< prev index next >

src/cpu/x86/vm/sharedRuntime_x86_64.cpp

Print this page




  26 #ifndef _WINDOWS
  27 #include "alloca.h"
  28 #endif
  29 #include "asm/macroAssembler.hpp"
  30 #include "asm/macroAssembler.inline.hpp"
  31 #include "code/debugInfoRec.hpp"
  32 #include "code/icBuffer.hpp"
  33 #include "code/vtableStubs.hpp"
  34 #include "interpreter/interpreter.hpp"
  35 #include "oops/compiledICHolder.hpp"
  36 #include "prims/jvmtiRedefineClassesTrace.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/vframeArray.hpp"
  39 #include "vmreg_x86.inline.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_Runtime1.hpp"
  42 #endif
  43 #ifdef COMPILER2
  44 #include "opto/runtime.hpp"
  45 #endif



  46 
  47 #define __ masm->
  48 
  49 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  50 
  51 class SimpleRuntimeFrame {
  52 
  53   public:
  54 
  55   // Most of the runtime stubs have this simple frame layout.
  56   // This class exists to make the layout shared in one place.
  57   // Offsets are for compiler stack slots, which are jints.
  58   enum layout {
  59     // The frame sender code expects that rbp will be in the "natural" place and
  60     // will override any oopMap setting for it. We must therefore force the layout
  61     // so that it agrees with the frame sender code.
  62     rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
  63     rbp_off2,
  64     return_off, return_off2,
  65     framesize


 141   static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
 142   static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
 143 
 144   // Offsets into the register save area
 145   // Used by deoptimization when it is managing result register
 146   // values on its own
 147 
 148   static int rax_offset_in_bytes(void)    { return BytesPerInt * rax_off; }
 149   static int rdx_offset_in_bytes(void)    { return BytesPerInt * rdx_off; }
 150   static int rbx_offset_in_bytes(void)    { return BytesPerInt * rbx_off; }
 151   static int xmm0_offset_in_bytes(void)   { return BytesPerInt * xmm0_off; }
 152   static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
 153 
 154   // During deoptimization only the result registers need to be restored,
 155   // all the other values have already been extracted.
 156   static void restore_result_registers(MacroAssembler* masm);
 157 };
 158 
 159 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
 160   int vect_words = 0;

 161   int off = 0;
 162   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 163   if (UseAVX < 3) {
 164     num_xmm_regs = num_xmm_regs/2;
 165   }
 166 #ifdef COMPILER2
 167   if (save_vectors) {
 168     assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
 169     assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
 170     // Save upper half of YMM registers
 171     vect_words = 16 * num_xmm_regs / wordSize;
 172     if (UseAVX < 3) {

 173       additional_frame_words += vect_words;
 174     }
 175   }
 176 #else
 177   assert(!save_vectors, "vectors are generated only by C2");
 178 #endif
 179 
 180   // Always make the frame size 16-byte aligned
 181   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
 182                                      reg_save_size*BytesPerInt, num_xmm_regs);
 183   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 184   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 185   // The caller will allocate additional_frame_words
 186   int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
 187   // CodeBlob frame size is in words.
 188   int frame_size_in_words = frame_size_in_bytes / wordSize;
 189   *total_frame_words = frame_size_in_words;
 190 
 191   // Save registers, fpu state, and flags.
 192   // We assume caller has already pushed the return address onto the
 193   // stack, so rsp is 8-byte aligned here.
 194   // We push rpb twice in this sequence because we want the real rbp
 195   // to be under the return like a normal enter.
 196 
 197   __ enter();          // rsp becomes 16-byte aligned here


 203     // Save upper half of YMM registes(0..num_xmm_regs)
 204     __ subptr(rsp, num_xmm_regs*16);
 205     for (int n = 0; n < num_xmm_regs; n++) {
 206       __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
 207     }
 208   }
 209   if (frame::arg_reg_save_area_bytes != 0) {
 210     // Allocate argument register save area
 211     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 212   }
 213 
 214   // Set an oopmap for the call site.  This oopmap will map all
 215   // oop-registers and debug-info registers as callee-saved.  This
 216   // will allow deoptimization at this safepoint to find all possible
 217   // debug-info recordings, as well as let GC find all oops.
 218 
 219   OopMapSet *oop_maps = new OopMapSet();
 220   OopMap* map = new OopMap(frame_size_in_slots, 0);
 221 
 222 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)

 223 
 224   map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
 225   map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
 226   map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
 227   map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
 228   // rbp location is known implicitly by the frame sender code, needs no oopmap
 229   // and the location where rbp was saved by is ignored
 230   map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
 231   map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
 232   map->set_callee_saved(STACK_OFFSET( r8_off  ), r8->as_VMReg());
 233   map->set_callee_saved(STACK_OFFSET( r9_off  ), r9->as_VMReg());
 234   map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
 235   map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
 236   map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
 237   map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
 238   map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
 239   map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
 240   // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
 241   // on EVEX enabled targets, we get it included in the xsave area
 242   off = xmm0_off;
 243   int delta = xmm1_off - off;
 244   for (int n = 0; n < 16; n++) {
 245     XMMRegister xmm_name = as_XMMRegister(n);
 246     map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
 247     off += delta;
 248   }
 249   if(UseAVX > 2) {
 250     // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
 251     off = zmm16_off;
 252     delta = zmm17_off - off;
 253     for (int n = 16; n < num_xmm_regs; n++) {
 254       XMMRegister xmm_name = as_XMMRegister(n);
 255       map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
 256       off += delta;
 257     }
 258   }
 259 






















 260   // %%% These should all be a waste but we'll keep things as they were for now
 261   if (true) {
 262     map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
 263     map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
 264     map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
 265     map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
 266     // rbp location is known implicitly by the frame sender code, needs no oopmap
 267     map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
 268     map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
 269     map->set_callee_saved(STACK_OFFSET( r8H_off  ), r8->as_VMReg()->next());
 270     map->set_callee_saved(STACK_OFFSET( r9H_off  ), r9->as_VMReg()->next());
 271     map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
 272     map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
 273     map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
 274     map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
 275     map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
 276     map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
 277     // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
 278     // on EVEX enabled targets, we get it included in the xsave area
 279     off = xmm0H_off;


 290       for (int n = 16; n < num_xmm_regs; n++) {
 291         XMMRegister xmm_name = as_XMMRegister(n);
 292         map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
 293         off += delta;
 294       }
 295     }
 296   }
 297 
 298   return map;
 299 }
 300 
 301 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 302   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 303   if (UseAVX < 3) {
 304     num_xmm_regs = num_xmm_regs/2;
 305   }
 306   if (frame::arg_reg_save_area_bytes != 0) {
 307     // Pop arg register save area
 308     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 309   }
 310 #ifdef COMPILER2
 311   // On EVEX enabled targets everything is handled in pop fpu state
 312   if ((restore_vectors) && (UseAVX < 3)) {
 313     assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX");
 314     assert(MaxVectorSize == 64, "up to 512bit vectors are supported now");
 315     int off = 0;
 316     // Restore upper half of YMM registes (0..num_xmm_regs)
 317     for (int n = 0; n < num_xmm_regs; n++) {
 318       __ vinsertf128h(as_XMMRegister(n), Address(rsp,  off++*16));
 319     }
 320     __ addptr(rsp, num_xmm_regs*16);
 321   }
 322 #else
 323   assert(!restore_vectors, "vectors are generated only by C2");
 324 #endif
 325   // Recover CPU state
 326   __ pop_CPU_state();
 327   // Get the rbp described implicitly by the calling convention (no oopMap)
 328   __ pop(rbp);
 329 }
 330 
 331 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 332 
 333   // Just restore result register. Only used by deoptimization. By
 334   // now any callee save register that needs to be restored to a c2
 335   // caller of the deoptee has been extracted into the vframeArray
 336   // and will be stuffed into the c2i adapter we create for later
 337   // restoration so only result registers need to be restored here.
 338 
 339   // Restore fp result register
 340   __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes()));
 341   // Restore integer result register
 342   __ movptr(rax, Address(rsp, rax_offset_in_bytes()));
 343   __ movptr(rdx, Address(rsp, rdx_offset_in_bytes()));


 638   }
 639 
 640   // Schedule the branch target address early.
 641   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 642   __ jmp(rcx);
 643 }
 644 
 645 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 646                         address code_start, address code_end,
 647                         Label& L_ok) {
 648   Label L_fail;
 649   __ lea(temp_reg, ExternalAddress(code_start));
 650   __ cmpptr(pc_reg, temp_reg);
 651   __ jcc(Assembler::belowEqual, L_fail);
 652   __ lea(temp_reg, ExternalAddress(code_end));
 653   __ cmpptr(pc_reg, temp_reg);
 654   __ jcc(Assembler::below, L_ok);
 655   __ bind(L_fail);
 656 }
 657 
 658 static void gen_i2c_adapter(MacroAssembler *masm,
 659                             int total_args_passed,
 660                             int comp_args_on_stack,
 661                             const BasicType *sig_bt,
 662                             const VMRegPair *regs) {
 663 
 664   // Note: r13 contains the senderSP on entry. We must preserve it since
 665   // we may do a i2c -> c2i transition if we lose a race where compiled
 666   // code goes non-entrant while we get args ready.
 667   // In addition we use r13 to locate all the interpreter args as
 668   // we must align the stack to 16 bytes on an i2c entry else we
 669   // lose alignment we expect in all compiled code and register
 670   // save code can segv when fxsave instructions find improperly
 671   // aligned stack pointer.
 672 
 673   // Adapters can be frameless because they do not require the caller
 674   // to perform additional cleanup work, such as correcting the stack pointer.
 675   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 676   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 677   // even if a callee has modified the stack pointer.
 678   // A c2i adapter is frameless because the *callee* frame, which is interpreted,


 735     comp_words_on_stack = round_to(comp_words_on_stack, 2);
 736     __ subptr(rsp, comp_words_on_stack * wordSize);
 737   }
 738 
 739 
 740   // Ensure compiled code always sees stack at proper alignment
 741   __ andptr(rsp, -16);
 742 
 743   // push the return address and misalign the stack that youngest frame always sees
 744   // as far as the placement of the call instruction
 745   __ push(rax);
 746 
 747   // Put saved SP in another register
 748   const Register saved_sp = rax;
 749   __ movptr(saved_sp, r11);
 750 
 751   // Will jump to the compiled code just as if compiled code was doing it.
 752   // Pre-load the register-jump target early, to schedule it better.
 753   __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
 754 












 755   // Now generate the shuffle code.  Pick up all register args and move the
 756   // rest through the floating point stack top.
 757   for (int i = 0; i < total_args_passed; i++) {
 758     if (sig_bt[i] == T_VOID) {
 759       // Longs and doubles are passed in native word order, but misaligned
 760       // in the 32-bit build.
 761       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 762       continue;
 763     }
 764 
 765     // Pick up 0, 1 or 2 words from SP+offset.
 766 
 767     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 768             "scrambled load targets?");
 769     // Load in argument order going down.
 770     int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
 771     // Point to interpreter value (vs. tag)
 772     int next_off = ld_off - Interpreter::stackElementSize;
 773     //
 774     //


2668   return nm;
2669 
2670 }
2671 
2672 // this function returns the adjust size (in number of words) to a c2i adapter
2673 // activation for use during deoptimization
2674 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
2675   return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2676 }
2677 
2678 
2679 uint SharedRuntime::out_preserve_stack_slots() {
2680   return 0;
2681 }
2682 
2683 //------------------------------generate_deopt_blob----------------------------
2684 void SharedRuntime::generate_deopt_blob() {
2685   // Allocate space for the code
2686   ResourceMark rm;
2687   // Setup code generation tools
2688   CodeBuffer buffer("deopt_blob", 2048, 1024);






2689   MacroAssembler* masm = new MacroAssembler(&buffer);
2690   int frame_size_in_words;
2691   OopMap* map = NULL;
2692   OopMapSet *oop_maps = new OopMapSet();
2693 
2694   // -------------
2695   // This code enters when returning to a de-optimized nmethod.  A return
2696   // address has been pushed on the the stack, and return values are in
2697   // registers.
2698   // If we are doing a normal deopt then we were called from the patched
2699   // nmethod from the point we returned to the nmethod. So the return
2700   // address on the stack is wrong by NativeCall::instruction_size
2701   // We will adjust the value so it looks like we have the original return
2702   // address on the stack (like when we eagerly deoptimized).
2703   // In the case of an exception pending when deoptimizing, we enter
2704   // with a return address on the stack that points after the call we patched
2705   // into the exception handler. We have the following register state from,
2706   // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
2707   //    rax: exception oop
2708   //    rbx: exception handler


2717   // The current frame is compiled code and may contain many inlined
2718   // functions, each with their own JVM state.  We pop the current frame, then
2719   // push all the new frames.  Then we call the C routine unpack_frames() to
2720   // populate these frames.  Finally unpack_frames() returns us the new target
2721   // address.  Notice that callee-save registers are BLOWN here; they have
2722   // already been captured in the vframeArray at the time the return PC was
2723   // patched.
2724   address start = __ pc();
2725   Label cont;
2726 
2727   // Prolog for non exception case!
2728 
2729   // Save everything in sight.
2730   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2731 
2732   // Normal deoptimization.  Save exec mode for unpack_frames.
2733   __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved
2734   __ jmp(cont);
2735 
2736   int reexecute_offset = __ pc() - start;






2737 
2738   // Reexecute case
2739   // return address is the pc describes what bci to do re-execute at
2740 
2741   // No need to update map as each call to save_live_registers will produce identical oopmap
2742   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2743 
2744   __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved
2745   __ jmp(cont);
2746 
































2747   int exception_offset = __ pc() - start;
2748 
2749   // Prolog for exception case
2750 
2751   // all registers are dead at this entry point, except for rax, and
2752   // rdx which contain the exception oop and exception pc
2753   // respectively.  Set them in TLS and fall thru to the
2754   // unpack_with_exception_in_tls entry point.
2755 
2756   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
2757   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax);
2758 
2759   int exception_in_tls_offset = __ pc() - start;
2760 
2761   // new implementation because exception oop is now passed in JavaThread
2762 
2763   // Prolog for exception case
2764   // All registers must be preserved because they might be used by LinearScan
2765   // Exceptiop oop and throwing PC are passed in JavaThread
2766   // tos: stack at point of call to method that threw the exception (i.e. only


2812   __ set_last_Java_frame(noreg, noreg, NULL);
2813 #ifdef ASSERT
2814   { Label L;
2815     __ cmpptr(Address(r15_thread,
2816                     JavaThread::last_Java_fp_offset()),
2817             (int32_t)0);
2818     __ jcc(Assembler::equal, L);
2819     __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2820     __ bind(L);
2821   }
2822 #endif // ASSERT
2823   __ mov(c_rarg0, r15_thread);
2824   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2825 
2826   // Need to have an oopmap that tells fetch_unroll_info where to
2827   // find any register it might need.
2828   oop_maps->add_gc_map(__ pc() - start, map);
2829 
2830   __ reset_last_Java_frame(false, false);
2831 






2832   // Load UnrollBlock* into rdi
2833   __ mov(rdi, rax);
2834 
2835    Label noException;
2836   __ cmpl(r14, Deoptimization::Unpack_exception);   // Was exception pending?
2837   __ jcc(Assembler::notEqual, noException);
2838   __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
2839   // QQQ this is useless it was NULL above
2840   __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
2841   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
2842   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
2843 
2844   __ verify_oop(rax);
2845 
2846   // Overwrite the result registers with the exception results.
2847   __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
2848   // I think this is useless
2849   __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx);
2850 
2851   __ bind(noException);


2986   // Clear fp AND pc
2987   __ reset_last_Java_frame(true, true);
2988 
2989   // Collect return values
2990   __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes()));
2991   __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes()));
2992   // I think this is useless (throwing pc?)
2993   __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes()));
2994 
2995   // Pop self-frame.
2996   __ leave();                           // Epilog
2997 
2998   // Jump to interpreter
2999   __ ret(0);
3000 
3001   // Make sure all code is generated
3002   masm->flush();
3003 
3004   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
3005   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);






3006 }
3007 
3008 #ifdef COMPILER2
3009 //------------------------------generate_uncommon_trap_blob--------------------
3010 void SharedRuntime::generate_uncommon_trap_blob() {
3011   // Allocate space for the code
3012   ResourceMark rm;
3013   // Setup code generation tools
3014   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3015   MacroAssembler* masm = new MacroAssembler(&buffer);
3016 
3017   assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
3018 
3019   address start = __ pc();
3020 
3021   if (UseRTMLocking) {
3022     // Abort RTM transaction before possible nmethod deoptimization.
3023     __ xabort(0);
3024   }
3025 




  26 #ifndef _WINDOWS
  27 #include "alloca.h"
  28 #endif
  29 #include "asm/macroAssembler.hpp"
  30 #include "asm/macroAssembler.inline.hpp"
  31 #include "code/debugInfoRec.hpp"
  32 #include "code/icBuffer.hpp"
  33 #include "code/vtableStubs.hpp"
  34 #include "interpreter/interpreter.hpp"
  35 #include "oops/compiledICHolder.hpp"
  36 #include "prims/jvmtiRedefineClassesTrace.hpp"
  37 #include "runtime/sharedRuntime.hpp"
  38 #include "runtime/vframeArray.hpp"
  39 #include "vmreg_x86.inline.hpp"
  40 #ifdef COMPILER1
  41 #include "c1/c1_Runtime1.hpp"
  42 #endif
  43 #ifdef COMPILER2
  44 #include "opto/runtime.hpp"
  45 #endif
  46 #if INCLUDE_JVMCI
  47 #include "jvmci/jvmciJavaClasses.hpp"
  48 #endif
  49 
  50 #define __ masm->
  51 
  52 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  53 
  54 class SimpleRuntimeFrame {
  55 
  56   public:
  57 
  58   // Most of the runtime stubs have this simple frame layout.
  59   // This class exists to make the layout shared in one place.
  60   // Offsets are for compiler stack slots, which are jints.
  61   enum layout {
  62     // The frame sender code expects that rbp will be in the "natural" place and
  63     // will override any oopMap setting for it. We must therefore force the layout
  64     // so that it agrees with the frame sender code.
  65     rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
  66     rbp_off2,
  67     return_off, return_off2,
  68     framesize


 144   static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
 145   static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
 146 
 147   // Offsets into the register save area
 148   // Used by deoptimization when it is managing result register
 149   // values on its own
 150 
 151   static int rax_offset_in_bytes(void)    { return BytesPerInt * rax_off; }
 152   static int rdx_offset_in_bytes(void)    { return BytesPerInt * rdx_off; }
 153   static int rbx_offset_in_bytes(void)    { return BytesPerInt * rbx_off; }
 154   static int xmm0_offset_in_bytes(void)   { return BytesPerInt * xmm0_off; }
 155   static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
 156 
 157   // During deoptimization only the result registers need to be restored,
 158   // all the other values have already been extracted.
 159   static void restore_result_registers(MacroAssembler* masm);
 160 };
 161 
 162 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
 163   int vect_words = 0;
 164   int ymmhi_offset = -1;
 165   int off = 0;
 166   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 167   if (UseAVX < 3) {
 168     num_xmm_regs = num_xmm_regs/2;
 169   }
 170 #if defined(COMPILER2) || INCLUDE_JVMCI
 171   if (save_vectors) {
 172     assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
 173     assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
 174     // Save upper half of YMM registers
 175     vect_words = 16 * num_xmm_regs / wordSize;
 176     if (UseAVX < 3) {
 177       ymmhi_offset = additional_frame_words;
 178       additional_frame_words += vect_words;
 179     }
 180   }
 181 #else
 182   assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 183 #endif
 184 
 185   // Always make the frame size 16-byte aligned
 186   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
 187                                      reg_save_size*BytesPerInt, num_xmm_regs);
 188   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 189   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 190   // The caller will allocate additional_frame_words
 191   int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
 192   // CodeBlob frame size is in words.
 193   int frame_size_in_words = frame_size_in_bytes / wordSize;
 194   *total_frame_words = frame_size_in_words;
 195 
 196   // Save registers, fpu state, and flags.
 197   // We assume caller has already pushed the return address onto the
 198   // stack, so rsp is 8-byte aligned here.
 199   // We push rpb twice in this sequence because we want the real rbp
 200   // to be under the return like a normal enter.
 201 
 202   __ enter();          // rsp becomes 16-byte aligned here


 208     // Save upper half of YMM registes(0..num_xmm_regs)
 209     __ subptr(rsp, num_xmm_regs*16);
 210     for (int n = 0; n < num_xmm_regs; n++) {
 211       __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
 212     }
 213   }
 214   if (frame::arg_reg_save_area_bytes != 0) {
 215     // Allocate argument register save area
 216     __ subptr(rsp, frame::arg_reg_save_area_bytes);
 217   }
 218 
 219   // Set an oopmap for the call site.  This oopmap will map all
 220   // oop-registers and debug-info registers as callee-saved.  This
 221   // will allow deoptimization at this safepoint to find all possible
 222   // debug-info recordings, as well as let GC find all oops.
 223 
 224   OopMapSet *oop_maps = new OopMapSet();
 225   OopMap* map = new OopMap(frame_size_in_slots, 0);
 226 
 227 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
 228 #define YMMHI_STACK_OFFSET(x) VMRegImpl::stack2reg((x / VMRegImpl::stack_slot_size) + ymmhi_offset)
 229 
 230   map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg());
 231   map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg());
 232   map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg());
 233   map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg());
 234   // rbp location is known implicitly by the frame sender code, needs no oopmap
 235   // and the location where rbp was saved by is ignored
 236   map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg());
 237   map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg());
 238   map->set_callee_saved(STACK_OFFSET( r8_off  ), r8->as_VMReg());
 239   map->set_callee_saved(STACK_OFFSET( r9_off  ), r9->as_VMReg());
 240   map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg());
 241   map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg());
 242   map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg());
 243   map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
 244   map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
 245   map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
 246   // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
 247   // on EVEX enabled targets, we get it included in the xsave area
 248   off = xmm0_off;
 249   int delta = xmm1_off - off;
 250   for (int n = 0; n < 16; n++) {
 251     XMMRegister xmm_name = as_XMMRegister(n);
 252     map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
 253     off += delta;
 254   }
 255   if(UseAVX > 2) {
 256     // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
 257     off = zmm16_off;
 258     delta = zmm17_off - off;
 259     for (int n = 16; n < num_xmm_regs; n++) {
 260       XMMRegister xmm_name = as_XMMRegister(n);
 261       map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
 262       off += delta;
 263     }
 264   }
 265 
 266 #if defined(COMPILER2) || INCLUDE_JVMCI
 267   if (save_vectors) {
 268     assert(ymmhi_offset != -1, "save area must exist");
 269     map->set_callee_saved(YMMHI_STACK_OFFSET(  0), xmm0->as_VMReg()->next(4));
 270     map->set_callee_saved(YMMHI_STACK_OFFSET( 16), xmm1->as_VMReg()->next(4));
 271     map->set_callee_saved(YMMHI_STACK_OFFSET( 32), xmm2->as_VMReg()->next(4));
 272     map->set_callee_saved(YMMHI_STACK_OFFSET( 48), xmm3->as_VMReg()->next(4));
 273     map->set_callee_saved(YMMHI_STACK_OFFSET( 64), xmm4->as_VMReg()->next(4));
 274     map->set_callee_saved(YMMHI_STACK_OFFSET( 80), xmm5->as_VMReg()->next(4));
 275     map->set_callee_saved(YMMHI_STACK_OFFSET( 96), xmm6->as_VMReg()->next(4));
 276     map->set_callee_saved(YMMHI_STACK_OFFSET(112), xmm7->as_VMReg()->next(4));
 277     map->set_callee_saved(YMMHI_STACK_OFFSET(128), xmm8->as_VMReg()->next(4));
 278     map->set_callee_saved(YMMHI_STACK_OFFSET(144), xmm9->as_VMReg()->next(4));
 279     map->set_callee_saved(YMMHI_STACK_OFFSET(160), xmm10->as_VMReg()->next(4));
 280     map->set_callee_saved(YMMHI_STACK_OFFSET(176), xmm11->as_VMReg()->next(4));
 281     map->set_callee_saved(YMMHI_STACK_OFFSET(192), xmm12->as_VMReg()->next(4));
 282     map->set_callee_saved(YMMHI_STACK_OFFSET(208), xmm13->as_VMReg()->next(4));
 283     map->set_callee_saved(YMMHI_STACK_OFFSET(224), xmm14->as_VMReg()->next(4));
 284     map->set_callee_saved(YMMHI_STACK_OFFSET(240), xmm15->as_VMReg()->next(4));
 285   }
 286 #endif // COMPILER2 || INCLUDE_JVMCI
 287 
 288   // %%% These should all be a waste but we'll keep things as they were for now
 289   if (true) {
 290     map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next());
 291     map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next());
 292     map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next());
 293     map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next());
 294     // rbp location is known implicitly by the frame sender code, needs no oopmap
 295     map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next());
 296     map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next());
 297     map->set_callee_saved(STACK_OFFSET( r8H_off  ), r8->as_VMReg()->next());
 298     map->set_callee_saved(STACK_OFFSET( r9H_off  ), r9->as_VMReg()->next());
 299     map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next());
 300     map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next());
 301     map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next());
 302     map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
 303     map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
 304     map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
 305     // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
 306     // on EVEX enabled targets, we get it included in the xsave area
 307     off = xmm0H_off;


 318       for (int n = 16; n < num_xmm_regs; n++) {
 319         XMMRegister xmm_name = as_XMMRegister(n);
 320         map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
 321         off += delta;
 322       }
 323     }
 324   }
 325 
 326   return map;
 327 }
 328 
 329 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 330   int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 331   if (UseAVX < 3) {
 332     num_xmm_regs = num_xmm_regs/2;
 333   }
 334   if (frame::arg_reg_save_area_bytes != 0) {
 335     // Pop arg register save area
 336     __ addptr(rsp, frame::arg_reg_save_area_bytes);
 337   }
 338 #if defined(COMPILER2) || INCLUDE_JVMCI
 339   // On EVEX enabled targets everything is handled in pop fpu state
 340   if ((restore_vectors) && (UseAVX < 3)) {
 341     assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX");
 342     assert(MaxVectorSize == 64, "up to 512bit vectors are supported now");
 343     int off = 0;
 344     // Restore upper half of YMM registes (0..num_xmm_regs)
 345     for (int n = 0; n < num_xmm_regs; n++) {
 346       __ vinsertf128h(as_XMMRegister(n), Address(rsp,  off++*16));
 347     }
 348     __ addptr(rsp, num_xmm_regs*16);
 349   }
 350 #else
 351   assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
 352 #endif
 353   // Recover CPU state
 354   __ pop_CPU_state();
 355   // Get the rbp described implicitly by the calling convention (no oopMap)
 356   __ pop(rbp);
 357 }
 358 
 359 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 360 
 361   // Just restore result register. Only used by deoptimization. By
 362   // now any callee save register that needs to be restored to a c2
 363   // caller of the deoptee has been extracted into the vframeArray
 364   // and will be stuffed into the c2i adapter we create for later
 365   // restoration so only result registers need to be restored here.
 366 
 367   // Restore fp result register
 368   __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes()));
 369   // Restore integer result register
 370   __ movptr(rax, Address(rsp, rax_offset_in_bytes()));
 371   __ movptr(rdx, Address(rsp, rdx_offset_in_bytes()));


 666   }
 667 
 668   // Schedule the branch target address early.
 669   __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
 670   __ jmp(rcx);
 671 }
 672 
 673 static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
 674                         address code_start, address code_end,
 675                         Label& L_ok) {
 676   Label L_fail;
 677   __ lea(temp_reg, ExternalAddress(code_start));
 678   __ cmpptr(pc_reg, temp_reg);
 679   __ jcc(Assembler::belowEqual, L_fail);
 680   __ lea(temp_reg, ExternalAddress(code_end));
 681   __ cmpptr(pc_reg, temp_reg);
 682   __ jcc(Assembler::below, L_ok);
 683   __ bind(L_fail);
 684 }
 685 
 686 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 687                                     int total_args_passed,
 688                                     int comp_args_on_stack,
 689                                     const BasicType *sig_bt,
 690                                     const VMRegPair *regs) {
 691 
 692   // Note: r13 contains the senderSP on entry. We must preserve it since
 693   // we may do a i2c -> c2i transition if we lose a race where compiled
 694   // code goes non-entrant while we get args ready.
 695   // In addition we use r13 to locate all the interpreter args as
 696   // we must align the stack to 16 bytes on an i2c entry else we
 697   // lose alignment we expect in all compiled code and register
 698   // save code can segv when fxsave instructions find improperly
 699   // aligned stack pointer.
 700 
 701   // Adapters can be frameless because they do not require the caller
 702   // to perform additional cleanup work, such as correcting the stack pointer.
 703   // An i2c adapter is frameless because the *caller* frame, which is interpreted,
 704   // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
 705   // even if a callee has modified the stack pointer.
 706   // A c2i adapter is frameless because the *callee* frame, which is interpreted,


 763     comp_words_on_stack = round_to(comp_words_on_stack, 2);
 764     __ subptr(rsp, comp_words_on_stack * wordSize);
 765   }
 766 
 767 
 768   // Ensure compiled code always sees stack at proper alignment
 769   __ andptr(rsp, -16);
 770 
 771   // push the return address and misalign the stack that youngest frame always sees
 772   // as far as the placement of the call instruction
 773   __ push(rax);
 774 
 775   // Put saved SP in another register
 776   const Register saved_sp = rax;
 777   __ movptr(saved_sp, r11);
 778 
 779   // Will jump to the compiled code just as if compiled code was doing it.
 780   // Pre-load the register-jump target early, to schedule it better.
 781   __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
 782 
 783 #if INCLUDE_JVMCI
 784   if (EnableJVMCI) {
 785     // check if this call should be routed towards a specific entry point
 786     __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
 787     Label no_alternative_target;
 788     __ jcc(Assembler::equal, no_alternative_target);
 789     __ movptr(r11, Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
 790     __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
 791     __ bind(no_alternative_target);
 792   }
 793 #endif // INCLUDE_JVMCI
 794 
 795   // Now generate the shuffle code.  Pick up all register args and move the
 796   // rest through the floating point stack top.
 797   for (int i = 0; i < total_args_passed; i++) {
 798     if (sig_bt[i] == T_VOID) {
 799       // Longs and doubles are passed in native word order, but misaligned
 800       // in the 32-bit build.
 801       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 802       continue;
 803     }
 804 
 805     // Pick up 0, 1 or 2 words from SP+offset.
 806 
 807     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 808             "scrambled load targets?");
 809     // Load in argument order going down.
 810     int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
 811     // Point to interpreter value (vs. tag)
 812     int next_off = ld_off - Interpreter::stackElementSize;
 813     //
 814     //


2708   return nm;
2709 
2710 }
2711 
2712 // this function returns the adjust size (in number of words) to a c2i adapter
2713 // activation for use during deoptimization
2714 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
2715   return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2716 }
2717 
2718 
2719 uint SharedRuntime::out_preserve_stack_slots() {
2720   return 0;
2721 }
2722 
2723 //------------------------------generate_deopt_blob----------------------------
2724 void SharedRuntime::generate_deopt_blob() {
2725   // Allocate space for the code
2726   ResourceMark rm;
2727   // Setup code generation tools
2728   int pad = 0;
2729 #if INCLUDE_JVMCI
2730   if (EnableJVMCI) {
2731     pad += 512; // Increase the buffer size when compiling for JVMCI
2732   }
2733 #endif
2734   CodeBuffer buffer("deopt_blob", 2048+pad, 1024);
2735   MacroAssembler* masm = new MacroAssembler(&buffer);
2736   int frame_size_in_words;
2737   OopMap* map = NULL;
2738   OopMapSet *oop_maps = new OopMapSet();
2739 
2740   // -------------
2741   // This code enters when returning to a de-optimized nmethod.  A return
2742   // address has been pushed on the the stack, and return values are in
2743   // registers.
2744   // If we are doing a normal deopt then we were called from the patched
2745   // nmethod from the point we returned to the nmethod. So the return
2746   // address on the stack is wrong by NativeCall::instruction_size
2747   // We will adjust the value so it looks like we have the original return
2748   // address on the stack (like when we eagerly deoptimized).
2749   // In the case of an exception pending when deoptimizing, we enter
2750   // with a return address on the stack that points after the call we patched
2751   // into the exception handler. We have the following register state from,
2752   // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
2753   //    rax: exception oop
2754   //    rbx: exception handler


2763   // The current frame is compiled code and may contain many inlined
2764   // functions, each with their own JVM state.  We pop the current frame, then
2765   // push all the new frames.  Then we call the C routine unpack_frames() to
2766   // populate these frames.  Finally unpack_frames() returns us the new target
2767   // address.  Notice that callee-save registers are BLOWN here; they have
2768   // already been captured in the vframeArray at the time the return PC was
2769   // patched.
2770   address start = __ pc();
2771   Label cont;
2772 
2773   // Prolog for non exception case!
2774 
2775   // Save everything in sight.
2776   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2777 
2778   // Normal deoptimization.  Save exec mode for unpack_frames.
2779   __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved
2780   __ jmp(cont);
2781 
2782   int reexecute_offset = __ pc() - start;
2783 #if INCLUDE_JVMCI && !defined(COMPILER1)
2784   if (EnableJVMCI && UseJVMCICompiler) {
2785     // JVMCI does not use this kind of deoptimization
2786     __ should_not_reach_here();
2787   }
2788 #endif
2789 
2790   // Reexecute case
2791   // return address is the pc describes what bci to do re-execute at
2792 
2793   // No need to update map as each call to save_live_registers will produce identical oopmap
2794   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2795 
2796   __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved
2797   __ jmp(cont);
2798 
2799 #if INCLUDE_JVMCI
2800   Label after_fetch_unroll_info_call;
2801   int implicit_exception_uncommon_trap_offset = 0;
2802   int uncommon_trap_offset = 0;
2803 
2804   if (EnableJVMCI) {
2805     implicit_exception_uncommon_trap_offset = __ pc() - start;
2806 
2807     __ pushptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
2808     __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())), (int32_t)NULL_WORD);
2809 
2810     uncommon_trap_offset = __ pc() - start;
2811 
2812     // Save everything in sight.
2813     RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2814     // fetch_unroll_info needs to call last_java_frame()
2815     __ set_last_Java_frame(noreg, noreg, NULL);
2816 
2817     __ movl(c_rarg1, Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())));
2818     __ movl(Address(r15_thread, in_bytes(JavaThread::pending_deoptimization_offset())), -1);
2819 
2820     __ movl(r14, (int32_t)Deoptimization::Unpack_reexecute);
2821     __ mov(c_rarg0, r15_thread);
2822     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
2823     oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
2824 
2825     __ reset_last_Java_frame(false, false);
2826 
2827     __ jmp(after_fetch_unroll_info_call);
2828   } // EnableJVMCI
2829 #endif // INCLUDE_JVMCI
2830 
2831   int exception_offset = __ pc() - start;
2832 
2833   // Prolog for exception case
2834 
2835   // all registers are dead at this entry point, except for rax, and
2836   // rdx which contain the exception oop and exception pc
2837   // respectively.  Set them in TLS and fall thru to the
2838   // unpack_with_exception_in_tls entry point.
2839 
2840   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
2841   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax);
2842 
2843   int exception_in_tls_offset = __ pc() - start;
2844 
2845   // new implementation because exception oop is now passed in JavaThread
2846 
2847   // Prolog for exception case
2848   // All registers must be preserved because they might be used by LinearScan
2849   // Exceptiop oop and throwing PC are passed in JavaThread
2850   // tos: stack at point of call to method that threw the exception (i.e. only


2896   __ set_last_Java_frame(noreg, noreg, NULL);
2897 #ifdef ASSERT
2898   { Label L;
2899     __ cmpptr(Address(r15_thread,
2900                     JavaThread::last_Java_fp_offset()),
2901             (int32_t)0);
2902     __ jcc(Assembler::equal, L);
2903     __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2904     __ bind(L);
2905   }
2906 #endif // ASSERT
2907   __ mov(c_rarg0, r15_thread);
2908   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2909 
2910   // Need to have an oopmap that tells fetch_unroll_info where to
2911   // find any register it might need.
2912   oop_maps->add_gc_map(__ pc() - start, map);
2913 
2914   __ reset_last_Java_frame(false, false);
2915 
2916 #if INCLUDE_JVMCI
2917   if (EnableJVMCI) {
2918     __ bind(after_fetch_unroll_info_call);
2919   }
2920 #endif
2921 
2922   // Load UnrollBlock* into rdi
2923   __ mov(rdi, rax);
2924 
2925    Label noException;
2926   __ cmpl(r14, Deoptimization::Unpack_exception);   // Was exception pending?
2927   __ jcc(Assembler::notEqual, noException);
2928   __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
2929   // QQQ this is useless it was NULL above
2930   __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
2931   __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
2932   __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
2933 
2934   __ verify_oop(rax);
2935 
2936   // Overwrite the result registers with the exception results.
2937   __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
2938   // I think this is useless
2939   __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx);
2940 
2941   __ bind(noException);


3076   // Clear fp AND pc
3077   __ reset_last_Java_frame(true, true);
3078 
3079   // Collect return values
3080   __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes()));
3081   __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes()));
3082   // I think this is useless (throwing pc?)
3083   __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes()));
3084 
3085   // Pop self-frame.
3086   __ leave();                           // Epilog
3087 
3088   // Jump to interpreter
3089   __ ret(0);
3090 
3091   // Make sure all code is generated
3092   masm->flush();
3093 
3094   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
3095   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3096 #if INCLUDE_JVMCI
3097   if (EnableJVMCI) {
3098     _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
3099     _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
3100   }
3101 #endif
3102 }
3103 
3104 #ifdef COMPILER2
3105 //------------------------------generate_uncommon_trap_blob--------------------
3106 void SharedRuntime::generate_uncommon_trap_blob() {
3107   // Allocate space for the code
3108   ResourceMark rm;
3109   // Setup code generation tools
3110   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
3111   MacroAssembler* masm = new MacroAssembler(&buffer);
3112 
3113   assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
3114 
3115   address start = __ pc();
3116 
3117   if (UseRTMLocking) {
3118     // Abort RTM transaction before possible nmethod deoptimization.
3119     __ xabort(0);
3120   }
3121 


< prev index next >