1 /*
   2  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "compiler/disassembler.hpp"
  28 #include "interpreter/interpreter.hpp"
  29 #include "interpreter/interpreterRuntime.hpp"
  30 #include "interpreter/interp_masm.hpp"
  31 #include "interpreter/templateTable.hpp"
  32 #include "memory/universe.hpp"
  33 #include "oops/methodData.hpp"
  34 #include "oops/objArrayKlass.hpp"
  35 #include "oops/oop.inline.hpp"
  36 #include "oops/valueKlass.hpp"
  37 #include "prims/methodHandles.hpp"
  38 #include "runtime/frame.inline.hpp"
  39 #include "runtime/safepointMechanism.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "runtime/synchronizer.hpp"
  43 #include "utilities/macros.hpp"
  44 
  45 #define __ Disassembler::hook<InterpreterMacroAssembler>(__FILE__, __LINE__, _masm)->
  46 
  47 // Global Register Names
  48 static const Register rbcp     = LP64_ONLY(r13) NOT_LP64(rsi);
  49 static const Register rlocals  = LP64_ONLY(r14) NOT_LP64(rdi);
  50 
  51 // Platform-dependent initialization
  52 void TemplateTable::pd_initialize() {
  53   // No x86 specific initialization
  54 }
  55 
  56 // Address Computation: local variables
  57 static inline Address iaddress(int n) {
  58   return Address(rlocals, Interpreter::local_offset_in_bytes(n));
  59 }
  60 
  61 static inline Address laddress(int n) {
  62   return iaddress(n + 1);
  63 }
  64 
  65 #ifndef _LP64
  66 static inline Address haddress(int n) {
  67   return iaddress(n + 0);
  68 }
  69 #endif
  70 
  71 static inline Address faddress(int n) {
  72   return iaddress(n);
  73 }
  74 
  75 static inline Address daddress(int n) {
  76   return laddress(n);
  77 }
  78 
  79 static inline Address aaddress(int n) {
  80   return iaddress(n);
  81 }
  82 
  83 static inline Address iaddress(Register r) {
  84   return Address(rlocals, r, Address::times_ptr);
  85 }
  86 
  87 static inline Address laddress(Register r) {
  88   return Address(rlocals, r, Address::times_ptr, Interpreter::local_offset_in_bytes(1));
  89 }
  90 
  91 #ifndef _LP64
  92 static inline Address haddress(Register r)       {
  93   return Address(rlocals, r, Interpreter::stackElementScale(), Interpreter::local_offset_in_bytes(0));
  94 }
  95 #endif
  96 
  97 static inline Address faddress(Register r) {
  98   return iaddress(r);
  99 }
 100 
 101 static inline Address daddress(Register r) {
 102   return laddress(r);
 103 }
 104 
 105 static inline Address aaddress(Register r) {
 106   return iaddress(r);
 107 }
 108 
 109 
 110 // expression stack
 111 // (Note: Must not use symmetric equivalents at_rsp_m1/2 since they store
 112 // data beyond the rsp which is potentially unsafe in an MT environment;
 113 // an interrupt may overwrite that data.)
 114 static inline Address at_rsp   () {
 115   return Address(rsp, 0);
 116 }
 117 
 118 // At top of Java expression stack which may be different than esp().  It
 119 // isn't for category 1 objects.
 120 static inline Address at_tos   () {
 121   return Address(rsp,  Interpreter::expr_offset_in_bytes(0));
 122 }
 123 
 124 static inline Address at_tos_p1() {
 125   return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
 126 }
 127 
 128 static inline Address at_tos_p2() {
 129   return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
 130 }
 131 
 132 // Condition conversion
 133 static Assembler::Condition j_not(TemplateTable::Condition cc) {
 134   switch (cc) {
 135   case TemplateTable::equal        : return Assembler::notEqual;
 136   case TemplateTable::not_equal    : return Assembler::equal;
 137   case TemplateTable::less         : return Assembler::greaterEqual;
 138   case TemplateTable::less_equal   : return Assembler::greater;
 139   case TemplateTable::greater      : return Assembler::lessEqual;
 140   case TemplateTable::greater_equal: return Assembler::less;
 141   }
 142   ShouldNotReachHere();
 143   return Assembler::zero;
 144 }
 145 
 146 
 147 
 148 // Miscelaneous helper routines
 149 // Store an oop (or NULL) at the address described by obj.
 150 // If val == noreg this means store a NULL
 151 
 152 
 153 static void do_oop_store(InterpreterMacroAssembler* _masm,
 154                          Address dst,
 155                          Register val,
 156                          DecoratorSet decorators = 0) {
 157   assert(val == noreg || val == rax, "parameter is just for looks");
 158   __ store_heap_oop(dst, val, rdx, rbx, noreg, decorators);
 159 }
 160 
 161 static void do_oop_load(InterpreterMacroAssembler* _masm,
 162                         Address src,
 163                         Register dst,
 164                         DecoratorSet decorators = 0) {
 165   __ load_heap_oop(dst, src, rdx, rbx, decorators);
 166 }
 167 
 168 Address TemplateTable::at_bcp(int offset) {
 169   assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
 170   return Address(rbcp, offset);
 171 }
 172 
 173 
 174 void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
 175                                    Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
 176                                    int byte_no) {
 177   if (!RewriteBytecodes)  return;
 178   Label L_patch_done;
 179 
 180   switch (bc) {
 181   case Bytecodes::_fast_qputfield:
 182   case Bytecodes::_fast_aputfield:
 183   case Bytecodes::_fast_bputfield:
 184   case Bytecodes::_fast_zputfield:
 185   case Bytecodes::_fast_cputfield:
 186   case Bytecodes::_fast_dputfield:
 187   case Bytecodes::_fast_fputfield:
 188   case Bytecodes::_fast_iputfield:
 189   case Bytecodes::_fast_lputfield:
 190   case Bytecodes::_fast_sputfield:
 191     {
 192       // We skip bytecode quickening for putfield instructions when
 193       // the put_code written to the constant pool cache is zero.
 194       // This is required so that every execution of this instruction
 195       // calls out to InterpreterRuntime::resolve_get_put to do
 196       // additional, required work.
 197       assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
 198       assert(load_bc_into_bc_reg, "we use bc_reg as temp");
 199       __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
 200       __ movl(bc_reg, bc);
 201       __ cmpl(temp_reg, (int) 0);
 202       __ jcc(Assembler::zero, L_patch_done);  // don't patch
 203     }
 204     break;
 205   default:
 206     assert(byte_no == -1, "sanity");
 207     // the pair bytecodes have already done the load.
 208     if (load_bc_into_bc_reg) {
 209       __ movl(bc_reg, bc);
 210     }
 211   }
 212 
 213   if (JvmtiExport::can_post_breakpoint()) {
 214     Label L_fast_patch;
 215     // if a breakpoint is present we can't rewrite the stream directly
 216     __ movzbl(temp_reg, at_bcp(0));
 217     __ cmpl(temp_reg, Bytecodes::_breakpoint);
 218     __ jcc(Assembler::notEqual, L_fast_patch);
 219     __ get_method(temp_reg);
 220     // Let breakpoint table handling rewrite to quicker bytecode
 221     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), temp_reg, rbcp, bc_reg);
 222 #ifndef ASSERT
 223     __ jmpb(L_patch_done);
 224 #else
 225     __ jmp(L_patch_done);
 226 #endif
 227     __ bind(L_fast_patch);
 228   }
 229 
 230 #ifdef ASSERT
 231   Label L_okay;
 232   __ load_unsigned_byte(temp_reg, at_bcp(0));
 233   __ cmpl(temp_reg, (int) Bytecodes::java_code(bc));
 234   __ jcc(Assembler::equal, L_okay);
 235   __ cmpl(temp_reg, bc_reg);
 236   __ jcc(Assembler::equal, L_okay);
 237   __ stop("patching the wrong bytecode");
 238   __ bind(L_okay);
 239 #endif
 240 
 241   // patch bytecode
 242   __ movb(at_bcp(0), bc_reg);
 243   __ bind(L_patch_done);
 244 }
 245 // Individual instructions
 246 
 247 
 248 void TemplateTable::nop() {
 249   transition(vtos, vtos);
 250   // nothing to do
 251 }
 252 
 253 void TemplateTable::shouldnotreachhere() {
 254   transition(vtos, vtos);
 255   __ stop("shouldnotreachhere bytecode");
 256 }
 257 
 258 void TemplateTable::aconst_null() {
 259   transition(vtos, atos);
 260   __ xorl(rax, rax);
 261 }
 262 
 263 void TemplateTable::iconst(int value) {
 264   transition(vtos, itos);
 265   if (value == 0) {
 266     __ xorl(rax, rax);
 267   } else {
 268     __ movl(rax, value);
 269   }
 270 }
 271 
 272 void TemplateTable::lconst(int value) {
 273   transition(vtos, ltos);
 274   if (value == 0) {
 275     __ xorl(rax, rax);
 276   } else {
 277     __ movl(rax, value);
 278   }
 279 #ifndef _LP64
 280   assert(value >= 0, "check this code");
 281   __ xorptr(rdx, rdx);
 282 #endif
 283 }
 284 
 285 
 286 
 287 void TemplateTable::fconst(int value) {
 288   transition(vtos, ftos);
 289   if (UseSSE >= 1) {
 290     static float one = 1.0f, two = 2.0f;
 291     switch (value) {
 292     case 0:
 293       __ xorps(xmm0, xmm0);
 294       break;
 295     case 1:
 296       __ movflt(xmm0, ExternalAddress((address) &one));
 297       break;
 298     case 2:
 299       __ movflt(xmm0, ExternalAddress((address) &two));
 300       break;
 301     default:
 302       ShouldNotReachHere();
 303       break;
 304     }
 305   } else {
 306 #ifdef _LP64
 307     ShouldNotReachHere();
 308 #else
 309            if (value == 0) { __ fldz();
 310     } else if (value == 1) { __ fld1();
 311     } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
 312     } else                 { ShouldNotReachHere();
 313     }
 314 #endif // _LP64
 315   }
 316 }
 317 
 318 void TemplateTable::dconst(int value) {
 319   transition(vtos, dtos);
 320   if (UseSSE >= 2) {
 321     static double one = 1.0;
 322     switch (value) {
 323     case 0:
 324       __ xorpd(xmm0, xmm0);
 325       break;
 326     case 1:
 327       __ movdbl(xmm0, ExternalAddress((address) &one));
 328       break;
 329     default:
 330       ShouldNotReachHere();
 331       break;
 332     }
 333   } else {
 334 #ifdef _LP64
 335     ShouldNotReachHere();
 336 #else
 337            if (value == 0) { __ fldz();
 338     } else if (value == 1) { __ fld1();
 339     } else                 { ShouldNotReachHere();
 340     }
 341 #endif
 342   }
 343 }
 344 
 345 void TemplateTable::bipush() {
 346   transition(vtos, itos);
 347   __ load_signed_byte(rax, at_bcp(1));
 348 }
 349 
 350 void TemplateTable::sipush() {
 351   transition(vtos, itos);
 352   __ load_unsigned_short(rax, at_bcp(1));
 353   __ bswapl(rax);
 354   __ sarl(rax, 16);
 355 }
 356 
 357 void TemplateTable::ldc(bool wide) {
 358   transition(vtos, vtos);
 359   Register rarg = NOT_LP64(rcx) LP64_ONLY(c_rarg1);
 360   Label call_ldc, notFloat, notClass, notInt, Done;
 361 
 362   if (wide) {
 363     __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
 364   } else {
 365     __ load_unsigned_byte(rbx, at_bcp(1));
 366   }
 367 
 368   __ get_cpool_and_tags(rcx, rax);
 369   const int base_offset = ConstantPool::header_size() * wordSize;
 370   const int tags_offset = Array<u1>::base_offset_in_bytes();
 371 
 372   // get type
 373   __ movzbl(rdx, Address(rax, rbx, Address::times_1, tags_offset));
 374   __ andl(rdx, ~JVM_CONSTANT_QDescBit);
 375 
 376   // unresolved class - get the resolved class
 377   __ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
 378   __ jccb(Assembler::equal, call_ldc);
 379 
 380   // unresolved class in error state - call into runtime to throw the error
 381   // from the first resolution attempt
 382   __ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
 383   __ jccb(Assembler::equal, call_ldc);
 384 
 385   // resolved class - need to call vm to get java mirror of the class
 386   __ cmpl(rdx, JVM_CONSTANT_Class);
 387   __ jcc(Assembler::notEqual, notClass);
 388 
 389   __ bind(call_ldc);
 390 
 391   __ movl(rarg, wide);
 392   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), rarg);
 393 
 394   __ push(atos);
 395   __ jmp(Done);
 396 
 397   __ bind(notClass);
 398   __ cmpl(rdx, JVM_CONSTANT_Float);
 399   __ jccb(Assembler::notEqual, notFloat);
 400 
 401   // ftos
 402   __ load_float(Address(rcx, rbx, Address::times_ptr, base_offset));
 403   __ push(ftos);
 404   __ jmp(Done);
 405 
 406   __ bind(notFloat);
 407   __ cmpl(rdx, JVM_CONSTANT_Integer);
 408   __ jccb(Assembler::notEqual, notInt);
 409 
 410   // itos
 411   __ movl(rax, Address(rcx, rbx, Address::times_ptr, base_offset));
 412   __ push(itos);
 413   __ jmp(Done);
 414 
 415   // assume the tag is for condy; if not, the VM runtime will tell us
 416   __ bind(notInt);
 417   condy_helper(Done);
 418 
 419   __ bind(Done);
 420 }
 421 
 422 // Fast path for caching oop constants.
 423 void TemplateTable::fast_aldc(bool wide) {
 424   transition(vtos, atos);
 425 
 426   Register result = rax;
 427   Register tmp = rdx;
 428   Register rarg = NOT_LP64(rcx) LP64_ONLY(c_rarg1);
 429   int index_size = wide ? sizeof(u2) : sizeof(u1);
 430 
 431   Label resolved;
 432 
 433   // We are resolved if the resolved reference cache entry contains a
 434   // non-null object (String, MethodType, etc.)
 435   assert_different_registers(result, tmp);
 436   __ get_cache_index_at_bcp(tmp, 1, index_size);
 437   __ load_resolved_reference_at_index(result, tmp);
 438   __ testptr(result, result);
 439   __ jcc(Assembler::notZero, resolved);
 440 
 441   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 442 
 443   // first time invocation - must resolve first
 444   __ movl(rarg, (int)bytecode());
 445   __ call_VM(result, entry, rarg);
 446   __ bind(resolved);
 447 
 448   { // Check for the null sentinel.
 449     // If we just called the VM, it already did the mapping for us,
 450     // but it's harmless to retry.
 451     Label notNull;
 452     ExternalAddress null_sentinel((address)Universe::the_null_sentinel_addr());
 453     __ movptr(tmp, null_sentinel);
 454     __ cmpoop(tmp, result);
 455     __ jccb(Assembler::notEqual, notNull);
 456     __ xorptr(result, result);  // NULL object reference
 457     __ bind(notNull);
 458   }
 459 
 460   if (VerifyOops) {
 461     __ verify_oop(result);
 462   }
 463 }
 464 
 465 void TemplateTable::ldc2_w() {
 466   transition(vtos, vtos);
 467   Label notDouble, notLong, Done;
 468   __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
 469 
 470   __ get_cpool_and_tags(rcx, rax);
 471   const int base_offset = ConstantPool::header_size() * wordSize;
 472   const int tags_offset = Array<u1>::base_offset_in_bytes();
 473 
 474   // get type
 475   __ movzbl(rdx, Address(rax, rbx, Address::times_1, tags_offset));
 476   __ cmpl(rdx, JVM_CONSTANT_Double);
 477   __ jccb(Assembler::notEqual, notDouble);
 478 
 479   // dtos
 480   __ load_double(Address(rcx, rbx, Address::times_ptr, base_offset));
 481   __ push(dtos);
 482 
 483   __ jmp(Done);
 484   __ bind(notDouble);
 485   __ cmpl(rdx, JVM_CONSTANT_Long);
 486   __ jccb(Assembler::notEqual, notLong);
 487 
 488   // ltos
 489   __ movptr(rax, Address(rcx, rbx, Address::times_ptr, base_offset + 0 * wordSize));
 490   NOT_LP64(__ movptr(rdx, Address(rcx, rbx, Address::times_ptr, base_offset + 1 * wordSize)));
 491   __ push(ltos);
 492   __ jmp(Done);
 493 
 494   __ bind(notLong);
 495   condy_helper(Done);
 496 
 497   __ bind(Done);
 498 }
 499 
 500 void TemplateTable::condy_helper(Label& Done) {
 501   const Register obj = rax;
 502   const Register off = rbx;
 503   const Register flags = rcx;
 504   const Register rarg = NOT_LP64(rcx) LP64_ONLY(c_rarg1);
 505   __ movl(rarg, (int)bytecode());
 506   call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
 507 #ifndef _LP64
 508   // borrow rdi from locals
 509   __ get_thread(rdi);
 510   __ get_vm_result_2(flags, rdi);
 511   __ restore_locals();
 512 #else
 513   __ get_vm_result_2(flags, r15_thread);
 514 #endif
 515   // VMr = obj = base address to find primitive value to push
 516   // VMr2 = flags = (tos, off) using format of CPCE::_flags
 517   __ movl(off, flags);
 518   __ andl(off, ConstantPoolCacheEntry::field_index_mask);
 519   const Address field(obj, off, Address::times_1, 0*wordSize);
 520 
 521   // What sort of thing are we loading?
 522   __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
 523   __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
 524 
 525   switch (bytecode()) {
 526   case Bytecodes::_ldc:
 527   case Bytecodes::_ldc_w:
 528     {
 529       // tos in (itos, ftos, stos, btos, ctos, ztos)
 530       Label notInt, notFloat, notShort, notByte, notChar, notBool;
 531       __ cmpl(flags, itos);
 532       __ jcc(Assembler::notEqual, notInt);
 533       // itos
 534       __ movl(rax, field);
 535       __ push(itos);
 536       __ jmp(Done);
 537 
 538       __ bind(notInt);
 539       __ cmpl(flags, ftos);
 540       __ jcc(Assembler::notEqual, notFloat);
 541       // ftos
 542       __ load_float(field);
 543       __ push(ftos);
 544       __ jmp(Done);
 545 
 546       __ bind(notFloat);
 547       __ cmpl(flags, stos);
 548       __ jcc(Assembler::notEqual, notShort);
 549       // stos
 550       __ load_signed_short(rax, field);
 551       __ push(stos);
 552       __ jmp(Done);
 553 
 554       __ bind(notShort);
 555       __ cmpl(flags, btos);
 556       __ jcc(Assembler::notEqual, notByte);
 557       // btos
 558       __ load_signed_byte(rax, field);
 559       __ push(btos);
 560       __ jmp(Done);
 561 
 562       __ bind(notByte);
 563       __ cmpl(flags, ctos);
 564       __ jcc(Assembler::notEqual, notChar);
 565       // ctos
 566       __ load_unsigned_short(rax, field);
 567       __ push(ctos);
 568       __ jmp(Done);
 569 
 570       __ bind(notChar);
 571       __ cmpl(flags, ztos);
 572       __ jcc(Assembler::notEqual, notBool);
 573       // ztos
 574       __ load_signed_byte(rax, field);
 575       __ push(ztos);
 576       __ jmp(Done);
 577 
 578       __ bind(notBool);
 579       break;
 580     }
 581 
 582   case Bytecodes::_ldc2_w:
 583     {
 584       Label notLong, notDouble;
 585       __ cmpl(flags, ltos);
 586       __ jcc(Assembler::notEqual, notLong);
 587       // ltos
 588       // Loading high word first because movptr clobbers rax
 589       NOT_LP64(__ movptr(rdx, field.plus_disp(4)));
 590       __ movptr(rax, field);
 591       __ push(ltos);
 592       __ jmp(Done);
 593 
 594       __ bind(notLong);
 595       __ cmpl(flags, dtos);
 596       __ jcc(Assembler::notEqual, notDouble);
 597       // dtos
 598       __ load_double(field);
 599       __ push(dtos);
 600       __ jmp(Done);
 601 
 602       __ bind(notDouble);
 603       break;
 604     }
 605 
 606   default:
 607     ShouldNotReachHere();
 608   }
 609 
 610   __ stop("bad ldc/condy");
 611 }
 612 
 613 void TemplateTable::locals_index(Register reg, int offset) {
 614   __ load_unsigned_byte(reg, at_bcp(offset));
 615   __ negptr(reg);
 616 }
 617 
 618 void TemplateTable::iload() {
 619   iload_internal();
 620 }
 621 
 622 void TemplateTable::nofast_iload() {
 623   iload_internal(may_not_rewrite);
 624 }
 625 
 626 void TemplateTable::iload_internal(RewriteControl rc) {
 627   transition(vtos, itos);
 628   if (RewriteFrequentPairs && rc == may_rewrite) {
 629     Label rewrite, done;
 630     const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
 631     LP64_ONLY(assert(rbx != bc, "register damaged"));
 632 
 633     // get next byte
 634     __ load_unsigned_byte(rbx,
 635                           at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
 636     // if _iload, wait to rewrite to iload2.  We only want to rewrite the
 637     // last two iloads in a pair.  Comparing against fast_iload means that
 638     // the next bytecode is neither an iload or a caload, and therefore
 639     // an iload pair.
 640     __ cmpl(rbx, Bytecodes::_iload);
 641     __ jcc(Assembler::equal, done);
 642 
 643     __ cmpl(rbx, Bytecodes::_fast_iload);
 644     __ movl(bc, Bytecodes::_fast_iload2);
 645 
 646     __ jccb(Assembler::equal, rewrite);
 647 
 648     // if _caload, rewrite to fast_icaload
 649     __ cmpl(rbx, Bytecodes::_caload);
 650     __ movl(bc, Bytecodes::_fast_icaload);
 651     __ jccb(Assembler::equal, rewrite);
 652 
 653     // rewrite so iload doesn't check again.
 654     __ movl(bc, Bytecodes::_fast_iload);
 655 
 656     // rewrite
 657     // bc: fast bytecode
 658     __ bind(rewrite);
 659     patch_bytecode(Bytecodes::_iload, bc, rbx, false);
 660     __ bind(done);
 661   }
 662 
 663   // Get the local value into tos
 664   locals_index(rbx);
 665   __ movl(rax, iaddress(rbx));
 666 }
 667 
 668 void TemplateTable::fast_iload2() {
 669   transition(vtos, itos);
 670   locals_index(rbx);
 671   __ movl(rax, iaddress(rbx));
 672   __ push(itos);
 673   locals_index(rbx, 3);
 674   __ movl(rax, iaddress(rbx));
 675 }
 676 
 677 void TemplateTable::fast_iload() {
 678   transition(vtos, itos);
 679   locals_index(rbx);
 680   __ movl(rax, iaddress(rbx));
 681 }
 682 
 683 void TemplateTable::lload() {
 684   transition(vtos, ltos);
 685   locals_index(rbx);
 686   __ movptr(rax, laddress(rbx));
 687   NOT_LP64(__ movl(rdx, haddress(rbx)));
 688 }
 689 
 690 void TemplateTable::fload() {
 691   transition(vtos, ftos);
 692   locals_index(rbx);
 693   __ load_float(faddress(rbx));
 694 }
 695 
 696 void TemplateTable::dload() {
 697   transition(vtos, dtos);
 698   locals_index(rbx);
 699   __ load_double(daddress(rbx));
 700 }
 701 
 702 void TemplateTable::aload() {
 703   transition(vtos, atos);
 704   locals_index(rbx);
 705   __ movptr(rax, aaddress(rbx));
 706 }
 707 
 708 void TemplateTable::locals_index_wide(Register reg) {
 709   __ load_unsigned_short(reg, at_bcp(2));
 710   __ bswapl(reg);
 711   __ shrl(reg, 16);
 712   __ negptr(reg);
 713 }
 714 
 715 void TemplateTable::wide_iload() {
 716   transition(vtos, itos);
 717   locals_index_wide(rbx);
 718   __ movl(rax, iaddress(rbx));
 719 }
 720 
 721 void TemplateTable::wide_lload() {
 722   transition(vtos, ltos);
 723   locals_index_wide(rbx);
 724   __ movptr(rax, laddress(rbx));
 725   NOT_LP64(__ movl(rdx, haddress(rbx)));
 726 }
 727 
 728 void TemplateTable::wide_fload() {
 729   transition(vtos, ftos);
 730   locals_index_wide(rbx);
 731   __ load_float(faddress(rbx));
 732 }
 733 
 734 void TemplateTable::wide_dload() {
 735   transition(vtos, dtos);
 736   locals_index_wide(rbx);
 737   __ load_double(daddress(rbx));
 738 }
 739 
 740 void TemplateTable::wide_aload() {
 741   transition(vtos, atos);
 742   locals_index_wide(rbx);
 743   __ movptr(rax, aaddress(rbx));
 744 }
 745 
 746 void TemplateTable::index_check(Register array, Register index) {
 747   // Pop ptr into array
 748   __ pop_ptr(array);
 749   index_check_without_pop(array, index);
 750 }
 751 
 752 void TemplateTable::index_check_without_pop(Register array, Register index) {
 753   // destroys rbx
 754   // check array
 755   __ null_check(array, arrayOopDesc::length_offset_in_bytes());
 756   // sign extend index for use by indexed load
 757   __ movl2ptr(index, index);
 758   // check index
 759   __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
 760   if (index != rbx) {
 761     // ??? convention: move aberrant index into rbx for exception message
 762     assert(rbx != array, "different registers");
 763     __ movl(rbx, index);
 764   }
 765   Label skip;
 766   __ jccb(Assembler::below, skip);
 767   // Pass array to create more detailed exceptions.
 768   __ mov(NOT_LP64(rax) LP64_ONLY(c_rarg1), array);
 769   __ jump(ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
 770   __ bind(skip);
 771 }
 772 
 773 void TemplateTable::iaload() {
 774   transition(itos, itos);
 775   // rax: index
 776   // rdx: array
 777   index_check(rdx, rax); // kills rbx
 778   __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, rax,
 779                     Address(rdx, rax, Address::times_4,
 780                             arrayOopDesc::base_offset_in_bytes(T_INT)),
 781                     noreg, noreg);
 782 }
 783 
 784 void TemplateTable::laload() {
 785   transition(itos, ltos);
 786   // rax: index
 787   // rdx: array
 788   index_check(rdx, rax); // kills rbx
 789   NOT_LP64(__ mov(rbx, rax));
 790   // rbx,: index
 791   __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, noreg /* ltos */,
 792                     Address(rdx, rbx, Address::times_8,
 793                             arrayOopDesc::base_offset_in_bytes(T_LONG)),
 794                     noreg, noreg);
 795 }
 796 
 797 
 798 
 799 void TemplateTable::faload() {
 800   transition(itos, ftos);
 801   // rax: index
 802   // rdx: array
 803   index_check(rdx, rax); // kills rbx
 804   __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg /* ftos */,
 805                     Address(rdx, rax,
 806                             Address::times_4,
 807                             arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
 808                     noreg, noreg);
 809 }
 810 
 811 void TemplateTable::daload() {
 812   transition(itos, dtos);
 813   // rax: index
 814   // rdx: array
 815   index_check(rdx, rax); // kills rbx
 816   __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg /* dtos */,
 817                     Address(rdx, rax,
 818                             Address::times_8,
 819                             arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
 820                     noreg, noreg);
 821 }
 822 
 823 void TemplateTable::aaload() {
 824   transition(itos, atos);
 825   Register array = rdx;
 826   Register index = rax;
 827 
 828   index_check(array, index); // kills rbx
 829   __ profile_array(rbx, array, rcx);
 830   if (ValueArrayFlatten) {
 831     Label is_flat_array, done;
 832     __ test_flattened_array_oop(array, rbx, is_flat_array);
 833     do_oop_load(_masm,
 834                 Address(array, index,
 835                         UseCompressedOops ? Address::times_4 : Address::times_ptr,
 836                         arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
 837                 rax,
 838                 IS_ARRAY);
 839     __ jmp(done);
 840     __ bind(is_flat_array);
 841     __ read_flattened_element(array, index, rbx, rcx, rax);
 842     __ bind(done);
 843   } else {
 844     do_oop_load(_masm,
 845                 Address(array, index,
 846                         UseCompressedOops ? Address::times_4 : Address::times_ptr,
 847                         arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
 848                 rax,
 849                 IS_ARRAY);
 850   }
 851   __ profile_element(rbx, rax, rcx);
 852 }
 853 
 854 void TemplateTable::baload() {
 855   transition(itos, itos);
 856   // rax: index
 857   // rdx: array
 858   index_check(rdx, rax); // kills rbx
 859   __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, rax,
 860                     Address(rdx, rax, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)),
 861                     noreg, noreg);
 862 }
 863 
 864 void TemplateTable::caload() {
 865   transition(itos, itos);
 866   // rax: index
 867   // rdx: array
 868   index_check(rdx, rax); // kills rbx
 869   __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, rax,
 870                     Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)),
 871                     noreg, noreg);
 872 }
 873 
 874 // iload followed by caload frequent pair
 875 void TemplateTable::fast_icaload() {
 876   transition(vtos, itos);
 877   // load index out of locals
 878   locals_index(rbx);
 879   __ movl(rax, iaddress(rbx));
 880 
 881   // rax: index
 882   // rdx: array
 883   index_check(rdx, rax); // kills rbx
 884   __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, rax,
 885                     Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)),
 886                     noreg, noreg);
 887 }
 888 
 889 
 890 void TemplateTable::saload() {
 891   transition(itos, itos);
 892   // rax: index
 893   // rdx: array
 894   index_check(rdx, rax); // kills rbx
 895   __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, rax,
 896                     Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)),
 897                     noreg, noreg);
 898 }
 899 
 900 void TemplateTable::iload(int n) {
 901   transition(vtos, itos);
 902   __ movl(rax, iaddress(n));
 903 }
 904 
 905 void TemplateTable::lload(int n) {
 906   transition(vtos, ltos);
 907   __ movptr(rax, laddress(n));
 908   NOT_LP64(__ movptr(rdx, haddress(n)));
 909 }
 910 
 911 void TemplateTable::fload(int n) {
 912   transition(vtos, ftos);
 913   __ load_float(faddress(n));
 914 }
 915 
 916 void TemplateTable::dload(int n) {
 917   transition(vtos, dtos);
 918   __ load_double(daddress(n));
 919 }
 920 
 921 void TemplateTable::aload(int n) {
 922   transition(vtos, atos);
 923   __ movptr(rax, aaddress(n));
 924 }
 925 
 926 void TemplateTable::aload_0() {
 927   aload_0_internal();
 928 }
 929 
 930 void TemplateTable::nofast_aload_0() {
 931   aload_0_internal(may_not_rewrite);
 932 }
 933 
 934 void TemplateTable::aload_0_internal(RewriteControl rc) {
 935   transition(vtos, atos);
 936   // According to bytecode histograms, the pairs:
 937   //
 938   // _aload_0, _fast_igetfield
 939   // _aload_0, _fast_agetfield
 940   // _aload_0, _fast_fgetfield
 941   //
 942   // occur frequently. If RewriteFrequentPairs is set, the (slow)
 943   // _aload_0 bytecode checks if the next bytecode is either
 944   // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
 945   // rewrites the current bytecode into a pair bytecode; otherwise it
 946   // rewrites the current bytecode into _fast_aload_0 that doesn't do
 947   // the pair check anymore.
 948   //
 949   // Note: If the next bytecode is _getfield, the rewrite must be
 950   //       delayed, otherwise we may miss an opportunity for a pair.
 951   //
 952   // Also rewrite frequent pairs
 953   //   aload_0, aload_1
 954   //   aload_0, iload_1
 955   // These bytecodes with a small amount of code are most profitable
 956   // to rewrite
 957   if (RewriteFrequentPairs && rc == may_rewrite) {
 958     Label rewrite, done;
 959 
 960     const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
 961     LP64_ONLY(assert(rbx != bc, "register damaged"));
 962 
 963     // get next byte
 964     __ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
 965 
 966     // if _getfield then wait with rewrite
 967     __ cmpl(rbx, Bytecodes::_getfield);
 968     __ jcc(Assembler::equal, done);
 969 
 970     // if _igetfield then rewrite to _fast_iaccess_0
 971     assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
 972     __ cmpl(rbx, Bytecodes::_fast_igetfield);
 973     __ movl(bc, Bytecodes::_fast_iaccess_0);
 974     __ jccb(Assembler::equal, rewrite);
 975 
 976     // if _agetfield then rewrite to _fast_aaccess_0
 977     assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
 978     __ cmpl(rbx, Bytecodes::_fast_agetfield);
 979     __ movl(bc, Bytecodes::_fast_aaccess_0);
 980     __ jccb(Assembler::equal, rewrite);
 981 
 982     // if _fgetfield then rewrite to _fast_faccess_0
 983     assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
 984     __ cmpl(rbx, Bytecodes::_fast_fgetfield);
 985     __ movl(bc, Bytecodes::_fast_faccess_0);
 986     __ jccb(Assembler::equal, rewrite);
 987 
 988     // else rewrite to _fast_aload0
 989     assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
 990     __ movl(bc, Bytecodes::_fast_aload_0);
 991 
 992     // rewrite
 993     // bc: fast bytecode
 994     __ bind(rewrite);
 995     patch_bytecode(Bytecodes::_aload_0, bc, rbx, false);
 996 
 997     __ bind(done);
 998   }
 999 
1000   // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
1001   aload(0);
1002 }
1003 
1004 void TemplateTable::istore() {
1005   transition(itos, vtos);
1006   locals_index(rbx);
1007   __ movl(iaddress(rbx), rax);
1008 }
1009 
1010 
1011 void TemplateTable::lstore() {
1012   transition(ltos, vtos);
1013   locals_index(rbx);
1014   __ movptr(laddress(rbx), rax);
1015   NOT_LP64(__ movptr(haddress(rbx), rdx));
1016 }
1017 
1018 void TemplateTable::fstore() {
1019   transition(ftos, vtos);
1020   locals_index(rbx);
1021   __ store_float(faddress(rbx));
1022 }
1023 
1024 void TemplateTable::dstore() {
1025   transition(dtos, vtos);
1026   locals_index(rbx);
1027   __ store_double(daddress(rbx));
1028 }
1029 
1030 void TemplateTable::astore() {
1031   transition(vtos, vtos);
1032   __ pop_ptr(rax);
1033   locals_index(rbx);
1034   __ movptr(aaddress(rbx), rax);
1035 }
1036 
1037 void TemplateTable::wide_istore() {
1038   transition(vtos, vtos);
1039   __ pop_i();
1040   locals_index_wide(rbx);
1041   __ movl(iaddress(rbx), rax);
1042 }
1043 
1044 void TemplateTable::wide_lstore() {
1045   transition(vtos, vtos);
1046   NOT_LP64(__ pop_l(rax, rdx));
1047   LP64_ONLY(__ pop_l());
1048   locals_index_wide(rbx);
1049   __ movptr(laddress(rbx), rax);
1050   NOT_LP64(__ movl(haddress(rbx), rdx));
1051 }
1052 
1053 void TemplateTable::wide_fstore() {
1054 #ifdef _LP64
1055   transition(vtos, vtos);
1056   __ pop_f(xmm0);
1057   locals_index_wide(rbx);
1058   __ movflt(faddress(rbx), xmm0);
1059 #else
1060   wide_istore();
1061 #endif
1062 }
1063 
1064 void TemplateTable::wide_dstore() {
1065 #ifdef _LP64
1066   transition(vtos, vtos);
1067   __ pop_d(xmm0);
1068   locals_index_wide(rbx);
1069   __ movdbl(daddress(rbx), xmm0);
1070 #else
1071   wide_lstore();
1072 #endif
1073 }
1074 
1075 void TemplateTable::wide_astore() {
1076   transition(vtos, vtos);
1077   __ pop_ptr(rax);
1078   locals_index_wide(rbx);
1079   __ movptr(aaddress(rbx), rax);
1080 }
1081 
1082 void TemplateTable::iastore() {
1083   transition(itos, vtos);
1084   __ pop_i(rbx);
1085   // rax: value
1086   // rbx: index
1087   // rdx: array
1088   index_check(rdx, rbx); // prefer index in rbx
1089   __ access_store_at(T_INT, IN_HEAP | IS_ARRAY,
1090                      Address(rdx, rbx, Address::times_4,
1091                              arrayOopDesc::base_offset_in_bytes(T_INT)),
1092                      rax, noreg, noreg);
1093 }
1094 
1095 void TemplateTable::lastore() {
1096   transition(ltos, vtos);
1097   __ pop_i(rbx);
1098   // rax,: low(value)
1099   // rcx: array
1100   // rdx: high(value)
1101   index_check(rcx, rbx);  // prefer index in rbx,
1102   // rbx,: index
1103   __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY,
1104                      Address(rcx, rbx, Address::times_8,
1105                              arrayOopDesc::base_offset_in_bytes(T_LONG)),
1106                      noreg /* ltos */, noreg, noreg);
1107 }
1108 
1109 
1110 void TemplateTable::fastore() {
1111   transition(ftos, vtos);
1112   __ pop_i(rbx);
1113   // value is in UseSSE >= 1 ? xmm0 : ST(0)
1114   // rbx:  index
1115   // rdx:  array
1116   index_check(rdx, rbx); // prefer index in rbx
1117   __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY,
1118                      Address(rdx, rbx, Address::times_4,
1119                              arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
1120                      noreg /* ftos */, noreg, noreg);
1121 }
1122 
1123 void TemplateTable::dastore() {
1124   transition(dtos, vtos);
1125   __ pop_i(rbx);
1126   // value is in UseSSE >= 2 ? xmm0 : ST(0)
1127   // rbx:  index
1128   // rdx:  array
1129   index_check(rdx, rbx); // prefer index in rbx
1130   __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY,
1131                      Address(rdx, rbx, Address::times_8,
1132                              arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
1133                      noreg /* dtos */, noreg, noreg);
1134 }
1135 
1136 void TemplateTable::aastore() {
1137   Label is_null, is_flat_array, ok_is_subtype, done;
1138   transition(vtos, vtos);
1139   // stack: ..., array, index, value
1140   __ movptr(rax, at_tos());    // value
1141   __ movl(rcx, at_tos_p1()); // index
1142   __ movptr(rdx, at_tos_p2()); // array
1143 
1144   Address element_address(rdx, rcx,
1145                           UseCompressedOops? Address::times_4 : Address::times_ptr,
1146                           arrayOopDesc::base_offset_in_bytes(T_OBJECT));
1147 
1148   index_check_without_pop(rdx, rcx);     // kills rbx
1149 
1150   __ profile_array(rdi, rdx, rbx);
1151   __ profile_element(rdi, rax, rbx);
1152 
1153   __ testptr(rax, rax);
1154   __ jcc(Assembler::zero, is_null);
1155 
1156   // Move array class to rdi
1157   __ load_klass(rdi, rdx);
1158   if (ValueArrayFlatten) {
1159     __ test_flattened_array_oop(rdx, rbx, is_flat_array);
1160   }
1161 
1162   // Move subklass into rbx
1163   __ load_klass(rbx, rax);
1164   // Move array element superklass into rax
1165   __ movptr(rax, Address(rdi,
1166                          ObjArrayKlass::element_klass_offset()));
1167 
1168   // Generate subtype check.  Blows rcx, rdi
1169   // Superklass in rax.  Subklass in rbx.
1170   // is "rbx <: rax" ? (value subclass <: array element superclass)
1171   __ gen_subtype_check(rbx, ok_is_subtype, false);
1172 
1173   // Come here on failure
1174   // object is at TOS
1175   __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
1176 
1177   // Come here on success
1178   __ bind(ok_is_subtype);
1179 
1180   // Get the value we will store
1181   __ movptr(rax, at_tos());
1182   __ movl(rcx, at_tos_p1()); // index
1183   // Now store using the appropriate barrier
1184   do_oop_store(_masm, element_address, rax, IS_ARRAY);
1185   __ jmp(done);
1186 
1187   // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
1188   __ bind(is_null);
1189   if (EnableValhalla) {
1190     Label is_null_into_value_array_npe, store_null;
1191 
1192     // No way to store null in null-free array
1193     __ test_null_free_array_oop(rdx, rbx, is_null_into_value_array_npe);
1194     __ jmp(store_null);
1195 
1196     __ bind(is_null_into_value_array_npe);
1197     __ jump(ExternalAddress(Interpreter::_throw_NullPointerException_entry));
1198 
1199     __ bind(store_null);
1200   }
1201   // Store a NULL
1202   do_oop_store(_masm, element_address, noreg, IS_ARRAY);
1203   __ jmp(done);
1204 
1205   if (EnableValhalla) {
1206     Label is_type_ok;
1207     __ bind(is_flat_array); // Store non-null value to flat
1208 
1209     // Simplistic type check...
1210 
1211     // Profile the not-null value's klass.
1212     __ load_klass(rbx, rax);
1213     // Move element klass into rax
1214     __ movptr(rax, Address(rdi, ArrayKlass::element_klass_offset()));
1215     // flat value array needs exact type match
1216     // is "rax == rbx" (value subclass == array element superclass)
1217     __ cmpptr(rax, rbx);
1218     __ jccb(Assembler::equal, is_type_ok);
1219 
1220     __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
1221 
1222     __ bind(is_type_ok);
1223     // rbx: value's klass
1224     // rdx: array
1225     // rdi: array klass
1226     __ test_klass_is_empty_value(rbx, rax, done);
1227 
1228     // calc dst for copy
1229     __ movl(rax, at_tos_p1()); // index
1230     __ data_for_value_array_index(rdx, rdi, rax, rax);
1231 
1232     // ...and src for copy
1233     __ movptr(rcx, at_tos());  // value
1234     __ data_for_oop(rcx, rcx, rbx);
1235 
1236     __ access_value_copy(IN_HEAP, rcx, rax, rbx);
1237   }
1238   // Pop stack arguments
1239   __ bind(done);
1240   __ addptr(rsp, 3 * Interpreter::stackElementSize);
1241 }
1242 
1243 void TemplateTable::bastore() {
1244   transition(itos, vtos);
1245   __ pop_i(rbx);
1246   // rax: value
1247   // rbx: index
1248   // rdx: array
1249   index_check(rdx, rbx); // prefer index in rbx
1250   // Need to check whether array is boolean or byte
1251   // since both types share the bastore bytecode.
1252   __ load_klass(rcx, rdx);
1253   __ movl(rcx, Address(rcx, Klass::layout_helper_offset()));
1254   int diffbit = Klass::layout_helper_boolean_diffbit();
1255   __ testl(rcx, diffbit);
1256   Label L_skip;
1257   __ jccb(Assembler::zero, L_skip);
1258   __ andl(rax, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
1259   __ bind(L_skip);
1260   __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY,
1261                      Address(rdx, rbx,Address::times_1,
1262                              arrayOopDesc::base_offset_in_bytes(T_BYTE)),
1263                      rax, noreg, noreg);
1264 }
1265 
1266 void TemplateTable::castore() {
1267   transition(itos, vtos);
1268   __ pop_i(rbx);
1269   // rax: value
1270   // rbx: index
1271   // rdx: array
1272   index_check(rdx, rbx);  // prefer index in rbx
1273   __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY,
1274                      Address(rdx, rbx, Address::times_2,
1275                              arrayOopDesc::base_offset_in_bytes(T_CHAR)),
1276                      rax, noreg, noreg);
1277 }
1278 
1279 
1280 void TemplateTable::sastore() {
1281   castore();
1282 }
1283 
1284 void TemplateTable::istore(int n) {
1285   transition(itos, vtos);
1286   __ movl(iaddress(n), rax);
1287 }
1288 
1289 void TemplateTable::lstore(int n) {
1290   transition(ltos, vtos);
1291   __ movptr(laddress(n), rax);
1292   NOT_LP64(__ movptr(haddress(n), rdx));
1293 }
1294 
1295 void TemplateTable::fstore(int n) {
1296   transition(ftos, vtos);
1297   __ store_float(faddress(n));
1298 }
1299 
1300 void TemplateTable::dstore(int n) {
1301   transition(dtos, vtos);
1302   __ store_double(daddress(n));
1303 }
1304 
1305 
1306 void TemplateTable::astore(int n) {
1307   transition(vtos, vtos);
1308   __ pop_ptr(rax);
1309   __ movptr(aaddress(n), rax);
1310 }
1311 
1312 void TemplateTable::pop() {
1313   transition(vtos, vtos);
1314   __ addptr(rsp, Interpreter::stackElementSize);
1315 }
1316 
1317 void TemplateTable::pop2() {
1318   transition(vtos, vtos);
1319   __ addptr(rsp, 2 * Interpreter::stackElementSize);
1320 }
1321 
1322 
1323 void TemplateTable::dup() {
1324   transition(vtos, vtos);
1325   __ load_ptr(0, rax);
1326   __ push_ptr(rax);
1327   // stack: ..., a, a
1328 }
1329 
1330 void TemplateTable::dup_x1() {
1331   transition(vtos, vtos);
1332   // stack: ..., a, b
1333   __ load_ptr( 0, rax);  // load b
1334   __ load_ptr( 1, rcx);  // load a
1335   __ store_ptr(1, rax);  // store b
1336   __ store_ptr(0, rcx);  // store a
1337   __ push_ptr(rax);      // push b
1338   // stack: ..., b, a, b
1339 }
1340 
1341 void TemplateTable::dup_x2() {
1342   transition(vtos, vtos);
1343   // stack: ..., a, b, c
1344   __ load_ptr( 0, rax);  // load c
1345   __ load_ptr( 2, rcx);  // load a
1346   __ store_ptr(2, rax);  // store c in a
1347   __ push_ptr(rax);      // push c
1348   // stack: ..., c, b, c, c
1349   __ load_ptr( 2, rax);  // load b
1350   __ store_ptr(2, rcx);  // store a in b
1351   // stack: ..., c, a, c, c
1352   __ store_ptr(1, rax);  // store b in c
1353   // stack: ..., c, a, b, c
1354 }
1355 
1356 void TemplateTable::dup2() {
1357   transition(vtos, vtos);
1358   // stack: ..., a, b
1359   __ load_ptr(1, rax);  // load a
1360   __ push_ptr(rax);     // push a
1361   __ load_ptr(1, rax);  // load b
1362   __ push_ptr(rax);     // push b
1363   // stack: ..., a, b, a, b
1364 }
1365 
1366 
1367 void TemplateTable::dup2_x1() {
1368   transition(vtos, vtos);
1369   // stack: ..., a, b, c
1370   __ load_ptr( 0, rcx);  // load c
1371   __ load_ptr( 1, rax);  // load b
1372   __ push_ptr(rax);      // push b
1373   __ push_ptr(rcx);      // push c
1374   // stack: ..., a, b, c, b, c
1375   __ store_ptr(3, rcx);  // store c in b
1376   // stack: ..., a, c, c, b, c
1377   __ load_ptr( 4, rcx);  // load a
1378   __ store_ptr(2, rcx);  // store a in 2nd c
1379   // stack: ..., a, c, a, b, c
1380   __ store_ptr(4, rax);  // store b in a
1381   // stack: ..., b, c, a, b, c
1382 }
1383 
1384 void TemplateTable::dup2_x2() {
1385   transition(vtos, vtos);
1386   // stack: ..., a, b, c, d
1387   __ load_ptr( 0, rcx);  // load d
1388   __ load_ptr( 1, rax);  // load c
1389   __ push_ptr(rax);      // push c
1390   __ push_ptr(rcx);      // push d
1391   // stack: ..., a, b, c, d, c, d
1392   __ load_ptr( 4, rax);  // load b
1393   __ store_ptr(2, rax);  // store b in d
1394   __ store_ptr(4, rcx);  // store d in b
1395   // stack: ..., a, d, c, b, c, d
1396   __ load_ptr( 5, rcx);  // load a
1397   __ load_ptr( 3, rax);  // load c
1398   __ store_ptr(3, rcx);  // store a in c
1399   __ store_ptr(5, rax);  // store c in a
1400   // stack: ..., c, d, a, b, c, d
1401 }
1402 
1403 void TemplateTable::swap() {
1404   transition(vtos, vtos);
1405   // stack: ..., a, b
1406   __ load_ptr( 1, rcx);  // load a
1407   __ load_ptr( 0, rax);  // load b
1408   __ store_ptr(0, rcx);  // store a in b
1409   __ store_ptr(1, rax);  // store b in a
1410   // stack: ..., b, a
1411 }
1412 
1413 void TemplateTable::iop2(Operation op) {
1414   transition(itos, itos);
1415   switch (op) {
1416   case add  :                    __ pop_i(rdx); __ addl (rax, rdx); break;
1417   case sub  : __ movl(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
1418   case mul  :                    __ pop_i(rdx); __ imull(rax, rdx); break;
1419   case _and :                    __ pop_i(rdx); __ andl (rax, rdx); break;
1420   case _or  :                    __ pop_i(rdx); __ orl  (rax, rdx); break;
1421   case _xor :                    __ pop_i(rdx); __ xorl (rax, rdx); break;
1422   case shl  : __ movl(rcx, rax); __ pop_i(rax); __ shll (rax);      break;
1423   case shr  : __ movl(rcx, rax); __ pop_i(rax); __ sarl (rax);      break;
1424   case ushr : __ movl(rcx, rax); __ pop_i(rax); __ shrl (rax);      break;
1425   default   : ShouldNotReachHere();
1426   }
1427 }
1428 
1429 void TemplateTable::lop2(Operation op) {
1430   transition(ltos, ltos);
1431 #ifdef _LP64
1432   switch (op) {
1433   case add  :                    __ pop_l(rdx); __ addptr(rax, rdx); break;
1434   case sub  : __ mov(rdx, rax);  __ pop_l(rax); __ subptr(rax, rdx); break;
1435   case _and :                    __ pop_l(rdx); __ andptr(rax, rdx); break;
1436   case _or  :                    __ pop_l(rdx); __ orptr (rax, rdx); break;
1437   case _xor :                    __ pop_l(rdx); __ xorptr(rax, rdx); break;
1438   default   : ShouldNotReachHere();
1439   }
1440 #else
1441   __ pop_l(rbx, rcx);
1442   switch (op) {
1443     case add  : __ addl(rax, rbx); __ adcl(rdx, rcx); break;
1444     case sub  : __ subl(rbx, rax); __ sbbl(rcx, rdx);
1445                 __ mov (rax, rbx); __ mov (rdx, rcx); break;
1446     case _and : __ andl(rax, rbx); __ andl(rdx, rcx); break;
1447     case _or  : __ orl (rax, rbx); __ orl (rdx, rcx); break;
1448     case _xor : __ xorl(rax, rbx); __ xorl(rdx, rcx); break;
1449     default   : ShouldNotReachHere();
1450   }
1451 #endif
1452 }
1453 
1454 void TemplateTable::idiv() {
1455   transition(itos, itos);
1456   __ movl(rcx, rax);
1457   __ pop_i(rax);
1458   // Note: could xor rax and ecx and compare with (-1 ^ min_int). If
1459   //       they are not equal, one could do a normal division (no correction
1460   //       needed), which may speed up this implementation for the common case.
1461   //       (see also JVM spec., p.243 & p.271)
1462   __ corrected_idivl(rcx);
1463 }
1464 
1465 void TemplateTable::irem() {
1466   transition(itos, itos);
1467   __ movl(rcx, rax);
1468   __ pop_i(rax);
1469   // Note: could xor rax and ecx and compare with (-1 ^ min_int). If
1470   //       they are not equal, one could do a normal division (no correction
1471   //       needed), which may speed up this implementation for the common case.
1472   //       (see also JVM spec., p.243 & p.271)
1473   __ corrected_idivl(rcx);
1474   __ movl(rax, rdx);
1475 }
1476 
1477 void TemplateTable::lmul() {
1478   transition(ltos, ltos);
1479 #ifdef _LP64
1480   __ pop_l(rdx);
1481   __ imulq(rax, rdx);
1482 #else
1483   __ pop_l(rbx, rcx);
1484   __ push(rcx); __ push(rbx);
1485   __ push(rdx); __ push(rax);
1486   __ lmul(2 * wordSize, 0);
1487   __ addptr(rsp, 4 * wordSize);  // take off temporaries
1488 #endif
1489 }
1490 
1491 void TemplateTable::ldiv() {
1492   transition(ltos, ltos);
1493 #ifdef _LP64
1494   __ mov(rcx, rax);
1495   __ pop_l(rax);
1496   // generate explicit div0 check
1497   __ testq(rcx, rcx);
1498   __ jump_cc(Assembler::zero,
1499              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1500   // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
1501   //       they are not equal, one could do a normal division (no correction
1502   //       needed), which may speed up this implementation for the common case.
1503   //       (see also JVM spec., p.243 & p.271)
1504   __ corrected_idivq(rcx); // kills rbx
1505 #else
1506   __ pop_l(rbx, rcx);
1507   __ push(rcx); __ push(rbx);
1508   __ push(rdx); __ push(rax);
1509   // check if y = 0
1510   __ orl(rax, rdx);
1511   __ jump_cc(Assembler::zero,
1512              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1513   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv));
1514   __ addptr(rsp, 4 * wordSize);  // take off temporaries
1515 #endif
1516 }
1517 
1518 void TemplateTable::lrem() {
1519   transition(ltos, ltos);
1520 #ifdef _LP64
1521   __ mov(rcx, rax);
1522   __ pop_l(rax);
1523   __ testq(rcx, rcx);
1524   __ jump_cc(Assembler::zero,
1525              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1526   // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
1527   //       they are not equal, one could do a normal division (no correction
1528   //       needed), which may speed up this implementation for the common case.
1529   //       (see also JVM spec., p.243 & p.271)
1530   __ corrected_idivq(rcx); // kills rbx
1531   __ mov(rax, rdx);
1532 #else
1533   __ pop_l(rbx, rcx);
1534   __ push(rcx); __ push(rbx);
1535   __ push(rdx); __ push(rax);
1536   // check if y = 0
1537   __ orl(rax, rdx);
1538   __ jump_cc(Assembler::zero,
1539              ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1540   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem));
1541   __ addptr(rsp, 4 * wordSize);
1542 #endif
1543 }
1544 
1545 void TemplateTable::lshl() {
1546   transition(itos, ltos);
1547   __ movl(rcx, rax);                             // get shift count
1548   #ifdef _LP64
1549   __ pop_l(rax);                                 // get shift value
1550   __ shlq(rax);
1551 #else
1552   __ pop_l(rax, rdx);                            // get shift value
1553   __ lshl(rdx, rax);
1554 #endif
1555 }
1556 
1557 void TemplateTable::lshr() {
1558 #ifdef _LP64
1559   transition(itos, ltos);
1560   __ movl(rcx, rax);                             // get shift count
1561   __ pop_l(rax);                                 // get shift value
1562   __ sarq(rax);
1563 #else
1564   transition(itos, ltos);
1565   __ mov(rcx, rax);                              // get shift count
1566   __ pop_l(rax, rdx);                            // get shift value
1567   __ lshr(rdx, rax, true);
1568 #endif
1569 }
1570 
1571 void TemplateTable::lushr() {
1572   transition(itos, ltos);
1573 #ifdef _LP64
1574   __ movl(rcx, rax);                             // get shift count
1575   __ pop_l(rax);                                 // get shift value
1576   __ shrq(rax);
1577 #else
1578   __ mov(rcx, rax);                              // get shift count
1579   __ pop_l(rax, rdx);                            // get shift value
1580   __ lshr(rdx, rax);
1581 #endif
1582 }
1583 
1584 void TemplateTable::fop2(Operation op) {
1585   transition(ftos, ftos);
1586 
1587   if (UseSSE >= 1) {
1588     switch (op) {
1589     case add:
1590       __ addss(xmm0, at_rsp());
1591       __ addptr(rsp, Interpreter::stackElementSize);
1592       break;
1593     case sub:
1594       __ movflt(xmm1, xmm0);
1595       __ pop_f(xmm0);
1596       __ subss(xmm0, xmm1);
1597       break;
1598     case mul:
1599       __ mulss(xmm0, at_rsp());
1600       __ addptr(rsp, Interpreter::stackElementSize);
1601       break;
1602     case div:
1603       __ movflt(xmm1, xmm0);
1604       __ pop_f(xmm0);
1605       __ divss(xmm0, xmm1);
1606       break;
1607     case rem:
1608       // On x86_64 platforms the SharedRuntime::frem method is called to perform the
1609       // modulo operation. The frem method calls the function
1610       // double fmod(double x, double y) in math.h. The documentation of fmod states:
1611       // "If x or y is a NaN, a NaN is returned." without specifying what type of NaN
1612       // (signalling or quiet) is returned.
1613       //
1614       // On x86_32 platforms the FPU is used to perform the modulo operation. The
1615       // reason is that on 32-bit Windows the sign of modulo operations diverges from
1616       // what is considered the standard (e.g., -0.0f % -3.14f is 0.0f (and not -0.0f).
1617       // The fprem instruction used on x86_32 is functionally equivalent to
1618       // SharedRuntime::frem in that it returns a NaN.
1619 #ifdef _LP64
1620       __ movflt(xmm1, xmm0);
1621       __ pop_f(xmm0);
1622       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
1623 #else
1624       __ push_f(xmm0);
1625       __ pop_f();
1626       __ fld_s(at_rsp());
1627       __ fremr(rax);
1628       __ f2ieee();
1629       __ pop(rax);  // pop second operand off the stack
1630       __ push_f();
1631       __ pop_f(xmm0);
1632 #endif
1633       break;
1634     default:
1635       ShouldNotReachHere();
1636       break;
1637     }
1638   } else {
1639 #ifdef _LP64
1640     ShouldNotReachHere();
1641 #else
1642     switch (op) {
1643     case add: __ fadd_s (at_rsp());                break;
1644     case sub: __ fsubr_s(at_rsp());                break;
1645     case mul: __ fmul_s (at_rsp());                break;
1646     case div: __ fdivr_s(at_rsp());                break;
1647     case rem: __ fld_s  (at_rsp()); __ fremr(rax); break;
1648     default : ShouldNotReachHere();
1649     }
1650     __ f2ieee();
1651     __ pop(rax);  // pop second operand off the stack
1652 #endif // _LP64
1653   }
1654 }
1655 
1656 void TemplateTable::dop2(Operation op) {
1657   transition(dtos, dtos);
1658   if (UseSSE >= 2) {
1659     switch (op) {
1660     case add:
1661       __ addsd(xmm0, at_rsp());
1662       __ addptr(rsp, 2 * Interpreter::stackElementSize);
1663       break;
1664     case sub:
1665       __ movdbl(xmm1, xmm0);
1666       __ pop_d(xmm0);
1667       __ subsd(xmm0, xmm1);
1668       break;
1669     case mul:
1670       __ mulsd(xmm0, at_rsp());
1671       __ addptr(rsp, 2 * Interpreter::stackElementSize);
1672       break;
1673     case div:
1674       __ movdbl(xmm1, xmm0);
1675       __ pop_d(xmm0);
1676       __ divsd(xmm0, xmm1);
1677       break;
1678     case rem:
1679       // Similar to fop2(), the modulo operation is performed using the
1680       // SharedRuntime::drem method (on x86_64 platforms) or using the
1681       // FPU (on x86_32 platforms) for the same reasons as mentioned in fop2().
1682 #ifdef _LP64
1683       __ movdbl(xmm1, xmm0);
1684       __ pop_d(xmm0);
1685       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
1686 #else
1687       __ push_d(xmm0);
1688       __ pop_d();
1689       __ fld_d(at_rsp());
1690       __ fremr(rax);
1691       __ d2ieee();
1692       __ pop(rax);
1693       __ pop(rdx);
1694       __ push_d();
1695       __ pop_d(xmm0);
1696 #endif
1697       break;
1698     default:
1699       ShouldNotReachHere();
1700       break;
1701     }
1702   } else {
1703 #ifdef _LP64
1704     ShouldNotReachHere();
1705 #else
1706     switch (op) {
1707     case add: __ fadd_d (at_rsp());                break;
1708     case sub: __ fsubr_d(at_rsp());                break;
1709     case mul: {
1710       Label L_strict;
1711       Label L_join;
1712       const Address access_flags      (rcx, Method::access_flags_offset());
1713       __ get_method(rcx);
1714       __ movl(rcx, access_flags);
1715       __ testl(rcx, JVM_ACC_STRICT);
1716       __ jccb(Assembler::notZero, L_strict);
1717       __ fmul_d (at_rsp());
1718       __ jmpb(L_join);
1719       __ bind(L_strict);
1720       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
1721       __ fmulp();
1722       __ fmul_d (at_rsp());
1723       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
1724       __ fmulp();
1725       __ bind(L_join);
1726       break;
1727     }
1728     case div: {
1729       Label L_strict;
1730       Label L_join;
1731       const Address access_flags      (rcx, Method::access_flags_offset());
1732       __ get_method(rcx);
1733       __ movl(rcx, access_flags);
1734       __ testl(rcx, JVM_ACC_STRICT);
1735       __ jccb(Assembler::notZero, L_strict);
1736       __ fdivr_d(at_rsp());
1737       __ jmp(L_join);
1738       __ bind(L_strict);
1739       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
1740       __ fmul_d (at_rsp());
1741       __ fdivrp();
1742       __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
1743       __ fmulp();
1744       __ bind(L_join);
1745       break;
1746     }
1747     case rem: __ fld_d  (at_rsp()); __ fremr(rax); break;
1748     default : ShouldNotReachHere();
1749     }
1750     __ d2ieee();
1751     // Pop double precision number from rsp.
1752     __ pop(rax);
1753     __ pop(rdx);
1754 #endif
1755   }
1756 }
1757 
1758 void TemplateTable::ineg() {
1759   transition(itos, itos);
1760   __ negl(rax);
1761 }
1762 
1763 void TemplateTable::lneg() {
1764   transition(ltos, ltos);
1765   LP64_ONLY(__ negq(rax));
1766   NOT_LP64(__ lneg(rdx, rax));
1767 }
1768 
1769 // Note: 'double' and 'long long' have 32-bits alignment on x86.
1770 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
1771   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
1772   // of 128-bits operands for SSE instructions.
1773   jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
1774   // Store the value to a 128-bits operand.
1775   operand[0] = lo;
1776   operand[1] = hi;
1777   return operand;
1778 }
1779 
1780 // Buffer for 128-bits masks used by SSE instructions.
1781 static jlong float_signflip_pool[2*2];
1782 static jlong double_signflip_pool[2*2];
1783 
1784 void TemplateTable::fneg() {
1785   transition(ftos, ftos);
1786   if (UseSSE >= 1) {
1787     static jlong *float_signflip  = double_quadword(&float_signflip_pool[1],  CONST64(0x8000000080000000),  CONST64(0x8000000080000000));
1788     __ xorps(xmm0, ExternalAddress((address) float_signflip));
1789   } else {
1790     LP64_ONLY(ShouldNotReachHere());
1791     NOT_LP64(__ fchs());
1792   }
1793 }
1794 
1795 void TemplateTable::dneg() {
1796   transition(dtos, dtos);
1797   if (UseSSE >= 2) {
1798     static jlong *double_signflip =
1799       double_quadword(&double_signflip_pool[1], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
1800     __ xorpd(xmm0, ExternalAddress((address) double_signflip));
1801   } else {
1802 #ifdef _LP64
1803     ShouldNotReachHere();
1804 #else
1805     __ fchs();
1806 #endif
1807   }
1808 }
1809 
1810 void TemplateTable::iinc() {
1811   transition(vtos, vtos);
1812   __ load_signed_byte(rdx, at_bcp(2)); // get constant
1813   locals_index(rbx);
1814   __ addl(iaddress(rbx), rdx);
1815 }
1816 
1817 void TemplateTable::wide_iinc() {
1818   transition(vtos, vtos);
1819   __ movl(rdx, at_bcp(4)); // get constant
1820   locals_index_wide(rbx);
1821   __ bswapl(rdx); // swap bytes & sign-extend constant
1822   __ sarl(rdx, 16);
1823   __ addl(iaddress(rbx), rdx);
1824   // Note: should probably use only one movl to get both
1825   //       the index and the constant -> fix this
1826 }
1827 
1828 void TemplateTable::convert() {
1829 #ifdef _LP64
1830   // Checking
1831 #ifdef ASSERT
1832   {
1833     TosState tos_in  = ilgl;
1834     TosState tos_out = ilgl;
1835     switch (bytecode()) {
1836     case Bytecodes::_i2l: // fall through
1837     case Bytecodes::_i2f: // fall through
1838     case Bytecodes::_i2d: // fall through
1839     case Bytecodes::_i2b: // fall through
1840     case Bytecodes::_i2c: // fall through
1841     case Bytecodes::_i2s: tos_in = itos; break;
1842     case Bytecodes::_l2i: // fall through
1843     case Bytecodes::_l2f: // fall through
1844     case Bytecodes::_l2d: tos_in = ltos; break;
1845     case Bytecodes::_f2i: // fall through
1846     case Bytecodes::_f2l: // fall through
1847     case Bytecodes::_f2d: tos_in = ftos; break;
1848     case Bytecodes::_d2i: // fall through
1849     case Bytecodes::_d2l: // fall through
1850     case Bytecodes::_d2f: tos_in = dtos; break;
1851     default             : ShouldNotReachHere();
1852     }
1853     switch (bytecode()) {
1854     case Bytecodes::_l2i: // fall through
1855     case Bytecodes::_f2i: // fall through
1856     case Bytecodes::_d2i: // fall through
1857     case Bytecodes::_i2b: // fall through
1858     case Bytecodes::_i2c: // fall through
1859     case Bytecodes::_i2s: tos_out = itos; break;
1860     case Bytecodes::_i2l: // fall through
1861     case Bytecodes::_f2l: // fall through
1862     case Bytecodes::_d2l: tos_out = ltos; break;
1863     case Bytecodes::_i2f: // fall through
1864     case Bytecodes::_l2f: // fall through
1865     case Bytecodes::_d2f: tos_out = ftos; break;
1866     case Bytecodes::_i2d: // fall through
1867     case Bytecodes::_l2d: // fall through
1868     case Bytecodes::_f2d: tos_out = dtos; break;
1869     default             : ShouldNotReachHere();
1870     }
1871     transition(tos_in, tos_out);
1872   }
1873 #endif // ASSERT
1874 
1875   static const int64_t is_nan = 0x8000000000000000L;
1876 
1877   // Conversion
1878   switch (bytecode()) {
1879   case Bytecodes::_i2l:
1880     __ movslq(rax, rax);
1881     break;
1882   case Bytecodes::_i2f:
1883     __ cvtsi2ssl(xmm0, rax);
1884     break;
1885   case Bytecodes::_i2d:
1886     __ cvtsi2sdl(xmm0, rax);
1887     break;
1888   case Bytecodes::_i2b:
1889     __ movsbl(rax, rax);
1890     break;
1891   case Bytecodes::_i2c:
1892     __ movzwl(rax, rax);
1893     break;
1894   case Bytecodes::_i2s:
1895     __ movswl(rax, rax);
1896     break;
1897   case Bytecodes::_l2i:
1898     __ movl(rax, rax);
1899     break;
1900   case Bytecodes::_l2f:
1901     __ cvtsi2ssq(xmm0, rax);
1902     break;
1903   case Bytecodes::_l2d:
1904     __ cvtsi2sdq(xmm0, rax);
1905     break;
1906   case Bytecodes::_f2i:
1907   {
1908     Label L;
1909     __ cvttss2sil(rax, xmm0);
1910     __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
1911     __ jcc(Assembler::notEqual, L);
1912     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
1913     __ bind(L);
1914   }
1915     break;
1916   case Bytecodes::_f2l:
1917   {
1918     Label L;
1919     __ cvttss2siq(rax, xmm0);
1920     // NaN or overflow/underflow?
1921     __ cmp64(rax, ExternalAddress((address) &is_nan));
1922     __ jcc(Assembler::notEqual, L);
1923     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
1924     __ bind(L);
1925   }
1926     break;
1927   case Bytecodes::_f2d:
1928     __ cvtss2sd(xmm0, xmm0);
1929     break;
1930   case Bytecodes::_d2i:
1931   {
1932     Label L;
1933     __ cvttsd2sil(rax, xmm0);
1934     __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
1935     __ jcc(Assembler::notEqual, L);
1936     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
1937     __ bind(L);
1938   }
1939     break;
1940   case Bytecodes::_d2l:
1941   {
1942     Label L;
1943     __ cvttsd2siq(rax, xmm0);
1944     // NaN or overflow/underflow?
1945     __ cmp64(rax, ExternalAddress((address) &is_nan));
1946     __ jcc(Assembler::notEqual, L);
1947     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
1948     __ bind(L);
1949   }
1950     break;
1951   case Bytecodes::_d2f:
1952     __ cvtsd2ss(xmm0, xmm0);
1953     break;
1954   default:
1955     ShouldNotReachHere();
1956   }
1957 #else
1958   // Checking
1959 #ifdef ASSERT
1960   { TosState tos_in  = ilgl;
1961     TosState tos_out = ilgl;
1962     switch (bytecode()) {
1963       case Bytecodes::_i2l: // fall through
1964       case Bytecodes::_i2f: // fall through
1965       case Bytecodes::_i2d: // fall through
1966       case Bytecodes::_i2b: // fall through
1967       case Bytecodes::_i2c: // fall through
1968       case Bytecodes::_i2s: tos_in = itos; break;
1969       case Bytecodes::_l2i: // fall through
1970       case Bytecodes::_l2f: // fall through
1971       case Bytecodes::_l2d: tos_in = ltos; break;
1972       case Bytecodes::_f2i: // fall through
1973       case Bytecodes::_f2l: // fall through
1974       case Bytecodes::_f2d: tos_in = ftos; break;
1975       case Bytecodes::_d2i: // fall through
1976       case Bytecodes::_d2l: // fall through
1977       case Bytecodes::_d2f: tos_in = dtos; break;
1978       default             : ShouldNotReachHere();
1979     }
1980     switch (bytecode()) {
1981       case Bytecodes::_l2i: // fall through
1982       case Bytecodes::_f2i: // fall through
1983       case Bytecodes::_d2i: // fall through
1984       case Bytecodes::_i2b: // fall through
1985       case Bytecodes::_i2c: // fall through
1986       case Bytecodes::_i2s: tos_out = itos; break;
1987       case Bytecodes::_i2l: // fall through
1988       case Bytecodes::_f2l: // fall through
1989       case Bytecodes::_d2l: tos_out = ltos; break;
1990       case Bytecodes::_i2f: // fall through
1991       case Bytecodes::_l2f: // fall through
1992       case Bytecodes::_d2f: tos_out = ftos; break;
1993       case Bytecodes::_i2d: // fall through
1994       case Bytecodes::_l2d: // fall through
1995       case Bytecodes::_f2d: tos_out = dtos; break;
1996       default             : ShouldNotReachHere();
1997     }
1998     transition(tos_in, tos_out);
1999   }
2000 #endif // ASSERT
2001 
2002   // Conversion
2003   // (Note: use push(rcx)/pop(rcx) for 1/2-word stack-ptr manipulation)
2004   switch (bytecode()) {
2005     case Bytecodes::_i2l:
2006       __ extend_sign(rdx, rax);
2007       break;
2008     case Bytecodes::_i2f:
2009       if (UseSSE >= 1) {
2010         __ cvtsi2ssl(xmm0, rax);
2011       } else {
2012         __ push(rax);          // store int on tos
2013         __ fild_s(at_rsp());   // load int to ST0
2014         __ f2ieee();           // truncate to float size
2015         __ pop(rcx);           // adjust rsp
2016       }
2017       break;
2018     case Bytecodes::_i2d:
2019       if (UseSSE >= 2) {
2020         __ cvtsi2sdl(xmm0, rax);
2021       } else {
2022       __ push(rax);          // add one slot for d2ieee()
2023       __ push(rax);          // store int on tos
2024       __ fild_s(at_rsp());   // load int to ST0
2025       __ d2ieee();           // truncate to double size
2026       __ pop(rcx);           // adjust rsp
2027       __ pop(rcx);
2028       }
2029       break;
2030     case Bytecodes::_i2b:
2031       __ shll(rax, 24);      // truncate upper 24 bits
2032       __ sarl(rax, 24);      // and sign-extend byte
2033       LP64_ONLY(__ movsbl(rax, rax));
2034       break;
2035     case Bytecodes::_i2c:
2036       __ andl(rax, 0xFFFF);  // truncate upper 16 bits
2037       LP64_ONLY(__ movzwl(rax, rax));
2038       break;
2039     case Bytecodes::_i2s:
2040       __ shll(rax, 16);      // truncate upper 16 bits
2041       __ sarl(rax, 16);      // and sign-extend short
2042       LP64_ONLY(__ movswl(rax, rax));
2043       break;
2044     case Bytecodes::_l2i:
2045       /* nothing to do */
2046       break;
2047     case Bytecodes::_l2f:
2048       // On 64-bit platforms, the cvtsi2ssq instruction is used to convert
2049       // 64-bit long values to floats. On 32-bit platforms it is not possible
2050       // to use that instruction with 64-bit operands, therefore the FPU is
2051       // used to perform the conversion.
2052       __ push(rdx);          // store long on tos
2053       __ push(rax);
2054       __ fild_d(at_rsp());   // load long to ST0
2055       __ f2ieee();           // truncate to float size
2056       __ pop(rcx);           // adjust rsp
2057       __ pop(rcx);
2058       if (UseSSE >= 1) {
2059         __ push_f();
2060         __ pop_f(xmm0);
2061       }
2062       break;
2063     case Bytecodes::_l2d:
2064       // On 32-bit platforms the FPU is used for conversion because on
2065       // 32-bit platforms it is not not possible to use the cvtsi2sdq
2066       // instruction with 64-bit operands.
2067       __ push(rdx);          // store long on tos
2068       __ push(rax);
2069       __ fild_d(at_rsp());   // load long to ST0
2070       __ d2ieee();           // truncate to double size
2071       __ pop(rcx);           // adjust rsp
2072       __ pop(rcx);
2073       if (UseSSE >= 2) {
2074         __ push_d();
2075         __ pop_d(xmm0);
2076       }
2077       break;
2078     case Bytecodes::_f2i:
2079       // SharedRuntime::f2i does not differentiate between sNaNs and qNaNs
2080       // as it returns 0 for any NaN.
2081       if (UseSSE >= 1) {
2082         __ push_f(xmm0);
2083       } else {
2084         __ push(rcx);          // reserve space for argument
2085         __ fstp_s(at_rsp());   // pass float argument on stack
2086       }
2087       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
2088       break;
2089     case Bytecodes::_f2l:
2090       // SharedRuntime::f2l does not differentiate between sNaNs and qNaNs
2091       // as it returns 0 for any NaN.
2092       if (UseSSE >= 1) {
2093        __ push_f(xmm0);
2094       } else {
2095         __ push(rcx);          // reserve space for argument
2096         __ fstp_s(at_rsp());   // pass float argument on stack
2097       }
2098       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
2099       break;
2100     case Bytecodes::_f2d:
2101       if (UseSSE < 1) {
2102         /* nothing to do */
2103       } else if (UseSSE == 1) {
2104         __ push_f(xmm0);
2105         __ pop_f();
2106       } else { // UseSSE >= 2
2107         __ cvtss2sd(xmm0, xmm0);
2108       }
2109       break;
2110     case Bytecodes::_d2i:
2111       if (UseSSE >= 2) {
2112         __ push_d(xmm0);
2113       } else {
2114         __ push(rcx);          // reserve space for argument
2115         __ push(rcx);
2116         __ fstp_d(at_rsp());   // pass double argument on stack
2117       }
2118       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
2119       break;
2120     case Bytecodes::_d2l:
2121       if (UseSSE >= 2) {
2122         __ push_d(xmm0);
2123       } else {
2124         __ push(rcx);          // reserve space for argument
2125         __ push(rcx);
2126         __ fstp_d(at_rsp());   // pass double argument on stack
2127       }
2128       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
2129       break;
2130     case Bytecodes::_d2f:
2131       if (UseSSE <= 1) {
2132         __ push(rcx);          // reserve space for f2ieee()
2133         __ f2ieee();           // truncate to float size
2134         __ pop(rcx);           // adjust rsp
2135         if (UseSSE == 1) {
2136           // The cvtsd2ss instruction is not available if UseSSE==1, therefore
2137           // the conversion is performed using the FPU in this case.
2138           __ push_f();
2139           __ pop_f(xmm0);
2140         }
2141       } else { // UseSSE >= 2
2142         __ cvtsd2ss(xmm0, xmm0);
2143       }
2144       break;
2145     default             :
2146       ShouldNotReachHere();
2147   }
2148 #endif
2149 }
2150 
2151 void TemplateTable::lcmp() {
2152   transition(ltos, itos);
2153 #ifdef _LP64
2154   Label done;
2155   __ pop_l(rdx);
2156   __ cmpq(rdx, rax);
2157   __ movl(rax, -1);
2158   __ jccb(Assembler::less, done);
2159   __ setb(Assembler::notEqual, rax);
2160   __ movzbl(rax, rax);
2161   __ bind(done);
2162 #else
2163 
2164   // y = rdx:rax
2165   __ pop_l(rbx, rcx);             // get x = rcx:rbx
2166   __ lcmp2int(rcx, rbx, rdx, rax);// rcx := cmp(x, y)
2167   __ mov(rax, rcx);
2168 #endif
2169 }
2170 
2171 void TemplateTable::float_cmp(bool is_float, int unordered_result) {
2172   if ((is_float && UseSSE >= 1) ||
2173       (!is_float && UseSSE >= 2)) {
2174     Label done;
2175     if (is_float) {
2176       // XXX get rid of pop here, use ... reg, mem32
2177       __ pop_f(xmm1);
2178       __ ucomiss(xmm1, xmm0);
2179     } else {
2180       // XXX get rid of pop here, use ... reg, mem64
2181       __ pop_d(xmm1);
2182       __ ucomisd(xmm1, xmm0);
2183     }
2184     if (unordered_result < 0) {
2185       __ movl(rax, -1);
2186       __ jccb(Assembler::parity, done);
2187       __ jccb(Assembler::below, done);
2188       __ setb(Assembler::notEqual, rdx);
2189       __ movzbl(rax, rdx);
2190     } else {
2191       __ movl(rax, 1);
2192       __ jccb(Assembler::parity, done);
2193       __ jccb(Assembler::above, done);
2194       __ movl(rax, 0);
2195       __ jccb(Assembler::equal, done);
2196       __ decrementl(rax);
2197     }
2198     __ bind(done);
2199   } else {
2200 #ifdef _LP64
2201     ShouldNotReachHere();
2202 #else
2203     if (is_float) {
2204       __ fld_s(at_rsp());
2205     } else {
2206       __ fld_d(at_rsp());
2207       __ pop(rdx);
2208     }
2209     __ pop(rcx);
2210     __ fcmp2int(rax, unordered_result < 0);
2211 #endif // _LP64
2212   }
2213 }
2214 
2215 void TemplateTable::branch(bool is_jsr, bool is_wide) {
2216   __ get_method(rcx); // rcx holds method
2217   __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
2218                                      // holds bumped taken count
2219 
2220   const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
2221                              InvocationCounter::counter_offset();
2222   const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
2223                               InvocationCounter::counter_offset();
2224 
2225   // Load up edx with the branch displacement
2226   if (is_wide) {
2227     __ movl(rdx, at_bcp(1));
2228   } else {
2229     __ load_signed_short(rdx, at_bcp(1));
2230   }
2231   __ bswapl(rdx);
2232 
2233   if (!is_wide) {
2234     __ sarl(rdx, 16);
2235   }
2236   LP64_ONLY(__ movl2ptr(rdx, rdx));
2237 
2238   // Handle all the JSR stuff here, then exit.
2239   // It's much shorter and cleaner than intermingling with the non-JSR
2240   // normal-branch stuff occurring below.
2241   if (is_jsr) {
2242     // Pre-load the next target bytecode into rbx
2243     __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1, 0));
2244 
2245     // compute return address as bci in rax
2246     __ lea(rax, at_bcp((is_wide ? 5 : 3) -
2247                         in_bytes(ConstMethod::codes_offset())));
2248     __ subptr(rax, Address(rcx, Method::const_offset()));
2249     // Adjust the bcp in r13 by the displacement in rdx
2250     __ addptr(rbcp, rdx);
2251     // jsr returns atos that is not an oop
2252     __ push_i(rax);
2253     __ dispatch_only(vtos, true);
2254     return;
2255   }
2256 
2257   // Normal (non-jsr) branch handling
2258 
2259   // Adjust the bcp in r13 by the displacement in rdx
2260   __ addptr(rbcp, rdx);
2261 
2262   assert(UseLoopCounter || !UseOnStackReplacement,
2263          "on-stack-replacement requires loop counters");
2264   Label backedge_counter_overflow;
2265   Label profile_method;
2266   Label dispatch;
2267   if (UseLoopCounter) {
2268     // increment backedge counter for backward branches
2269     // rax: MDO
2270     // rbx: MDO bumped taken-count
2271     // rcx: method
2272     // rdx: target offset
2273     // r13: target bcp
2274     // r14: locals pointer
2275     __ testl(rdx, rdx);             // check if forward or backward branch
2276     __ jcc(Assembler::positive, dispatch); // count only if backward branch
2277 
2278     // check if MethodCounters exists
2279     Label has_counters;
2280     __ movptr(rax, Address(rcx, Method::method_counters_offset()));
2281     __ testptr(rax, rax);
2282     __ jcc(Assembler::notZero, has_counters);
2283     __ push(rdx);
2284     __ push(rcx);
2285     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
2286                rcx);
2287     __ pop(rcx);
2288     __ pop(rdx);
2289     __ movptr(rax, Address(rcx, Method::method_counters_offset()));
2290     __ testptr(rax, rax);
2291     __ jcc(Assembler::zero, dispatch);
2292     __ bind(has_counters);
2293 
2294     if (TieredCompilation) {
2295       Label no_mdo;
2296       int increment = InvocationCounter::count_increment;
2297       if (ProfileInterpreter) {
2298         // Are we profiling?
2299         __ movptr(rbx, Address(rcx, in_bytes(Method::method_data_offset())));
2300         __ testptr(rbx, rbx);
2301         __ jccb(Assembler::zero, no_mdo);
2302         // Increment the MDO backedge counter
2303         const Address mdo_backedge_counter(rbx, in_bytes(MethodData::backedge_counter_offset()) +
2304                                            in_bytes(InvocationCounter::counter_offset()));
2305         const Address mask(rbx, in_bytes(MethodData::backedge_mask_offset()));
2306         __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, rax, false, Assembler::zero,
2307                                    UseOnStackReplacement ? &backedge_counter_overflow : NULL);
2308         __ jmp(dispatch);
2309       }
2310       __ bind(no_mdo);
2311       // Increment backedge counter in MethodCounters*
2312       __ movptr(rcx, Address(rcx, Method::method_counters_offset()));
2313       const Address mask(rcx, in_bytes(MethodCounters::backedge_mask_offset()));
2314       __ increment_mask_and_jump(Address(rcx, be_offset), increment, mask,
2315                                  rax, false, Assembler::zero,
2316                                  UseOnStackReplacement ? &backedge_counter_overflow : NULL);
2317     } else { // not TieredCompilation
2318       // increment counter
2319       __ movptr(rcx, Address(rcx, Method::method_counters_offset()));
2320       __ movl(rax, Address(rcx, be_offset));        // load backedge counter
2321       __ incrementl(rax, InvocationCounter::count_increment); // increment counter
2322       __ movl(Address(rcx, be_offset), rax);        // store counter
2323 
2324       __ movl(rax, Address(rcx, inv_offset));    // load invocation counter
2325 
2326       __ andl(rax, InvocationCounter::count_mask_value); // and the status bits
2327       __ addl(rax, Address(rcx, be_offset));        // add both counters
2328 
2329       if (ProfileInterpreter) {
2330         // Test to see if we should create a method data oop
2331         __ cmp32(rax, Address(rcx, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
2332         __ jcc(Assembler::less, dispatch);
2333 
2334         // if no method data exists, go to profile method
2335         __ test_method_data_pointer(rax, profile_method);
2336 
2337         if (UseOnStackReplacement) {
2338           // check for overflow against rbx which is the MDO taken count
2339           __ cmp32(rbx, Address(rcx, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
2340           __ jcc(Assembler::below, dispatch);
2341 
2342           // When ProfileInterpreter is on, the backedge_count comes
2343           // from the MethodData*, which value does not get reset on
2344           // the call to frequency_counter_overflow().  To avoid
2345           // excessive calls to the overflow routine while the method is
2346           // being compiled, add a second test to make sure the overflow
2347           // function is called only once every overflow_frequency.
2348           const int overflow_frequency = 1024;
2349           __ andl(rbx, overflow_frequency - 1);
2350           __ jcc(Assembler::zero, backedge_counter_overflow);
2351 
2352         }
2353       } else {
2354         if (UseOnStackReplacement) {
2355           // check for overflow against rax, which is the sum of the
2356           // counters
2357           __ cmp32(rax, Address(rcx, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
2358           __ jcc(Assembler::aboveEqual, backedge_counter_overflow);
2359 
2360         }
2361       }
2362     }
2363     __ bind(dispatch);
2364   }
2365 
2366   // Pre-load the next target bytecode into rbx
2367   __ load_unsigned_byte(rbx, Address(rbcp, 0));
2368 
2369   // continue with the bytecode @ target
2370   // rax: return bci for jsr's, unused otherwise
2371   // rbx: target bytecode
2372   // r13: target bcp
2373   __ dispatch_only(vtos, true);
2374 
2375   if (UseLoopCounter) {
2376     if (ProfileInterpreter) {
2377       // Out-of-line code to allocate method data oop.
2378       __ bind(profile_method);
2379       __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
2380       __ set_method_data_pointer_for_bcp();
2381       __ jmp(dispatch);
2382     }
2383 
2384     if (UseOnStackReplacement) {
2385       // invocation counter overflow
2386       __ bind(backedge_counter_overflow);
2387       __ negptr(rdx);
2388       __ addptr(rdx, rbcp); // branch bcp
2389       // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
2390       __ call_VM(noreg,
2391                  CAST_FROM_FN_PTR(address,
2392                                   InterpreterRuntime::frequency_counter_overflow),
2393                  rdx);
2394 
2395       // rax: osr nmethod (osr ok) or NULL (osr not possible)
2396       // rdx: scratch
2397       // r14: locals pointer
2398       // r13: bcp
2399       __ testptr(rax, rax);                        // test result
2400       __ jcc(Assembler::zero, dispatch);         // no osr if null
2401       // nmethod may have been invalidated (VM may block upon call_VM return)
2402       __ cmpb(Address(rax, nmethod::state_offset()), nmethod::in_use);
2403       __ jcc(Assembler::notEqual, dispatch);
2404 
2405       // We have the address of an on stack replacement routine in rax.
2406       // In preparation of invoking it, first we must migrate the locals
2407       // and monitors from off the interpreter frame on the stack.
2408       // Ensure to save the osr nmethod over the migration call,
2409       // it will be preserved in rbx.
2410       __ mov(rbx, rax);
2411 
2412       NOT_LP64(__ get_thread(rcx));
2413 
2414       call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
2415 
2416       // rax is OSR buffer, move it to expected parameter location
2417       LP64_ONLY(__ mov(j_rarg0, rax));
2418       NOT_LP64(__ mov(rcx, rax));
2419       // We use j_rarg definitions here so that registers don't conflict as parameter
2420       // registers change across platforms as we are in the midst of a calling
2421       // sequence to the OSR nmethod and we don't want collision. These are NOT parameters.
2422 
2423       const Register retaddr   = LP64_ONLY(j_rarg2) NOT_LP64(rdi);
2424       const Register sender_sp = LP64_ONLY(j_rarg1) NOT_LP64(rdx);
2425 
2426       // pop the interpreter frame
2427       __ movptr(sender_sp, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
2428       __ leave();                                // remove frame anchor
2429       __ pop(retaddr);                           // get return address
2430       __ mov(rsp, sender_sp);                   // set sp to sender sp
2431       // Ensure compiled code always sees stack at proper alignment
2432       __ andptr(rsp, -(StackAlignmentInBytes));
2433 
2434       // unlike x86 we need no specialized return from compiled code
2435       // to the interpreter or the call stub.
2436 
2437       // push the return address
2438       __ push(retaddr);
2439 
2440       // and begin the OSR nmethod
2441       __ jmp(Address(rbx, nmethod::osr_entry_point_offset()));
2442     }
2443   }
2444 }
2445 
2446 void TemplateTable::if_0cmp(Condition cc) {
2447   transition(itos, vtos);
2448   // assume branch is more often taken than not (loops use backward branches)
2449   Label not_taken;
2450   __ testl(rax, rax);
2451   __ jcc(j_not(cc), not_taken);
2452   branch(false, false);
2453   __ bind(not_taken);
2454   __ profile_not_taken_branch(rax);
2455 }
2456 
2457 void TemplateTable::if_icmp(Condition cc) {
2458   transition(itos, vtos);
2459   // assume branch is more often taken than not (loops use backward branches)
2460   Label not_taken;
2461   __ pop_i(rdx);
2462   __ cmpl(rdx, rax);
2463   __ jcc(j_not(cc), not_taken);
2464   branch(false, false);
2465   __ bind(not_taken);
2466   __ profile_not_taken_branch(rax);
2467 }
2468 
2469 void TemplateTable::if_nullcmp(Condition cc) {
2470   transition(atos, vtos);
2471   // assume branch is more often taken than not (loops use backward branches)
2472   Label not_taken;
2473   __ testptr(rax, rax);
2474   __ jcc(j_not(cc), not_taken);
2475   branch(false, false);
2476   __ bind(not_taken);
2477   __ profile_not_taken_branch(rax);
2478 }
2479 
2480 void TemplateTable::if_acmp(Condition cc) {
2481   transition(atos, vtos);
2482   // assume branch is more often taken than not (loops use backward branches)
2483   Label taken, not_taken;
2484   __ pop_ptr(rdx);
2485 
2486   const int is_value_mask = markWord::always_locked_pattern;
2487   if (EnableValhalla) {
2488     __ cmpoop(rdx, rax);
2489     __ jcc(Assembler::equal, (cc == equal) ? taken : not_taken);
2490 
2491     // might be substitutable, test if either rax or rdx is null
2492     __ movptr(rbx, rdx);
2493     __ andptr(rbx, rax);
2494     __ testptr(rbx, rbx);
2495     __ jcc(Assembler::zero, (cc == equal) ? not_taken : taken);
2496 
2497     // and both are values ?
2498     __ movptr(rbx, Address(rdx, oopDesc::mark_offset_in_bytes()));
2499     __ andptr(rbx, is_value_mask);
2500     __ movptr(rcx, Address(rax, oopDesc::mark_offset_in_bytes()));
2501     __ andptr(rbx, is_value_mask);
2502     __ andptr(rbx, rcx);
2503     __ cmpl(rbx, is_value_mask);
2504     __ jcc(Assembler::notEqual, (cc == equal) ? not_taken : taken);
2505 
2506     // same value klass ?
2507     __ load_metadata(rbx, rdx);
2508     __ load_metadata(rcx, rax);
2509     __ cmpptr(rbx, rcx);
2510     __ jcc(Assembler::notEqual, (cc == equal) ? not_taken : taken);
2511 
2512     // Know both are the same type, let's test for substitutability...
2513     if (cc == equal) {
2514       invoke_is_substitutable(rax, rdx, taken, not_taken);
2515     } else {
2516       invoke_is_substitutable(rax, rdx, not_taken, taken);
2517     }
2518     __ stop("Not reachable");
2519   }
2520 
2521   __ cmpoop(rdx, rax);
2522   __ jcc(j_not(cc), not_taken);
2523   __ bind(taken);
2524   branch(false, false);
2525   __ bind(not_taken);
2526   __ profile_not_taken_branch(rax);
2527 }
2528 
2529 void TemplateTable::invoke_is_substitutable(Register aobj, Register bobj,
2530                                             Label& is_subst, Label& not_subst) {
2531   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::is_substitutable), aobj, bobj);
2532   // Restored...rax answer, jmp to outcome...
2533   __ testl(rax, rax);
2534   __ jcc(Assembler::zero, not_subst);
2535   __ jmp(is_subst);
2536 }
2537 
2538 void TemplateTable::ret() {
2539   transition(vtos, vtos);
2540   locals_index(rbx);
2541   LP64_ONLY(__ movslq(rbx, iaddress(rbx))); // get return bci, compute return bcp
2542   NOT_LP64(__ movptr(rbx, iaddress(rbx)));
2543   __ profile_ret(rbx, rcx);
2544   __ get_method(rax);
2545   __ movptr(rbcp, Address(rax, Method::const_offset()));
2546   __ lea(rbcp, Address(rbcp, rbx, Address::times_1,
2547                       ConstMethod::codes_offset()));
2548   __ dispatch_next(vtos, 0, true);
2549 }
2550 
2551 void TemplateTable::wide_ret() {
2552   transition(vtos, vtos);
2553   locals_index_wide(rbx);
2554   __ movptr(rbx, aaddress(rbx)); // get return bci, compute return bcp
2555   __ profile_ret(rbx, rcx);
2556   __ get_method(rax);
2557   __ movptr(rbcp, Address(rax, Method::const_offset()));
2558   __ lea(rbcp, Address(rbcp, rbx, Address::times_1, ConstMethod::codes_offset()));
2559   __ dispatch_next(vtos, 0, true);
2560 }
2561 
2562 void TemplateTable::tableswitch() {
2563   Label default_case, continue_execution;
2564   transition(itos, vtos);
2565 
2566   // align r13/rsi
2567   __ lea(rbx, at_bcp(BytesPerInt));
2568   __ andptr(rbx, -BytesPerInt);
2569   // load lo & hi
2570   __ movl(rcx, Address(rbx, BytesPerInt));
2571   __ movl(rdx, Address(rbx, 2 * BytesPerInt));
2572   __ bswapl(rcx);
2573   __ bswapl(rdx);
2574   // check against lo & hi
2575   __ cmpl(rax, rcx);
2576   __ jcc(Assembler::less, default_case);
2577   __ cmpl(rax, rdx);
2578   __ jcc(Assembler::greater, default_case);
2579   // lookup dispatch offset
2580   __ subl(rax, rcx);
2581   __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * BytesPerInt));
2582   __ profile_switch_case(rax, rbx, rcx);
2583   // continue execution
2584   __ bind(continue_execution);
2585   __ bswapl(rdx);
2586   LP64_ONLY(__ movl2ptr(rdx, rdx));
2587   __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1));
2588   __ addptr(rbcp, rdx);
2589   __ dispatch_only(vtos, true);
2590   // handle default
2591   __ bind(default_case);
2592   __ profile_switch_default(rax);
2593   __ movl(rdx, Address(rbx, 0));
2594   __ jmp(continue_execution);
2595 }
2596 
2597 void TemplateTable::lookupswitch() {
2598   transition(itos, itos);
2599   __ stop("lookupswitch bytecode should have been rewritten");
2600 }
2601 
2602 void TemplateTable::fast_linearswitch() {
2603   transition(itos, vtos);
2604   Label loop_entry, loop, found, continue_execution;
2605   // bswap rax so we can avoid bswapping the table entries
2606   __ bswapl(rax);
2607   // align r13
2608   __ lea(rbx, at_bcp(BytesPerInt)); // btw: should be able to get rid of
2609                                     // this instruction (change offsets
2610                                     // below)
2611   __ andptr(rbx, -BytesPerInt);
2612   // set counter
2613   __ movl(rcx, Address(rbx, BytesPerInt));
2614   __ bswapl(rcx);
2615   __ jmpb(loop_entry);
2616   // table search
2617   __ bind(loop);
2618   __ cmpl(rax, Address(rbx, rcx, Address::times_8, 2 * BytesPerInt));
2619   __ jcc(Assembler::equal, found);
2620   __ bind(loop_entry);
2621   __ decrementl(rcx);
2622   __ jcc(Assembler::greaterEqual, loop);
2623   // default case
2624   __ profile_switch_default(rax);
2625   __ movl(rdx, Address(rbx, 0));
2626   __ jmp(continue_execution);
2627   // entry found -> get offset
2628   __ bind(found);
2629   __ movl(rdx, Address(rbx, rcx, Address::times_8, 3 * BytesPerInt));
2630   __ profile_switch_case(rcx, rax, rbx);
2631   // continue execution
2632   __ bind(continue_execution);
2633   __ bswapl(rdx);
2634   __ movl2ptr(rdx, rdx);
2635   __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1));
2636   __ addptr(rbcp, rdx);
2637   __ dispatch_only(vtos, true);
2638 }
2639 
2640 void TemplateTable::fast_binaryswitch() {
2641   transition(itos, vtos);
2642   // Implementation using the following core algorithm:
2643   //
2644   // int binary_search(int key, LookupswitchPair* array, int n) {
2645   //   // Binary search according to "Methodik des Programmierens" by
2646   //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
2647   //   int i = 0;
2648   //   int j = n;
2649   //   while (i+1 < j) {
2650   //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
2651   //     // with      Q: for all i: 0 <= i < n: key < a[i]
2652   //     // where a stands for the array and assuming that the (inexisting)
2653   //     // element a[n] is infinitely big.
2654   //     int h = (i + j) >> 1;
2655   //     // i < h < j
2656   //     if (key < array[h].fast_match()) {
2657   //       j = h;
2658   //     } else {
2659   //       i = h;
2660   //     }
2661   //   }
2662   //   // R: a[i] <= key < a[i+1] or Q
2663   //   // (i.e., if key is within array, i is the correct index)
2664   //   return i;
2665   // }
2666 
2667   // Register allocation
2668   const Register key   = rax; // already set (tosca)
2669   const Register array = rbx;
2670   const Register i     = rcx;
2671   const Register j     = rdx;
2672   const Register h     = rdi;
2673   const Register temp  = rsi;
2674 
2675   // Find array start
2676   NOT_LP64(__ save_bcp());
2677 
2678   __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
2679                                           // get rid of this
2680                                           // instruction (change
2681                                           // offsets below)
2682   __ andptr(array, -BytesPerInt);
2683 
2684   // Initialize i & j
2685   __ xorl(i, i);                            // i = 0;
2686   __ movl(j, Address(array, -BytesPerInt)); // j = length(array);
2687 
2688   // Convert j into native byteordering
2689   __ bswapl(j);
2690 
2691   // And start
2692   Label entry;
2693   __ jmp(entry);
2694 
2695   // binary search loop
2696   {
2697     Label loop;
2698     __ bind(loop);
2699     // int h = (i + j) >> 1;
2700     __ leal(h, Address(i, j, Address::times_1)); // h = i + j;
2701     __ sarl(h, 1);                               // h = (i + j) >> 1;
2702     // if (key < array[h].fast_match()) {
2703     //   j = h;
2704     // } else {
2705     //   i = h;
2706     // }
2707     // Convert array[h].match to native byte-ordering before compare
2708     __ movl(temp, Address(array, h, Address::times_8));
2709     __ bswapl(temp);
2710     __ cmpl(key, temp);
2711     // j = h if (key <  array[h].fast_match())
2712     __ cmov32(Assembler::less, j, h);
2713     // i = h if (key >= array[h].fast_match())
2714     __ cmov32(Assembler::greaterEqual, i, h);
2715     // while (i+1 < j)
2716     __ bind(entry);
2717     __ leal(h, Address(i, 1)); // i+1
2718     __ cmpl(h, j);             // i+1 < j
2719     __ jcc(Assembler::less, loop);
2720   }
2721 
2722   // end of binary search, result index is i (must check again!)
2723   Label default_case;
2724   // Convert array[i].match to native byte-ordering before compare
2725   __ movl(temp, Address(array, i, Address::times_8));
2726   __ bswapl(temp);
2727   __ cmpl(key, temp);
2728   __ jcc(Assembler::notEqual, default_case);
2729 
2730   // entry found -> j = offset
2731   __ movl(j , Address(array, i, Address::times_8, BytesPerInt));
2732   __ profile_switch_case(i, key, array);
2733   __ bswapl(j);
2734   LP64_ONLY(__ movslq(j, j));
2735 
2736   NOT_LP64(__ restore_bcp());
2737   NOT_LP64(__ restore_locals());                           // restore rdi
2738 
2739   __ load_unsigned_byte(rbx, Address(rbcp, j, Address::times_1));
2740   __ addptr(rbcp, j);
2741   __ dispatch_only(vtos, true);
2742 
2743   // default case -> j = default offset
2744   __ bind(default_case);
2745   __ profile_switch_default(i);
2746   __ movl(j, Address(array, -2 * BytesPerInt));
2747   __ bswapl(j);
2748   LP64_ONLY(__ movslq(j, j));
2749 
2750   NOT_LP64(__ restore_bcp());
2751   NOT_LP64(__ restore_locals());
2752 
2753   __ load_unsigned_byte(rbx, Address(rbcp, j, Address::times_1));
2754   __ addptr(rbcp, j);
2755   __ dispatch_only(vtos, true);
2756 }
2757 
2758 void TemplateTable::_return(TosState state) {
2759   transition(state, state);
2760 
2761   assert(_desc->calls_vm(),
2762          "inconsistent calls_vm information"); // call in remove_activation
2763 
2764   if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
2765     assert(state == vtos, "only valid state");
2766     Register robj = LP64_ONLY(c_rarg1) NOT_LP64(rax);
2767     __ movptr(robj, aaddress(0));
2768     __ load_klass(rdi, robj);
2769     __ movl(rdi, Address(rdi, Klass::access_flags_offset()));
2770     __ testl(rdi, JVM_ACC_HAS_FINALIZER);
2771     Label skip_register_finalizer;
2772     __ jcc(Assembler::zero, skip_register_finalizer);
2773 
2774     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), robj);
2775 
2776     __ bind(skip_register_finalizer);
2777   }
2778 
2779   if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
2780     Label no_safepoint;
2781     NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
2782 #ifdef _LP64
2783     __ testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
2784 #else
2785     const Register thread = rdi;
2786     __ get_thread(thread);
2787     __ testb(Address(thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
2788 #endif
2789     __ jcc(Assembler::zero, no_safepoint);
2790     __ push(state);
2791     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
2792                                     InterpreterRuntime::at_safepoint));
2793     __ pop(state);
2794     __ bind(no_safepoint);
2795   }
2796 
2797   // Narrow result if state is itos but result type is smaller.
2798   // Need to narrow in the return bytecode rather than in generate_return_entry
2799   // since compiled code callers expect the result to already be narrowed.
2800   if (state == itos) {
2801     __ narrow(rax);
2802   }
2803 
2804   __ remove_activation(state, rbcp, true, true, true);
2805 
2806   __ jmp(rbcp);
2807 }
2808 
2809 // ----------------------------------------------------------------------------
2810 // Volatile variables demand their effects be made known to all CPU's
2811 // in order.  Store buffers on most chips allow reads & writes to
2812 // reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
2813 // without some kind of memory barrier (i.e., it's not sufficient that
2814 // the interpreter does not reorder volatile references, the hardware
2815 // also must not reorder them).
2816 //
2817 // According to the new Java Memory Model (JMM):
2818 // (1) All volatiles are serialized wrt to each other.  ALSO reads &
2819 //     writes act as aquire & release, so:
2820 // (2) A read cannot let unrelated NON-volatile memory refs that
2821 //     happen after the read float up to before the read.  It's OK for
2822 //     non-volatile memory refs that happen before the volatile read to
2823 //     float down below it.
2824 // (3) Similar a volatile write cannot let unrelated NON-volatile
2825 //     memory refs that happen BEFORE the write float down to after the
2826 //     write.  It's OK for non-volatile memory refs that happen after the
2827 //     volatile write to float up before it.
2828 //
2829 // We only put in barriers around volatile refs (they are expensive),
2830 // not _between_ memory refs (that would require us to track the
2831 // flavor of the previous memory refs).  Requirements (2) and (3)
2832 // require some barriers before volatile stores and after volatile
2833 // loads.  These nearly cover requirement (1) but miss the
2834 // volatile-store-volatile-load case.  This final case is placed after
2835 // volatile-stores although it could just as well go before
2836 // volatile-loads.
2837 
2838 void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits order_constraint ) {
2839   // Helper function to insert a is-volatile test and memory barrier
2840   __ membar(order_constraint);
2841 }
2842 
2843 void TemplateTable::resolve_cache_and_index(int byte_no,
2844                                             Register cache,
2845                                             Register index,
2846                                             size_t index_size) {
2847   const Register temp = rbx;
2848   assert_different_registers(cache, index, temp);
2849 
2850   Label L_clinit_barrier_slow;
2851   Label resolved;
2852 
2853   Bytecodes::Code code = bytecode();
2854   switch (code) {
2855   case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
2856   case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
2857   default: break;
2858   }
2859 
2860   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2861   __ get_cache_and_index_and_bytecode_at_bcp(cache, index, temp, byte_no, 1, index_size);
2862   __ cmpl(temp, code);  // have we resolved this bytecode?
2863   __ jcc(Assembler::equal, resolved);
2864 
2865   // resolve first time through
2866   // Class initialization barrier slow path lands here as well.
2867   __ bind(L_clinit_barrier_slow);
2868   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
2869   __ movl(temp, code);
2870   __ call_VM(noreg, entry, temp);
2871   // Update registers with resolved info
2872   __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
2873 
2874   __ bind(resolved);
2875 
2876   // Class initialization barrier for static methods
2877   if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) {
2878     const Register method = temp;
2879     const Register klass  = temp;
2880     const Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg);
2881     assert(thread != noreg, "x86_32 not supported");
2882 
2883     __ load_resolved_method_at_index(byte_no, method, cache, index);
2884     __ load_method_holder(klass, method);
2885     __ clinit_barrier(klass, thread, NULL /*L_fast_path*/, &L_clinit_barrier_slow);
2886   }
2887 }
2888 
2889 // The cache and index registers must be set before call
2890 void TemplateTable::load_field_cp_cache_entry(Register obj,
2891                                               Register cache,
2892                                               Register index,
2893                                               Register off,
2894                                               Register flags,
2895                                               bool is_static = false) {
2896   assert_different_registers(cache, index, flags, off);
2897 
2898   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
2899   // Field offset
2900   __ movptr(off, Address(cache, index, Address::times_ptr,
2901                          in_bytes(cp_base_offset +
2902                                   ConstantPoolCacheEntry::f2_offset())));
2903   // Flags
2904   __ movl(flags, Address(cache, index, Address::times_ptr,
2905                          in_bytes(cp_base_offset +
2906                                   ConstantPoolCacheEntry::flags_offset())));
2907 
2908   // klass overwrite register
2909   if (is_static) {
2910     __ movptr(obj, Address(cache, index, Address::times_ptr,
2911                            in_bytes(cp_base_offset +
2912                                     ConstantPoolCacheEntry::f1_offset())));
2913     const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2914     __ movptr(obj, Address(obj, mirror_offset));
2915     __ resolve_oop_handle(obj);
2916   }
2917 }
2918 
2919 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
2920                                                Register method,
2921                                                Register itable_index,
2922                                                Register flags,
2923                                                bool is_invokevirtual,
2924                                                bool is_invokevfinal, /*unused*/
2925                                                bool is_invokedynamic) {
2926   // setup registers
2927   const Register cache = rcx;
2928   const Register index = rdx;
2929   assert_different_registers(method, flags);
2930   assert_different_registers(method, cache, index);
2931   assert_different_registers(itable_index, flags);
2932   assert_different_registers(itable_index, cache, index);
2933   // determine constant pool cache field offsets
2934   assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
2935   const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
2936                                     ConstantPoolCacheEntry::flags_offset());
2937   // access constant pool cache fields
2938   const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
2939                                     ConstantPoolCacheEntry::f2_offset());
2940 
2941   size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
2942   resolve_cache_and_index(byte_no, cache, index, index_size);
2943   __ load_resolved_method_at_index(byte_no, method, cache, index);
2944 
2945   if (itable_index != noreg) {
2946     // pick up itable or appendix index from f2 also:
2947     __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
2948   }
2949   __ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
2950 }
2951 
2952 // The registers cache and index expected to be set before call.
2953 // Correct values of the cache and index registers are preserved.
2954 void TemplateTable::jvmti_post_field_access(Register cache,
2955                                             Register index,
2956                                             bool is_static,
2957                                             bool has_tos) {
2958   if (JvmtiExport::can_post_field_access()) {
2959     // Check to see if a field access watch has been set before we take
2960     // the time to call into the VM.
2961     Label L1;
2962     assert_different_registers(cache, index, rax);
2963     __ mov32(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2964     __ testl(rax,rax);
2965     __ jcc(Assembler::zero, L1);
2966 
2967     // cache entry pointer
2968     __ addptr(cache, in_bytes(ConstantPoolCache::base_offset()));
2969     __ shll(index, LogBytesPerWord);
2970     __ addptr(cache, index);
2971     if (is_static) {
2972       __ xorptr(rax, rax);      // NULL object reference
2973     } else {
2974       __ pop(atos);         // Get the object
2975       __ verify_oop(rax);
2976       __ push(atos);        // Restore stack state
2977     }
2978     // rax,:   object pointer or NULL
2979     // cache: cache entry pointer
2980     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
2981                rax, cache);
2982     __ get_cache_and_index_at_bcp(cache, index, 1);
2983     __ bind(L1);
2984   }
2985 }
2986 
2987 void TemplateTable::pop_and_check_object(Register r) {
2988   __ pop_ptr(r);
2989   __ null_check(r);  // for field access must check obj.
2990   __ verify_oop(r);
2991 }
2992 
2993 void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
2994   transition(vtos, vtos);
2995 
2996   const Register cache = rcx;
2997   const Register index = rdx;
2998   const Register obj   = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
2999   const Register off   = rbx;
3000   const Register flags = rax;
3001   const Register bc    = LP64_ONLY(c_rarg3) NOT_LP64(rcx); // uses same reg as obj, so don't mix them
3002   const Register flags2 = rdx;
3003 
3004   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
3005   jvmti_post_field_access(cache, index, is_static, false);
3006   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
3007 
3008   const Address field(obj, off, Address::times_1, 0*wordSize);
3009 
3010   Label Done, notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notValueType;
3011 
3012   if (!is_static) {
3013     __ movptr(rcx, Address(cache, index, Address::times_ptr,
3014                            in_bytes(ConstantPoolCache::base_offset() +
3015                                     ConstantPoolCacheEntry::f1_offset())));
3016   }
3017 
3018   __ movl(flags2, flags);
3019 
3020   __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
3021   // Make sure we don't need to mask edx after the above shift
3022   assert(btos == 0, "change code, btos != 0");
3023 
3024   __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
3025 
3026   __ jcc(Assembler::notZero, notByte);
3027   // btos
3028   if (!is_static) pop_and_check_object(obj);
3029   __ access_load_at(T_BYTE, IN_HEAP, rax, field, noreg, noreg);
3030   __ push(btos);
3031   // Rewrite bytecode to be faster
3032   if (!is_static && rc == may_rewrite) {
3033     patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx);
3034   }
3035   __ jmp(Done);
3036 
3037   __ bind(notByte);
3038 
3039   __ cmpl(flags, ztos);
3040   __ jcc(Assembler::notEqual, notBool);
3041    if (!is_static) pop_and_check_object(obj);
3042   // ztos (same code as btos)
3043   __ access_load_at(T_BOOLEAN, IN_HEAP, rax, field, noreg, noreg);
3044   __ push(ztos);
3045   // Rewrite bytecode to be faster
3046   if (!is_static && rc == may_rewrite) {
3047     // use btos rewriting, no truncating to t/f bit is needed for getfield.
3048     patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx);
3049   }
3050   __ jmp(Done);
3051 
3052   __ bind(notBool);
3053   __ cmpl(flags, atos);
3054   __ jcc(Assembler::notEqual, notObj);
3055   // atos
3056   if (!EnableValhalla) {
3057     if (!is_static) pop_and_check_object(obj);
3058     do_oop_load(_masm, field, rax);
3059     __ push(atos);
3060     if (!is_static && rc == may_rewrite) {
3061       patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx);
3062     }
3063     __ jmp(Done);
3064   } else {
3065     if (is_static) {
3066       __ load_heap_oop(rax, field);
3067       Label isFlattenable, uninitialized;
3068       // Issue below if the static field has not been initialized yet
3069       __ test_field_is_flattenable(flags2, rscratch1, isFlattenable);
3070         // Not flattenable case
3071         __ push(atos);
3072         __ jmp(Done);
3073       // Flattenable case, must not return null even if uninitialized
3074       __ bind(isFlattenable);
3075         __ testptr(rax, rax);
3076         __ jcc(Assembler::zero, uninitialized);
3077           __ push(atos);
3078           __ jmp(Done);
3079         __ bind(uninitialized);
3080           __ andl(flags2, ConstantPoolCacheEntry::field_index_mask);
3081           __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_static_value_field),
3082                  obj, flags2);
3083           __ verify_oop(rax);
3084           __ push(atos);
3085           __ jmp(Done);
3086     } else {
3087       Label isFlattened, nonnull, isFlattenable, rewriteFlattenable;
3088       __ test_field_is_flattenable(flags2, rscratch1, isFlattenable);
3089         // Non-flattenable field case, also covers the object case
3090         pop_and_check_object(obj);
3091         __ load_heap_oop(rax, field);
3092         __ push(atos);
3093         if (rc == may_rewrite) {
3094           patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx);
3095         }
3096         __ jmp(Done);
3097       __ bind(isFlattenable);
3098         __ test_field_is_flattened(flags2, rscratch1, isFlattened);
3099           // Non-flattened field case
3100           pop_and_check_object(obj);
3101           __ load_heap_oop(rax, field);
3102           __ testptr(rax, rax);
3103           __ jcc(Assembler::notZero, nonnull);
3104             __ andl(flags2, ConstantPoolCacheEntry::field_index_mask);
3105             __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field),
3106                        obj, flags2);
3107           __ bind(nonnull);
3108           __ verify_oop(rax);
3109           __ push(atos);
3110           __ jmp(rewriteFlattenable);
3111         __ bind(isFlattened);
3112           __ andl(flags2, ConstantPoolCacheEntry::field_index_mask);
3113           pop_and_check_object(rbx);
3114           call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::read_flattened_field),
3115                   rbx, flags2, rcx);
3116           __ verify_oop(rax);
3117           __ push(atos);
3118       __ bind(rewriteFlattenable);
3119       if (rc == may_rewrite) {
3120         patch_bytecode(Bytecodes::_fast_qgetfield, bc, rbx);
3121       }
3122       __ jmp(Done);
3123     }
3124   }
3125 
3126   __ bind(notObj);
3127 
3128   if (!is_static) pop_and_check_object(obj);
3129 
3130   __ cmpl(flags, itos);
3131   __ jcc(Assembler::notEqual, notInt);
3132   // itos
3133   __ access_load_at(T_INT, IN_HEAP, rax, field, noreg, noreg);
3134   __ push(itos);
3135   // Rewrite bytecode to be faster
3136   if (!is_static && rc == may_rewrite) {
3137     patch_bytecode(Bytecodes::_fast_igetfield, bc, rbx);
3138   }
3139   __ jmp(Done);
3140 
3141   __ bind(notInt);
3142   __ cmpl(flags, ctos);
3143   __ jcc(Assembler::notEqual, notChar);
3144   // ctos
3145   __ access_load_at(T_CHAR, IN_HEAP, rax, field, noreg, noreg);
3146   __ push(ctos);
3147   // Rewrite bytecode to be faster
3148   if (!is_static && rc == may_rewrite) {
3149     patch_bytecode(Bytecodes::_fast_cgetfield, bc, rbx);
3150   }
3151   __ jmp(Done);
3152 
3153   __ bind(notChar);
3154   __ cmpl(flags, stos);
3155   __ jcc(Assembler::notEqual, notShort);
3156   // stos
3157   __ access_load_at(T_SHORT, IN_HEAP, rax, field, noreg, noreg);
3158   __ push(stos);
3159   // Rewrite bytecode to be faster
3160   if (!is_static && rc == may_rewrite) {
3161     patch_bytecode(Bytecodes::_fast_sgetfield, bc, rbx);
3162   }
3163   __ jmp(Done);
3164 
3165   __ bind(notShort);
3166   __ cmpl(flags, ltos);
3167   __ jcc(Assembler::notEqual, notLong);
3168   // ltos
3169     // Generate code as if volatile (x86_32).  There just aren't enough registers to
3170     // save that information and this code is faster than the test.
3171   __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, noreg /* ltos */, field, noreg, noreg);
3172   __ push(ltos);
3173   // Rewrite bytecode to be faster
3174   LP64_ONLY(if (!is_static && rc == may_rewrite) patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx));
3175   __ jmp(Done);
3176 
3177   __ bind(notLong);
3178   __ cmpl(flags, ftos);
3179   __ jcc(Assembler::notEqual, notFloat);
3180   // ftos
3181 
3182   __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
3183   __ push(ftos);
3184   // Rewrite bytecode to be faster
3185   if (!is_static && rc == may_rewrite) {
3186     patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx);
3187   }
3188   __ jmp(Done);
3189 
3190   __ bind(notFloat);
3191 #ifdef ASSERT
3192   Label notDouble;
3193   __ cmpl(flags, dtos);
3194   __ jcc(Assembler::notEqual, notDouble);
3195 #endif
3196   // dtos
3197   // MO_RELAXED: for the case of volatile field, in fact it adds no extra work for the underlying implementation
3198   __ access_load_at(T_DOUBLE, IN_HEAP | MO_RELAXED, noreg /* dtos */, field, noreg, noreg);
3199   __ push(dtos);
3200   // Rewrite bytecode to be faster
3201   if (!is_static && rc == may_rewrite) {
3202     patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx);
3203   }
3204 #ifdef ASSERT
3205   __ jmp(Done);
3206 
3207   __ bind(notDouble);
3208   __ stop("Bad state");
3209 #endif
3210 
3211   __ bind(Done);
3212   // [jk] not needed currently
3213   // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadLoad |
3214   //                                              Assembler::LoadStore));
3215 }
3216 
3217 void TemplateTable::getfield(int byte_no) {
3218   getfield_or_static(byte_no, false);
3219 }
3220 
3221 void TemplateTable::nofast_getfield(int byte_no) {
3222   getfield_or_static(byte_no, false, may_not_rewrite);
3223 }
3224 
3225 void TemplateTable::getstatic(int byte_no) {
3226   getfield_or_static(byte_no, true);
3227 }
3228 
3229 void TemplateTable::withfield() {
3230   transition(vtos, atos);
3231 
3232   Register cache = LP64_ONLY(c_rarg1) NOT_LP64(rcx);
3233   Register index = LP64_ONLY(c_rarg2) NOT_LP64(rdx);
3234 
3235   resolve_cache_and_index(f2_byte, cache, index, sizeof(u2));
3236 
3237   call_VM(rbx, CAST_FROM_FN_PTR(address, InterpreterRuntime::withfield), cache);
3238   // new value type is returned in rbx
3239   // stack adjustement is returned in rax
3240   __ verify_oop(rbx);
3241   __ addptr(rsp, rax);
3242   __ movptr(rax, rbx);
3243 }
3244 
3245 // The registers cache and index expected to be set before call.
3246 // The function may destroy various registers, just not the cache and index registers.
3247 void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
3248 
3249   const Register robj = LP64_ONLY(c_rarg2)   NOT_LP64(rax);
3250   const Register RBX  = LP64_ONLY(c_rarg1)   NOT_LP64(rbx);
3251   const Register RCX  = LP64_ONLY(c_rarg3)   NOT_LP64(rcx);
3252   const Register RDX  = LP64_ONLY(rscratch1) NOT_LP64(rdx);
3253 
3254   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
3255 
3256   if (JvmtiExport::can_post_field_modification()) {
3257     // Check to see if a field modification watch has been set before
3258     // we take the time to call into the VM.
3259     Label L1;
3260     assert_different_registers(cache, index, rax);
3261     __ mov32(rax, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
3262     __ testl(rax, rax);
3263     __ jcc(Assembler::zero, L1);
3264 
3265     __ get_cache_and_index_at_bcp(robj, RDX, 1);
3266 
3267 
3268     if (is_static) {
3269       // Life is simple.  Null out the object pointer.
3270       __ xorl(RBX, RBX);
3271 
3272     } else {
3273       // Life is harder. The stack holds the value on top, followed by
3274       // the object.  We don't know the size of the value, though; it
3275       // could be one or two words depending on its type. As a result,
3276       // we must find the type to determine where the object is.
3277 #ifndef _LP64
3278       Label two_word, valsize_known;
3279 #endif
3280       __ movl(RCX, Address(robj, RDX,
3281                            Address::times_ptr,
3282                            in_bytes(cp_base_offset +
3283                                      ConstantPoolCacheEntry::flags_offset())));
3284       NOT_LP64(__ mov(rbx, rsp));
3285       __ shrl(RCX, ConstantPoolCacheEntry::tos_state_shift);
3286 
3287       // Make sure we don't need to mask rcx after the above shift
3288       ConstantPoolCacheEntry::verify_tos_state_shift();
3289 #ifdef _LP64
3290       __ movptr(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
3291       __ cmpl(c_rarg3, ltos);
3292       __ cmovptr(Assembler::equal,
3293                  c_rarg1, at_tos_p2()); // ltos (two word jvalue)
3294       __ cmpl(c_rarg3, dtos);
3295       __ cmovptr(Assembler::equal,
3296                  c_rarg1, at_tos_p2()); // dtos (two word jvalue)
3297 #else
3298       __ cmpl(rcx, ltos);
3299       __ jccb(Assembler::equal, two_word);
3300       __ cmpl(rcx, dtos);
3301       __ jccb(Assembler::equal, two_word);
3302       __ addptr(rbx, Interpreter::expr_offset_in_bytes(1)); // one word jvalue (not ltos, dtos)
3303       __ jmpb(valsize_known);
3304 
3305       __ bind(two_word);
3306       __ addptr(rbx, Interpreter::expr_offset_in_bytes(2)); // two words jvalue
3307 
3308       __ bind(valsize_known);
3309       // setup object pointer
3310       __ movptr(rbx, Address(rbx, 0));
3311 #endif
3312     }
3313     // cache entry pointer
3314     __ addptr(robj, in_bytes(cp_base_offset));
3315     __ shll(RDX, LogBytesPerWord);
3316     __ addptr(robj, RDX);
3317     // object (tos)
3318     __ mov(RCX, rsp);
3319     // c_rarg1: object pointer set up above (NULL if static)
3320     // c_rarg2: cache entry pointer
3321     // c_rarg3: jvalue object on the stack
3322     __ call_VM(noreg,
3323                CAST_FROM_FN_PTR(address,
3324                                 InterpreterRuntime::post_field_modification),
3325                RBX, robj, RCX);
3326     __ get_cache_and_index_at_bcp(cache, index, 1);
3327     __ bind(L1);
3328   }
3329 }
3330 
3331 void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
3332   transition(vtos, vtos);
3333 
3334   const Register cache = rcx;
3335   const Register index = rdx;
3336   const Register obj   = rcx;
3337   const Register off   = rbx;
3338   const Register flags = rax;
3339   const Register flags2 = rdx;
3340 
3341   resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
3342   jvmti_post_field_mod(cache, index, is_static);
3343   load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
3344 
3345   // [jk] not needed currently
3346   // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
3347   //                                              Assembler::StoreStore));
3348 
3349   Label notVolatile, Done;
3350   __ movl(rdx, flags);
3351   __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3352   __ andl(rdx, 0x1);
3353 
3354   // Check for volatile store
3355   __ testl(rdx, rdx);
3356   __ movl(flags2, flags);
3357   __ jcc(Assembler::zero, notVolatile);
3358 
3359   putfield_or_static_helper(byte_no, is_static, rc, obj, off, flags, flags2);
3360   volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
3361                                                Assembler::StoreStore));
3362   __ jmp(Done);
3363   __ bind(notVolatile);
3364 
3365   putfield_or_static_helper(byte_no, is_static, rc, obj, off, flags, flags2);
3366 
3367   __ bind(Done);
3368 }
3369 
3370 void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, RewriteControl rc,
3371                                               Register obj, Register off, Register flags, Register flags2) {
3372 
3373   // field addresses
3374   const Address field(obj, off, Address::times_1, 0*wordSize);
3375   NOT_LP64( const Address hi(obj, off, Address::times_1, 1*wordSize);)
3376 
3377   Label notByte, notBool, notInt, notShort, notChar,
3378         notLong, notFloat, notObj, notValueType;
3379   Label Done;
3380 
3381   const Register bc    = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
3382 
3383   __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
3384 
3385   assert(btos == 0, "change code, btos != 0");
3386   __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
3387   __ jcc(Assembler::notZero, notByte);
3388 
3389   // btos
3390   {
3391     __ pop(btos);
3392     if (!is_static) pop_and_check_object(obj);
3393     __ access_store_at(T_BYTE, IN_HEAP, field, rax, noreg, noreg);
3394     if (!is_static && rc == may_rewrite) {
3395       patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx, true, byte_no);
3396     }
3397     __ jmp(Done);
3398   }
3399 
3400   __ bind(notByte);
3401   __ cmpl(flags, ztos);
3402   __ jcc(Assembler::notEqual, notBool);
3403 
3404   // ztos
3405   {
3406     __ pop(ztos);
3407     if (!is_static) pop_and_check_object(obj);
3408     __ access_store_at(T_BOOLEAN, IN_HEAP, field, rax, noreg, noreg);
3409     if (!is_static && rc == may_rewrite) {
3410       patch_bytecode(Bytecodes::_fast_zputfield, bc, rbx, true, byte_no);
3411     }
3412     __ jmp(Done);
3413   }
3414 
3415   __ bind(notBool);
3416   __ cmpl(flags, atos);
3417   __ jcc(Assembler::notEqual, notObj);
3418 
3419   // atos
3420   {
3421     if (!EnableValhalla) {
3422       __ pop(atos);
3423       if (!is_static) pop_and_check_object(obj);
3424       // Store into the field
3425       do_oop_store(_masm, field, rax);
3426       if (!is_static && rc == may_rewrite) {
3427         patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx, true, byte_no);
3428       }
3429       __ jmp(Done);
3430     } else {
3431       __ pop(atos);
3432       if (is_static) {
3433         Label notFlattenable, notBuffered;
3434         __ test_field_is_not_flattenable(flags2, rscratch1, notFlattenable);
3435         __ null_check(rax);
3436         __ bind(notFlattenable);
3437         do_oop_store(_masm, field, rax);
3438         __ jmp(Done);
3439       } else {
3440         Label isFlattenable, isFlattened, notBuffered, notBuffered2, rewriteNotFlattenable, rewriteFlattenable;
3441         __ test_field_is_flattenable(flags2, rscratch1, isFlattenable);
3442         // Not flattenable case, covers not flattenable values and objects
3443         pop_and_check_object(obj);
3444         // Store into the field
3445         do_oop_store(_masm, field, rax);
3446         __ bind(rewriteNotFlattenable);
3447         if (rc == may_rewrite) {
3448           patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx, true, byte_no);
3449         }
3450         __ jmp(Done);
3451         // Implementation of the flattenable semantic
3452         __ bind(isFlattenable);
3453         __ null_check(rax);
3454         __ test_field_is_flattened(flags2, rscratch1, isFlattened);
3455         // Not flattened case
3456         pop_and_check_object(obj);
3457         // Store into the field
3458         do_oop_store(_masm, field, rax);
3459         __ jmp(rewriteFlattenable);
3460         __ bind(isFlattened);
3461         pop_and_check_object(obj);
3462         assert_different_registers(rax, rdx, obj, off);
3463         __ load_klass(rdx, rax);
3464         __ data_for_oop(rax, rax, rdx);
3465         __ addptr(obj, off);
3466         __ access_value_copy(IN_HEAP, rax, obj, rdx);
3467         __ bind(rewriteFlattenable);
3468         if (rc == may_rewrite) {
3469           patch_bytecode(Bytecodes::_fast_qputfield, bc, rbx, true, byte_no);
3470         }
3471         __ jmp(Done);
3472       }
3473     }
3474   }
3475 
3476   __ bind(notObj);
3477   __ cmpl(flags, itos);
3478   __ jcc(Assembler::notEqual, notInt);
3479 
3480   // itos
3481   {
3482     __ pop(itos);
3483     if (!is_static) pop_and_check_object(obj);
3484     __ access_store_at(T_INT, IN_HEAP, field, rax, noreg, noreg);
3485     if (!is_static && rc == may_rewrite) {
3486       patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx, true, byte_no);
3487     }
3488     __ jmp(Done);
3489   }
3490 
3491   __ bind(notInt);
3492   __ cmpl(flags, ctos);
3493   __ jcc(Assembler::notEqual, notChar);
3494 
3495   // ctos
3496   {
3497     __ pop(ctos);
3498     if (!is_static) pop_and_check_object(obj);
3499     __ access_store_at(T_CHAR, IN_HEAP, field, rax, noreg, noreg);
3500     if (!is_static && rc == may_rewrite) {
3501       patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx, true, byte_no);
3502     }
3503     __ jmp(Done);
3504   }
3505 
3506   __ bind(notChar);
3507   __ cmpl(flags, stos);
3508   __ jcc(Assembler::notEqual, notShort);
3509 
3510   // stos
3511   {
3512     __ pop(stos);
3513     if (!is_static) pop_and_check_object(obj);
3514     __ access_store_at(T_SHORT, IN_HEAP, field, rax, noreg, noreg);
3515     if (!is_static && rc == may_rewrite) {
3516       patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx, true, byte_no);
3517     }
3518     __ jmp(Done);
3519   }
3520 
3521   __ bind(notShort);
3522   __ cmpl(flags, ltos);
3523   __ jcc(Assembler::notEqual, notLong);
3524 
3525   // ltos
3526   {
3527     __ pop(ltos);
3528     if (!is_static) pop_and_check_object(obj);
3529     // MO_RELAXED: generate atomic store for the case of volatile field (important for x86_32)
3530     __ access_store_at(T_LONG, IN_HEAP | MO_RELAXED, field, noreg /* ltos*/, noreg, noreg);
3531 #ifdef _LP64
3532     if (!is_static && rc == may_rewrite) {
3533       patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx, true, byte_no);
3534     }
3535 #endif // _LP64
3536     __ jmp(Done);
3537   }
3538 
3539   __ bind(notLong);
3540   __ cmpl(flags, ftos);
3541   __ jcc(Assembler::notEqual, notFloat);
3542 
3543   // ftos
3544   {
3545     __ pop(ftos);
3546     if (!is_static) pop_and_check_object(obj);
3547     __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
3548     if (!is_static && rc == may_rewrite) {
3549       patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no);
3550     }
3551     __ jmp(Done);
3552   }
3553 
3554   __ bind(notFloat);
3555 #ifdef ASSERT
3556   Label notDouble;
3557   __ cmpl(flags, dtos);
3558   __ jcc(Assembler::notEqual, notDouble);
3559 #endif
3560 
3561   // dtos
3562   {
3563     __ pop(dtos);
3564     if (!is_static) pop_and_check_object(obj);
3565     // MO_RELAXED: for the case of volatile field, in fact it adds no extra work for the underlying implementation
3566     __ access_store_at(T_DOUBLE, IN_HEAP | MO_RELAXED, field, noreg /* dtos */, noreg, noreg);
3567     if (!is_static && rc == may_rewrite) {
3568       patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no);
3569     }
3570   }
3571 
3572 #ifdef ASSERT
3573   __ jmp(Done);
3574 
3575   __ bind(notDouble);
3576   __ stop("Bad state");
3577 #endif
3578 
3579   __ bind(Done);
3580 }
3581 
3582 void TemplateTable::putfield(int byte_no) {
3583   putfield_or_static(byte_no, false);
3584 }
3585 
3586 void TemplateTable::nofast_putfield(int byte_no) {
3587   putfield_or_static(byte_no, false, may_not_rewrite);
3588 }
3589 
3590 void TemplateTable::putstatic(int byte_no) {
3591   putfield_or_static(byte_no, true);
3592 }
3593 
3594 void TemplateTable::jvmti_post_fast_field_mod() {
3595 
3596   const Register scratch = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
3597 
3598   if (JvmtiExport::can_post_field_modification()) {
3599     // Check to see if a field modification watch has been set before
3600     // we take the time to call into the VM.
3601     Label L2;
3602     __ mov32(scratch, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
3603     __ testl(scratch, scratch);
3604     __ jcc(Assembler::zero, L2);
3605     __ pop_ptr(rbx);                  // copy the object pointer from tos
3606     __ verify_oop(rbx);
3607     __ push_ptr(rbx);                 // put the object pointer back on tos
3608     // Save tos values before call_VM() clobbers them. Since we have
3609     // to do it for every data type, we use the saved values as the
3610     // jvalue object.
3611     switch (bytecode()) {          // load values into the jvalue object
3612     case Bytecodes::_fast_qputfield: //fall through
3613     case Bytecodes::_fast_aputfield: __ push_ptr(rax); break;
3614     case Bytecodes::_fast_bputfield: // fall through
3615     case Bytecodes::_fast_zputfield: // fall through
3616     case Bytecodes::_fast_sputfield: // fall through
3617     case Bytecodes::_fast_cputfield: // fall through
3618     case Bytecodes::_fast_iputfield: __ push_i(rax); break;
3619     case Bytecodes::_fast_dputfield: __ push(dtos); break;
3620     case Bytecodes::_fast_fputfield: __ push(ftos); break;
3621     case Bytecodes::_fast_lputfield: __ push_l(rax); break;
3622 
3623     default:
3624       ShouldNotReachHere();
3625     }
3626     __ mov(scratch, rsp);             // points to jvalue on the stack
3627     // access constant pool cache entry
3628     LP64_ONLY(__ get_cache_entry_pointer_at_bcp(c_rarg2, rax, 1));
3629     NOT_LP64(__ get_cache_entry_pointer_at_bcp(rax, rdx, 1));
3630     __ verify_oop(rbx);
3631     // rbx: object pointer copied above
3632     // c_rarg2: cache entry pointer
3633     // c_rarg3: jvalue object on the stack
3634     LP64_ONLY(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, c_rarg2, c_rarg3));
3635     NOT_LP64(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, rax, rcx));
3636 
3637     switch (bytecode()) {             // restore tos values
3638     case Bytecodes::_fast_qputfield: // fall through
3639     case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break;
3640     case Bytecodes::_fast_bputfield: // fall through
3641     case Bytecodes::_fast_zputfield: // fall through
3642     case Bytecodes::_fast_sputfield: // fall through
3643     case Bytecodes::_fast_cputfield: // fall through
3644     case Bytecodes::_fast_iputfield: __ pop_i(rax); break;
3645     case Bytecodes::_fast_dputfield: __ pop(dtos); break;
3646     case Bytecodes::_fast_fputfield: __ pop(ftos); break;
3647     case Bytecodes::_fast_lputfield: __ pop_l(rax); break;
3648     default: break;
3649     }
3650     __ bind(L2);
3651   }
3652 }
3653 
3654 void TemplateTable::fast_storefield(TosState state) {
3655   transition(state, vtos);
3656 
3657   ByteSize base = ConstantPoolCache::base_offset();
3658 
3659   jvmti_post_fast_field_mod();
3660 
3661   // access constant pool cache
3662   __ get_cache_and_index_at_bcp(rcx, rbx, 1);
3663 
3664   // test for volatile with rdx but rdx is tos register for lputfield.
3665   __ movl(rdx, Address(rcx, rbx, Address::times_ptr,
3666                        in_bytes(base +
3667                                 ConstantPoolCacheEntry::flags_offset())));
3668 
3669   // replace index with field offset from cache entry
3670   __ movptr(rbx, Address(rcx, rbx, Address::times_ptr,
3671                          in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
3672 
3673   // [jk] not needed currently
3674   // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
3675   //                                              Assembler::StoreStore));
3676 
3677   Label notVolatile, Done;
3678   if (bytecode() == Bytecodes::_fast_qputfield) {
3679     __ movl(rscratch2, rdx);  // saving flags for isFlattened test
3680   }
3681 
3682   __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3683   __ andl(rdx, 0x1);
3684 
3685   // Get object from stack
3686   pop_and_check_object(rcx);
3687 
3688   // field address
3689   const Address field(rcx, rbx, Address::times_1);
3690 
3691   // Check for volatile store
3692   __ testl(rdx, rdx);
3693   __ jcc(Assembler::zero, notVolatile);
3694 
3695   if (bytecode() == Bytecodes::_fast_qputfield) {
3696     __ movl(rdx, rscratch2);  // restoring flags for isFlattened test
3697   }
3698   fast_storefield_helper(field, rax, rdx);
3699   volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
3700                                                Assembler::StoreStore));
3701   __ jmp(Done);
3702   __ bind(notVolatile);
3703 
3704   if (bytecode() == Bytecodes::_fast_qputfield) {
3705     __ movl(rdx, rscratch2);  // restoring flags for isFlattened test
3706   }
3707   fast_storefield_helper(field, rax, rdx);
3708 
3709   __ bind(Done);
3710 }
3711 
3712 void TemplateTable::fast_storefield_helper(Address field, Register rax, Register flags) {
3713 
3714   // access field
3715   switch (bytecode()) {
3716   case Bytecodes::_fast_qputfield:
3717     {
3718       Label isFlattened, done;
3719       __ null_check(rax);
3720       __ test_field_is_flattened(flags, rscratch1, isFlattened);
3721       // No Flattened case
3722       do_oop_store(_masm, field, rax);
3723       __ jmp(done);
3724       __ bind(isFlattened);
3725       // Flattened case
3726       __ load_klass(rdx, rax);
3727       __ data_for_oop(rax, rax, rdx);
3728       __ lea(rcx, field);
3729       __ access_value_copy(IN_HEAP, rax, rcx, rdx);
3730       __ bind(done);
3731     }
3732     break;
3733   case Bytecodes::_fast_aputfield:
3734     {
3735       do_oop_store(_masm, field, rax);
3736     }
3737     break;
3738   case Bytecodes::_fast_lputfield:
3739 #ifdef _LP64
3740     __ access_store_at(T_LONG, IN_HEAP, field, noreg /* ltos */, noreg, noreg);
3741 #else
3742   __ stop("should not be rewritten");
3743 #endif
3744     break;
3745   case Bytecodes::_fast_iputfield:
3746     __ access_store_at(T_INT, IN_HEAP, field, rax, noreg, noreg);
3747     break;
3748   case Bytecodes::_fast_zputfield:
3749     __ access_store_at(T_BOOLEAN, IN_HEAP, field, rax, noreg, noreg);
3750     break;
3751   case Bytecodes::_fast_bputfield:
3752     __ access_store_at(T_BYTE, IN_HEAP, field, rax, noreg, noreg);
3753     break;
3754   case Bytecodes::_fast_sputfield:
3755     __ access_store_at(T_SHORT, IN_HEAP, field, rax, noreg, noreg);
3756     break;
3757   case Bytecodes::_fast_cputfield:
3758     __ access_store_at(T_CHAR, IN_HEAP, field, rax, noreg, noreg);
3759     break;
3760   case Bytecodes::_fast_fputfield:
3761     __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos*/, noreg, noreg);
3762     break;
3763   case Bytecodes::_fast_dputfield:
3764     __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos*/, noreg, noreg);
3765     break;
3766   default:
3767     ShouldNotReachHere();
3768   }
3769 }
3770 
3771 void TemplateTable::fast_accessfield(TosState state) {
3772   transition(atos, state);
3773 
3774   // Do the JVMTI work here to avoid disturbing the register state below
3775   if (JvmtiExport::can_post_field_access()) {
3776     // Check to see if a field access watch has been set before we
3777     // take the time to call into the VM.
3778     Label L1;
3779     __ mov32(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
3780     __ testl(rcx, rcx);
3781     __ jcc(Assembler::zero, L1);
3782     // access constant pool cache entry
3783     LP64_ONLY(__ get_cache_entry_pointer_at_bcp(c_rarg2, rcx, 1));
3784     NOT_LP64(__ get_cache_entry_pointer_at_bcp(rcx, rdx, 1));
3785     __ verify_oop(rax);
3786     __ push_ptr(rax);  // save object pointer before call_VM() clobbers it
3787     LP64_ONLY(__ mov(c_rarg1, rax));
3788     // c_rarg1: object pointer copied above
3789     // c_rarg2: cache entry pointer
3790     LP64_ONLY(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), c_rarg1, c_rarg2));
3791     NOT_LP64(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), rax, rcx));
3792     __ pop_ptr(rax); // restore object pointer
3793     __ bind(L1);
3794   }
3795 
3796   // access constant pool cache
3797   __ get_cache_and_index_at_bcp(rcx, rbx, 1);
3798   // replace index with field offset from cache entry
3799   // [jk] not needed currently
3800   // __ movl(rdx, Address(rcx, rbx, Address::times_8,
3801   //                      in_bytes(ConstantPoolCache::base_offset() +
3802   //                               ConstantPoolCacheEntry::flags_offset())));
3803   // __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3804   // __ andl(rdx, 0x1);
3805   //
3806   __ movptr(rdx, Address(rcx, rbx, Address::times_ptr,
3807                          in_bytes(ConstantPoolCache::base_offset() +
3808                                   ConstantPoolCacheEntry::f2_offset())));
3809 
3810   // rax: object
3811   __ verify_oop(rax);
3812   __ null_check(rax);
3813   Address field(rax, rdx, Address::times_1);
3814 
3815   // access field
3816   switch (bytecode()) {
3817   case Bytecodes::_fast_qgetfield:
3818     {
3819       Label isFlattened, nonnull, Done;
3820       __ movptr(rscratch1, Address(rcx, rbx, Address::times_ptr,
3821                                    in_bytes(ConstantPoolCache::base_offset() +
3822                                             ConstantPoolCacheEntry::flags_offset())));
3823       __ test_field_is_flattened(rscratch1, rscratch2, isFlattened);
3824         // Non-flattened field case
3825         __ movptr(rscratch1, rax);
3826         __ load_heap_oop(rax, field);
3827         __ testptr(rax, rax);
3828         __ jcc(Assembler::notZero, nonnull);
3829           __ movptr(rax, rscratch1);
3830           __ movl(rcx, Address(rcx, rbx, Address::times_ptr,
3831                              in_bytes(ConstantPoolCache::base_offset() +
3832                                       ConstantPoolCacheEntry::flags_offset())));
3833           __ andl(rcx, ConstantPoolCacheEntry::field_index_mask);
3834           __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::uninitialized_instance_value_field),
3835                      rax, rcx);
3836         __ bind(nonnull);
3837         __ verify_oop(rax);
3838         __ jmp(Done);
3839       __ bind(isFlattened);
3840         __ push(rdx); // save offset
3841         __ movl(rdx, Address(rcx, rbx, Address::times_ptr,
3842                            in_bytes(ConstantPoolCache::base_offset() +
3843                                     ConstantPoolCacheEntry::flags_offset())));
3844         __ andl(rdx, ConstantPoolCacheEntry::field_index_mask);
3845         __ movptr(rcx, Address(rcx, rbx, Address::times_ptr,
3846                                      in_bytes(ConstantPoolCache::base_offset() +
3847                                               ConstantPoolCacheEntry::f1_offset())));
3848         __ pop(rbx); // restore offset
3849         __ read_flattened_field(rcx, rdx, rbx, rax);
3850       __ bind(Done);
3851       __ verify_oop(rax);
3852     }
3853     break;
3854   case Bytecodes::_fast_agetfield:
3855     do_oop_load(_masm, field, rax);
3856     __ verify_oop(rax);
3857     break;
3858   case Bytecodes::_fast_lgetfield:
3859 #ifdef _LP64
3860     __ access_load_at(T_LONG, IN_HEAP, noreg /* ltos */, field, noreg, noreg);
3861 #else
3862   __ stop("should not be rewritten");
3863 #endif
3864     break;
3865   case Bytecodes::_fast_igetfield:
3866     __ access_load_at(T_INT, IN_HEAP, rax, field, noreg, noreg);
3867     break;
3868   case Bytecodes::_fast_bgetfield:
3869     __ access_load_at(T_BYTE, IN_HEAP, rax, field, noreg, noreg);
3870     break;
3871   case Bytecodes::_fast_sgetfield:
3872     __ access_load_at(T_SHORT, IN_HEAP, rax, field, noreg, noreg);
3873     break;
3874   case Bytecodes::_fast_cgetfield:
3875     __ access_load_at(T_CHAR, IN_HEAP, rax, field, noreg, noreg);
3876     break;
3877   case Bytecodes::_fast_fgetfield:
3878     __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
3879     break;
3880   case Bytecodes::_fast_dgetfield:
3881     __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
3882     break;
3883   default:
3884     ShouldNotReachHere();
3885   }
3886   // [jk] not needed currently
3887   //   Label notVolatile;
3888   //   __ testl(rdx, rdx);
3889   //   __ jcc(Assembler::zero, notVolatile);
3890   //   __ membar(Assembler::LoadLoad);
3891   //   __ bind(notVolatile);
3892 }
3893 
3894 void TemplateTable::fast_xaccess(TosState state) {
3895   transition(vtos, state);
3896 
3897   // get receiver
3898   __ movptr(rax, aaddress(0));
3899   // access constant pool cache
3900   __ get_cache_and_index_at_bcp(rcx, rdx, 2);
3901   __ movptr(rbx,
3902             Address(rcx, rdx, Address::times_ptr,
3903                     in_bytes(ConstantPoolCache::base_offset() +
3904                              ConstantPoolCacheEntry::f2_offset())));
3905   // make sure exception is reported in correct bcp range (getfield is
3906   // next instruction)
3907   __ increment(rbcp);
3908   __ null_check(rax);
3909   const Address field = Address(rax, rbx, Address::times_1, 0*wordSize);
3910   switch (state) {
3911   case itos:
3912     __ access_load_at(T_INT, IN_HEAP, rax, field, noreg, noreg);
3913     break;
3914   case atos:
3915     do_oop_load(_masm, field, rax);
3916     __ verify_oop(rax);
3917     break;
3918   case ftos:
3919     __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
3920     break;
3921   default:
3922     ShouldNotReachHere();
3923   }
3924 
3925   // [jk] not needed currently
3926   // Label notVolatile;
3927   // __ movl(rdx, Address(rcx, rdx, Address::times_8,
3928   //                      in_bytes(ConstantPoolCache::base_offset() +
3929   //                               ConstantPoolCacheEntry::flags_offset())));
3930   // __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3931   // __ testl(rdx, 0x1);
3932   // __ jcc(Assembler::zero, notVolatile);
3933   // __ membar(Assembler::LoadLoad);
3934   // __ bind(notVolatile);
3935 
3936   __ decrement(rbcp);
3937 }
3938 
3939 //-----------------------------------------------------------------------------
3940 // Calls
3941 
3942 void TemplateTable::count_calls(Register method, Register temp) {
3943   // implemented elsewhere
3944   ShouldNotReachHere();
3945 }
3946 
3947 void TemplateTable::prepare_invoke(int byte_no,
3948                                    Register method,  // linked method (or i-klass)
3949                                    Register index,   // itable index, MethodType, etc.
3950                                    Register recv,    // if caller wants to see it
3951                                    Register flags    // if caller wants to test it
3952                                    ) {
3953   // determine flags
3954   const Bytecodes::Code code = bytecode();
3955   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
3956   const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
3957   const bool is_invokehandle     = code == Bytecodes::_invokehandle;
3958   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
3959   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
3960   const bool load_receiver       = (recv  != noreg);
3961   const bool save_flags          = (flags != noreg);
3962   assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
3963   assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
3964   assert(flags == noreg || flags == rdx, "");
3965   assert(recv  == noreg || recv  == rcx, "");
3966 
3967   // setup registers & access constant pool cache
3968   if (recv  == noreg)  recv  = rcx;
3969   if (flags == noreg)  flags = rdx;
3970   assert_different_registers(method, index, recv, flags);
3971 
3972   // save 'interpreter return address'
3973   __ save_bcp();
3974 
3975   load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
3976 
3977   // maybe push appendix to arguments (just before return address)
3978   if (is_invokedynamic || is_invokehandle) {
3979     Label L_no_push;
3980     __ testl(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift));
3981     __ jcc(Assembler::zero, L_no_push);
3982     // Push the appendix as a trailing parameter.
3983     // This must be done before we get the receiver,
3984     // since the parameter_size includes it.
3985     __ push(rbx);
3986     __ mov(rbx, index);
3987     __ load_resolved_reference_at_index(index, rbx);
3988     __ pop(rbx);
3989     __ push(index);  // push appendix (MethodType, CallSite, etc.)
3990     __ bind(L_no_push);
3991   }
3992 
3993   // load receiver if needed (after appendix is pushed so parameter size is correct)
3994   // Note: no return address pushed yet
3995   if (load_receiver) {
3996     __ movl(recv, flags);
3997     __ andl(recv, ConstantPoolCacheEntry::parameter_size_mask);
3998     const int no_return_pc_pushed_yet = -1;  // argument slot correction before we push return address
3999     const int receiver_is_at_end      = -1;  // back off one slot to get receiver
4000     Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
4001     __ movptr(recv, recv_addr);
4002     __ verify_oop(recv);
4003   }
4004 
4005   if (save_flags) {
4006     __ movl(rbcp, flags);
4007   }
4008 
4009   // compute return type
4010   __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
4011   // Make sure we don't need to mask flags after the above shift
4012   ConstantPoolCacheEntry::verify_tos_state_shift();
4013   // load return address
4014   {
4015     const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
4016     ExternalAddress table(table_addr);
4017     LP64_ONLY(__ lea(rscratch1, table));
4018     LP64_ONLY(__ movptr(flags, Address(rscratch1, flags, Address::times_ptr)));
4019     NOT_LP64(__ movptr(flags, ArrayAddress(table, Address(noreg, flags, Address::times_ptr))));
4020   }
4021 
4022   // push return address
4023   __ push(flags);
4024 
4025   // Restore flags value from the constant pool cache, and restore rsi
4026   // for later null checks.  r13 is the bytecode pointer
4027   if (save_flags) {
4028     __ movl(flags, rbcp);
4029     __ restore_bcp();
4030   }
4031 }
4032 
4033 void TemplateTable::invokevirtual_helper(Register index,
4034                                          Register recv,
4035                                          Register flags) {
4036   // Uses temporary registers rax, rdx
4037   assert_different_registers(index, recv, rax, rdx);
4038   assert(index == rbx, "");
4039   assert(recv  == rcx, "");
4040 
4041   // Test for an invoke of a final method
4042   Label notFinal;
4043   __ movl(rax, flags);
4044   __ andl(rax, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
4045   __ jcc(Assembler::zero, notFinal);
4046 
4047   const Register method = index;  // method must be rbx
4048   assert(method == rbx,
4049          "Method* must be rbx for interpreter calling convention");
4050 
4051   // do the call - the index is actually the method to call
4052   // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
4053 
4054   // It's final, need a null check here!
4055   __ null_check(recv);
4056 
4057   // profile this call
4058   __ profile_final_call(rax);
4059   __ profile_arguments_type(rax, method, rbcp, true);
4060 
4061   __ jump_from_interpreted(method, rax);
4062 
4063   __ bind(notFinal);
4064 
4065   // get receiver klass
4066   __ null_check(recv, oopDesc::klass_offset_in_bytes());
4067   __ load_klass(rax, recv);
4068 
4069   // profile this call
4070   __ profile_virtual_call(rax, rlocals, rdx);
4071   // get target Method* & entry point
4072   __ lookup_virtual_method(rax, index, method);
4073   __ profile_called_method(method, rdx, rbcp);
4074 
4075   __ profile_arguments_type(rdx, method, rbcp, true);
4076   __ jump_from_interpreted(method, rdx);
4077 }
4078 
4079 void TemplateTable::invokevirtual(int byte_no) {
4080   transition(vtos, vtos);
4081   assert(byte_no == f2_byte, "use this argument");
4082   prepare_invoke(byte_no,
4083                  rbx,    // method or vtable index
4084                  noreg,  // unused itable index
4085                  rcx, rdx); // recv, flags
4086 
4087   // rbx: index
4088   // rcx: receiver
4089   // rdx: flags
4090 
4091   invokevirtual_helper(rbx, rcx, rdx);
4092 }
4093 
4094 void TemplateTable::invokespecial(int byte_no) {
4095   transition(vtos, vtos);
4096   assert(byte_no == f1_byte, "use this argument");
4097   prepare_invoke(byte_no, rbx, noreg,  // get f1 Method*
4098                  rcx);  // get receiver also for null check
4099   __ verify_oop(rcx);
4100   __ null_check(rcx);
4101   // do the call
4102   __ profile_call(rax);
4103   __ profile_arguments_type(rax, rbx, rbcp, false);
4104   __ jump_from_interpreted(rbx, rax);
4105 }
4106 
4107 void TemplateTable::invokestatic(int byte_no) {
4108   transition(vtos, vtos);
4109   assert(byte_no == f1_byte, "use this argument");
4110   prepare_invoke(byte_no, rbx);  // get f1 Method*
4111   // do the call
4112   __ profile_call(rax);
4113   __ profile_arguments_type(rax, rbx, rbcp, false);
4114   __ jump_from_interpreted(rbx, rax);
4115 }
4116 
4117 
4118 void TemplateTable::fast_invokevfinal(int byte_no) {
4119   transition(vtos, vtos);
4120   assert(byte_no == f2_byte, "use this argument");
4121   __ stop("fast_invokevfinal not used on x86");
4122 }
4123 
4124 
4125 void TemplateTable::invokeinterface(int byte_no) {
4126   transition(vtos, vtos);
4127   assert(byte_no == f1_byte, "use this argument");
4128   prepare_invoke(byte_no, rax, rbx,  // get f1 Klass*, f2 Method*
4129                  rcx, rdx); // recv, flags
4130 
4131   // rax: reference klass (from f1) if interface method
4132   // rbx: method (from f2)
4133   // rcx: receiver
4134   // rdx: flags
4135 
4136   // First check for Object case, then private interface method,
4137   // then regular interface method.
4138 
4139   // Special case of invokeinterface called for virtual method of
4140   // java.lang.Object.  See cpCache.cpp for details.
4141   Label notObjectMethod;
4142   __ movl(rlocals, rdx);
4143   __ andl(rlocals, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
4144   __ jcc(Assembler::zero, notObjectMethod);
4145   invokevirtual_helper(rbx, rcx, rdx);
4146   // no return from above
4147   __ bind(notObjectMethod);
4148 
4149   Label no_such_interface; // for receiver subtype check
4150   Register recvKlass; // used for exception processing
4151 
4152   // Check for private method invocation - indicated by vfinal
4153   Label notVFinal;
4154   __ movl(rlocals, rdx);
4155   __ andl(rlocals, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
4156   __ jcc(Assembler::zero, notVFinal);
4157 
4158   // Get receiver klass into rlocals - also a null check
4159   __ null_check(rcx, oopDesc::klass_offset_in_bytes());
4160   __ load_klass(rlocals, rcx);
4161 
4162   Label subtype;
4163   __ check_klass_subtype(rlocals, rax, rbcp, subtype);
4164   // If we get here the typecheck failed
4165   recvKlass = rdx;
4166   __ mov(recvKlass, rlocals); // shuffle receiver class for exception use
4167   __ jmp(no_such_interface);
4168 
4169   __ bind(subtype);
4170 
4171   // do the call - rbx is actually the method to call
4172 
4173   __ profile_final_call(rdx);
4174   __ profile_arguments_type(rdx, rbx, rbcp, true);
4175 
4176   __ jump_from_interpreted(rbx, rdx);
4177   // no return from above
4178   __ bind(notVFinal);
4179 
4180   // Get receiver klass into rdx - also a null check
4181   __ restore_locals();  // restore r14
4182   __ null_check(rcx, oopDesc::klass_offset_in_bytes());
4183   __ load_klass(rdx, rcx);
4184 
4185   Label no_such_method;
4186 
4187   // Preserve method for throw_AbstractMethodErrorVerbose.
4188   __ mov(rcx, rbx);
4189   // Receiver subtype check against REFC.
4190   // Superklass in rax. Subklass in rdx. Blows rcx, rdi.
4191   __ lookup_interface_method(// inputs: rec. class, interface, itable index
4192                              rdx, rax, noreg,
4193                              // outputs: scan temp. reg, scan temp. reg
4194                              rbcp, rlocals,
4195                              no_such_interface,
4196                              /*return_method=*/false);
4197 
4198   // profile this call
4199   __ restore_bcp(); // rbcp was destroyed by receiver type check
4200   __ profile_virtual_call(rdx, rbcp, rlocals);
4201 
4202   // Get declaring interface class from method, and itable index
4203   __ load_method_holder(rax, rbx);
4204   __ movl(rbx, Address(rbx, Method::itable_index_offset()));
4205   __ subl(rbx, Method::itable_index_max);
4206   __ negl(rbx);
4207 
4208   // Preserve recvKlass for throw_AbstractMethodErrorVerbose.
4209   __ mov(rlocals, rdx);
4210   __ lookup_interface_method(// inputs: rec. class, interface, itable index
4211                              rlocals, rax, rbx,
4212                              // outputs: method, scan temp. reg
4213                              rbx, rbcp,
4214                              no_such_interface);
4215 
4216   // rbx: Method* to call
4217   // rcx: receiver
4218   // Check for abstract method error
4219   // Note: This should be done more efficiently via a throw_abstract_method_error
4220   //       interpreter entry point and a conditional jump to it in case of a null
4221   //       method.
4222   __ testptr(rbx, rbx);
4223   __ jcc(Assembler::zero, no_such_method);
4224 
4225   __ profile_called_method(rbx, rbcp, rdx);
4226   __ profile_arguments_type(rdx, rbx, rbcp, true);
4227 
4228   // do the call
4229   // rcx: receiver
4230   // rbx,: Method*
4231   __ jump_from_interpreted(rbx, rdx);
4232   __ should_not_reach_here();
4233 
4234   // exception handling code follows...
4235   // note: must restore interpreter registers to canonical
4236   //       state for exception handling to work correctly!
4237 
4238   __ bind(no_such_method);
4239   // throw exception
4240   __ pop(rbx);           // pop return address (pushed by prepare_invoke)
4241   __ restore_bcp();      // rbcp must be correct for exception handler   (was destroyed)
4242   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
4243   // Pass arguments for generating a verbose error message.
4244 #ifdef _LP64
4245   recvKlass = c_rarg1;
4246   Register method    = c_rarg2;
4247   if (recvKlass != rdx) { __ movq(recvKlass, rdx); }
4248   if (method != rcx)    { __ movq(method, rcx);    }
4249 #else
4250   recvKlass = rdx;
4251   Register method    = rcx;
4252 #endif
4253   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose),
4254              recvKlass, method);
4255   // The call_VM checks for exception, so we should never return here.
4256   __ should_not_reach_here();
4257 
4258   __ bind(no_such_interface);
4259   // throw exception
4260   __ pop(rbx);           // pop return address (pushed by prepare_invoke)
4261   __ restore_bcp();      // rbcp must be correct for exception handler   (was destroyed)
4262   __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
4263   // Pass arguments for generating a verbose error message.
4264   LP64_ONLY( if (recvKlass != rdx) { __ movq(recvKlass, rdx); } )
4265   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose),
4266              recvKlass, rax);
4267   // the call_VM checks for exception, so we should never return here.
4268   __ should_not_reach_here();
4269 }
4270 
4271 void TemplateTable::invokehandle(int byte_no) {
4272   transition(vtos, vtos);
4273   assert(byte_no == f1_byte, "use this argument");
4274   const Register rbx_method = rbx;
4275   const Register rax_mtype  = rax;
4276   const Register rcx_recv   = rcx;
4277   const Register rdx_flags  = rdx;
4278 
4279   prepare_invoke(byte_no, rbx_method, rax_mtype, rcx_recv);
4280   __ verify_method_ptr(rbx_method);
4281   __ verify_oop(rcx_recv);
4282   __ null_check(rcx_recv);
4283 
4284   // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
4285   // rbx: MH.invokeExact_MT method (from f2)
4286 
4287   // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
4288 
4289   // FIXME: profile the LambdaForm also
4290   __ profile_final_call(rax);
4291   __ profile_arguments_type(rdx, rbx_method, rbcp, true);
4292 
4293   __ jump_from_interpreted(rbx_method, rdx);
4294 }
4295 
4296 void TemplateTable::invokedynamic(int byte_no) {
4297   transition(vtos, vtos);
4298   assert(byte_no == f1_byte, "use this argument");
4299 
4300   const Register rbx_method   = rbx;
4301   const Register rax_callsite = rax;
4302 
4303   prepare_invoke(byte_no, rbx_method, rax_callsite);
4304 
4305   // rax: CallSite object (from cpool->resolved_references[f1])
4306   // rbx: MH.linkToCallSite method (from f2)
4307 
4308   // Note:  rax_callsite is already pushed by prepare_invoke
4309 
4310   // %%% should make a type profile for any invokedynamic that takes a ref argument
4311   // profile this call
4312   __ profile_call(rbcp);
4313   __ profile_arguments_type(rdx, rbx_method, rbcp, false);
4314 
4315   __ verify_oop(rax_callsite);
4316 
4317   __ jump_from_interpreted(rbx_method, rdx);
4318 }
4319 
4320 //-----------------------------------------------------------------------------
4321 // Allocation
4322 
4323 void TemplateTable::_new() {
4324   transition(vtos, atos);
4325   __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
4326   Label slow_case;
4327   Label done;
4328 
4329   __ get_cpool_and_tags(rcx, rax);
4330 
4331   // Make sure the class we're about to instantiate has been resolved.
4332   // This is done before loading InstanceKlass to be consistent with the order
4333   // how Constant Pool is updated (see ConstantPool::klass_at_put)
4334   const int tags_offset = Array<u1>::base_offset_in_bytes();
4335   __ cmpb(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class);
4336   __ jcc(Assembler::notEqual, slow_case);
4337 
4338   // get InstanceKlass
4339   __ load_resolved_klass_at_index(rcx, rcx, rdx);
4340 
4341   // make sure klass is initialized & doesn't have finalizer
4342   __ cmpb(Address(rcx, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
4343   __ jcc(Assembler::notEqual, slow_case);
4344 
4345   __ allocate_instance(rcx, rax, rdx, rbx, true, slow_case);
4346   __ jmp(done);
4347 
4348   // slow case
4349   __ bind(slow_case);
4350 
4351   Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rax);
4352   Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx);
4353 
4354   __ get_constant_pool(rarg1);
4355   __ get_unsigned_2_byte_index_at_bcp(rarg2, 1);
4356   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rarg1, rarg2);
4357    __ verify_oop(rax);
4358 
4359   // continue
4360   __ bind(done);
4361 }
4362 
4363 void TemplateTable::defaultvalue() {
4364   transition(vtos, atos);
4365 
4366   Label slow_case;
4367   Label done;
4368 
4369   __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
4370   __ get_cpool_and_tags(rcx, rax);
4371 
4372   // Make sure the class we're about to instantiate has been resolved.
4373   // This is done before loading InstanceKlass to be consistent with the order
4374   // how Constant Pool is updated (see ConstantPool::klass_at_put)
4375   const int tags_offset = Array<u1>::base_offset_in_bytes();
4376   __ cmpb(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class);
4377   __ jcc(Assembler::notEqual, slow_case);
4378 
4379   // get InstanceKlass
4380   __ load_resolved_klass_at_index(rcx, rcx, rdx);
4381 
4382   // make sure klass is fully initialized
4383   __ cmpb(Address(rcx, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
4384   __ jcc(Assembler::notEqual, slow_case);
4385 
4386   // have a resolved ValueKlass in rcx, return the default value oop from it
4387   __ get_default_value_oop(rcx, rdx, rax);
4388   __ jmp(done);
4389 
4390   __ bind(slow_case);
4391 
4392   Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rcx);
4393   Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx);
4394 
4395   __ get_unsigned_2_byte_index_at_bcp(rarg2, 1);
4396   __ get_constant_pool(rarg1);
4397 
4398   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::defaultvalue),
4399       rarg1, rarg2);
4400 
4401   __ bind(done);
4402   __ verify_oop(rax);
4403 }
4404 
4405 void TemplateTable::newarray() {
4406   transition(itos, atos);
4407   Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rdx);
4408   __ load_unsigned_byte(rarg1, at_bcp(1));
4409   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
4410           rarg1, rax);
4411 }
4412 
4413 void TemplateTable::anewarray() {
4414   transition(itos, atos);
4415 
4416   Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rcx);
4417   Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx);
4418 
4419   __ get_unsigned_2_byte_index_at_bcp(rarg2, 1);
4420   __ get_constant_pool(rarg1);
4421   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
4422           rarg1, rarg2, rax);
4423 }
4424 
4425 void TemplateTable::arraylength() {
4426   transition(atos, itos);
4427   __ null_check(rax, arrayOopDesc::length_offset_in_bytes());
4428   __ movl(rax, Address(rax, arrayOopDesc::length_offset_in_bytes()));
4429 }
4430 
4431 void TemplateTable::checkcast() {
4432   transition(atos, atos);
4433   Label done, is_null, ok_is_subtype, quicked, resolved;
4434   __ testptr(rax, rax); // object is in rax
4435   __ jcc(Assembler::zero, is_null);
4436 
4437   // Get cpool & tags index
4438   __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
4439   __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
4440   // See if bytecode has already been quicked
4441   __ movzbl(rdx, Address(rdx, rbx,
4442       Address::times_1,
4443       Array<u1>::base_offset_in_bytes()));
4444   __ andl (rdx, ~JVM_CONSTANT_QDescBit);
4445   __ cmpl(rdx, JVM_CONSTANT_Class);
4446   __ jcc(Assembler::equal, quicked);
4447   __ push(atos); // save receiver for result, and for GC
4448   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
4449 
4450   // vm_result_2 has metadata result
4451 #ifndef _LP64
4452   // borrow rdi from locals
4453   __ get_thread(rdi);
4454   __ get_vm_result_2(rax, rdi);
4455   __ restore_locals();
4456 #else
4457   __ get_vm_result_2(rax, r15_thread);
4458 #endif
4459 
4460   __ pop_ptr(rdx); // restore receiver
4461   __ jmpb(resolved);
4462 
4463   // Get superklass in rax and subklass in rbx
4464   __ bind(quicked);
4465   __ mov(rdx, rax); // Save object in rdx; rax needed for subtype check
4466   __ load_resolved_klass_at_index(rax, rcx, rbx);
4467 
4468   __ bind(resolved);
4469   __ load_klass(rbx, rdx);
4470 
4471   // Generate subtype check.  Blows rcx, rdi.  Object in rdx.
4472   // Superklass in rax.  Subklass in rbx.
4473   __ gen_subtype_check(rbx, ok_is_subtype);
4474 
4475   // Come here on failure
4476   __ push_ptr(rdx);
4477   // object is at TOS
4478   __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
4479 
4480   // Come here on success
4481   __ bind(ok_is_subtype);
4482   __ mov(rax, rdx); // Restore object in rdx
4483   __ jmp(done);
4484 
4485   __ bind(is_null);
4486 
4487   // Collect counts on whether this check-cast sees NULLs a lot or not.
4488   if (ProfileInterpreter) {
4489     __ profile_null_seen(rcx);
4490   }
4491 
4492   if (EnableValhalla) {
4493     // Get cpool & tags index
4494     __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
4495     __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
4496     // See if CP entry is a Q-descriptor
4497     __ movzbl(rcx, Address(rdx, rbx,
4498         Address::times_1,
4499         Array<u1>::base_offset_in_bytes()));
4500     __ andl (rcx, JVM_CONSTANT_QDescBit);
4501     __ cmpl(rcx, JVM_CONSTANT_QDescBit);
4502     __ jcc(Assembler::notEqual, done);
4503     __ jump(ExternalAddress(Interpreter::_throw_NullPointerException_entry));
4504   }
4505 
4506   __ bind(done);
4507 }
4508 
4509 void TemplateTable::instanceof() {
4510   transition(atos, itos);
4511   Label done, is_null, ok_is_subtype, quicked, resolved;
4512   __ testptr(rax, rax);
4513   __ jcc(Assembler::zero, is_null);
4514 
4515   // Get cpool & tags index
4516   __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
4517   __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
4518   // See if bytecode has already been quicked
4519   __ movzbl(rdx, Address(rdx, rbx,
4520         Address::times_1,
4521         Array<u1>::base_offset_in_bytes()));
4522   __ andl (rdx, ~JVM_CONSTANT_QDescBit);
4523   __ cmpl(rdx, JVM_CONSTANT_Class);
4524   __ jcc(Assembler::equal, quicked);
4525 
4526   __ push(atos); // save receiver for result, and for GC
4527   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
4528   // vm_result_2 has metadata result
4529 
4530 #ifndef _LP64
4531   // borrow rdi from locals
4532   __ get_thread(rdi);
4533   __ get_vm_result_2(rax, rdi);
4534   __ restore_locals();
4535 #else
4536   __ get_vm_result_2(rax, r15_thread);
4537 #endif
4538 
4539   __ pop_ptr(rdx); // restore receiver
4540   __ verify_oop(rdx);
4541   __ load_klass(rdx, rdx);
4542   __ jmpb(resolved);
4543 
4544   // Get superklass in rax and subklass in rdx
4545   __ bind(quicked);
4546   __ load_klass(rdx, rax);
4547   __ load_resolved_klass_at_index(rax, rcx, rbx);
4548 
4549   __ bind(resolved);
4550 
4551   // Generate subtype check.  Blows rcx, rdi
4552   // Superklass in rax.  Subklass in rdx.
4553   __ gen_subtype_check(rdx, ok_is_subtype);
4554 
4555   // Come here on failure
4556   __ xorl(rax, rax);
4557   __ jmpb(done);
4558   // Come here on success
4559   __ bind(ok_is_subtype);
4560   __ movl(rax, 1);
4561 
4562   // Collect counts on whether this test sees NULLs a lot or not.
4563   if (ProfileInterpreter) {
4564     __ jmp(done);
4565     __ bind(is_null);
4566     __ profile_null_seen(rcx);
4567   } else {
4568     __ bind(is_null);   // same as 'done'
4569   }
4570   __ bind(done);
4571   // rax = 0: obj == NULL or  obj is not an instanceof the specified klass
4572   // rax = 1: obj != NULL and obj is     an instanceof the specified klass
4573 }
4574 
4575 //----------------------------------------------------------------------------------------------------
4576 // Breakpoints
4577 void TemplateTable::_breakpoint() {
4578   // Note: We get here even if we are single stepping..
4579   // jbug insists on setting breakpoints at every bytecode
4580   // even if we are in single step mode.
4581 
4582   transition(vtos, vtos);
4583 
4584   Register rarg = LP64_ONLY(c_rarg1) NOT_LP64(rcx);
4585 
4586   // get the unpatched byte code
4587   __ get_method(rarg);
4588   __ call_VM(noreg,
4589              CAST_FROM_FN_PTR(address,
4590                               InterpreterRuntime::get_original_bytecode_at),
4591              rarg, rbcp);
4592   __ mov(rbx, rax);  // why?
4593 
4594   // post the breakpoint event
4595   __ get_method(rarg);
4596   __ call_VM(noreg,
4597              CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
4598              rarg, rbcp);
4599 
4600   // complete the execution of original bytecode
4601   __ dispatch_only_normal(vtos);
4602 }
4603 
4604 //-----------------------------------------------------------------------------
4605 // Exceptions
4606 
4607 void TemplateTable::athrow() {
4608   transition(atos, vtos);
4609   __ null_check(rax);
4610   __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
4611 }
4612 
4613 //-----------------------------------------------------------------------------
4614 // Synchronization
4615 //
4616 // Note: monitorenter & exit are symmetric routines; which is reflected
4617 //       in the assembly code structure as well
4618 //
4619 // Stack layout:
4620 //
4621 // [expressions  ] <--- rsp               = expression stack top
4622 // ..
4623 // [expressions  ]
4624 // [monitor entry] <--- monitor block top = expression stack bot
4625 // ..
4626 // [monitor entry]
4627 // [frame data   ] <--- monitor block bot
4628 // ...
4629 // [saved rbp    ] <--- rbp
4630 void TemplateTable::monitorenter() {
4631   transition(atos, vtos);
4632 
4633   // check for NULL object
4634   __ null_check(rax);
4635 
4636   __ resolve(IS_NOT_NULL, rax);
4637 
4638   const int is_value_mask = markWord::always_locked_pattern;
4639   Label has_identity;
4640   __ movptr(rbx, Address(rax, oopDesc::mark_offset_in_bytes()));
4641   __ andptr(rbx, is_value_mask);
4642   __ cmpl(rbx, is_value_mask);
4643   __ jcc(Assembler::notEqual, has_identity);
4644   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
4645                      InterpreterRuntime::throw_illegal_monitor_state_exception));
4646   __ should_not_reach_here();
4647   __ bind(has_identity);
4648 
4649   const Address monitor_block_top(
4650         rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
4651   const Address monitor_block_bot(
4652         rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
4653   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
4654 
4655   Label allocated;
4656 
4657   Register rtop = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
4658   Register rbot = LP64_ONLY(c_rarg2) NOT_LP64(rbx);
4659   Register rmon = LP64_ONLY(c_rarg1) NOT_LP64(rdx);
4660 
4661   // initialize entry pointer
4662   __ xorl(rmon, rmon); // points to free slot or NULL
4663 
4664   // find a free slot in the monitor block (result in rmon)
4665   {
4666     Label entry, loop, exit;
4667     __ movptr(rtop, monitor_block_top); // points to current entry,
4668                                         // starting with top-most entry
4669     __ lea(rbot, monitor_block_bot);    // points to word before bottom
4670                                         // of monitor block
4671     __ jmpb(entry);
4672 
4673     __ bind(loop);
4674     // check if current entry is used
4675     __ cmpptr(Address(rtop, BasicObjectLock::obj_offset_in_bytes()), (int32_t) NULL_WORD);
4676     // if not used then remember entry in rmon
4677     __ cmovptr(Assembler::equal, rmon, rtop);   // cmov => cmovptr
4678     // check if current entry is for same object
4679     __ cmpptr(rax, Address(rtop, BasicObjectLock::obj_offset_in_bytes()));
4680     // if same object then stop searching
4681     __ jccb(Assembler::equal, exit);
4682     // otherwise advance to next entry
4683     __ addptr(rtop, entry_size);
4684     __ bind(entry);
4685     // check if bottom reached
4686     __ cmpptr(rtop, rbot);
4687     // if not at bottom then check this entry
4688     __ jcc(Assembler::notEqual, loop);
4689     __ bind(exit);
4690   }
4691 
4692   __ testptr(rmon, rmon); // check if a slot has been found
4693   __ jcc(Assembler::notZero, allocated); // if found, continue with that one
4694 
4695   // allocate one if there's no free slot
4696   {
4697     Label entry, loop;
4698     // 1. compute new pointers          // rsp: old expression stack top
4699     __ movptr(rmon, monitor_block_bot); // rmon: old expression stack bottom
4700     __ subptr(rsp, entry_size);         // move expression stack top
4701     __ subptr(rmon, entry_size);        // move expression stack bottom
4702     __ mov(rtop, rsp);                  // set start value for copy loop
4703     __ movptr(monitor_block_bot, rmon); // set new monitor block bottom
4704     __ jmp(entry);
4705     // 2. move expression stack contents
4706     __ bind(loop);
4707     __ movptr(rbot, Address(rtop, entry_size)); // load expression stack
4708                                                 // word from old location
4709     __ movptr(Address(rtop, 0), rbot);          // and store it at new location
4710     __ addptr(rtop, wordSize);                  // advance to next word
4711     __ bind(entry);
4712     __ cmpptr(rtop, rmon);                      // check if bottom reached
4713     __ jcc(Assembler::notEqual, loop);          // if not at bottom then
4714                                                 // copy next word
4715   }
4716 
4717   // call run-time routine
4718   // rmon: points to monitor entry
4719   __ bind(allocated);
4720 
4721   // Increment bcp to point to the next bytecode, so exception
4722   // handling for async. exceptions work correctly.
4723   // The object has already been poped from the stack, so the
4724   // expression stack looks correct.
4725   __ increment(rbcp);
4726 
4727   // store object
4728   __ movptr(Address(rmon, BasicObjectLock::obj_offset_in_bytes()), rax);
4729   __ lock_object(rmon);
4730 
4731   // check to make sure this monitor doesn't cause stack overflow after locking
4732   __ save_bcp();  // in case of exception
4733   __ generate_stack_overflow_check(0);
4734 
4735   // The bcp has already been incremented. Just need to dispatch to
4736   // next instruction.
4737   __ dispatch_next(vtos);
4738 }
4739 
4740 void TemplateTable::monitorexit() {
4741   transition(atos, vtos);
4742 
4743   // check for NULL object
4744   __ null_check(rax);
4745 
4746   __ resolve(IS_NOT_NULL, rax);
4747 
4748   const int is_value_mask = markWord::always_locked_pattern;
4749   Label has_identity;
4750   __ movptr(rbx, Address(rax, oopDesc::mark_offset_in_bytes()));
4751   __ andptr(rbx, is_value_mask);
4752   __ cmpl(rbx, is_value_mask);
4753   __ jcc(Assembler::notEqual, has_identity);
4754   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
4755                      InterpreterRuntime::throw_illegal_monitor_state_exception));
4756   __ should_not_reach_here();
4757   __ bind(has_identity);
4758 
4759   const Address monitor_block_top(
4760         rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
4761   const Address monitor_block_bot(
4762         rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
4763   const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
4764 
4765   Register rtop = LP64_ONLY(c_rarg1) NOT_LP64(rdx);
4766   Register rbot = LP64_ONLY(c_rarg2) NOT_LP64(rbx);
4767 
4768   Label found;
4769 
4770   // find matching slot
4771   {
4772     Label entry, loop;
4773     __ movptr(rtop, monitor_block_top); // points to current entry,
4774                                         // starting with top-most entry
4775     __ lea(rbot, monitor_block_bot);    // points to word before bottom
4776                                         // of monitor block
4777     __ jmpb(entry);
4778 
4779     __ bind(loop);
4780     // check if current entry is for same object
4781     __ cmpptr(rax, Address(rtop, BasicObjectLock::obj_offset_in_bytes()));
4782     // if same object then stop searching
4783     __ jcc(Assembler::equal, found);
4784     // otherwise advance to next entry
4785     __ addptr(rtop, entry_size);
4786     __ bind(entry);
4787     // check if bottom reached
4788     __ cmpptr(rtop, rbot);
4789     // if not at bottom then check this entry
4790     __ jcc(Assembler::notEqual, loop);
4791   }
4792 
4793   // error handling. Unlocking was not block-structured
4794   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
4795                    InterpreterRuntime::throw_illegal_monitor_state_exception));
4796   __ should_not_reach_here();
4797 
4798   // call run-time routine
4799   __ bind(found);
4800   __ push_ptr(rax); // make sure object is on stack (contract with oopMaps)
4801   __ unlock_object(rtop);
4802   __ pop_ptr(rax); // discard object
4803 }
4804 
4805 // Wide instructions
4806 void TemplateTable::wide() {
4807   transition(vtos, vtos);
4808   __ load_unsigned_byte(rbx, at_bcp(1));
4809   ExternalAddress wtable((address)Interpreter::_wentry_point);
4810   __ jump(ArrayAddress(wtable, Address(noreg, rbx, Address::times_ptr)));
4811   // Note: the rbcp increment step is part of the individual wide bytecode implementations
4812 }
4813 
4814 // Multi arrays
4815 void TemplateTable::multianewarray() {
4816   transition(vtos, atos);
4817 
4818   Register rarg = LP64_ONLY(c_rarg1) NOT_LP64(rax);
4819   __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
4820   // last dim is on top of stack; we want address of first one:
4821   // first_addr = last_addr + (ndims - 1) * stackElementSize - 1*wordsize
4822   // the latter wordSize to point to the beginning of the array.
4823   __ lea(rarg, Address(rsp, rax, Interpreter::stackElementScale(), -wordSize));
4824   call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), rarg);
4825   __ load_unsigned_byte(rbx, at_bcp(3));
4826   __ lea(rsp, Address(rsp, rbx, Interpreter::stackElementScale()));  // get rid of counts
4827 }