New src/hotspot/cpu/x86/assembler

   1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableBarrierSet.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/os.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 #include "utilities/macros.hpp"
  39 
  40 #ifdef PRODUCT
  41 #define BLOCK_COMMENT(str) /* nothing */
  42 #define STOP(error) stop(error)
  43 #else
  44 #define BLOCK_COMMENT(str) block_comment(str)
  45 #define STOP(error) block_comment(error); stop(error)
  46 #endif
  47 
  48 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  49 // Implementation of AddressLiteral
  50 
  51 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  52 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  53   // -----------------Table 4.5 -------------------- //
  54   16, 32, 64,  // EVEX_FV(0)
  55   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  56   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  57   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  58   8,  16, 32,  // EVEX_HV(0)
  59   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  60   // -----------------Table 4.6 -------------------- //
  61   16, 32, 64,  // EVEX_FVM(0)
  62   1,  1,  1,   // EVEX_T1S(0)
  63   2,  2,  2,   // EVEX_T1S(1)
  64   4,  4,  4,   // EVEX_T1S(2)
  65   8,  8,  8,   // EVEX_T1S(3)
  66   4,  4,  4,   // EVEX_T1F(0)
  67   8,  8,  8,   // EVEX_T1F(1)
  68   8,  8,  8,   // EVEX_T2(0)
  69   0,  16, 16,  // EVEX_T2(1)
  70   0,  16, 16,  // EVEX_T4(0)
  71   0,  0,  32,  // EVEX_T4(1)
  72   0,  0,  32,  // EVEX_T8(0)
  73   8,  16, 32,  // EVEX_HVM(0)
  74   4,  8,  16,  // EVEX_QVM(0)
  75   2,  4,  8,   // EVEX_OVM(0)
  76   16, 16, 16,  // EVEX_M128(0)
  77   8,  32, 64,  // EVEX_DUP(0)
  78   0,  0,  0    // EVEX_NTUP
  79 };
  80 
  81 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  82   _is_lval = false;
  83   _target = target;
  84   switch (rtype) {
  85   case relocInfo::oop_type:
  86   case relocInfo::metadata_type:
  87     // Oops are a special case. Normally they would be their own section
  88     // but in cases like icBuffer they are literals in the code stream that
  89     // we don't have a section for. We use none so that we get a literal address
  90     // which is always patchable.
  91     break;
  92   case relocInfo::external_word_type:
  93     _rspec = external_word_Relocation::spec(target);
  94     break;
  95   case relocInfo::internal_word_type:
  96     _rspec = internal_word_Relocation::spec(target);
  97     break;
  98   case relocInfo::opt_virtual_call_type:
  99     _rspec = opt_virtual_call_Relocation::spec();
 100     break;
 101   case relocInfo::static_call_type:
 102     _rspec = static_call_Relocation::spec();
 103     break;
 104   case relocInfo::runtime_call_type:
 105     _rspec = runtime_call_Relocation::spec();
 106     break;
 107   case relocInfo::poll_type:
 108   case relocInfo::poll_return_type:
 109     _rspec = Relocation::spec_simple(rtype);
 110     break;
 111   case relocInfo::none:
 112     break;
 113   default:
 114     ShouldNotReachHere();
 115     break;
 116   }
 117 }
 118 
 119 // Implementation of Address
 120 
 121 #ifdef _LP64
 122 
 123 Address Address::make_array(ArrayAddress adr) {
 124   // Not implementable on 64bit machines
 125   // Should have been handled higher up the call chain.
 126   ShouldNotReachHere();
 127   return Address();
 128 }
 129 
 130 // exceedingly dangerous constructor
 131 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 132   _base  = noreg;
 133   _index = noreg;
 134   _scale = no_scale;
 135   _disp  = disp;
 136   switch (rtype) {
 137     case relocInfo::external_word_type:
 138       _rspec = external_word_Relocation::spec(loc);
 139       break;
 140     case relocInfo::internal_word_type:
 141       _rspec = internal_word_Relocation::spec(loc);
 142       break;
 143     case relocInfo::runtime_call_type:
 144       // HMM
 145       _rspec = runtime_call_Relocation::spec();
 146       break;
 147     case relocInfo::poll_type:
 148     case relocInfo::poll_return_type:
 149       _rspec = Relocation::spec_simple(rtype);
 150       break;
 151     case relocInfo::none:
 152       break;
 153     default:
 154       ShouldNotReachHere();
 155   }
 156 }
 157 #else // LP64
 158 
 159 Address Address::make_array(ArrayAddress adr) {
 160   AddressLiteral base = adr.base();
 161   Address index = adr.index();
 162   assert(index._disp == 0, "must not have disp"); // maybe it can?
 163   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 164   array._rspec = base._rspec;
 165   return array;
 166 }
 167 
 168 // exceedingly dangerous constructor
 169 Address::Address(address loc, RelocationHolder spec) {
 170   _base  = noreg;
 171   _index = noreg;
 172   _scale = no_scale;
 173   _disp  = (intptr_t) loc;
 174   _rspec = spec;
 175 }
 176 
 177 #endif // _LP64
 178 
 179 
 180 
 181 // Convert the raw encoding form into the form expected by the constructor for
 182 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 183 // that to noreg for the Address constructor.
 184 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 185   RelocationHolder rspec;
 186   if (disp_reloc != relocInfo::none) {
 187     rspec = Relocation::spec_simple(disp_reloc);
 188   }
 189   bool valid_index = index != rsp->encoding();
 190   if (valid_index) {
 191     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 192     madr._rspec = rspec;
 193     return madr;
 194   } else {
 195     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 196     madr._rspec = rspec;
 197     return madr;
 198   }
 199 }
 200 
 201 // Implementation of Assembler
 202 
 203 int AbstractAssembler::code_fill_byte() {
 204   return (u_char)'\xF4'; // hlt
 205 }
 206 
 207 // make this go away someday
 208 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 209   if (rtype == relocInfo::none)
 210     emit_int32(data);
 211   else
 212     emit_data(data, Relocation::spec_simple(rtype), format);
 213 }
 214 
 215 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 216   assert(imm_operand == 0, "default format must be immediate in this file");
 217   assert(inst_mark() != NULL, "must be inside InstructionMark");
 218   if (rspec.type() !=  relocInfo::none) {
 219     #ifdef ASSERT
 220       check_relocation(rspec, format);
 221     #endif
 222     // Do not use AbstractAssembler::relocate, which is not intended for
 223     // embedded words.  Instead, relocate to the enclosing instruction.
 224 
 225     // hack. call32 is too wide for mask so use disp32
 226     if (format == call32_operand)
 227       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 228     else
 229       code_section()->relocate(inst_mark(), rspec, format);
 230   }
 231   emit_int32(data);
 232 }
 233 
 234 static int encode(Register r) {
 235   int enc = r->encoding();
 236   if (enc >= 8) {
 237     enc -= 8;
 238   }
 239   return enc;
 240 }
 241 
 242 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 243   assert(dst->has_byte_register(), "must have byte register");
 244   assert(isByte(op1) && isByte(op2), "wrong opcode");
 245   assert(isByte(imm8), "not a byte");
 246   assert((op1 & 0x01) == 0, "should be 8bit operation");
 247   emit_int8(op1);
 248   emit_int8(op2 | encode(dst));
 249   emit_int8(imm8);
 250 }
 251 
 252 
 253 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 254   assert(isByte(op1) && isByte(op2), "wrong opcode");
 255   assert((op1 & 0x01) == 1, "should be 32bit operation");
 256   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 257   if (is8bit(imm32)) {
 258     emit_int8(op1 | 0x02); // set sign bit
 259     emit_int8(op2 | encode(dst));
 260     emit_int8(imm32 & 0xFF);
 261   } else {
 262     emit_int8(op1);
 263     emit_int8(op2 | encode(dst));
 264     emit_int32(imm32);
 265   }
 266 }
 267 
 268 // Force generation of a 4 byte immediate value even if it fits into 8bit
 269 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 270   assert(isByte(op1) && isByte(op2), "wrong opcode");
 271   assert((op1 & 0x01) == 1, "should be 32bit operation");
 272   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 273   emit_int8(op1);
 274   emit_int8(op2 | encode(dst));
 275   emit_int32(imm32);
 276 }
 277 
 278 // immediate-to-memory forms
 279 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 280   assert((op1 & 0x01) == 1, "should be 32bit operation");
 281   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 282   if (is8bit(imm32)) {
 283     emit_int8(op1 | 0x02); // set sign bit
 284     emit_operand(rm, adr, 1);
 285     emit_int8(imm32 & 0xFF);
 286   } else {
 287     emit_int8(op1);
 288     emit_operand(rm, adr, 4);
 289     emit_int32(imm32);
 290   }
 291 }
 292 
 293 
 294 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 295   assert(isByte(op1) && isByte(op2), "wrong opcode");
 296   emit_int8(op1);
 297   emit_int8(op2 | encode(dst) << 3 | encode(src));
 298 }
 299 
 300 
 301 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 302                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 303   int mod_idx = 0;
 304   // We will test if the displacement fits the compressed format and if so
 305   // apply the compression to the displacment iff the result is8bit.
 306   if (VM_Version::supports_evex() && is_evex_inst) {
 307     switch (cur_tuple_type) {
 308     case EVEX_FV:
 309       if ((cur_encoding & VEX_W) == VEX_W) {
 310         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 311       } else {
 312         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 313       }
 314       break;
 315 
 316     case EVEX_HV:
 317       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 318       break;
 319 
 320     case EVEX_FVM:
 321       break;
 322 
 323     case EVEX_T1S:
 324       switch (in_size_in_bits) {
 325       case EVEX_8bit:
 326         break;
 327 
 328       case EVEX_16bit:
 329         mod_idx = 1;
 330         break;
 331 
 332       case EVEX_32bit:
 333         mod_idx = 2;
 334         break;
 335 
 336       case EVEX_64bit:
 337         mod_idx = 3;
 338         break;
 339       }
 340       break;
 341 
 342     case EVEX_T1F:
 343     case EVEX_T2:
 344     case EVEX_T4:
 345       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 346       break;
 347 
 348     case EVEX_T8:
 349       break;
 350 
 351     case EVEX_HVM:
 352       break;
 353 
 354     case EVEX_QVM:
 355       break;
 356 
 357     case EVEX_OVM:
 358       break;
 359 
 360     case EVEX_M128:
 361       break;
 362 
 363     case EVEX_DUP:
 364       break;
 365 
 366     default:
 367       assert(0, "no valid evex tuple_table entry");
 368       break;
 369     }
 370 
 371     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 372       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 373       if ((disp % disp_factor) == 0) {
 374         int new_disp = disp / disp_factor;
 375         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 376           disp = new_disp;
 377         }
 378       } else {
 379         return false;
 380       }
 381     }
 382   }
 383   return (-0x80 <= disp && disp < 0x80);
 384 }
 385 
 386 
 387 bool Assembler::emit_compressed_disp_byte(int &disp) {
 388   int mod_idx = 0;
 389   // We will test if the displacement fits the compressed format and if so
 390   // apply the compression to the displacment iff the result is8bit.
 391   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 392     int evex_encoding = _attributes->get_evex_encoding();
 393     int tuple_type = _attributes->get_tuple_type();
 394     switch (tuple_type) {
 395     case EVEX_FV:
 396       if ((evex_encoding & VEX_W) == VEX_W) {
 397         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 398       } else {
 399         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 400       }
 401       break;
 402 
 403     case EVEX_HV:
 404       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 405       break;
 406 
 407     case EVEX_FVM:
 408       break;
 409 
 410     case EVEX_T1S:
 411       switch (_attributes->get_input_size()) {
 412       case EVEX_8bit:
 413         break;
 414 
 415       case EVEX_16bit:
 416         mod_idx = 1;
 417         break;
 418 
 419       case EVEX_32bit:
 420         mod_idx = 2;
 421         break;
 422 
 423       case EVEX_64bit:
 424         mod_idx = 3;
 425         break;
 426       }
 427       break;
 428 
 429     case EVEX_T1F:
 430     case EVEX_T2:
 431     case EVEX_T4:
 432       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 433       break;
 434 
 435     case EVEX_T8:
 436       break;
 437 
 438     case EVEX_HVM:
 439       break;
 440 
 441     case EVEX_QVM:
 442       break;
 443 
 444     case EVEX_OVM:
 445       break;
 446 
 447     case EVEX_M128:
 448       break;
 449 
 450     case EVEX_DUP:
 451       break;
 452 
 453     default:
 454       assert(0, "no valid evex tuple_table entry");
 455       break;
 456     }
 457 
 458     int vector_len = _attributes->get_vector_len();
 459     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 460       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 461       if ((disp % disp_factor) == 0) {
 462         int new_disp = disp / disp_factor;
 463         if (is8bit(new_disp)) {
 464           disp = new_disp;
 465         }
 466       } else {
 467         return false;
 468       }
 469     }
 470   }
 471   return is8bit(disp);
 472 }
 473 
 474 
 475 void Assembler::emit_operand(Register reg, Register base, Register index,
 476                              Address::ScaleFactor scale, int disp,
 477                              RelocationHolder const& rspec,
 478                              int rip_relative_correction) {
 479   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 480 
 481   // Encode the registers as needed in the fields they are used in
 482 
 483   int regenc = encode(reg) << 3;
 484   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 485   int baseenc = base->is_valid() ? encode(base) : 0;
 486 
 487   if (base->is_valid()) {
 488     if (index->is_valid()) {
 489       assert(scale != Address::no_scale, "inconsistent address");
 490       // [base + index*scale + disp]
 491       if (disp == 0 && rtype == relocInfo::none  &&
 492           base != rbp LP64_ONLY(&& base != r13)) {
 493         // [base + index*scale]
 494         // [00 reg 100][ss index base]
 495         assert(index != rsp, "illegal addressing mode");
 496         emit_int8(0x04 | regenc);
 497         emit_int8(scale << 6 | indexenc | baseenc);
 498       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 499         // [base + index*scale + imm8]
 500         // [01 reg 100][ss index base] imm8
 501         assert(index != rsp, "illegal addressing mode");
 502         emit_int8(0x44 | regenc);
 503         emit_int8(scale << 6 | indexenc | baseenc);
 504         emit_int8(disp & 0xFF);
 505       } else {
 506         // [base + index*scale + disp32]
 507         // [10 reg 100][ss index base] disp32
 508         assert(index != rsp, "illegal addressing mode");
 509         emit_int8(0x84 | regenc);
 510         emit_int8(scale << 6 | indexenc | baseenc);
 511         emit_data(disp, rspec, disp32_operand);
 512       }
 513     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 514       // [rsp + disp]
 515       if (disp == 0 && rtype == relocInfo::none) {
 516         // [rsp]
 517         // [00 reg 100][00 100 100]
 518         emit_int8(0x04 | regenc);
 519         emit_int8(0x24);
 520       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 521         // [rsp + imm8]
 522         // [01 reg 100][00 100 100] disp8
 523         emit_int8(0x44 | regenc);
 524         emit_int8(0x24);
 525         emit_int8(disp & 0xFF);
 526       } else {
 527         // [rsp + imm32]
 528         // [10 reg 100][00 100 100] disp32
 529         emit_int8(0x84 | regenc);
 530         emit_int8(0x24);
 531         emit_data(disp, rspec, disp32_operand);
 532       }
 533     } else {
 534       // [base + disp]
 535       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 536       if (disp == 0 && rtype == relocInfo::none &&
 537           base != rbp LP64_ONLY(&& base != r13)) {
 538         // [base]
 539         // [00 reg base]
 540         emit_int8(0x00 | regenc | baseenc);
 541       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 542         // [base + disp8]
 543         // [01 reg base] disp8
 544         emit_int8(0x40 | regenc | baseenc);
 545         emit_int8(disp & 0xFF);
 546       } else {
 547         // [base + disp32]
 548         // [10 reg base] disp32
 549         emit_int8(0x80 | regenc | baseenc);
 550         emit_data(disp, rspec, disp32_operand);
 551       }
 552     }
 553   } else {
 554     if (index->is_valid()) {
 555       assert(scale != Address::no_scale, "inconsistent address");
 556       // [index*scale + disp]
 557       // [00 reg 100][ss index 101] disp32
 558       assert(index != rsp, "illegal addressing mode");
 559       emit_int8(0x04 | regenc);
 560       emit_int8(scale << 6 | indexenc | 0x05);
 561       emit_data(disp, rspec, disp32_operand);
 562     } else if (rtype != relocInfo::none ) {
 563       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 564       // [00 000 101] disp32
 565 
 566       emit_int8(0x05 | regenc);
 567       // Note that the RIP-rel. correction applies to the generated
 568       // disp field, but _not_ to the target address in the rspec.
 569 
 570       // disp was created by converting the target address minus the pc
 571       // at the start of the instruction. That needs more correction here.
 572       // intptr_t disp = target - next_ip;
 573       assert(inst_mark() != NULL, "must be inside InstructionMark");
 574       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 575       int64_t adjusted = disp;
 576       // Do rip-rel adjustment for 64bit
 577       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 578       assert(is_simm32(adjusted),
 579              "must be 32bit offset (RIP relative address)");
 580       emit_data((int32_t) adjusted, rspec, disp32_operand);
 581 
 582     } else {
 583       // 32bit never did this, did everything as the rip-rel/disp code above
 584       // [disp] ABSOLUTE
 585       // [00 reg 100][00 100 101] disp32
 586       emit_int8(0x04 | regenc);
 587       emit_int8(0x25);
 588       emit_data(disp, rspec, disp32_operand);
 589     }
 590   }
 591 }
 592 
 593 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 594                              Address::ScaleFactor scale, int disp,
 595                              RelocationHolder const& rspec) {
 596   if (UseAVX > 2) {
 597     int xreg_enc = reg->encoding();
 598     if (xreg_enc > 15) {
 599       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 600       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 601       return;
 602     }
 603   }
 604   emit_operand((Register)reg, base, index, scale, disp, rspec);
 605 }
 606 
 607 // Secret local extension to Assembler::WhichOperand:
 608 #define end_pc_operand (_WhichOperand_limit)
 609 
 610 address Assembler::locate_operand(address inst, WhichOperand which) {
 611   // Decode the given instruction, and return the address of
 612   // an embedded 32-bit operand word.
 613 
 614   // If "which" is disp32_operand, selects the displacement portion
 615   // of an effective address specifier.
 616   // If "which" is imm64_operand, selects the trailing immediate constant.
 617   // If "which" is call32_operand, selects the displacement of a call or jump.
 618   // Caller is responsible for ensuring that there is such an operand,
 619   // and that it is 32/64 bits wide.
 620 
 621   // If "which" is end_pc_operand, find the end of the instruction.
 622 
 623   address ip = inst;
 624   bool is_64bit = false;
 625 
 626   debug_only(bool has_disp32 = false);
 627   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 628 
 629   again_after_prefix:
 630   switch (0xFF & *ip++) {
 631 
 632   // These convenience macros generate groups of "case" labels for the switch.
 633 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 634 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 635              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 636 #define REP16(x) REP8((x)+0): \
 637               case REP8((x)+8)
 638 
 639   case CS_segment:
 640   case SS_segment:
 641   case DS_segment:
 642   case ES_segment:
 643   case FS_segment:
 644   case GS_segment:
 645     // Seems dubious
 646     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 647     assert(ip == inst+1, "only one prefix allowed");
 648     goto again_after_prefix;
 649 
 650   case 0x67:
 651   case REX:
 652   case REX_B:
 653   case REX_X:
 654   case REX_XB:
 655   case REX_R:
 656   case REX_RB:
 657   case REX_RX:
 658   case REX_RXB:
 659     NOT_LP64(assert(false, "64bit prefixes"));
 660     goto again_after_prefix;
 661 
 662   case REX_W:
 663   case REX_WB:
 664   case REX_WX:
 665   case REX_WXB:
 666   case REX_WR:
 667   case REX_WRB:
 668   case REX_WRX:
 669   case REX_WRXB:
 670     NOT_LP64(assert(false, "64bit prefixes"));
 671     is_64bit = true;
 672     goto again_after_prefix;
 673 
 674   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 675   case 0x88: // movb a, r
 676   case 0x89: // movl a, r
 677   case 0x8A: // movb r, a
 678   case 0x8B: // movl r, a
 679   case 0x8F: // popl a
 680     debug_only(has_disp32 = true);
 681     break;
 682 
 683   case 0x68: // pushq #32
 684     if (which == end_pc_operand) {
 685       return ip + 4;
 686     }
 687     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 688     return ip;                  // not produced by emit_operand
 689 
 690   case 0x66: // movw ... (size prefix)
 691     again_after_size_prefix2:
 692     switch (0xFF & *ip++) {
 693     case REX:
 694     case REX_B:
 695     case REX_X:
 696     case REX_XB:
 697     case REX_R:
 698     case REX_RB:
 699     case REX_RX:
 700     case REX_RXB:
 701     case REX_W:
 702     case REX_WB:
 703     case REX_WX:
 704     case REX_WXB:
 705     case REX_WR:
 706     case REX_WRB:
 707     case REX_WRX:
 708     case REX_WRXB:
 709       NOT_LP64(assert(false, "64bit prefix found"));
 710       goto again_after_size_prefix2;
 711     case 0x8B: // movw r, a
 712     case 0x89: // movw a, r
 713       debug_only(has_disp32 = true);
 714       break;
 715     case 0xC7: // movw a, #16
 716       debug_only(has_disp32 = true);
 717       tail_size = 2;  // the imm16
 718       break;
 719     case 0x0F: // several SSE/SSE2 variants
 720       ip--;    // reparse the 0x0F
 721       goto again_after_prefix;
 722     default:
 723       ShouldNotReachHere();
 724     }
 725     break;
 726 
 727   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 728     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 729     // these asserts are somewhat nonsensical
 730 #ifndef _LP64
 731     assert(which == imm_operand || which == disp32_operand,
 732            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 733 #else
 734     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 735            which == narrow_oop_operand && !is_64bit,
 736            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 737 #endif // _LP64
 738     return ip;
 739 
 740   case 0x69: // imul r, a, #32
 741   case 0xC7: // movl a, #32(oop?)
 742     tail_size = 4;
 743     debug_only(has_disp32 = true); // has both kinds of operands!
 744     break;
 745 
 746   case 0x0F: // movx..., etc.
 747     switch (0xFF & *ip++) {
 748     case 0x3A: // pcmpestri
 749       tail_size = 1;
 750     case 0x38: // ptest, pmovzxbw
 751       ip++; // skip opcode
 752       debug_only(has_disp32 = true); // has both kinds of operands!
 753       break;
 754 
 755     case 0x70: // pshufd r, r/a, #8
 756       debug_only(has_disp32 = true); // has both kinds of operands!
 757     case 0x73: // psrldq r, #8
 758       tail_size = 1;
 759       break;
 760 
 761     case 0x12: // movlps
 762     case 0x28: // movaps
 763     case 0x2E: // ucomiss
 764     case 0x2F: // comiss
 765     case 0x54: // andps
 766     case 0x55: // andnps
 767     case 0x56: // orps
 768     case 0x57: // xorps
 769     case 0x58: // addpd
 770     case 0x59: // mulpd
 771     case 0x6E: // movd
 772     case 0x7E: // movd
 773     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 774     case 0xFE: // paddd
 775       debug_only(has_disp32 = true);
 776       break;
 777 
 778     case 0xAD: // shrd r, a, %cl
 779     case 0xAF: // imul r, a
 780     case 0xBE: // movsbl r, a (movsxb)
 781     case 0xBF: // movswl r, a (movsxw)
 782     case 0xB6: // movzbl r, a (movzxb)
 783     case 0xB7: // movzwl r, a (movzxw)
 784     case REP16(0x40): // cmovl cc, r, a
 785     case 0xB0: // cmpxchgb
 786     case 0xB1: // cmpxchg
 787     case 0xC1: // xaddl
 788     case 0xC7: // cmpxchg8
 789     case REP16(0x90): // setcc a
 790       debug_only(has_disp32 = true);
 791       // fall out of the switch to decode the address
 792       break;
 793 
 794     case 0xC4: // pinsrw r, a, #8
 795       debug_only(has_disp32 = true);
 796     case 0xC5: // pextrw r, r, #8
 797       tail_size = 1;  // the imm8
 798       break;
 799 
 800     case 0xAC: // shrd r, a, #8
 801       debug_only(has_disp32 = true);
 802       tail_size = 1;  // the imm8
 803       break;
 804 
 805     case REP16(0x80): // jcc rdisp32
 806       if (which == end_pc_operand)  return ip + 4;
 807       assert(which == call32_operand, "jcc has no disp32 or imm");
 808       return ip;
 809     default:
 810       ShouldNotReachHere();
 811     }
 812     break;
 813 
 814   case 0x81: // addl a, #32; addl r, #32
 815     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 816     // on 32bit in the case of cmpl, the imm might be an oop
 817     tail_size = 4;
 818     debug_only(has_disp32 = true); // has both kinds of operands!
 819     break;
 820 
 821   case 0x83: // addl a, #8; addl r, #8
 822     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 823     debug_only(has_disp32 = true); // has both kinds of operands!
 824     tail_size = 1;
 825     break;
 826 
 827   case 0x9B:
 828     switch (0xFF & *ip++) {
 829     case 0xD9: // fnstcw a
 830       debug_only(has_disp32 = true);
 831       break;
 832     default:
 833       ShouldNotReachHere();
 834     }
 835     break;
 836 
 837   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 838   case REP4(0x10): // adc...
 839   case REP4(0x20): // and...
 840   case REP4(0x30): // xor...
 841   case REP4(0x08): // or...
 842   case REP4(0x18): // sbb...
 843   case REP4(0x28): // sub...
 844   case 0xF7: // mull a
 845   case 0x8D: // lea r, a
 846   case 0x87: // xchg r, a
 847   case REP4(0x38): // cmp...
 848   case 0x85: // test r, a
 849     debug_only(has_disp32 = true); // has both kinds of operands!
 850     break;
 851 
 852   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 853   case 0xC6: // movb a, #8
 854   case 0x80: // cmpb a, #8
 855   case 0x6B: // imul r, a, #8
 856     debug_only(has_disp32 = true); // has both kinds of operands!
 857     tail_size = 1; // the imm8
 858     break;
 859 
 860   case 0xC4: // VEX_3bytes
 861   case 0xC5: // VEX_2bytes
 862     assert((UseAVX > 0), "shouldn't have VEX prefix");
 863     assert(ip == inst+1, "no prefixes allowed");
 864     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 865     // but they have prefix 0x0F and processed when 0x0F processed above.
 866     //
 867     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 868     // instructions (these instructions are not supported in 64-bit mode).
 869     // To distinguish them bits [7:6] are set in the VEX second byte since
 870     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 871     // those VEX bits REX and vvvv bits are inverted.
 872     //
 873     // Fortunately C2 doesn't generate these instructions so we don't need
 874     // to check for them in product version.
 875 
 876     // Check second byte
 877     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 878 
 879     int vex_opcode;
 880     // First byte
 881     if ((0xFF & *inst) == VEX_3bytes) {
 882       vex_opcode = VEX_OPCODE_MASK & *ip;
 883       ip++; // third byte
 884       is_64bit = ((VEX_W & *ip) == VEX_W);
 885     } else {
 886       vex_opcode = VEX_OPCODE_0F;
 887     }
 888     ip++; // opcode
 889     // To find the end of instruction (which == end_pc_operand).
 890     switch (vex_opcode) {
 891       case VEX_OPCODE_0F:
 892         switch (0xFF & *ip) {
 893         case 0x70: // pshufd r, r/a, #8
 894         case 0x71: // ps[rl|ra|ll]w r, #8
 895         case 0x72: // ps[rl|ra|ll]d r, #8
 896         case 0x73: // ps[rl|ra|ll]q r, #8
 897         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 898         case 0xC4: // pinsrw r, r, r/a, #8
 899         case 0xC5: // pextrw r/a, r, #8
 900         case 0xC6: // shufp[s|d] r, r, r/a, #8
 901           tail_size = 1;  // the imm8
 902           break;
 903         }
 904         break;
 905       case VEX_OPCODE_0F_3A:
 906         tail_size = 1;
 907         break;
 908     }
 909     ip++; // skip opcode
 910     debug_only(has_disp32 = true); // has both kinds of operands!
 911     break;
 912 
 913   case 0x62: // EVEX_4bytes
 914     assert(VM_Version::supports_evex(), "shouldn't have EVEX prefix");
 915     assert(ip == inst+1, "no prefixes allowed");
 916     // no EVEX collisions, all instructions that have 0x62 opcodes
 917     // have EVEX versions and are subopcodes of 0x66
 918     ip++; // skip P0 and exmaine W in P1
 919     is_64bit = ((VEX_W & *ip) == VEX_W);
 920     ip++; // move to P2
 921     ip++; // skip P2, move to opcode
 922     // To find the end of instruction (which == end_pc_operand).
 923     switch (0xFF & *ip) {
 924     case 0x22: // pinsrd r, r/a, #8
 925     case 0x61: // pcmpestri r, r/a, #8
 926     case 0x70: // pshufd r, r/a, #8
 927     case 0x73: // psrldq r, #8
 928       tail_size = 1;  // the imm8
 929       break;
 930     default:
 931       break;
 932     }
 933     ip++; // skip opcode
 934     debug_only(has_disp32 = true); // has both kinds of operands!
 935     break;
 936 
 937   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 938   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 939   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 940   case 0xDD: // fld_d a; fst_d a; fstp_d a
 941   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 942   case 0xDF: // fild_d a; fistp_d a
 943   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 944   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 945   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 946     debug_only(has_disp32 = true);
 947     break;
 948 
 949   case 0xE8: // call rdisp32
 950   case 0xE9: // jmp  rdisp32
 951     if (which == end_pc_operand)  return ip + 4;
 952     assert(which == call32_operand, "call has no disp32 or imm");
 953     return ip;
 954 
 955   case 0xF0:                    // Lock
 956     assert(os::is_MP(), "only on MP");
 957     goto again_after_prefix;
 958 
 959   case 0xF3:                    // For SSE
 960   case 0xF2:                    // For SSE2
 961     switch (0xFF & *ip++) {
 962     case REX:
 963     case REX_B:
 964     case REX_X:
 965     case REX_XB:
 966     case REX_R:
 967     case REX_RB:
 968     case REX_RX:
 969     case REX_RXB:
 970     case REX_W:
 971     case REX_WB:
 972     case REX_WX:
 973     case REX_WXB:
 974     case REX_WR:
 975     case REX_WRB:
 976     case REX_WRX:
 977     case REX_WRXB:
 978       NOT_LP64(assert(false, "found 64bit prefix"));
 979       ip++;
 980     default:
 981       ip++;
 982     }
 983     debug_only(has_disp32 = true); // has both kinds of operands!
 984     break;
 985 
 986   default:
 987     ShouldNotReachHere();
 988 
 989 #undef REP8
 990 #undef REP16
 991   }
 992 
 993   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 994 #ifdef _LP64
 995   assert(which != imm_operand, "instruction is not a movq reg, imm64");
 996 #else
 997   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
 998   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
 999 #endif // LP64
1000   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1001 
1002   // parse the output of emit_operand
1003   int op2 = 0xFF & *ip++;
1004   int base = op2 & 0x07;
1005   int op3 = -1;
1006   const int b100 = 4;
1007   const int b101 = 5;
1008   if (base == b100 && (op2 >> 6) != 3) {
1009     op3 = 0xFF & *ip++;
1010     base = op3 & 0x07;   // refetch the base
1011   }
1012   // now ip points at the disp (if any)
1013 
1014   switch (op2 >> 6) {
1015   case 0:
1016     // [00 reg  100][ss index base]
1017     // [00 reg  100][00   100  esp]
1018     // [00 reg base]
1019     // [00 reg  100][ss index  101][disp32]
1020     // [00 reg  101]               [disp32]
1021 
1022     if (base == b101) {
1023       if (which == disp32_operand)
1024         return ip;              // caller wants the disp32
1025       ip += 4;                  // skip the disp32
1026     }
1027     break;
1028 
1029   case 1:
1030     // [01 reg  100][ss index base][disp8]
1031     // [01 reg  100][00   100  esp][disp8]
1032     // [01 reg base]               [disp8]
1033     ip += 1;                    // skip the disp8
1034     break;
1035 
1036   case 2:
1037     // [10 reg  100][ss index base][disp32]
1038     // [10 reg  100][00   100  esp][disp32]
1039     // [10 reg base]               [disp32]
1040     if (which == disp32_operand)
1041       return ip;                // caller wants the disp32
1042     ip += 4;                    // skip the disp32
1043     break;
1044 
1045   case 3:
1046     // [11 reg base]  (not a memory addressing mode)
1047     break;
1048   }
1049 
1050   if (which == end_pc_operand) {
1051     return ip + tail_size;
1052   }
1053 
1054 #ifdef _LP64
1055   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1056 #else
1057   assert(which == imm_operand, "instruction has only an imm field");
1058 #endif // LP64
1059   return ip;
1060 }
1061 
1062 address Assembler::locate_next_instruction(address inst) {
1063   // Secretly share code with locate_operand:
1064   return locate_operand(inst, end_pc_operand);
1065 }
1066 
1067 
1068 #ifdef ASSERT
1069 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1070   address inst = inst_mark();
1071   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1072   address opnd;
1073 
1074   Relocation* r = rspec.reloc();
1075   if (r->type() == relocInfo::none) {
1076     return;
1077   } else if (r->is_call() || format == call32_operand) {
1078     // assert(format == imm32_operand, "cannot specify a nonzero format");
1079     opnd = locate_operand(inst, call32_operand);
1080   } else if (r->is_data()) {
1081     assert(format == imm_operand || format == disp32_operand
1082            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1083     opnd = locate_operand(inst, (WhichOperand)format);
1084   } else {
1085     assert(format == imm_operand, "cannot specify a format");
1086     return;
1087   }
1088   assert(opnd == pc(), "must put operand where relocs can find it");
1089 }
1090 #endif // ASSERT
1091 
1092 void Assembler::emit_operand32(Register reg, Address adr) {
1093   assert(reg->encoding() < 8, "no extended registers");
1094   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1095   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1096                adr._rspec);
1097 }
1098 
1099 void Assembler::emit_operand(Register reg, Address adr,
1100                              int rip_relative_correction) {
1101   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1102                adr._rspec,
1103                rip_relative_correction);
1104 }
1105 
1106 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1107   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1108                adr._rspec);
1109 }
1110 
1111 // MMX operations
1112 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1113   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1114   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1115 }
1116 
1117 // work around gcc (3.2.1-7a) bug
1118 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1119   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1120   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1121 }
1122 
1123 
1124 void Assembler::emit_farith(int b1, int b2, int i) {
1125   assert(isByte(b1) && isByte(b2), "wrong opcode");
1126   assert(0 <= i &&  i < 8, "illegal stack offset");
1127   emit_int8(b1);
1128   emit_int8(b2 + i);
1129 }
1130 
1131 
1132 // Now the Assembler instructions (identical for 32/64 bits)
1133 
1134 void Assembler::adcl(Address dst, int32_t imm32) {
1135   InstructionMark im(this);
1136   prefix(dst);
1137   emit_arith_operand(0x81, rdx, dst, imm32);
1138 }
1139 
1140 void Assembler::adcl(Address dst, Register src) {
1141   InstructionMark im(this);
1142   prefix(dst, src);
1143   emit_int8(0x11);
1144   emit_operand(src, dst);
1145 }
1146 
1147 void Assembler::adcl(Register dst, int32_t imm32) {
1148   prefix(dst);
1149   emit_arith(0x81, 0xD0, dst, imm32);
1150 }
1151 
1152 void Assembler::adcl(Register dst, Address src) {
1153   InstructionMark im(this);
1154   prefix(src, dst);
1155   emit_int8(0x13);
1156   emit_operand(dst, src);
1157 }
1158 
1159 void Assembler::adcl(Register dst, Register src) {
1160   (void) prefix_and_encode(dst->encoding(), src->encoding());
1161   emit_arith(0x13, 0xC0, dst, src);
1162 }
1163 
1164 void Assembler::addl(Address dst, int32_t imm32) {
1165   InstructionMark im(this);
1166   prefix(dst);
1167   emit_arith_operand(0x81, rax, dst, imm32);
1168 }
1169 
1170 void Assembler::addb(Address dst, int imm8) {
1171   InstructionMark im(this);
1172   prefix(dst);
1173   emit_int8((unsigned char)0x80);
1174   emit_operand(rax, dst, 1);
1175   emit_int8(imm8);
1176 }
1177 
1178 void Assembler::addw(Address dst, int imm16) {
1179   InstructionMark im(this);
1180   emit_int8(0x66);
1181   prefix(dst);
1182   emit_int8((unsigned char)0x81);
1183   emit_operand(rax, dst, 2);
1184   emit_int16(imm16);
1185 }
1186 
1187 void Assembler::addl(Address dst, Register src) {
1188   InstructionMark im(this);
1189   prefix(dst, src);
1190   emit_int8(0x01);
1191   emit_operand(src, dst);
1192 }
1193 
1194 void Assembler::addl(Register dst, int32_t imm32) {
1195   prefix(dst);
1196   emit_arith(0x81, 0xC0, dst, imm32);
1197 }
1198 
1199 void Assembler::addl(Register dst, Address src) {
1200   InstructionMark im(this);
1201   prefix(src, dst);
1202   emit_int8(0x03);
1203   emit_operand(dst, src);
1204 }
1205 
1206 void Assembler::addl(Register dst, Register src) {
1207   (void) prefix_and_encode(dst->encoding(), src->encoding());
1208   emit_arith(0x03, 0xC0, dst, src);
1209 }
1210 
1211 void Assembler::addr_nop_4() {
1212   assert(UseAddressNop, "no CPU support");
1213   // 4 bytes: NOP DWORD PTR [EAX+0]
1214   emit_int8(0x0F);
1215   emit_int8(0x1F);
1216   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1217   emit_int8(0);    // 8-bits offset (1 byte)
1218 }
1219 
1220 void Assembler::addr_nop_5() {
1221   assert(UseAddressNop, "no CPU support");
1222   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1223   emit_int8(0x0F);
1224   emit_int8(0x1F);
1225   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1226   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1227   emit_int8(0);    // 8-bits offset (1 byte)
1228 }
1229 
1230 void Assembler::addr_nop_7() {
1231   assert(UseAddressNop, "no CPU support");
1232   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1233   emit_int8(0x0F);
1234   emit_int8(0x1F);
1235   emit_int8((unsigned char)0x80);
1236                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1237   emit_int32(0);   // 32-bits offset (4 bytes)
1238 }
1239 
1240 void Assembler::addr_nop_8() {
1241   assert(UseAddressNop, "no CPU support");
1242   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1243   emit_int8(0x0F);
1244   emit_int8(0x1F);
1245   emit_int8((unsigned char)0x84);
1246                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1247   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1248   emit_int32(0);   // 32-bits offset (4 bytes)
1249 }
1250 
1251 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1252   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1253   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1254   attributes.set_rex_vex_w_reverted();
1255   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1256   emit_int8(0x58);
1257   emit_int8((unsigned char)(0xC0 | encode));
1258 }
1259 
1260 void Assembler::addsd(XMMRegister dst, Address src) {
1261   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1262   InstructionMark im(this);
1263   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1264   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1265   attributes.set_rex_vex_w_reverted();
1266   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1267   emit_int8(0x58);
1268   emit_operand(dst, src);
1269 }
1270 
1271 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1272   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1273   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1274   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1275   emit_int8(0x58);
1276   emit_int8((unsigned char)(0xC0 | encode));
1277 }
1278 
1279 void Assembler::addss(XMMRegister dst, Address src) {
1280   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1281   InstructionMark im(this);
1282   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1283   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1284   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1285   emit_int8(0x58);
1286   emit_operand(dst, src);
1287 }
1288 
1289 void Assembler::aesdec(XMMRegister dst, Address src) {
1290   assert(VM_Version::supports_aes(), "");
1291   InstructionMark im(this);
1292   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1293   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1294   emit_int8((unsigned char)0xDE);
1295   emit_operand(dst, src);
1296 }
1297 
1298 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1299   assert(VM_Version::supports_aes(), "");
1300   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1301   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1302   emit_int8((unsigned char)0xDE);
1303   emit_int8(0xC0 | encode);
1304 }
1305 
1306 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1307   assert(VM_Version::supports_aes(), "");
1308   InstructionMark im(this);
1309   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1310   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1311   emit_int8((unsigned char)0xDF);
1312   emit_operand(dst, src);
1313 }
1314 
1315 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1316   assert(VM_Version::supports_aes(), "");
1317   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1318   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1319   emit_int8((unsigned char)0xDF);
1320   emit_int8((unsigned char)(0xC0 | encode));
1321 }
1322 
1323 void Assembler::aesenc(XMMRegister dst, Address src) {
1324   assert(VM_Version::supports_aes(), "");
1325   InstructionMark im(this);
1326   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1327   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1328   emit_int8((unsigned char)0xDC);
1329   emit_operand(dst, src);
1330 }
1331 
1332 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1333   assert(VM_Version::supports_aes(), "");
1334   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1335   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1336   emit_int8((unsigned char)0xDC);
1337   emit_int8(0xC0 | encode);
1338 }
1339 
1340 void Assembler::aesenclast(XMMRegister dst, Address src) {
1341   assert(VM_Version::supports_aes(), "");
1342   InstructionMark im(this);
1343   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1344   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1345   emit_int8((unsigned char)0xDD);
1346   emit_operand(dst, src);
1347 }
1348 
1349 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1350   assert(VM_Version::supports_aes(), "");
1351   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1352   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1353   emit_int8((unsigned char)0xDD);
1354   emit_int8((unsigned char)(0xC0 | encode));
1355 }
1356 
1357 void Assembler::andl(Address dst, int32_t imm32) {
1358   InstructionMark im(this);
1359   prefix(dst);
1360   emit_int8((unsigned char)0x81);
1361   emit_operand(rsp, dst, 4);
1362   emit_int32(imm32);
1363 }
1364 
1365 void Assembler::andl(Register dst, int32_t imm32) {
1366   prefix(dst);
1367   emit_arith(0x81, 0xE0, dst, imm32);
1368 }
1369 
1370 void Assembler::andl(Register dst, Address src) {
1371   InstructionMark im(this);
1372   prefix(src, dst);
1373   emit_int8(0x23);
1374   emit_operand(dst, src);
1375 }
1376 
1377 void Assembler::andl(Register dst, Register src) {
1378   (void) prefix_and_encode(dst->encoding(), src->encoding());
1379   emit_arith(0x23, 0xC0, dst, src);
1380 }
1381 
1382 void Assembler::andnl(Register dst, Register src1, Register src2) {
1383   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1384   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1385   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1386   emit_int8((unsigned char)0xF2);
1387   emit_int8((unsigned char)(0xC0 | encode));
1388 }
1389 
1390 void Assembler::andnl(Register dst, Register src1, Address src2) {
1391   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1392   InstructionMark im(this);
1393   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1394   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1395   emit_int8((unsigned char)0xF2);
1396   emit_operand(dst, src2);
1397 }
1398 
1399 void Assembler::bsfl(Register dst, Register src) {
1400   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1401   emit_int8(0x0F);
1402   emit_int8((unsigned char)0xBC);
1403   emit_int8((unsigned char)(0xC0 | encode));
1404 }
1405 
1406 void Assembler::bsrl(Register dst, Register src) {
1407   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1408   emit_int8(0x0F);
1409   emit_int8((unsigned char)0xBD);
1410   emit_int8((unsigned char)(0xC0 | encode));
1411 }
1412 
1413 void Assembler::bswapl(Register reg) { // bswap
1414   int encode = prefix_and_encode(reg->encoding());
1415   emit_int8(0x0F);
1416   emit_int8((unsigned char)(0xC8 | encode));
1417 }
1418 
1419 void Assembler::blsil(Register dst, Register src) {
1420   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1421   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1422   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1423   emit_int8((unsigned char)0xF3);
1424   emit_int8((unsigned char)(0xC0 | encode));
1425 }
1426 
1427 void Assembler::blsil(Register dst, Address src) {
1428   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1429   InstructionMark im(this);
1430   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1431   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1432   emit_int8((unsigned char)0xF3);
1433   emit_operand(rbx, src);
1434 }
1435 
1436 void Assembler::blsmskl(Register dst, Register src) {
1437   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1438   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1439   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1440   emit_int8((unsigned char)0xF3);
1441   emit_int8((unsigned char)(0xC0 | encode));
1442 }
1443 
1444 void Assembler::blsmskl(Register dst, Address src) {
1445   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1446   InstructionMark im(this);
1447   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1448   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1449   emit_int8((unsigned char)0xF3);
1450   emit_operand(rdx, src);
1451 }
1452 
1453 void Assembler::blsrl(Register dst, Register src) {
1454   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1455   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1456   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1457   emit_int8((unsigned char)0xF3);
1458   emit_int8((unsigned char)(0xC0 | encode));
1459 }
1460 
1461 void Assembler::blsrl(Register dst, Address src) {
1462   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1463   InstructionMark im(this);
1464   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1465   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1466   emit_int8((unsigned char)0xF3);
1467   emit_operand(rcx, src);
1468 }
1469 
1470 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1471   // suspect disp32 is always good
1472   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1473 
1474   if (L.is_bound()) {
1475     const int long_size = 5;
1476     int offs = (int)( target(L) - pc() );
1477     assert(offs <= 0, "assembler error");
1478     InstructionMark im(this);
1479     // 1110 1000 #32-bit disp
1480     emit_int8((unsigned char)0xE8);
1481     emit_data(offs - long_size, rtype, operand);
1482   } else {
1483     InstructionMark im(this);
1484     // 1110 1000 #32-bit disp
1485     L.add_patch_at(code(), locator());
1486 
1487     emit_int8((unsigned char)0xE8);
1488     emit_data(int(0), rtype, operand);
1489   }
1490 }
1491 
1492 void Assembler::call(Register dst) {
1493   int encode = prefix_and_encode(dst->encoding());
1494   emit_int8((unsigned char)0xFF);
1495   emit_int8((unsigned char)(0xD0 | encode));
1496 }
1497 
1498 
1499 void Assembler::call(Address adr) {
1500   InstructionMark im(this);
1501   prefix(adr);
1502   emit_int8((unsigned char)0xFF);
1503   emit_operand(rdx, adr);
1504 }
1505 
1506 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1507   InstructionMark im(this);
1508   emit_int8((unsigned char)0xE8);
1509   intptr_t disp = entry - (pc() + sizeof(int32_t));
1510   // Entry is NULL in case of a scratch emit.
1511   assert(entry == NULL || is_simm32(disp), "disp=" INTPTR_FORMAT " must be 32bit offset (call2)", disp);
1512   // Technically, should use call32_operand, but this format is
1513   // implied by the fact that we're emitting a call instruction.
1514 
1515   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1516   emit_data((int) disp, rspec, operand);
1517 }
1518 
1519 void Assembler::cdql() {
1520   emit_int8((unsigned char)0x99);
1521 }
1522 
1523 void Assembler::cld() {
1524   emit_int8((unsigned char)0xFC);
1525 }
1526 
1527 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1528   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1529   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1530   emit_int8(0x0F);
1531   emit_int8(0x40 | cc);
1532   emit_int8((unsigned char)(0xC0 | encode));
1533 }
1534 
1535 
1536 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1537   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1538   prefix(src, dst);
1539   emit_int8(0x0F);
1540   emit_int8(0x40 | cc);
1541   emit_operand(dst, src);
1542 }
1543 
1544 void Assembler::cmpb(Address dst, int imm8) {
1545   InstructionMark im(this);
1546   prefix(dst);
1547   emit_int8((unsigned char)0x80);
1548   emit_operand(rdi, dst, 1);
1549   emit_int8(imm8);
1550 }
1551 
1552 void Assembler::cmpl(Address dst, int32_t imm32) {
1553   InstructionMark im(this);
1554   prefix(dst);
1555   emit_int8((unsigned char)0x81);
1556   emit_operand(rdi, dst, 4);
1557   emit_int32(imm32);
1558 }
1559 
1560 void Assembler::cmpl(Register dst, int32_t imm32) {
1561   prefix(dst);
1562   emit_arith(0x81, 0xF8, dst, imm32);
1563 }
1564 
1565 void Assembler::cmpl(Register dst, Register src) {
1566   (void) prefix_and_encode(dst->encoding(), src->encoding());
1567   emit_arith(0x3B, 0xC0, dst, src);
1568 }
1569 
1570 void Assembler::cmpl(Register dst, Address  src) {
1571   InstructionMark im(this);
1572   prefix(src, dst);
1573   emit_int8((unsigned char)0x3B);
1574   emit_operand(dst, src);
1575 }
1576 
1577 void Assembler::cmpw(Address dst, int imm16) {
1578   InstructionMark im(this);
1579   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1580   emit_int8(0x66);
1581   emit_int8((unsigned char)0x81);
1582   emit_operand(rdi, dst, 2);
1583   emit_int16(imm16);
1584 }
1585 
1586 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1587 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1588 // The ZF is set if the compared values were equal, and cleared otherwise.
1589 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1590   InstructionMark im(this);
1591   prefix(adr, reg);
1592   emit_int8(0x0F);
1593   emit_int8((unsigned char)0xB1);
1594   emit_operand(reg, adr);
1595 }
1596 
1597 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1598 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1599 // The ZF is set if the compared values were equal, and cleared otherwise.
1600 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1601   InstructionMark im(this);
1602   prefix(adr, reg, true);
1603   emit_int8(0x0F);
1604   emit_int8((unsigned char)0xB0);
1605   emit_operand(reg, adr);
1606 }
1607 
1608 void Assembler::comisd(XMMRegister dst, Address src) {
1609   // NOTE: dbx seems to decode this as comiss even though the
1610   // 0x66 is there. Strangly ucomisd comes out correct
1611   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1612   InstructionMark im(this);
1613   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1614   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1615   attributes.set_rex_vex_w_reverted();
1616   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1617   emit_int8(0x2F);
1618   emit_operand(dst, src);
1619 }
1620 
1621 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1622   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1623   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1624   attributes.set_rex_vex_w_reverted();
1625   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1626   emit_int8(0x2F);
1627   emit_int8((unsigned char)(0xC0 | encode));
1628 }
1629 
1630 void Assembler::comiss(XMMRegister dst, Address src) {
1631   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1632   InstructionMark im(this);
1633   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1634   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1635   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1636   emit_int8(0x2F);
1637   emit_operand(dst, src);
1638 }
1639 
1640 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1641   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1642   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1643   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1644   emit_int8(0x2F);
1645   emit_int8((unsigned char)(0xC0 | encode));
1646 }
1647 
1648 void Assembler::cpuid() {
1649   emit_int8(0x0F);
1650   emit_int8((unsigned char)0xA2);
1651 }
1652 
1653 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1654 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1655 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1656 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1657 //
1658 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1659 //
1660 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1661 //
1662 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1663 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1664   assert(VM_Version::supports_sse4_2(), "");
1665   int8_t w = 0x01;
1666   Prefix p = Prefix_EMPTY;
1667 
1668   emit_int8((int8_t)0xF2);
1669   switch (sizeInBytes) {
1670   case 1:
1671     w = 0;
1672     break;
1673   case 2:
1674   case 4:
1675     break;
1676   LP64_ONLY(case 8:)
1677     // This instruction is not valid in 32 bits
1678     // Note:
1679     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1680     //
1681     // Page B - 72   Vol. 2C says
1682     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1683     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1684     //                                                                            F0!!!
1685     // while 3 - 208 Vol. 2A
1686     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1687     //
1688     // the 0 on a last bit is reserved for a different flavor of this instruction :
1689     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1690     p = REX_W;
1691     break;
1692   default:
1693     assert(0, "Unsupported value for a sizeInBytes argument");
1694     break;
1695   }
1696   LP64_ONLY(prefix(crc, v, p);)
1697   emit_int8((int8_t)0x0F);
1698   emit_int8(0x38);
1699   emit_int8((int8_t)(0xF0 | w));
1700   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1701 }
1702 
1703 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1704   assert(VM_Version::supports_sse4_2(), "");
1705   InstructionMark im(this);
1706   int8_t w = 0x01;
1707   Prefix p = Prefix_EMPTY;
1708 
1709   emit_int8((int8_t)0xF2);
1710   switch (sizeInBytes) {
1711   case 1:
1712     w = 0;
1713     break;
1714   case 2:
1715   case 4:
1716     break;
1717   LP64_ONLY(case 8:)
1718     // This instruction is not valid in 32 bits
1719     p = REX_W;
1720     break;
1721   default:
1722     assert(0, "Unsupported value for a sizeInBytes argument");
1723     break;
1724   }
1725   LP64_ONLY(prefix(crc, adr, p);)
1726   emit_int8((int8_t)0x0F);
1727   emit_int8(0x38);
1728   emit_int8((int8_t)(0xF0 | w));
1729   emit_operand(crc, adr);
1730 }
1731 
1732 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1733   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1734   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1735   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1736   emit_int8((unsigned char)0xE6);
1737   emit_int8((unsigned char)(0xC0 | encode));
1738 }
1739 
1740 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1741   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1742   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1743   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1744   emit_int8(0x5B);
1745   emit_int8((unsigned char)(0xC0 | encode));
1746 }
1747 
1748 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1749   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1750   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1751   attributes.set_rex_vex_w_reverted();
1752   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1753   emit_int8(0x5A);
1754   emit_int8((unsigned char)(0xC0 | encode));
1755 }
1756 
1757 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1758   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1759   InstructionMark im(this);
1760   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1761   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1762   attributes.set_rex_vex_w_reverted();
1763   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1764   emit_int8(0x5A);
1765   emit_operand(dst, src);
1766 }
1767 
1768 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1769   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1770   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1771   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1772   emit_int8(0x2A);
1773   emit_int8((unsigned char)(0xC0 | encode));
1774 }
1775 
1776 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1777   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1778   InstructionMark im(this);
1779   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1780   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1781   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1782   emit_int8(0x2A);
1783   emit_operand(dst, src);
1784 }
1785 
1786 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1787   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1788   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1789   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1790   emit_int8(0x2A);
1791   emit_int8((unsigned char)(0xC0 | encode));
1792 }
1793 
1794 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1795   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1796   InstructionMark im(this);
1797   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1798   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1799   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1800   emit_int8(0x2A);
1801   emit_operand(dst, src);
1802 }
1803 
1804 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1805   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1806   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1807   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1808   emit_int8(0x2A);
1809   emit_int8((unsigned char)(0xC0 | encode));
1810 }
1811 
1812 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1813   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1814   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1815   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1816   emit_int8(0x5A);
1817   emit_int8((unsigned char)(0xC0 | encode));
1818 }
1819 
1820 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1821   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1822   InstructionMark im(this);
1823   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1824   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1825   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1826   emit_int8(0x5A);
1827   emit_operand(dst, src);
1828 }
1829 
1830 
1831 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1832   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1833   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1834   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1835   emit_int8(0x2C);
1836   emit_int8((unsigned char)(0xC0 | encode));
1837 }
1838 
1839 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1840   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1841   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1842   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1843   emit_int8(0x2C);
1844   emit_int8((unsigned char)(0xC0 | encode));
1845 }
1846 
1847 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1848   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1849   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1850   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1851   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1852   emit_int8((unsigned char)0xE6);
1853   emit_int8((unsigned char)(0xC0 | encode));
1854 }
1855 
1856 void Assembler::decl(Address dst) {
1857   // Don't use it directly. Use MacroAssembler::decrement() instead.
1858   InstructionMark im(this);
1859   prefix(dst);
1860   emit_int8((unsigned char)0xFF);
1861   emit_operand(rcx, dst);
1862 }
1863 
1864 void Assembler::divsd(XMMRegister dst, Address src) {
1865   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1866   InstructionMark im(this);
1867   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1868   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1869   attributes.set_rex_vex_w_reverted();
1870   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1871   emit_int8(0x5E);
1872   emit_operand(dst, src);
1873 }
1874 
1875 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1876   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1877   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1878   attributes.set_rex_vex_w_reverted();
1879   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1880   emit_int8(0x5E);
1881   emit_int8((unsigned char)(0xC0 | encode));
1882 }
1883 
1884 void Assembler::divss(XMMRegister dst, Address src) {
1885   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1886   InstructionMark im(this);
1887   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1888   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1889   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1890   emit_int8(0x5E);
1891   emit_operand(dst, src);
1892 }
1893 
1894 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1895   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1896   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1897   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1898   emit_int8(0x5E);
1899   emit_int8((unsigned char)(0xC0 | encode));
1900 }
1901 
1902 void Assembler::emms() {
1903   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1904   emit_int8(0x0F);
1905   emit_int8(0x77);
1906 }
1907 
1908 void Assembler::hlt() {
1909   emit_int8((unsigned char)0xF4);
1910 }
1911 
1912 void Assembler::idivl(Register src) {
1913   int encode = prefix_and_encode(src->encoding());
1914   emit_int8((unsigned char)0xF7);
1915   emit_int8((unsigned char)(0xF8 | encode));
1916 }
1917 
1918 void Assembler::divl(Register src) { // Unsigned
1919   int encode = prefix_and_encode(src->encoding());
1920   emit_int8((unsigned char)0xF7);
1921   emit_int8((unsigned char)(0xF0 | encode));
1922 }
1923 
1924 void Assembler::imull(Register src) {
1925   int encode = prefix_and_encode(src->encoding());
1926   emit_int8((unsigned char)0xF7);
1927   emit_int8((unsigned char)(0xE8 | encode));
1928 }
1929 
1930 void Assembler::imull(Register dst, Register src) {
1931   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1932   emit_int8(0x0F);
1933   emit_int8((unsigned char)0xAF);
1934   emit_int8((unsigned char)(0xC0 | encode));
1935 }
1936 
1937 
1938 void Assembler::imull(Register dst, Register src, int value) {
1939   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1940   if (is8bit(value)) {
1941     emit_int8(0x6B);
1942     emit_int8((unsigned char)(0xC0 | encode));
1943     emit_int8(value & 0xFF);
1944   } else {
1945     emit_int8(0x69);
1946     emit_int8((unsigned char)(0xC0 | encode));
1947     emit_int32(value);
1948   }
1949 }
1950 
1951 void Assembler::imull(Register dst, Address src) {
1952   InstructionMark im(this);
1953   prefix(src, dst);
1954   emit_int8(0x0F);
1955   emit_int8((unsigned char) 0xAF);
1956   emit_operand(dst, src);
1957 }
1958 
1959 
1960 void Assembler::incl(Address dst) {
1961   // Don't use it directly. Use MacroAssembler::increment() instead.
1962   InstructionMark im(this);
1963   prefix(dst);
1964   emit_int8((unsigned char)0xFF);
1965   emit_operand(rax, dst);
1966 }
1967 
1968 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1969   InstructionMark im(this);
1970   assert((0 <= cc) && (cc < 16), "illegal cc");
1971   if (L.is_bound()) {
1972     address dst = target(L);
1973     assert(dst != NULL, "jcc most probably wrong");
1974 
1975     const int short_size = 2;
1976     const int long_size = 6;
1977     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1978     if (maybe_short && is8bit(offs - short_size)) {
1979       // 0111 tttn #8-bit disp
1980       emit_int8(0x70 | cc);
1981       emit_int8((offs - short_size) & 0xFF);
1982     } else {
1983       // 0000 1111 1000 tttn #32-bit disp
1984       assert(is_simm32(offs - long_size),
1985              "must be 32bit offset (call4)");
1986       emit_int8(0x0F);
1987       emit_int8((unsigned char)(0x80 | cc));
1988       emit_int32(offs - long_size);
1989     }
1990   } else {
1991     // Note: could eliminate cond. jumps to this jump if condition
1992     //       is the same however, seems to be rather unlikely case.
1993     // Note: use jccb() if label to be bound is very close to get
1994     //       an 8-bit displacement
1995     L.add_patch_at(code(), locator());
1996     emit_int8(0x0F);
1997     emit_int8((unsigned char)(0x80 | cc));
1998     emit_int32(0);
1999   }
2000 }
2001 
2002 void Assembler::jccb(Condition cc, Label& L) {
2003   if (L.is_bound()) {
2004     const int short_size = 2;
2005     address entry = target(L);
2006 #ifdef ASSERT
2007     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2008     intptr_t delta = short_branch_delta();
2009     if (delta != 0) {
2010       dist += (dist < 0 ? (-delta) :delta);
2011     }
2012     assert(is8bit(dist), "Dispacement too large for a short jmp");
2013 #endif
2014     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2015     // 0111 tttn #8-bit disp
2016     emit_int8(0x70 | cc);
2017     emit_int8((offs - short_size) & 0xFF);
2018   } else {
2019     InstructionMark im(this);
2020     L.add_patch_at(code(), locator());
2021     emit_int8(0x70 | cc);
2022     emit_int8(0);
2023   }
2024 }
2025 
2026 void Assembler::jmp(Address adr) {
2027   InstructionMark im(this);
2028   prefix(adr);
2029   emit_int8((unsigned char)0xFF);
2030   emit_operand(rsp, adr);
2031 }
2032 
2033 void Assembler::jmp(Label& L, bool maybe_short) {
2034   if (L.is_bound()) {
2035     address entry = target(L);
2036     assert(entry != NULL, "jmp most probably wrong");
2037     InstructionMark im(this);
2038     const int short_size = 2;
2039     const int long_size = 5;
2040     intptr_t offs = entry - pc();
2041     if (maybe_short && is8bit(offs - short_size)) {
2042       emit_int8((unsigned char)0xEB);
2043       emit_int8((offs - short_size) & 0xFF);
2044     } else {
2045       emit_int8((unsigned char)0xE9);
2046       emit_int32(offs - long_size);
2047     }
2048   } else {
2049     // By default, forward jumps are always 32-bit displacements, since
2050     // we can't yet know where the label will be bound.  If you're sure that
2051     // the forward jump will not run beyond 256 bytes, use jmpb to
2052     // force an 8-bit displacement.
2053     InstructionMark im(this);
2054     L.add_patch_at(code(), locator());
2055     emit_int8((unsigned char)0xE9);
2056     emit_int32(0);
2057   }
2058 }
2059 
2060 void Assembler::jmp(Register entry) {
2061   int encode = prefix_and_encode(entry->encoding());
2062   emit_int8((unsigned char)0xFF);
2063   emit_int8((unsigned char)(0xE0 | encode));
2064 }
2065 
2066 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2067   InstructionMark im(this);
2068   emit_int8((unsigned char)0xE9);
2069   assert(dest != NULL, "must have a target");
2070   intptr_t disp = dest - (pc() + sizeof(int32_t));
2071   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2072   emit_data(disp, rspec.reloc(), call32_operand);
2073 }
2074 
2075 void Assembler::jmpb(Label& L) {
2076   if (L.is_bound()) {
2077     const int short_size = 2;
2078     address entry = target(L);
2079     assert(entry != NULL, "jmp most probably wrong");
2080 #ifdef ASSERT
2081     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2082     intptr_t delta = short_branch_delta();
2083     if (delta != 0) {
2084       dist += (dist < 0 ? (-delta) :delta);
2085     }
2086     assert(is8bit(dist), "Dispacement too large for a short jmp");
2087 #endif
2088     intptr_t offs = entry - pc();
2089     emit_int8((unsigned char)0xEB);
2090     emit_int8((offs - short_size) & 0xFF);
2091   } else {
2092     InstructionMark im(this);
2093     L.add_patch_at(code(), locator());
2094     emit_int8((unsigned char)0xEB);
2095     emit_int8(0);
2096   }
2097 }
2098 
2099 void Assembler::ldmxcsr( Address src) {
2100   if (UseAVX > 0 ) {
2101     InstructionMark im(this);
2102     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2103     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2104     emit_int8((unsigned char)0xAE);
2105     emit_operand(as_Register(2), src);
2106   } else {
2107     NOT_LP64(assert(VM_Version::supports_sse(), ""));
2108     InstructionMark im(this);
2109     prefix(src);
2110     emit_int8(0x0F);
2111     emit_int8((unsigned char)0xAE);
2112     emit_operand(as_Register(2), src);
2113   }
2114 }
2115 
2116 void Assembler::leal(Register dst, Address src) {
2117   InstructionMark im(this);
2118 #ifdef _LP64
2119   emit_int8(0x67); // addr32
2120   prefix(src, dst);
2121 #endif // LP64
2122   emit_int8((unsigned char)0x8D);
2123   emit_operand(dst, src);
2124 }
2125 
2126 void Assembler::lfence() {
2127   emit_int8(0x0F);
2128   emit_int8((unsigned char)0xAE);
2129   emit_int8((unsigned char)0xE8);
2130 }
2131 
2132 void Assembler::lock() {
2133   emit_int8((unsigned char)0xF0);
2134 }
2135 
2136 void Assembler::lzcntl(Register dst, Register src) {
2137   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2138   emit_int8((unsigned char)0xF3);
2139   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2140   emit_int8(0x0F);
2141   emit_int8((unsigned char)0xBD);
2142   emit_int8((unsigned char)(0xC0 | encode));
2143 }
2144 
2145 // Emit mfence instruction
2146 void Assembler::mfence() {
2147   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2148   emit_int8(0x0F);
2149   emit_int8((unsigned char)0xAE);
2150   emit_int8((unsigned char)0xF0);
2151 }
2152 
2153 void Assembler::mov(Register dst, Register src) {
2154   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2155 }
2156 
2157 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2158   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2159   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2160   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2161   attributes.set_rex_vex_w_reverted();
2162   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2163   emit_int8(0x28);
2164   emit_int8((unsigned char)(0xC0 | encode));
2165 }
2166 
2167 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2168   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2169   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2170   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2171   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2172   emit_int8(0x28);
2173   emit_int8((unsigned char)(0xC0 | encode));
2174 }
2175 
2176 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2177   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2178   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2179   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2180   emit_int8(0x16);
2181   emit_int8((unsigned char)(0xC0 | encode));
2182 }
2183 
2184 void Assembler::movb(Register dst, Address src) {
2185   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2186   InstructionMark im(this);
2187   prefix(src, dst, true);
2188   emit_int8((unsigned char)0x8A);
2189   emit_operand(dst, src);
2190 }
2191 
2192 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2193   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2194   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2195   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2196   attributes.set_rex_vex_w_reverted();
2197   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2198   emit_int8(0x12);
2199   emit_int8(0xC0 | encode);
2200 }
2201 
2202 void Assembler::kmovbl(KRegister dst, Register src) {
2203   assert(VM_Version::supports_avx512dq(), "");
2204   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2205   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2206   emit_int8((unsigned char)0x92);
2207   emit_int8((unsigned char)(0xC0 | encode));
2208 }
2209 
2210 void Assembler::kmovbl(Register dst, KRegister src) {
2211   assert(VM_Version::supports_avx512dq(), "");
2212   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2213   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2214   emit_int8((unsigned char)0x93);
2215   emit_int8((unsigned char)(0xC0 | encode));
2216 }
2217 
2218 void Assembler::kmovwl(KRegister dst, Register src) {
2219   assert(VM_Version::supports_evex(), "");
2220   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2221   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2222   emit_int8((unsigned char)0x92);
2223   emit_int8((unsigned char)(0xC0 | encode));
2224 }
2225 
2226 void Assembler::kmovwl(Register dst, KRegister src) {
2227   assert(VM_Version::supports_evex(), "");
2228   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2229   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2230   emit_int8((unsigned char)0x93);
2231   emit_int8((unsigned char)(0xC0 | encode));
2232 }
2233 
2234 void Assembler::kmovwl(KRegister dst, Address src) {
2235   assert(VM_Version::supports_evex(), "");
2236   InstructionMark im(this);
2237   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2238   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2239   emit_int8((unsigned char)0x90);
2240   emit_operand((Register)dst, src);
2241 }
2242 
2243 void Assembler::kmovdl(KRegister dst, Register src) {
2244   assert(VM_Version::supports_avx512bw(), "");
2245   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2246   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2247   emit_int8((unsigned char)0x92);
2248   emit_int8((unsigned char)(0xC0 | encode));
2249 }
2250 
2251 void Assembler::kmovdl(Register dst, KRegister src) {
2252   assert(VM_Version::supports_avx512bw(), "");
2253   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2254   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2255   emit_int8((unsigned char)0x93);
2256   emit_int8((unsigned char)(0xC0 | encode));
2257 }
2258 
2259 void Assembler::kmovql(KRegister dst, KRegister src) {
2260   assert(VM_Version::supports_avx512bw(), "");
2261   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2262   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2263   emit_int8((unsigned char)0x90);
2264   emit_int8((unsigned char)(0xC0 | encode));
2265 }
2266 
2267 void Assembler::kmovql(KRegister dst, Address src) {
2268   assert(VM_Version::supports_avx512bw(), "");
2269   InstructionMark im(this);
2270   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2271   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2272   emit_int8((unsigned char)0x90);
2273   emit_operand((Register)dst, src);
2274 }
2275 
2276 void Assembler::kmovql(Address dst, KRegister src) {
2277   assert(VM_Version::supports_avx512bw(), "");
2278   InstructionMark im(this);
2279   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2280   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2281   emit_int8((unsigned char)0x90);
2282   emit_operand((Register)src, dst);
2283 }
2284 
2285 void Assembler::kmovql(KRegister dst, Register src) {
2286   assert(VM_Version::supports_avx512bw(), "");
2287   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2288   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2289   emit_int8((unsigned char)0x92);
2290   emit_int8((unsigned char)(0xC0 | encode));
2291 }
2292 
2293 void Assembler::kmovql(Register dst, KRegister src) {
2294   assert(VM_Version::supports_avx512bw(), "");
2295   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2296   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2297   emit_int8((unsigned char)0x93);
2298   emit_int8((unsigned char)(0xC0 | encode));
2299 }
2300 
2301 void Assembler::knotwl(KRegister dst, KRegister src) {
2302   assert(VM_Version::supports_evex(), "");
2303   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2304   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2305   emit_int8((unsigned char)0x44);
2306   emit_int8((unsigned char)(0xC0 | encode));
2307 }
2308 
2309 // This instruction produces ZF or CF flags
2310 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2311   assert(VM_Version::supports_avx512dq(), "");
2312   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2313   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2314   emit_int8((unsigned char)0x98);
2315   emit_int8((unsigned char)(0xC0 | encode));
2316 }
2317 
2318 // This instruction produces ZF or CF flags
2319 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2320   assert(VM_Version::supports_evex(), "");
2321   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2322   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2323   emit_int8((unsigned char)0x98);
2324   emit_int8((unsigned char)(0xC0 | encode));
2325 }
2326 
2327 // This instruction produces ZF or CF flags
2328 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2329   assert(VM_Version::supports_avx512bw(), "");
2330   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2331   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2332   emit_int8((unsigned char)0x98);
2333   emit_int8((unsigned char)(0xC0 | encode));
2334 }
2335 
2336 // This instruction produces ZF or CF flags
2337 void Assembler::kortestql(KRegister src1, KRegister src2) {
2338   assert(VM_Version::supports_avx512bw(), "");
2339   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2340   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2341   emit_int8((unsigned char)0x98);
2342   emit_int8((unsigned char)(0xC0 | encode));
2343 }
2344 
2345 // This instruction produces ZF or CF flags
2346 void Assembler::ktestql(KRegister src1, KRegister src2) {
2347   assert(VM_Version::supports_avx512bw(), "");
2348   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2349   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2350   emit_int8((unsigned char)0x99);
2351   emit_int8((unsigned char)(0xC0 | encode));
2352 }
2353 
2354 void Assembler::ktestq(KRegister src1, KRegister src2) {
2355   assert(VM_Version::supports_avx512bw(), "");
2356   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2357   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2358   emit_int8((unsigned char)0x99);
2359   emit_int8((unsigned char)(0xC0 | encode));
2360 }
2361 
2362 void Assembler::ktestd(KRegister src1, KRegister src2) {
2363   assert(VM_Version::supports_avx512bw(), "");
2364   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2365   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2366   emit_int8((unsigned char)0x99);
2367   emit_int8((unsigned char)(0xC0 | encode));
2368 }
2369 
2370 void Assembler::movb(Address dst, int imm8) {
2371   InstructionMark im(this);
2372    prefix(dst);
2373   emit_int8((unsigned char)0xC6);
2374   emit_operand(rax, dst, 1);
2375   emit_int8(imm8);
2376 }
2377 
2378 
2379 void Assembler::movb(Address dst, Register src) {
2380   assert(src->has_byte_register(), "must have byte register");
2381   InstructionMark im(this);
2382   prefix(dst, src, true);
2383   emit_int8((unsigned char)0x88);
2384   emit_operand(src, dst);
2385 }
2386 
2387 void Assembler::movdl(XMMRegister dst, Register src) {
2388   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2389   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2390   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2391   emit_int8(0x6E);
2392   emit_int8((unsigned char)(0xC0 | encode));
2393 }
2394 
2395 void Assembler::movdl(Register dst, XMMRegister src) {
2396   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2397   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2398   // swap src/dst to get correct prefix
2399   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2400   emit_int8(0x7E);
2401   emit_int8((unsigned char)(0xC0 | encode));
2402 }
2403 
2404 void Assembler::movdl(XMMRegister dst, Address src) {
2405   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2406   InstructionMark im(this);
2407   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2408   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2409   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2410   emit_int8(0x6E);
2411   emit_operand(dst, src);
2412 }
2413 
2414 void Assembler::movdl(Address dst, XMMRegister src) {
2415   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2416   InstructionMark im(this);
2417   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2418   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2419   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2420   emit_int8(0x7E);
2421   emit_operand(src, dst);
2422 }
2423 
2424 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2425   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2426   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2427   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2428   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2429   emit_int8(0x6F);
2430   emit_int8((unsigned char)(0xC0 | encode));
2431 }
2432 
2433 void Assembler::movdqa(XMMRegister dst, Address src) {
2434   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2435   InstructionMark im(this);
2436   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2437   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2438   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2439   emit_int8(0x6F);
2440   emit_operand(dst, src);
2441 }
2442 
2443 void Assembler::movdqu(XMMRegister dst, Address src) {
2444   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2445   InstructionMark im(this);
2446   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2447   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2448   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2449   emit_int8(0x6F);
2450   emit_operand(dst, src);
2451 }
2452 
2453 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2454   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2455   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2456   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2457   emit_int8(0x6F);
2458   emit_int8((unsigned char)(0xC0 | encode));
2459 }
2460 
2461 void Assembler::movdqu(Address dst, XMMRegister src) {
2462   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2463   InstructionMark im(this);
2464   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2465   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2466   attributes.reset_is_clear_context();
2467   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2468   emit_int8(0x7F);
2469   emit_operand(src, dst);
2470 }
2471 
2472 // Move Unaligned 256bit Vector
2473 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2474   assert(UseAVX > 0, "");
2475   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2476   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2477   emit_int8(0x6F);
2478   emit_int8((unsigned char)(0xC0 | encode));
2479 }
2480 
2481 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2482   assert(UseAVX > 0, "");
2483   InstructionMark im(this);
2484   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2485   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2486   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2487   emit_int8(0x6F);
2488   emit_operand(dst, src);
2489 }
2490 
2491 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2492   assert(UseAVX > 0, "");
2493   InstructionMark im(this);
2494   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2495   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2496   attributes.reset_is_clear_context();
2497   // swap src<->dst for encoding
2498   assert(src != xnoreg, "sanity");
2499   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2500   emit_int8(0x7F);
2501   emit_operand(src, dst);
2502 }
2503 
2504 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2505 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
2506   assert(VM_Version::supports_evex(), "");
2507   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2508   attributes.set_is_evex_instruction();
2509   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2510   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2511   emit_int8(0x6F);
2512   emit_int8((unsigned char)(0xC0 | encode));
2513 }
2514 
2515 void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
2516   assert(VM_Version::supports_evex(), "");
2517   InstructionMark im(this);
2518   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2519   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2520   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2521   attributes.set_is_evex_instruction();
2522   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2523   emit_int8(0x6F);
2524   emit_operand(dst, src);
2525 }
2526 
2527 void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
2528   assert(VM_Version::supports_evex(), "");
2529   assert(src != xnoreg, "sanity");
2530   InstructionMark im(this);
2531   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2532   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2533   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2534   attributes.set_is_evex_instruction();
2535   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2536   emit_int8(0x7F);
2537   emit_operand(src, dst);
2538 }
2539 
2540 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2541   assert(VM_Version::supports_avx512vlbw(), "");
2542   assert(is_vector_masking(), "");    // For stub code use only
2543   InstructionMark im(this);
2544   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2545   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2546   attributes.set_embedded_opmask_register_specifier(mask);
2547   attributes.set_is_evex_instruction();
2548   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2549   emit_int8(0x6F);
2550   emit_operand(dst, src);
2551 }
2552 
2553 void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
2554   assert(VM_Version::supports_evex(), "");
2555   InstructionMark im(this);
2556   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2557   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2558   attributes.set_is_evex_instruction();
2559   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2560   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2561   emit_int8(0x6F);
2562   emit_operand(dst, src);
2563 }
2564 
2565 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2566   assert(is_vector_masking(), "");
2567   assert(VM_Version::supports_avx512vlbw(), "");
2568   InstructionMark im(this);
2569   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2570   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2571   attributes.set_embedded_opmask_register_specifier(mask);
2572   attributes.set_is_evex_instruction();
2573   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2574   emit_int8(0x6F);
2575   emit_operand(dst, src);
2576 }
2577 
2578 void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
2579   assert(VM_Version::supports_evex(), "");
2580   assert(src != xnoreg, "sanity");
2581   InstructionMark im(this);
2582   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2583   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2584   attributes.set_is_evex_instruction();
2585   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2586   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2587   emit_int8(0x7F);
2588   emit_operand(src, dst);
2589 }
2590 
2591 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) {
2592   assert(VM_Version::supports_avx512vlbw(), "");
2593   assert(src != xnoreg, "sanity");
2594   InstructionMark im(this);
2595   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2596   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2597   attributes.reset_is_clear_context();
2598   attributes.set_embedded_opmask_register_specifier(mask);
2599   attributes.set_is_evex_instruction();
2600   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2601   emit_int8(0x7F);
2602   emit_operand(src, dst);
2603 }
2604 
2605 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2606   assert(VM_Version::supports_evex(), "");
2607   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2608   attributes.set_is_evex_instruction();
2609   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2610   emit_int8(0x6F);
2611   emit_int8((unsigned char)(0xC0 | encode));
2612 }
2613 
2614 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2615   assert(VM_Version::supports_evex(), "");
2616   InstructionMark im(this);
2617   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2618   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2619   attributes.set_is_evex_instruction();
2620   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2621   emit_int8(0x6F);
2622   emit_operand(dst, src);
2623 }
2624 
2625 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2626   assert(VM_Version::supports_evex(), "");
2627   assert(src != xnoreg, "sanity");
2628   InstructionMark im(this);
2629   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2630   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2631   attributes.reset_is_clear_context();
2632   attributes.set_is_evex_instruction();
2633   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2634   emit_int8(0x7F);
2635   emit_operand(src, dst);
2636 }
2637 
2638 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2639   assert(VM_Version::supports_evex(), "");
2640   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2641   attributes.set_is_evex_instruction();
2642   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2643   emit_int8(0x6F);
2644   emit_int8((unsigned char)(0xC0 | encode));
2645 }
2646 
2647 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2648   assert(VM_Version::supports_evex(), "");
2649   InstructionMark im(this);
2650   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2651   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2652   attributes.set_is_evex_instruction();
2653   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2654   emit_int8(0x6F);
2655   emit_operand(dst, src);
2656 }
2657 
2658 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2659   assert(VM_Version::supports_evex(), "");
2660   assert(src != xnoreg, "sanity");
2661   InstructionMark im(this);
2662   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2663   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2664   attributes.reset_is_clear_context();
2665   attributes.set_is_evex_instruction();
2666   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2667   emit_int8(0x7F);
2668   emit_operand(src, dst);
2669 }
2670 
2671 // Uses zero extension on 64bit
2672 
2673 void Assembler::movl(Register dst, int32_t imm32) {
2674   int encode = prefix_and_encode(dst->encoding());
2675   emit_int8((unsigned char)(0xB8 | encode));
2676   emit_int32(imm32);
2677 }
2678 
2679 void Assembler::movl(Register dst, Register src) {
2680   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2681   emit_int8((unsigned char)0x8B);
2682   emit_int8((unsigned char)(0xC0 | encode));
2683 }
2684 
2685 void Assembler::movl(Register dst, Address src) {
2686   InstructionMark im(this);
2687   prefix(src, dst);
2688   emit_int8((unsigned char)0x8B);
2689   emit_operand(dst, src);
2690 }
2691 
2692 void Assembler::movl(Address dst, int32_t imm32) {
2693   InstructionMark im(this);
2694   prefix(dst);
2695   emit_int8((unsigned char)0xC7);
2696   emit_operand(rax, dst, 4);
2697   emit_int32(imm32);
2698 }
2699 
2700 void Assembler::movl(Address dst, Register src) {
2701   InstructionMark im(this);
2702   prefix(dst, src);
2703   emit_int8((unsigned char)0x89);
2704   emit_operand(src, dst);
2705 }
2706 
2707 // New cpus require to use movsd and movss to avoid partial register stall
2708 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2709 // The selection is done in MacroAssembler::movdbl() and movflt().
2710 void Assembler::movlpd(XMMRegister dst, Address src) {
2711   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2712   InstructionMark im(this);
2713   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2714   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2715   attributes.set_rex_vex_w_reverted();
2716   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2717   emit_int8(0x12);
2718   emit_operand(dst, src);
2719 }
2720 
2721 void Assembler::movq( MMXRegister dst, Address src ) {
2722   assert( VM_Version::supports_mmx(), "" );
2723   emit_int8(0x0F);
2724   emit_int8(0x6F);
2725   emit_operand(dst, src);
2726 }
2727 
2728 void Assembler::movq( Address dst, MMXRegister src ) {
2729   assert( VM_Version::supports_mmx(), "" );
2730   emit_int8(0x0F);
2731   emit_int8(0x7F);
2732   // workaround gcc (3.2.1-7a) bug
2733   // In that version of gcc with only an emit_operand(MMX, Address)
2734   // gcc will tail jump and try and reverse the parameters completely
2735   // obliterating dst in the process. By having a version available
2736   // that doesn't need to swap the args at the tail jump the bug is
2737   // avoided.
2738   emit_operand(dst, src);
2739 }
2740 
2741 void Assembler::movq(XMMRegister dst, Address src) {
2742   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2743   InstructionMark im(this);
2744   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2745   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2746   attributes.set_rex_vex_w_reverted();
2747   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2748   emit_int8(0x7E);
2749   emit_operand(dst, src);
2750 }
2751 
2752 void Assembler::movq(Address dst, XMMRegister src) {
2753   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2754   InstructionMark im(this);
2755   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2756   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2757   attributes.set_rex_vex_w_reverted();
2758   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2759   emit_int8((unsigned char)0xD6);
2760   emit_operand(src, dst);
2761 }
2762 
2763 void Assembler::movsbl(Register dst, Address src) { // movsxb
2764   InstructionMark im(this);
2765   prefix(src, dst);
2766   emit_int8(0x0F);
2767   emit_int8((unsigned char)0xBE);
2768   emit_operand(dst, src);
2769 }
2770 
2771 void Assembler::movsbl(Register dst, Register src) { // movsxb
2772   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2773   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2774   emit_int8(0x0F);
2775   emit_int8((unsigned char)0xBE);
2776   emit_int8((unsigned char)(0xC0 | encode));
2777 }
2778 
2779 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2780   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2781   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2782   attributes.set_rex_vex_w_reverted();
2783   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2784   emit_int8(0x10);
2785   emit_int8((unsigned char)(0xC0 | encode));
2786 }
2787 
2788 void Assembler::movsd(XMMRegister dst, Address src) {
2789   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2790   InstructionMark im(this);
2791   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2792   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2793   attributes.set_rex_vex_w_reverted();
2794   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2795   emit_int8(0x10);
2796   emit_operand(dst, src);
2797 }
2798 
2799 void Assembler::movsd(Address dst, XMMRegister src) {
2800   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2801   InstructionMark im(this);
2802   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2803   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2804   attributes.reset_is_clear_context();
2805   attributes.set_rex_vex_w_reverted();
2806   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2807   emit_int8(0x11);
2808   emit_operand(src, dst);
2809 }
2810 
2811 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2812   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2813   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2814   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2815   emit_int8(0x10);
2816   emit_int8((unsigned char)(0xC0 | encode));
2817 }
2818 
2819 void Assembler::movss(XMMRegister dst, Address src) {
2820   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2821   InstructionMark im(this);
2822   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2823   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2824   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2825   emit_int8(0x10);
2826   emit_operand(dst, src);
2827 }
2828 
2829 void Assembler::movss(Address dst, XMMRegister src) {
2830   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2831   InstructionMark im(this);
2832   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2833   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2834   attributes.reset_is_clear_context();
2835   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2836   emit_int8(0x11);
2837   emit_operand(src, dst);
2838 }
2839 
2840 void Assembler::movswl(Register dst, Address src) { // movsxw
2841   InstructionMark im(this);
2842   prefix(src, dst);
2843   emit_int8(0x0F);
2844   emit_int8((unsigned char)0xBF);
2845   emit_operand(dst, src);
2846 }
2847 
2848 void Assembler::movswl(Register dst, Register src) { // movsxw
2849   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2850   emit_int8(0x0F);
2851   emit_int8((unsigned char)0xBF);
2852   emit_int8((unsigned char)(0xC0 | encode));
2853 }
2854 
2855 void Assembler::movw(Address dst, int imm16) {
2856   InstructionMark im(this);
2857 
2858   emit_int8(0x66); // switch to 16-bit mode
2859   prefix(dst);
2860   emit_int8((unsigned char)0xC7);
2861   emit_operand(rax, dst, 2);
2862   emit_int16(imm16);
2863 }
2864 
2865 void Assembler::movw(Register dst, Address src) {
2866   InstructionMark im(this);
2867   emit_int8(0x66);
2868   prefix(src, dst);
2869   emit_int8((unsigned char)0x8B);
2870   emit_operand(dst, src);
2871 }
2872 
2873 void Assembler::movw(Address dst, Register src) {
2874   InstructionMark im(this);
2875   emit_int8(0x66);
2876   prefix(dst, src);
2877   emit_int8((unsigned char)0x89);
2878   emit_operand(src, dst);
2879 }
2880 
2881 void Assembler::movzbl(Register dst, Address src) { // movzxb
2882   InstructionMark im(this);
2883   prefix(src, dst);
2884   emit_int8(0x0F);
2885   emit_int8((unsigned char)0xB6);
2886   emit_operand(dst, src);
2887 }
2888 
2889 void Assembler::movzbl(Register dst, Register src) { // movzxb
2890   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2891   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2892   emit_int8(0x0F);
2893   emit_int8((unsigned char)0xB6);
2894   emit_int8(0xC0 | encode);
2895 }
2896 
2897 void Assembler::movzwl(Register dst, Address src) { // movzxw
2898   InstructionMark im(this);
2899   prefix(src, dst);
2900   emit_int8(0x0F);
2901   emit_int8((unsigned char)0xB7);
2902   emit_operand(dst, src);
2903 }
2904 
2905 void Assembler::movzwl(Register dst, Register src) { // movzxw
2906   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2907   emit_int8(0x0F);
2908   emit_int8((unsigned char)0xB7);
2909   emit_int8(0xC0 | encode);
2910 }
2911 
2912 void Assembler::mull(Address src) {
2913   InstructionMark im(this);
2914   prefix(src);
2915   emit_int8((unsigned char)0xF7);
2916   emit_operand(rsp, src);
2917 }
2918 
2919 void Assembler::mull(Register src) {
2920   int encode = prefix_and_encode(src->encoding());
2921   emit_int8((unsigned char)0xF7);
2922   emit_int8((unsigned char)(0xE0 | encode));
2923 }
2924 
2925 void Assembler::mulsd(XMMRegister dst, Address src) {
2926   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2927   InstructionMark im(this);
2928   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2929   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2930   attributes.set_rex_vex_w_reverted();
2931   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2932   emit_int8(0x59);
2933   emit_operand(dst, src);
2934 }
2935 
2936 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2937   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2938   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2939   attributes.set_rex_vex_w_reverted();
2940   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2941   emit_int8(0x59);
2942   emit_int8((unsigned char)(0xC0 | encode));
2943 }
2944 
2945 void Assembler::mulss(XMMRegister dst, Address src) {
2946   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2947   InstructionMark im(this);
2948   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2949   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2950   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2951   emit_int8(0x59);
2952   emit_operand(dst, src);
2953 }
2954 
2955 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2956   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2957   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2958   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2959   emit_int8(0x59);
2960   emit_int8((unsigned char)(0xC0 | encode));
2961 }
2962 
2963 void Assembler::negl(Register dst) {
2964   int encode = prefix_and_encode(dst->encoding());
2965   emit_int8((unsigned char)0xF7);
2966   emit_int8((unsigned char)(0xD8 | encode));
2967 }
2968 
2969 void Assembler::nop(int i) {
2970 #ifdef ASSERT
2971   assert(i > 0, " ");
2972   // The fancy nops aren't currently recognized by debuggers making it a
2973   // pain to disassemble code while debugging. If asserts are on clearly
2974   // speed is not an issue so simply use the single byte traditional nop
2975   // to do alignment.
2976 
2977   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2978   return;
2979 
2980 #endif // ASSERT
2981 
2982   if (UseAddressNop && VM_Version::is_intel()) {
2983     //
2984     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2985     //  1: 0x90
2986     //  2: 0x66 0x90
2987     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2988     //  4: 0x0F 0x1F 0x40 0x00
2989     //  5: 0x0F 0x1F 0x44 0x00 0x00
2990     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2991     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2992     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2993     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2994     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2995     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2996 
2997     // The rest coding is Intel specific - don't use consecutive address nops
2998 
2999     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3000     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3001     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3002     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3003 
3004     while(i >= 15) {
3005       // For Intel don't generate consecutive addess nops (mix with regular nops)
3006       i -= 15;
3007       emit_int8(0x66);   // size prefix
3008       emit_int8(0x66);   // size prefix
3009       emit_int8(0x66);   // size prefix
3010       addr_nop_8();
3011       emit_int8(0x66);   // size prefix
3012       emit_int8(0x66);   // size prefix
3013       emit_int8(0x66);   // size prefix
3014       emit_int8((unsigned char)0x90);
3015                          // nop
3016     }
3017     switch (i) {
3018       case 14:
3019         emit_int8(0x66); // size prefix
3020       case 13:
3021         emit_int8(0x66); // size prefix
3022       case 12:
3023         addr_nop_8();
3024         emit_int8(0x66); // size prefix
3025         emit_int8(0x66); // size prefix
3026         emit_int8(0x66); // size prefix
3027         emit_int8((unsigned char)0x90);
3028                          // nop
3029         break;
3030       case 11:
3031         emit_int8(0x66); // size prefix
3032       case 10:
3033         emit_int8(0x66); // size prefix
3034       case 9:
3035         emit_int8(0x66); // size prefix
3036       case 8:
3037         addr_nop_8();
3038         break;
3039       case 7:
3040         addr_nop_7();
3041         break;
3042       case 6:
3043         emit_int8(0x66); // size prefix
3044       case 5:
3045         addr_nop_5();
3046         break;
3047       case 4:
3048         addr_nop_4();
3049         break;
3050       case 3:
3051         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3052         emit_int8(0x66); // size prefix
3053       case 2:
3054         emit_int8(0x66); // size prefix
3055       case 1:
3056         emit_int8((unsigned char)0x90);
3057                          // nop
3058         break;
3059       default:
3060         assert(i == 0, " ");
3061     }
3062     return;
3063   }
3064   if (UseAddressNop && VM_Version::is_amd()) {
3065     //
3066     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3067     //  1: 0x90
3068     //  2: 0x66 0x90
3069     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3070     //  4: 0x0F 0x1F 0x40 0x00
3071     //  5: 0x0F 0x1F 0x44 0x00 0x00
3072     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3073     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3074     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3075     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3076     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3077     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3078 
3079     // The rest coding is AMD specific - use consecutive address nops
3080 
3081     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3082     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3083     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3084     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3085     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3086     //     Size prefixes (0x66) are added for larger sizes
3087 
3088     while(i >= 22) {
3089       i -= 11;
3090       emit_int8(0x66); // size prefix
3091       emit_int8(0x66); // size prefix
3092       emit_int8(0x66); // size prefix
3093       addr_nop_8();
3094     }
3095     // Generate first nop for size between 21-12
3096     switch (i) {
3097       case 21:
3098         i -= 1;
3099         emit_int8(0x66); // size prefix
3100       case 20:
3101       case 19:
3102         i -= 1;
3103         emit_int8(0x66); // size prefix
3104       case 18:
3105       case 17:
3106         i -= 1;
3107         emit_int8(0x66); // size prefix
3108       case 16:
3109       case 15:
3110         i -= 8;
3111         addr_nop_8();
3112         break;
3113       case 14:
3114       case 13:
3115         i -= 7;
3116         addr_nop_7();
3117         break;
3118       case 12:
3119         i -= 6;
3120         emit_int8(0x66); // size prefix
3121         addr_nop_5();
3122         break;
3123       default:
3124         assert(i < 12, " ");
3125     }
3126 
3127     // Generate second nop for size between 11-1
3128     switch (i) {
3129       case 11:
3130         emit_int8(0x66); // size prefix
3131       case 10:
3132         emit_int8(0x66); // size prefix
3133       case 9:
3134         emit_int8(0x66); // size prefix
3135       case 8:
3136         addr_nop_8();
3137         break;
3138       case 7:
3139         addr_nop_7();
3140         break;
3141       case 6:
3142         emit_int8(0x66); // size prefix
3143       case 5:
3144         addr_nop_5();
3145         break;
3146       case 4:
3147         addr_nop_4();
3148         break;
3149       case 3:
3150         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3151         emit_int8(0x66); // size prefix
3152       case 2:
3153         emit_int8(0x66); // size prefix
3154       case 1:
3155         emit_int8((unsigned char)0x90);
3156                          // nop
3157         break;
3158       default:
3159         assert(i == 0, " ");
3160     }
3161     return;
3162   }
3163 
3164   if (UseAddressNop && VM_Version::is_zx()) {
3165     //
3166     // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
3167     //  1: 0x90
3168     //  2: 0x66 0x90
3169     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3170     //  4: 0x0F 0x1F 0x40 0x00
3171     //  5: 0x0F 0x1F 0x44 0x00 0x00
3172     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3173     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3174     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3175     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3176     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3177     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3178 
3179     // The rest coding is ZX specific - don't use consecutive address nops
3180 
3181     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3182     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3183     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3184     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3185 
3186     while (i >= 15) {
3187       // For ZX don't generate consecutive addess nops (mix with regular nops)
3188       i -= 15;
3189       emit_int8(0x66);   // size prefix
3190       emit_int8(0x66);   // size prefix
3191       emit_int8(0x66);   // size prefix
3192       addr_nop_8();
3193       emit_int8(0x66);   // size prefix
3194       emit_int8(0x66);   // size prefix
3195       emit_int8(0x66);   // size prefix
3196       emit_int8((unsigned char)0x90);
3197                          // nop
3198     }
3199     switch (i) {
3200       case 14:
3201         emit_int8(0x66); // size prefix
3202       case 13:
3203         emit_int8(0x66); // size prefix
3204       case 12:
3205         addr_nop_8();
3206         emit_int8(0x66); // size prefix
3207         emit_int8(0x66); // size prefix
3208         emit_int8(0x66); // size prefix
3209         emit_int8((unsigned char)0x90);
3210                          // nop
3211         break;
3212       case 11:
3213         emit_int8(0x66); // size prefix
3214       case 10:
3215         emit_int8(0x66); // size prefix
3216       case 9:
3217         emit_int8(0x66); // size prefix
3218       case 8:
3219         addr_nop_8();
3220         break;
3221       case 7:
3222         addr_nop_7();
3223         break;
3224       case 6:
3225         emit_int8(0x66); // size prefix
3226       case 5:
3227         addr_nop_5();
3228         break;
3229       case 4:
3230         addr_nop_4();
3231         break;
3232       case 3:
3233         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3234         emit_int8(0x66); // size prefix
3235       case 2:
3236         emit_int8(0x66); // size prefix
3237       case 1:
3238         emit_int8((unsigned char)0x90);
3239                          // nop
3240         break;
3241       default:
3242         assert(i == 0, " ");
3243     }
3244     return;
3245   }
3246 
3247   // Using nops with size prefixes "0x66 0x90".
3248   // From AMD Optimization Guide:
3249   //  1: 0x90
3250   //  2: 0x66 0x90
3251   //  3: 0x66 0x66 0x90
3252   //  4: 0x66 0x66 0x66 0x90
3253   //  5: 0x66 0x66 0x90 0x66 0x90
3254   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3255   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3256   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3257   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3258   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3259   //
3260   while(i > 12) {
3261     i -= 4;
3262     emit_int8(0x66); // size prefix
3263     emit_int8(0x66);
3264     emit_int8(0x66);
3265     emit_int8((unsigned char)0x90);
3266                      // nop
3267   }
3268   // 1 - 12 nops
3269   if(i > 8) {
3270     if(i > 9) {
3271       i -= 1;
3272       emit_int8(0x66);
3273     }
3274     i -= 3;
3275     emit_int8(0x66);
3276     emit_int8(0x66);
3277     emit_int8((unsigned char)0x90);
3278   }
3279   // 1 - 8 nops
3280   if(i > 4) {
3281     if(i > 6) {
3282       i -= 1;
3283       emit_int8(0x66);
3284     }
3285     i -= 3;
3286     emit_int8(0x66);
3287     emit_int8(0x66);
3288     emit_int8((unsigned char)0x90);
3289   }
3290   switch (i) {
3291     case 4:
3292       emit_int8(0x66);
3293     case 3:
3294       emit_int8(0x66);
3295     case 2:
3296       emit_int8(0x66);
3297     case 1:
3298       emit_int8((unsigned char)0x90);
3299       break;
3300     default:
3301       assert(i == 0, " ");
3302   }
3303 }
3304 
3305 void Assembler::notl(Register dst) {
3306   int encode = prefix_and_encode(dst->encoding());
3307   emit_int8((unsigned char)0xF7);
3308   emit_int8((unsigned char)(0xD0 | encode));
3309 }
3310 
3311 void Assembler::orl(Address dst, int32_t imm32) {
3312   InstructionMark im(this);
3313   prefix(dst);
3314   emit_arith_operand(0x81, rcx, dst, imm32);
3315 }
3316 
3317 void Assembler::orl(Register dst, int32_t imm32) {
3318   prefix(dst);
3319   emit_arith(0x81, 0xC8, dst, imm32);
3320 }
3321 
3322 void Assembler::orl(Register dst, Address src) {
3323   InstructionMark im(this);
3324   prefix(src, dst);
3325   emit_int8(0x0B);
3326   emit_operand(dst, src);
3327 }
3328 
3329 void Assembler::orl(Register dst, Register src) {
3330   (void) prefix_and_encode(dst->encoding(), src->encoding());
3331   emit_arith(0x0B, 0xC0, dst, src);
3332 }
3333 
3334 void Assembler::orl(Address dst, Register src) {
3335   InstructionMark im(this);
3336   prefix(dst, src);
3337   emit_int8(0x09);
3338   emit_operand(src, dst);
3339 }
3340 
3341 void Assembler::packuswb(XMMRegister dst, Address src) {
3342   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3343   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3344   InstructionMark im(this);
3345   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3346   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3347   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3348   emit_int8(0x67);
3349   emit_operand(dst, src);
3350 }
3351 
3352 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3353   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3354   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3355   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3356   emit_int8(0x67);
3357   emit_int8((unsigned char)(0xC0 | encode));
3358 }
3359 
3360 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3361   assert(UseAVX > 0, "some form of AVX must be enabled");
3362   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3363   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3364   emit_int8(0x67);
3365   emit_int8((unsigned char)(0xC0 | encode));
3366 }
3367 
3368 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3369   assert(VM_Version::supports_avx2(), "");
3370   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3371   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3372   emit_int8(0x00);
3373   emit_int8(0xC0 | encode);
3374   emit_int8(imm8);
3375 }
3376 
3377 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3378   assert(VM_Version::supports_avx2(), "");
3379   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3380   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3381   emit_int8(0x46);
3382   emit_int8(0xC0 | encode);
3383   emit_int8(imm8);
3384 }
3385 
3386 void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
3387   assert(VM_Version::supports_avx(), "");
3388   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3389   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3390   emit_int8(0x06);
3391   emit_int8(0xC0 | encode);
3392   emit_int8(imm8);
3393 }
3394 
3395 
3396 void Assembler::pause() {
3397   emit_int8((unsigned char)0xF3);
3398   emit_int8((unsigned char)0x90);
3399 }
3400 
3401 void Assembler::ud2() {
3402   emit_int8(0x0F);
3403   emit_int8(0x0B);
3404 }
3405 
3406 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3407   assert(VM_Version::supports_sse4_2(), "");
3408   InstructionMark im(this);
3409   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3410   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3411   emit_int8(0x61);
3412   emit_operand(dst, src);
3413   emit_int8(imm8);
3414 }
3415 
3416 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3417   assert(VM_Version::supports_sse4_2(), "");
3418   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3419   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3420   emit_int8(0x61);
3421   emit_int8((unsigned char)(0xC0 | encode));
3422   emit_int8(imm8);
3423 }
3424 
3425 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3426 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3427   assert(VM_Version::supports_sse2(), "");
3428   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3429   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3430   emit_int8(0x74);
3431   emit_int8((unsigned char)(0xC0 | encode));
3432 }
3433 
3434 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3435 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3436   assert(VM_Version::supports_avx(), "");
3437   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3438   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3439   emit_int8(0x74);
3440   emit_int8((unsigned char)(0xC0 | encode));
3441 }
3442 
3443 // In this context, kdst is written the mask used to process the equal components
3444 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3445   assert(VM_Version::supports_avx512bw(), "");
3446   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3447   attributes.set_is_evex_instruction();
3448   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3449   emit_int8(0x74);
3450   emit_int8((unsigned char)(0xC0 | encode));
3451 }
3452 
3453 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3454   assert(VM_Version::supports_avx512vlbw(), "");
3455   InstructionMark im(this);
3456   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3457   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3458   attributes.set_is_evex_instruction();
3459   int dst_enc = kdst->encoding();
3460   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3461   emit_int8(0x64);
3462   emit_operand(as_Register(dst_enc), src);
3463 }
3464 
3465 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3466   assert(is_vector_masking(), "");
3467   assert(VM_Version::supports_avx512vlbw(), "");
3468   InstructionMark im(this);
3469   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3470   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3471   attributes.reset_is_clear_context();
3472   attributes.set_embedded_opmask_register_specifier(mask);
3473   attributes.set_is_evex_instruction();
3474   int dst_enc = kdst->encoding();
3475   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3476   emit_int8(0x64);
3477   emit_operand(as_Register(dst_enc), src);
3478 }
3479 
3480 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3481   assert(VM_Version::supports_avx512vlbw(), "");
3482   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3483   attributes.set_is_evex_instruction();
3484   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3485   emit_int8(0x3E);
3486   emit_int8((unsigned char)(0xC0 | encode));
3487   emit_int8(vcc);
3488 }
3489 
3490 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3491   assert(is_vector_masking(), "");
3492   assert(VM_Version::supports_avx512vlbw(), "");
3493   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3494   attributes.reset_is_clear_context();
3495   attributes.set_embedded_opmask_register_specifier(mask);
3496   attributes.set_is_evex_instruction();
3497   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3498   emit_int8(0x3E);
3499   emit_int8((unsigned char)(0xC0 | encode));
3500   emit_int8(vcc);
3501 }
3502 
3503 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3504   assert(VM_Version::supports_avx512vlbw(), "");
3505   InstructionMark im(this);
3506   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3507   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3508   attributes.set_is_evex_instruction();
3509   int dst_enc = kdst->encoding();
3510   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3511   emit_int8(0x3E);
3512   emit_operand(as_Register(dst_enc), src);
3513   emit_int8(vcc);
3514 }
3515 
3516 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3517   assert(VM_Version::supports_avx512bw(), "");
3518   InstructionMark im(this);
3519   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3520   attributes.set_is_evex_instruction();
3521   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3522   int dst_enc = kdst->encoding();
3523   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3524   emit_int8(0x74);
3525   emit_operand(as_Register(dst_enc), src);
3526 }
3527 
3528 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3529   assert(VM_Version::supports_avx512vlbw(), "");
3530   assert(is_vector_masking(), "");    // For stub code use only
3531   InstructionMark im(this);
3532   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
3533   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3534   attributes.reset_is_clear_context();
3535   attributes.set_embedded_opmask_register_specifier(mask);
3536   attributes.set_is_evex_instruction();
3537   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3538   emit_int8(0x74);
3539   emit_operand(as_Register(kdst->encoding()), src);
3540 }
3541 
3542 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3543 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3544   assert(VM_Version::supports_sse2(), "");
3545   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3546   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3547   emit_int8(0x75);
3548   emit_int8((unsigned char)(0xC0 | encode));
3549 }
3550 
3551 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3552 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3553   assert(VM_Version::supports_avx(), "");
3554   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3555   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3556   emit_int8(0x75);
3557   emit_int8((unsigned char)(0xC0 | encode));
3558 }
3559 
3560 // In this context, kdst is written the mask used to process the equal components
3561 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3562   assert(VM_Version::supports_avx512bw(), "");
3563   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3564   attributes.set_is_evex_instruction();
3565   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3566   emit_int8(0x75);
3567   emit_int8((unsigned char)(0xC0 | encode));
3568 }
3569 
3570 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3571   assert(VM_Version::supports_avx512bw(), "");
3572   InstructionMark im(this);
3573   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3574   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3575   attributes.set_is_evex_instruction();
3576   int dst_enc = kdst->encoding();
3577   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3578   emit_int8(0x75);
3579   emit_operand(as_Register(dst_enc), src);
3580 }
3581 
3582 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3583 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3584   assert(VM_Version::supports_sse2(), "");
3585   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3586   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3587   emit_int8(0x76);
3588   emit_int8((unsigned char)(0xC0 | encode));
3589 }
3590 
3591 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3592 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3593   assert(VM_Version::supports_avx(), "");
3594   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3595   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3596   emit_int8(0x76);
3597   emit_int8((unsigned char)(0xC0 | encode));
3598 }
3599 
3600 // In this context, kdst is written the mask used to process the equal components
3601 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3602   assert(VM_Version::supports_evex(), "");
3603   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3604   attributes.set_is_evex_instruction();
3605   attributes.reset_is_clear_context();
3606   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3607   emit_int8(0x76);
3608   emit_int8((unsigned char)(0xC0 | encode));
3609 }
3610 
3611 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3612   assert(VM_Version::supports_evex(), "");
3613   InstructionMark im(this);
3614   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3615   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3616   attributes.reset_is_clear_context();
3617   attributes.set_is_evex_instruction();
3618   int dst_enc = kdst->encoding();
3619   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3620   emit_int8(0x76);
3621   emit_operand(as_Register(dst_enc), src);
3622 }
3623 
3624 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3625 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3626   assert(VM_Version::supports_sse4_1(), "");
3627   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3628   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3629   emit_int8(0x29);
3630   emit_int8((unsigned char)(0xC0 | encode));
3631 }
3632 
3633 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3634 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3635   assert(VM_Version::supports_avx(), "");
3636   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3637   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3638   emit_int8(0x29);
3639   emit_int8((unsigned char)(0xC0 | encode));
3640 }
3641 
3642 // In this context, kdst is written the mask used to process the equal components
3643 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3644   assert(VM_Version::supports_evex(), "");
3645   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3646   attributes.reset_is_clear_context();
3647   attributes.set_is_evex_instruction();
3648   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3649   emit_int8(0x29);
3650   emit_int8((unsigned char)(0xC0 | encode));
3651 }
3652 
3653 // In this context, kdst is written the mask used to process the equal components
3654 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3655   assert(VM_Version::supports_evex(), "");
3656   InstructionMark im(this);
3657   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3658   attributes.reset_is_clear_context();
3659   attributes.set_is_evex_instruction();
3660   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
3661   int dst_enc = kdst->encoding();
3662   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3663   emit_int8(0x29);
3664   emit_operand(as_Register(dst_enc), src);
3665 }
3666 
3667 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3668   assert(VM_Version::supports_sse2(), "");
3669   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3670   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3671   emit_int8((unsigned char)0xD7);
3672   emit_int8((unsigned char)(0xC0 | encode));
3673 }
3674 
3675 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3676   assert(VM_Version::supports_avx2(), "");
3677   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3678   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3679   emit_int8((unsigned char)0xD7);
3680   emit_int8((unsigned char)(0xC0 | encode));
3681 }
3682 
3683 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3684   assert(VM_Version::supports_sse4_1(), "");
3685   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3686   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3687   emit_int8(0x16);
3688   emit_int8((unsigned char)(0xC0 | encode));
3689   emit_int8(imm8);
3690 }
3691 
3692 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
3693   assert(VM_Version::supports_sse4_1(), "");
3694   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3695   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3696   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3697   emit_int8(0x16);
3698   emit_operand(src, dst);
3699   emit_int8(imm8);
3700 }
3701 
3702 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3703   assert(VM_Version::supports_sse4_1(), "");
3704   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3705   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3706   emit_int8(0x16);
3707   emit_int8((unsigned char)(0xC0 | encode));
3708   emit_int8(imm8);
3709 }
3710 
3711 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
3712   assert(VM_Version::supports_sse4_1(), "");
3713   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3714   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3715   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3716   emit_int8(0x16);
3717   emit_operand(src, dst);
3718   emit_int8(imm8);
3719 }
3720 
3721 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3722   assert(VM_Version::supports_sse2(), "");
3723   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3724   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3725   emit_int8((unsigned char)0xC5);
3726   emit_int8((unsigned char)(0xC0 | encode));
3727   emit_int8(imm8);
3728 }
3729 
3730 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
3731   assert(VM_Version::supports_sse4_1(), "");
3732   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3733   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3734   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3735   emit_int8((unsigned char)0x15);
3736   emit_operand(src, dst);
3737   emit_int8(imm8);
3738 }
3739 
3740 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
3741   assert(VM_Version::supports_sse4_1(), "");
3742   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3743   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3744   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3745   emit_int8(0x14);
3746   emit_operand(src, dst);
3747   emit_int8(imm8);
3748 }
3749 
3750 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
3751   assert(VM_Version::supports_sse4_1(), "");
3752   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3753   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3754   emit_int8(0x22);
3755   emit_int8((unsigned char)(0xC0 | encode));
3756   emit_int8(imm8);
3757 }
3758 
3759 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
3760   assert(VM_Version::supports_sse4_1(), "");
3761   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3762   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3763   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3764   emit_int8(0x22);
3765   emit_operand(dst,src);
3766   emit_int8(imm8);
3767 }
3768 
3769 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
3770   assert(VM_Version::supports_sse4_1(), "");
3771   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3772   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3773   emit_int8(0x22);
3774   emit_int8((unsigned char)(0xC0 | encode));
3775   emit_int8(imm8);
3776 }
3777 
3778 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
3779   assert(VM_Version::supports_sse4_1(), "");
3780   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3781   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3782   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3783   emit_int8(0x22);
3784   emit_operand(dst, src);
3785   emit_int8(imm8);
3786 }
3787 
3788 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
3789   assert(VM_Version::supports_sse2(), "");
3790   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3791   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3792   emit_int8((unsigned char)0xC4);
3793   emit_int8((unsigned char)(0xC0 | encode));
3794   emit_int8(imm8);
3795 }
3796 
3797 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
3798   assert(VM_Version::supports_sse2(), "");
3799   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3800   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3801   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3802   emit_int8((unsigned char)0xC4);
3803   emit_operand(dst, src);
3804   emit_int8(imm8);
3805 }
3806 
3807 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
3808   assert(VM_Version::supports_sse4_1(), "");
3809   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3810   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3811   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3812   emit_int8(0x20);
3813   emit_operand(dst, src);
3814   emit_int8(imm8);
3815 }
3816 
3817 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3818   assert(VM_Version::supports_sse4_1(), "");
3819   InstructionMark im(this);
3820   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3821   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3822   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3823   emit_int8(0x30);
3824   emit_operand(dst, src);
3825 }
3826 
3827 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3828   assert(VM_Version::supports_sse4_1(), "");
3829   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3830   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3831   emit_int8(0x30);
3832   emit_int8((unsigned char)(0xC0 | encode));
3833 }
3834 
3835 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3836   assert(VM_Version::supports_avx(), "");
3837   InstructionMark im(this);
3838   assert(dst != xnoreg, "sanity");
3839   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3840   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3841   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3842   emit_int8(0x30);
3843   emit_operand(dst, src);
3844 }
3845 
3846 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
3847   assert(is_vector_masking(), "");
3848   assert(VM_Version::supports_avx512vlbw(), "");
3849   assert(dst != xnoreg, "sanity");
3850   InstructionMark im(this);
3851   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3852   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3853   attributes.set_embedded_opmask_register_specifier(mask);
3854   attributes.set_is_evex_instruction();
3855   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3856   emit_int8(0x30);
3857   emit_operand(dst, src);
3858 }
3859 
3860 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
3861   assert(VM_Version::supports_avx512vlbw(), "");
3862   assert(src != xnoreg, "sanity");
3863   InstructionMark im(this);
3864   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3865   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3866   attributes.set_is_evex_instruction();
3867   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3868   emit_int8(0x30);
3869   emit_operand(src, dst);
3870 }
3871 
3872 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
3873   assert(is_vector_masking(), "");
3874   assert(VM_Version::supports_avx512vlbw(), "");
3875   assert(src != xnoreg, "sanity");
3876   InstructionMark im(this);
3877   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3878   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3879   attributes.reset_is_clear_context();
3880   attributes.set_embedded_opmask_register_specifier(mask);
3881   attributes.set_is_evex_instruction();
3882   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3883   emit_int8(0x30);
3884   emit_operand(src, dst);
3885 }
3886 
3887 // generic
3888 void Assembler::pop(Register dst) {
3889   int encode = prefix_and_encode(dst->encoding());
3890   emit_int8(0x58 | encode);
3891 }
3892 
3893 void Assembler::popcntl(Register dst, Address src) {
3894   assert(VM_Version::supports_popcnt(), "must support");
3895   InstructionMark im(this);
3896   emit_int8((unsigned char)0xF3);
3897   prefix(src, dst);
3898   emit_int8(0x0F);
3899   emit_int8((unsigned char)0xB8);
3900   emit_operand(dst, src);
3901 }
3902 
3903 void Assembler::popcntl(Register dst, Register src) {
3904   assert(VM_Version::supports_popcnt(), "must support");
3905   emit_int8((unsigned char)0xF3);
3906   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3907   emit_int8(0x0F);
3908   emit_int8((unsigned char)0xB8);
3909   emit_int8((unsigned char)(0xC0 | encode));
3910 }
3911 
3912 void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
3913   assert(VM_Version::supports_vpopcntdq(), "must support vpopcntdq feature");
3914   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3915   attributes.set_is_evex_instruction();
3916   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3917   emit_int8(0x55);
3918   emit_int8((unsigned char)(0xC0 | encode));
3919 }
3920 
3921 void Assembler::popf() {
3922   emit_int8((unsigned char)0x9D);
3923 }
3924 
3925 #ifndef _LP64 // no 32bit push/pop on amd64
3926 void Assembler::popl(Address dst) {
3927   // NOTE: this will adjust stack by 8byte on 64bits
3928   InstructionMark im(this);
3929   prefix(dst);
3930   emit_int8((unsigned char)0x8F);
3931   emit_operand(rax, dst);
3932 }
3933 #endif
3934 
3935 void Assembler::prefetch_prefix(Address src) {
3936   prefix(src);
3937   emit_int8(0x0F);
3938 }
3939 
3940 void Assembler::prefetchnta(Address src) {
3941   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3942   InstructionMark im(this);
3943   prefetch_prefix(src);
3944   emit_int8(0x18);
3945   emit_operand(rax, src); // 0, src
3946 }
3947 
3948 void Assembler::prefetchr(Address src) {
3949   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3950   InstructionMark im(this);
3951   prefetch_prefix(src);
3952   emit_int8(0x0D);
3953   emit_operand(rax, src); // 0, src
3954 }
3955 
3956 void Assembler::prefetcht0(Address src) {
3957   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3958   InstructionMark im(this);
3959   prefetch_prefix(src);
3960   emit_int8(0x18);
3961   emit_operand(rcx, src); // 1, src
3962 }
3963 
3964 void Assembler::prefetcht1(Address src) {
3965   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3966   InstructionMark im(this);
3967   prefetch_prefix(src);
3968   emit_int8(0x18);
3969   emit_operand(rdx, src); // 2, src
3970 }
3971 
3972 void Assembler::prefetcht2(Address src) {
3973   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3974   InstructionMark im(this);
3975   prefetch_prefix(src);
3976   emit_int8(0x18);
3977   emit_operand(rbx, src); // 3, src
3978 }
3979 
3980 void Assembler::prefetchw(Address src) {
3981   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3982   InstructionMark im(this);
3983   prefetch_prefix(src);
3984   emit_int8(0x0D);
3985   emit_operand(rcx, src); // 1, src
3986 }
3987 
3988 void Assembler::prefix(Prefix p) {
3989   emit_int8(p);
3990 }
3991 
3992 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
3993   assert(VM_Version::supports_ssse3(), "");
3994   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3995   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3996   emit_int8(0x00);
3997   emit_int8((unsigned char)(0xC0 | encode));
3998 }
3999 
4000 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4001   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4002          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4003          0, "");
4004   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4005   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4006   emit_int8(0x00);
4007   emit_int8((unsigned char)(0xC0 | encode));
4008 }
4009 
4010 void Assembler::pshufb(XMMRegister dst, Address src) {
4011   assert(VM_Version::supports_ssse3(), "");
4012   InstructionMark im(this);
4013   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4014   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4015   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4016   emit_int8(0x00);
4017   emit_operand(dst, src);
4018 }
4019 
4020 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
4021   assert(isByte(mode), "invalid value");
4022   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4023   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
4024   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4025   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4026   emit_int8(0x70);
4027   emit_int8((unsigned char)(0xC0 | encode));
4028   emit_int8(mode & 0xFF);
4029 }
4030 
4031 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
4032   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4033          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4034          0, "");
4035   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4036   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4037   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4038   emit_int8(0x70);
4039   emit_int8((unsigned char)(0xC0 | encode));
4040   emit_int8(mode & 0xFF);
4041 }
4042 
4043 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
4044   assert(isByte(mode), "invalid value");
4045   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4046   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4047   InstructionMark im(this);
4048   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4049   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4050   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4051   emit_int8(0x70);
4052   emit_operand(dst, src);
4053   emit_int8(mode & 0xFF);
4054 }
4055 
4056 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
4057   assert(isByte(mode), "invalid value");
4058   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4059   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4060   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4061   emit_int8(0x70);
4062   emit_int8((unsigned char)(0xC0 | encode));
4063   emit_int8(mode & 0xFF);
4064 }
4065 
4066 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
4067   assert(isByte(mode), "invalid value");
4068   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4069   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4070   InstructionMark im(this);
4071   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4072   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4073   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4074   emit_int8(0x70);
4075   emit_operand(dst, src);
4076   emit_int8(mode & 0xFF);
4077 }
4078 void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4079   assert(VM_Version::supports_evex(), "requires EVEX support");
4080   assert(vector_len == Assembler::AVX_256bit || vector_len == Assembler::AVX_512bit, "");
4081   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4082   attributes.set_is_evex_instruction();
4083   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4084   emit_int8(0x43);
4085   emit_int8((unsigned char)(0xC0 | encode));
4086   emit_int8(imm8 & 0xFF);
4087 }
4088 
4089 void Assembler::psrldq(XMMRegister dst, int shift) {
4090   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4091   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4092   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4093   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4094   emit_int8(0x73);
4095   emit_int8((unsigned char)(0xC0 | encode));
4096   emit_int8(shift);
4097 }
4098 
4099 void Assembler::pslldq(XMMRegister dst, int shift) {
4100   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4101   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4102   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4103   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
4104   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4105   emit_int8(0x73);
4106   emit_int8((unsigned char)(0xC0 | encode));
4107   emit_int8(shift);
4108 }
4109 
4110 void Assembler::ptest(XMMRegister dst, Address src) {
4111   assert(VM_Version::supports_sse4_1(), "");
4112   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4113   InstructionMark im(this);
4114   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4115   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4116   emit_int8(0x17);
4117   emit_operand(dst, src);
4118 }
4119 
4120 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4121   assert(VM_Version::supports_sse4_1(), "");
4122   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4123   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4124   emit_int8(0x17);
4125   emit_int8((unsigned char)(0xC0 | encode));
4126 }
4127 
4128 void Assembler::vptest(XMMRegister dst, Address src) {
4129   assert(VM_Version::supports_avx(), "");
4130   InstructionMark im(this);
4131   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4132   assert(dst != xnoreg, "sanity");
4133   // swap src<->dst for encoding
4134   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4135   emit_int8(0x17);
4136   emit_operand(dst, src);
4137 }
4138 
4139 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4140   assert(VM_Version::supports_avx(), "");
4141   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4142   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4143   emit_int8(0x17);
4144   emit_int8((unsigned char)(0xC0 | encode));
4145 }
4146 
4147 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4148   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4149   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4150   InstructionMark im(this);
4151   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4152   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4153   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4154   emit_int8(0x60);
4155   emit_operand(dst, src);
4156 }
4157 
4158 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4159   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4160   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4161   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4162   emit_int8(0x60);
4163   emit_int8((unsigned char)(0xC0 | encode));
4164 }
4165 
4166 void Assembler::punpckldq(XMMRegister dst, Address src) {
4167   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4168   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4169   InstructionMark im(this);
4170   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4171   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4172   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4173   emit_int8(0x62);
4174   emit_operand(dst, src);
4175 }
4176 
4177 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4178   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4179   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4180   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4181   emit_int8(0x62);
4182   emit_int8((unsigned char)(0xC0 | encode));
4183 }
4184 
4185 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4186   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4187   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4188   attributes.set_rex_vex_w_reverted();
4189   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4190   emit_int8(0x6C);
4191   emit_int8((unsigned char)(0xC0 | encode));
4192 }
4193 
4194 void Assembler::push(int32_t imm32) {
4195   // in 64bits we push 64bits onto the stack but only
4196   // take a 32bit immediate
4197   emit_int8(0x68);
4198   emit_int32(imm32);
4199 }
4200 
4201 void Assembler::push(Register src) {
4202   int encode = prefix_and_encode(src->encoding());
4203 
4204   emit_int8(0x50 | encode);
4205 }
4206 
4207 void Assembler::pushf() {
4208   emit_int8((unsigned char)0x9C);
4209 }
4210 
4211 #ifndef _LP64 // no 32bit push/pop on amd64
4212 void Assembler::pushl(Address src) {
4213   // Note this will push 64bit on 64bit
4214   InstructionMark im(this);
4215   prefix(src);
4216   emit_int8((unsigned char)0xFF);
4217   emit_operand(rsi, src);
4218 }
4219 #endif
4220 
4221 void Assembler::rcll(Register dst, int imm8) {
4222   assert(isShiftCount(imm8), "illegal shift count");
4223   int encode = prefix_and_encode(dst->encoding());
4224   if (imm8 == 1) {
4225     emit_int8((unsigned char)0xD1);
4226     emit_int8((unsigned char)(0xD0 | encode));
4227   } else {
4228     emit_int8((unsigned char)0xC1);
4229     emit_int8((unsigned char)0xD0 | encode);
4230     emit_int8(imm8);
4231   }
4232 }
4233 
4234 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4235   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4236   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4237   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4238   emit_int8(0x53);
4239   emit_int8((unsigned char)(0xC0 | encode));
4240 }
4241 
4242 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4243   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4244   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4245   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4246   emit_int8(0x53);
4247   emit_int8((unsigned char)(0xC0 | encode));
4248 }
4249 
4250 void Assembler::rdtsc() {
4251   emit_int8((unsigned char)0x0F);
4252   emit_int8((unsigned char)0x31);
4253 }
4254 
4255 // copies data from [esi] to [edi] using rcx pointer sized words
4256 // generic
4257 void Assembler::rep_mov() {
4258   emit_int8((unsigned char)0xF3);
4259   // MOVSQ
4260   LP64_ONLY(prefix(REX_W));
4261   emit_int8((unsigned char)0xA5);
4262 }
4263 
4264 // sets rcx bytes with rax, value at [edi]
4265 void Assembler::rep_stosb() {
4266   emit_int8((unsigned char)0xF3); // REP
4267   LP64_ONLY(prefix(REX_W));
4268   emit_int8((unsigned char)0xAA); // STOSB
4269 }
4270 
4271 // sets rcx pointer sized words with rax, value at [edi]
4272 // generic
4273 void Assembler::rep_stos() {
4274   emit_int8((unsigned char)0xF3); // REP
4275   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
4276   emit_int8((unsigned char)0xAB);
4277 }
4278 
4279 // scans rcx pointer sized words at [edi] for occurance of rax,
4280 // generic
4281 void Assembler::repne_scan() { // repne_scan
4282   emit_int8((unsigned char)0xF2);
4283   // SCASQ
4284   LP64_ONLY(prefix(REX_W));
4285   emit_int8((unsigned char)0xAF);
4286 }
4287 
4288 #ifdef _LP64
4289 // scans rcx 4 byte words at [edi] for occurance of rax,
4290 // generic
4291 void Assembler::repne_scanl() { // repne_scan
4292   emit_int8((unsigned char)0xF2);
4293   // SCASL
4294   emit_int8((unsigned char)0xAF);
4295 }
4296 #endif
4297 
4298 void Assembler::ret(int imm16) {
4299   if (imm16 == 0) {
4300     emit_int8((unsigned char)0xC3);
4301   } else {
4302     emit_int8((unsigned char)0xC2);
4303     emit_int16(imm16);
4304   }
4305 }
4306 
4307 void Assembler::sahf() {
4308 #ifdef _LP64
4309   // Not supported in 64bit mode
4310   ShouldNotReachHere();
4311 #endif
4312   emit_int8((unsigned char)0x9E);
4313 }
4314 
4315 void Assembler::sarl(Register dst, int imm8) {
4316   int encode = prefix_and_encode(dst->encoding());
4317   assert(isShiftCount(imm8), "illegal shift count");
4318   if (imm8 == 1) {
4319     emit_int8((unsigned char)0xD1);
4320     emit_int8((unsigned char)(0xF8 | encode));
4321   } else {
4322     emit_int8((unsigned char)0xC1);
4323     emit_int8((unsigned char)(0xF8 | encode));
4324     emit_int8(imm8);
4325   }
4326 }
4327 
4328 void Assembler::sarl(Register dst) {
4329   int encode = prefix_and_encode(dst->encoding());
4330   emit_int8((unsigned char)0xD3);
4331   emit_int8((unsigned char)(0xF8 | encode));
4332 }
4333 
4334 void Assembler::sbbl(Address dst, int32_t imm32) {
4335   InstructionMark im(this);
4336   prefix(dst);
4337   emit_arith_operand(0x81, rbx, dst, imm32);
4338 }
4339 
4340 void Assembler::sbbl(Register dst, int32_t imm32) {
4341   prefix(dst);
4342   emit_arith(0x81, 0xD8, dst, imm32);
4343 }
4344 
4345 
4346 void Assembler::sbbl(Register dst, Address src) {
4347   InstructionMark im(this);
4348   prefix(src, dst);
4349   emit_int8(0x1B);
4350   emit_operand(dst, src);
4351 }
4352 
4353 void Assembler::sbbl(Register dst, Register src) {
4354   (void) prefix_and_encode(dst->encoding(), src->encoding());
4355   emit_arith(0x1B, 0xC0, dst, src);
4356 }
4357 
4358 void Assembler::setb(Condition cc, Register dst) {
4359   assert(0 <= cc && cc < 16, "illegal cc");
4360   int encode = prefix_and_encode(dst->encoding(), true);
4361   emit_int8(0x0F);
4362   emit_int8((unsigned char)0x90 | cc);
4363   emit_int8((unsigned char)(0xC0 | encode));
4364 }
4365 
4366 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4367   assert(VM_Version::supports_ssse3(), "");
4368   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
4369   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4370   emit_int8((unsigned char)0x0F);
4371   emit_int8((unsigned char)(0xC0 | encode));
4372   emit_int8(imm8);
4373 }
4374 
4375 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4376   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4377          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4378          0, "");
4379   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4380   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4381   emit_int8((unsigned char)0x0F);
4382   emit_int8((unsigned char)(0xC0 | encode));
4383   emit_int8(imm8);
4384 }
4385 
4386 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4387   assert(VM_Version::supports_sse4_1(), "");
4388   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4389   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4390   emit_int8((unsigned char)0x0E);
4391   emit_int8((unsigned char)(0xC0 | encode));
4392   emit_int8(imm8);
4393 }
4394 
4395 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4396   assert(VM_Version::supports_sha(), "");
4397   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
4398   emit_int8((unsigned char)0xCC);
4399   emit_int8((unsigned char)(0xC0 | encode));
4400   emit_int8((unsigned char)imm8);
4401 }
4402 
4403 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4404   assert(VM_Version::supports_sha(), "");
4405   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4406   emit_int8((unsigned char)0xC8);
4407   emit_int8((unsigned char)(0xC0 | encode));
4408 }
4409 
4410 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4411   assert(VM_Version::supports_sha(), "");
4412   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4413   emit_int8((unsigned char)0xC9);
4414   emit_int8((unsigned char)(0xC0 | encode));
4415 }
4416 
4417 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4418   assert(VM_Version::supports_sha(), "");
4419   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4420   emit_int8((unsigned char)0xCA);
4421   emit_int8((unsigned char)(0xC0 | encode));
4422 }
4423 
4424 // xmm0 is implicit additional source to this instruction.
4425 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4426   assert(VM_Version::supports_sha(), "");
4427   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4428   emit_int8((unsigned char)0xCB);
4429   emit_int8((unsigned char)(0xC0 | encode));
4430 }
4431 
4432 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4433   assert(VM_Version::supports_sha(), "");
4434   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4435   emit_int8((unsigned char)0xCC);
4436   emit_int8((unsigned char)(0xC0 | encode));
4437 }
4438 
4439 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4440   assert(VM_Version::supports_sha(), "");
4441   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4442   emit_int8((unsigned char)0xCD);
4443   emit_int8((unsigned char)(0xC0 | encode));
4444 }
4445 
4446 
4447 void Assembler::shll(Register dst, int imm8) {
4448   assert(isShiftCount(imm8), "illegal shift count");
4449   int encode = prefix_and_encode(dst->encoding());
4450   if (imm8 == 1 ) {
4451     emit_int8((unsigned char)0xD1);
4452     emit_int8((unsigned char)(0xE0 | encode));
4453   } else {
4454     emit_int8((unsigned char)0xC1);
4455     emit_int8((unsigned char)(0xE0 | encode));
4456     emit_int8(imm8);
4457   }
4458 }
4459 
4460 void Assembler::shll(Register dst) {
4461   int encode = prefix_and_encode(dst->encoding());
4462   emit_int8((unsigned char)0xD3);
4463   emit_int8((unsigned char)(0xE0 | encode));
4464 }
4465 
4466 void Assembler::shrl(Register dst, int imm8) {
4467   assert(isShiftCount(imm8), "illegal shift count");
4468   int encode = prefix_and_encode(dst->encoding());
4469   emit_int8((unsigned char)0xC1);
4470   emit_int8((unsigned char)(0xE8 | encode));
4471   emit_int8(imm8);
4472 }
4473 
4474 void Assembler::shrl(Register dst) {
4475   int encode = prefix_and_encode(dst->encoding());
4476   emit_int8((unsigned char)0xD3);
4477   emit_int8((unsigned char)(0xE8 | encode));
4478 }
4479 
4480 // copies a single word from [esi] to [edi]
4481 void Assembler::smovl() {
4482   emit_int8((unsigned char)0xA5);
4483 }
4484 
4485 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
4486   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4487   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4488   attributes.set_rex_vex_w_reverted();
4489   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4490   emit_int8(0x51);
4491   emit_int8((unsigned char)(0xC0 | encode));
4492 }
4493 
4494 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4495   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4496   InstructionMark im(this);
4497   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4498   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4499   attributes.set_rex_vex_w_reverted();
4500   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4501   emit_int8(0x51);
4502   emit_operand(dst, src);
4503 }
4504 
4505 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
4506   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4507   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4508   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4509   emit_int8(0x51);
4510   emit_int8((unsigned char)(0xC0 | encode));
4511 }
4512 
4513 void Assembler::std() {
4514   emit_int8((unsigned char)0xFD);
4515 }
4516 
4517 void Assembler::sqrtss(XMMRegister dst, Address src) {
4518   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4519   InstructionMark im(this);
4520   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4521   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4522   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4523   emit_int8(0x51);
4524   emit_operand(dst, src);
4525 }
4526 
4527 void Assembler::stmxcsr( Address dst) {
4528   if (UseAVX > 0 ) {
4529     assert(VM_Version::supports_avx(), "");
4530     InstructionMark im(this);
4531     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4532     vex_prefix(dst, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4533     emit_int8((unsigned char)0xAE);
4534     emit_operand(as_Register(3), dst);
4535   } else {
4536     NOT_LP64(assert(VM_Version::supports_sse(), ""));
4537     InstructionMark im(this);
4538     prefix(dst);
4539     emit_int8(0x0F);
4540     emit_int8((unsigned char)0xAE);
4541     emit_operand(as_Register(3), dst);
4542   }
4543 }
4544 
4545 void Assembler::subl(Address dst, int32_t imm32) {
4546   InstructionMark im(this);
4547   prefix(dst);
4548   emit_arith_operand(0x81, rbp, dst, imm32);
4549 }
4550 
4551 void Assembler::subl(Address dst, Register src) {
4552   InstructionMark im(this);
4553   prefix(dst, src);
4554   emit_int8(0x29);
4555   emit_operand(src, dst);
4556 }
4557 
4558 void Assembler::subl(Register dst, int32_t imm32) {
4559   prefix(dst);
4560   emit_arith(0x81, 0xE8, dst, imm32);
4561 }
4562 
4563 // Force generation of a 4 byte immediate value even if it fits into 8bit
4564 void Assembler::subl_imm32(Register dst, int32_t imm32) {
4565   prefix(dst);
4566   emit_arith_imm32(0x81, 0xE8, dst, imm32);
4567 }
4568 
4569 void Assembler::subl(Register dst, Address src) {
4570   InstructionMark im(this);
4571   prefix(src, dst);
4572   emit_int8(0x2B);
4573   emit_operand(dst, src);
4574 }
4575 
4576 void Assembler::subl(Register dst, Register src) {
4577   (void) prefix_and_encode(dst->encoding(), src->encoding());
4578   emit_arith(0x2B, 0xC0, dst, src);
4579 }
4580 
4581 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
4582   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4583   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4584   attributes.set_rex_vex_w_reverted();
4585   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4586   emit_int8(0x5C);
4587   emit_int8((unsigned char)(0xC0 | encode));
4588 }
4589 
4590 void Assembler::subsd(XMMRegister dst, Address src) {
4591   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4592   InstructionMark im(this);
4593   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4594   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4595   attributes.set_rex_vex_w_reverted();
4596   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4597   emit_int8(0x5C);
4598   emit_operand(dst, src);
4599 }
4600 
4601 void Assembler::subss(XMMRegister dst, XMMRegister src) {
4602   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4603   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ false);
4604   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4605   emit_int8(0x5C);
4606   emit_int8((unsigned char)(0xC0 | encode));
4607 }
4608 
4609 void Assembler::subss(XMMRegister dst, Address src) {
4610   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4611   InstructionMark im(this);
4612   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4613   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4614   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4615   emit_int8(0x5C);
4616   emit_operand(dst, src);
4617 }
4618 
4619 void Assembler::testb(Register dst, int imm8) {
4620   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
4621   (void) prefix_and_encode(dst->encoding(), true);
4622   emit_arith_b(0xF6, 0xC0, dst, imm8);
4623 }
4624 
4625 void Assembler::testb(Address dst, int imm8) {
4626   InstructionMark im(this);
4627   prefix(dst);
4628   emit_int8((unsigned char)0xF6);
4629   emit_operand(rax, dst, 1);
4630   emit_int8(imm8);
4631 }
4632 
4633 void Assembler::testl(Register dst, int32_t imm32) {
4634   // not using emit_arith because test
4635   // doesn't support sign-extension of
4636   // 8bit operands
4637   int encode = dst->encoding();
4638   if (encode == 0) {
4639     emit_int8((unsigned char)0xA9);
4640   } else {
4641     encode = prefix_and_encode(encode);
4642     emit_int8((unsigned char)0xF7);
4643     emit_int8((unsigned char)(0xC0 | encode));
4644   }
4645   emit_int32(imm32);
4646 }
4647 
4648 void Assembler::testl(Register dst, Register src) {
4649   (void) prefix_and_encode(dst->encoding(), src->encoding());
4650   emit_arith(0x85, 0xC0, dst, src);
4651 }
4652 
4653 void Assembler::testl(Register dst, Address src) {
4654   InstructionMark im(this);
4655   prefix(src, dst);
4656   emit_int8((unsigned char)0x85);
4657   emit_operand(dst, src);
4658 }
4659 
4660 void Assembler::tzcntl(Register dst, Register src) {
4661   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4662   emit_int8((unsigned char)0xF3);
4663   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4664   emit_int8(0x0F);
4665   emit_int8((unsigned char)0xBC);
4666   emit_int8((unsigned char)0xC0 | encode);
4667 }
4668 
4669 void Assembler::tzcntq(Register dst, Register src) {
4670   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4671   emit_int8((unsigned char)0xF3);
4672   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4673   emit_int8(0x0F);
4674   emit_int8((unsigned char)0xBC);
4675   emit_int8((unsigned char)(0xC0 | encode));
4676 }
4677 
4678 void Assembler::ucomisd(XMMRegister dst, Address src) {
4679   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4680   InstructionMark im(this);
4681   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4682   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4683   attributes.set_rex_vex_w_reverted();
4684   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4685   emit_int8(0x2E);
4686   emit_operand(dst, src);
4687 }
4688 
4689 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
4690   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4691   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4692   attributes.set_rex_vex_w_reverted();
4693   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4694   emit_int8(0x2E);
4695   emit_int8((unsigned char)(0xC0 | encode));
4696 }
4697 
4698 void Assembler::ucomiss(XMMRegister dst, Address src) {
4699   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4700   InstructionMark im(this);
4701   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4702   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4703   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4704   emit_int8(0x2E);
4705   emit_operand(dst, src);
4706 }
4707 
4708 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
4709   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4710   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4711   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4712   emit_int8(0x2E);
4713   emit_int8((unsigned char)(0xC0 | encode));
4714 }
4715 
4716 void Assembler::xabort(int8_t imm8) {
4717   emit_int8((unsigned char)0xC6);
4718   emit_int8((unsigned char)0xF8);
4719   emit_int8((unsigned char)(imm8 & 0xFF));
4720 }
4721 
4722 void Assembler::xaddb(Address dst, Register src) {
4723   InstructionMark im(this);
4724   prefix(dst, src, true);
4725   emit_int8(0x0F);
4726   emit_int8((unsigned char)0xC0);
4727   emit_operand(src, dst);
4728 }
4729 
4730 void Assembler::xaddw(Address dst, Register src) {
4731   InstructionMark im(this);
4732   emit_int8(0x66);
4733   prefix(dst, src);
4734   emit_int8(0x0F);
4735   emit_int8((unsigned char)0xC1);
4736   emit_operand(src, dst);
4737 }
4738 
4739 void Assembler::xaddl(Address dst, Register src) {
4740   InstructionMark im(this);
4741   prefix(dst, src);
4742   emit_int8(0x0F);
4743   emit_int8((unsigned char)0xC1);
4744   emit_operand(src, dst);
4745 }
4746 
4747 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
4748   InstructionMark im(this);
4749   relocate(rtype);
4750   if (abort.is_bound()) {
4751     address entry = target(abort);
4752     assert(entry != NULL, "abort entry NULL");
4753     intptr_t offset = entry - pc();
4754     emit_int8((unsigned char)0xC7);
4755     emit_int8((unsigned char)0xF8);
4756     emit_int32(offset - 6); // 2 opcode + 4 address
4757   } else {
4758     abort.add_patch_at(code(), locator());
4759     emit_int8((unsigned char)0xC7);
4760     emit_int8((unsigned char)0xF8);
4761     emit_int32(0);
4762   }
4763 }
4764 
4765 void Assembler::xchgb(Register dst, Address src) { // xchg
4766   InstructionMark im(this);
4767   prefix(src, dst, true);
4768   emit_int8((unsigned char)0x86);
4769   emit_operand(dst, src);
4770 }
4771 
4772 void Assembler::xchgw(Register dst, Address src) { // xchg
4773   InstructionMark im(this);
4774   emit_int8(0x66);
4775   prefix(src, dst);
4776   emit_int8((unsigned char)0x87);
4777   emit_operand(dst, src);
4778 }
4779 
4780 void Assembler::xchgl(Register dst, Address src) { // xchg
4781   InstructionMark im(this);
4782   prefix(src, dst);
4783   emit_int8((unsigned char)0x87);
4784   emit_operand(dst, src);
4785 }
4786 
4787 void Assembler::xchgl(Register dst, Register src) {
4788   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4789   emit_int8((unsigned char)0x87);
4790   emit_int8((unsigned char)(0xC0 | encode));
4791 }
4792 
4793 void Assembler::xend() {
4794   emit_int8((unsigned char)0x0F);
4795   emit_int8((unsigned char)0x01);
4796   emit_int8((unsigned char)0xD5);
4797 }
4798 
4799 void Assembler::xgetbv() {
4800   emit_int8(0x0F);
4801   emit_int8(0x01);
4802   emit_int8((unsigned char)0xD0);
4803 }
4804 
4805 void Assembler::xorl(Register dst, int32_t imm32) {
4806   prefix(dst);
4807   emit_arith(0x81, 0xF0, dst, imm32);
4808 }
4809 
4810 void Assembler::xorl(Register dst, Address src) {
4811   InstructionMark im(this);
4812   prefix(src, dst);
4813   emit_int8(0x33);
4814   emit_operand(dst, src);
4815 }
4816 
4817 void Assembler::xorl(Register dst, Register src) {
4818   (void) prefix_and_encode(dst->encoding(), src->encoding());
4819   emit_arith(0x33, 0xC0, dst, src);
4820 }
4821 
4822 void Assembler::xorb(Register dst, Address src) {
4823   InstructionMark im(this);
4824   prefix(src, dst);
4825   emit_int8(0x32);
4826   emit_operand(dst, src);
4827 }
4828 
4829 // AVX 3-operands scalar float-point arithmetic instructions
4830 
4831 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
4832   assert(VM_Version::supports_avx(), "");
4833   InstructionMark im(this);
4834   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4835   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4836   attributes.set_rex_vex_w_reverted();
4837   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4838   emit_int8(0x58);
4839   emit_operand(dst, src);
4840 }
4841 
4842 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4843   assert(VM_Version::supports_avx(), "");
4844   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4845   attributes.set_rex_vex_w_reverted();
4846   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4847   emit_int8(0x58);
4848   emit_int8((unsigned char)(0xC0 | encode));
4849 }
4850 
4851 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
4852   assert(VM_Version::supports_avx(), "");
4853   InstructionMark im(this);
4854   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4855   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4856   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4857   emit_int8(0x58);
4858   emit_operand(dst, src);
4859 }
4860 
4861 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4862   assert(VM_Version::supports_avx(), "");
4863   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4864   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4865   emit_int8(0x58);
4866   emit_int8((unsigned char)(0xC0 | encode));
4867 }
4868 
4869 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
4870   assert(VM_Version::supports_avx(), "");
4871   InstructionMark im(this);
4872   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4873   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4874   attributes.set_rex_vex_w_reverted();
4875   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4876   emit_int8(0x5E);
4877   emit_operand(dst, src);
4878 }
4879 
4880 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4881   assert(VM_Version::supports_avx(), "");
4882   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4883   attributes.set_rex_vex_w_reverted();
4884   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4885   emit_int8(0x5E);
4886   emit_int8((unsigned char)(0xC0 | encode));
4887 }
4888 
4889 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
4890   assert(VM_Version::supports_avx(), "");
4891   InstructionMark im(this);
4892   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4893   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4894   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4895   emit_int8(0x5E);
4896   emit_operand(dst, src);
4897 }
4898 
4899 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4900   assert(VM_Version::supports_avx(), "");
4901   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4902   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4903   emit_int8(0x5E);
4904   emit_int8((unsigned char)(0xC0 | encode));
4905 }
4906 
4907 void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4908   assert(VM_Version::supports_fma(), "");
4909   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4910   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4911   emit_int8((unsigned char)0xB9);
4912   emit_int8((unsigned char)(0xC0 | encode));
4913 }
4914 
4915 void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4916   assert(VM_Version::supports_fma(), "");
4917   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4918   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4919   emit_int8((unsigned char)0xB9);
4920   emit_int8((unsigned char)(0xC0 | encode));
4921 }
4922 
4923 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
4924   assert(VM_Version::supports_avx(), "");
4925   InstructionMark im(this);
4926   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4927   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4928   attributes.set_rex_vex_w_reverted();
4929   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4930   emit_int8(0x59);
4931   emit_operand(dst, src);
4932 }
4933 
4934 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4935   assert(VM_Version::supports_avx(), "");
4936   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4937   attributes.set_rex_vex_w_reverted();
4938   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4939   emit_int8(0x59);
4940   emit_int8((unsigned char)(0xC0 | encode));
4941 }
4942 
4943 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
4944   assert(VM_Version::supports_avx(), "");
4945   InstructionMark im(this);
4946   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4947   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4948   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4949   emit_int8(0x59);
4950   emit_operand(dst, src);
4951 }
4952 
4953 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4954   assert(VM_Version::supports_avx(), "");
4955   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4956   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4957   emit_int8(0x59);
4958   emit_int8((unsigned char)(0xC0 | encode));
4959 }
4960 
4961 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
4962   assert(VM_Version::supports_avx(), "");
4963   InstructionMark im(this);
4964   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4965   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4966   attributes.set_rex_vex_w_reverted();
4967   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4968   emit_int8(0x5C);
4969   emit_operand(dst, src);
4970 }
4971 
4972 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4973   assert(VM_Version::supports_avx(), "");
4974   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4975   attributes.set_rex_vex_w_reverted();
4976   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4977   emit_int8(0x5C);
4978   emit_int8((unsigned char)(0xC0 | encode));
4979 }
4980 
4981 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
4982   assert(VM_Version::supports_avx(), "");
4983   InstructionMark im(this);
4984   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4985   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4986   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4987   emit_int8(0x5C);
4988   emit_operand(dst, src);
4989 }
4990 
4991 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4992   assert(VM_Version::supports_avx(), "");
4993   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4994   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4995   emit_int8(0x5C);
4996   emit_int8((unsigned char)(0xC0 | encode));
4997 }
4998 
4999 //====================VECTOR ARITHMETIC=====================================
5000 
5001 // Float-point vector arithmetic
5002 
5003 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
5004   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5005   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5006   attributes.set_rex_vex_w_reverted();
5007   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5008   emit_int8(0x58);
5009   emit_int8((unsigned char)(0xC0 | encode));
5010 }
5011 
5012 void Assembler::addpd(XMMRegister dst, Address src) {
5013   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5014   InstructionMark im(this);
5015   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5016   attributes.set_rex_vex_w_reverted();
5017   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5018   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5019   emit_int8(0x58);
5020   emit_operand(dst, src);
5021 }
5022 
5023 
5024 void Assembler::addps(XMMRegister dst, XMMRegister src) {
5025   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5026   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5027   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5028   emit_int8(0x58);
5029   emit_int8((unsigned char)(0xC0 | encode));
5030 }
5031 
5032 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5033   assert(VM_Version::supports_avx(), "");
5034   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5035   attributes.set_rex_vex_w_reverted();
5036   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5037   emit_int8(0x58);
5038   emit_int8((unsigned char)(0xC0 | encode));
5039 }
5040 
5041 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5042   assert(VM_Version::supports_avx(), "");
5043   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5044   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5045   emit_int8(0x58);
5046   emit_int8((unsigned char)(0xC0 | encode));
5047 }
5048 
5049 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5050   assert(VM_Version::supports_avx(), "");
5051   InstructionMark im(this);
5052   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5053   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5054   attributes.set_rex_vex_w_reverted();
5055   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5056   emit_int8(0x58);
5057   emit_operand(dst, src);
5058 }
5059 
5060 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5061   assert(VM_Version::supports_avx(), "");
5062   InstructionMark im(this);
5063   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5064   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5065   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5066   emit_int8(0x58);
5067   emit_operand(dst, src);
5068 }
5069 
5070 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
5071   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5072   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5073   attributes.set_rex_vex_w_reverted();
5074   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5075   emit_int8(0x5C);
5076   emit_int8((unsigned char)(0xC0 | encode));
5077 }
5078 
5079 void Assembler::subps(XMMRegister dst, XMMRegister src) {
5080   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5081   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5082   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5083   emit_int8(0x5C);
5084   emit_int8((unsigned char)(0xC0 | encode));
5085 }
5086 
5087 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5088   assert(VM_Version::supports_avx(), "");
5089   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5090   attributes.set_rex_vex_w_reverted();
5091   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5092   emit_int8(0x5C);
5093   emit_int8((unsigned char)(0xC0 | encode));
5094 }
5095 
5096 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5097   assert(VM_Version::supports_avx(), "");
5098   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5099   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5100   emit_int8(0x5C);
5101   emit_int8((unsigned char)(0xC0 | encode));
5102 }
5103 
5104 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5105   assert(VM_Version::supports_avx(), "");
5106   InstructionMark im(this);
5107   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5108   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5109   attributes.set_rex_vex_w_reverted();
5110   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5111   emit_int8(0x5C);
5112   emit_operand(dst, src);
5113 }
5114 
5115 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5116   assert(VM_Version::supports_avx(), "");
5117   InstructionMark im(this);
5118   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5119   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5120   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5121   emit_int8(0x5C);
5122   emit_operand(dst, src);
5123 }
5124 
5125 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
5126   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5127   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5128   attributes.set_rex_vex_w_reverted();
5129   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5130   emit_int8(0x59);
5131   emit_int8((unsigned char)(0xC0 | encode));
5132 }
5133 
5134 void Assembler::mulpd(XMMRegister dst, Address src) {
5135   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5136   InstructionMark im(this);
5137   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5138   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5139   attributes.set_rex_vex_w_reverted();
5140   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5141   emit_int8(0x59);
5142   emit_operand(dst, src);
5143 }
5144 
5145 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5146   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5147   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5148   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5149   emit_int8(0x59);
5150   emit_int8((unsigned char)(0xC0 | encode));
5151 }
5152 
5153 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5154   assert(VM_Version::supports_avx(), "");
5155   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5156   attributes.set_rex_vex_w_reverted();
5157   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5158   emit_int8(0x59);
5159   emit_int8((unsigned char)(0xC0 | encode));
5160 }
5161 
5162 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5163   assert(VM_Version::supports_avx(), "");
5164   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5165   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5166   emit_int8(0x59);
5167   emit_int8((unsigned char)(0xC0 | encode));
5168 }
5169 
5170 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5171   assert(VM_Version::supports_avx(), "");
5172   InstructionMark im(this);
5173   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5174   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5175   attributes.set_rex_vex_w_reverted();
5176   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5177   emit_int8(0x59);
5178   emit_operand(dst, src);
5179 }
5180 
5181 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5182   assert(VM_Version::supports_avx(), "");
5183   InstructionMark im(this);
5184   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5185   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5186   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5187   emit_int8(0x59);
5188   emit_operand(dst, src);
5189 }
5190 
5191 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5192   assert(VM_Version::supports_fma(), "");
5193   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5194   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5195   emit_int8((unsigned char)0xB8);
5196   emit_int8((unsigned char)(0xC0 | encode));
5197 }
5198 
5199 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5200   assert(VM_Version::supports_fma(), "");
5201   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5202   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5203   emit_int8((unsigned char)0xB8);
5204   emit_int8((unsigned char)(0xC0 | encode));
5205 }
5206 
5207 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5208   assert(VM_Version::supports_fma(), "");
5209   InstructionMark im(this);
5210   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5211   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5212   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5213   emit_int8((unsigned char)0xB8);
5214   emit_operand(dst, src2);
5215 }
5216 
5217 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5218   assert(VM_Version::supports_fma(), "");
5219   InstructionMark im(this);
5220   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5221   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5222   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5223   emit_int8((unsigned char)0xB8);
5224   emit_operand(dst, src2);
5225 }
5226 
5227 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5228   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5229   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5230   attributes.set_rex_vex_w_reverted();
5231   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5232   emit_int8(0x5E);
5233   emit_int8((unsigned char)(0xC0 | encode));
5234 }
5235 
5236 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5237   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5238   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5239   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5240   emit_int8(0x5E);
5241   emit_int8((unsigned char)(0xC0 | encode));
5242 }
5243 
5244 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5245   assert(VM_Version::supports_avx(), "");
5246   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5247   attributes.set_rex_vex_w_reverted();
5248   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5249   emit_int8(0x5E);
5250   emit_int8((unsigned char)(0xC0 | encode));
5251 }
5252 
5253 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5254   assert(VM_Version::supports_avx(), "");
5255   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5256   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5257   emit_int8(0x5E);
5258   emit_int8((unsigned char)(0xC0 | encode));
5259 }
5260 
5261 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5262   assert(VM_Version::supports_avx(), "");
5263   InstructionMark im(this);
5264   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5265   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5266   attributes.set_rex_vex_w_reverted();
5267   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5268   emit_int8(0x5E);
5269   emit_operand(dst, src);
5270 }
5271 
5272 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5273   assert(VM_Version::supports_avx(), "");
5274   InstructionMark im(this);
5275   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5276   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5277   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5278   emit_int8(0x5E);
5279   emit_operand(dst, src);
5280 }
5281 
5282 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5283   assert(VM_Version::supports_avx(), "");
5284   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5285   attributes.set_rex_vex_w_reverted();
5286   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5287   emit_int8(0x51);
5288   emit_int8((unsigned char)(0xC0 | encode));
5289 }
5290 
5291 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5292   assert(VM_Version::supports_avx(), "");
5293   InstructionMark im(this);
5294   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5295   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5296   attributes.set_rex_vex_w_reverted();
5297   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5298   emit_int8(0x51);
5299   emit_operand(dst, src);
5300 }
5301 
5302 void Assembler::vsqrtps(XMMRegister dst, XMMRegister src, int vector_len) {
5303   assert(VM_Version::supports_avx(), "");
5304   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5305   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5306   emit_int8(0x51);
5307   emit_int8((unsigned char)(0xC0 | encode));
5308 }
5309 
5310 void Assembler::vsqrtps(XMMRegister dst, Address src, int vector_len) {
5311   assert(VM_Version::supports_avx(), "");
5312   InstructionMark im(this);
5313   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5314   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5315   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5316   emit_int8(0x51);
5317   emit_operand(dst, src);
5318 }
5319 
5320 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5321   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5322   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5323   attributes.set_rex_vex_w_reverted();
5324   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5325   emit_int8(0x54);
5326   emit_int8((unsigned char)(0xC0 | encode));
5327 }
5328 
5329 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5330   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5331   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5332   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5333   emit_int8(0x54);
5334   emit_int8((unsigned char)(0xC0 | encode));
5335 }
5336 
5337 void Assembler::andps(XMMRegister dst, Address src) {
5338   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5339   InstructionMark im(this);
5340   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5341   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5342   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5343   emit_int8(0x54);
5344   emit_operand(dst, src);
5345 }
5346 
5347 void Assembler::andpd(XMMRegister dst, Address src) {
5348   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5349   InstructionMark im(this);
5350   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5351   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5352   attributes.set_rex_vex_w_reverted();
5353   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5354   emit_int8(0x54);
5355   emit_operand(dst, src);
5356 }
5357 
5358 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5359   assert(VM_Version::supports_avx(), "");
5360   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5361   attributes.set_rex_vex_w_reverted();
5362   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5363   emit_int8(0x54);
5364   emit_int8((unsigned char)(0xC0 | encode));
5365 }
5366 
5367 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5368   assert(VM_Version::supports_avx(), "");
5369   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5370   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5371   emit_int8(0x54);
5372   emit_int8((unsigned char)(0xC0 | encode));
5373 }
5374 
5375 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5376   assert(VM_Version::supports_avx(), "");
5377   InstructionMark im(this);
5378   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5379   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5380   attributes.set_rex_vex_w_reverted();
5381   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5382   emit_int8(0x54);
5383   emit_operand(dst, src);
5384 }
5385 
5386 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5387   assert(VM_Version::supports_avx(), "");
5388   InstructionMark im(this);
5389   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5390   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5391   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5392   emit_int8(0x54);
5393   emit_operand(dst, src);
5394 }
5395 
5396 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5397   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5398   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5399   attributes.set_rex_vex_w_reverted();
5400   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5401   emit_int8(0x15);
5402   emit_int8((unsigned char)(0xC0 | encode));
5403 }
5404 
5405 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5406   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5407   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5408   attributes.set_rex_vex_w_reverted();
5409   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5410   emit_int8(0x14);
5411   emit_int8((unsigned char)(0xC0 | encode));
5412 }
5413 
5414 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5415   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5416   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5417   attributes.set_rex_vex_w_reverted();
5418   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5419   emit_int8(0x57);
5420   emit_int8((unsigned char)(0xC0 | encode));
5421 }
5422 
5423 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5424   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5425   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5426   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5427   emit_int8(0x57);
5428   emit_int8((unsigned char)(0xC0 | encode));
5429 }
5430 
5431 void Assembler::xorpd(XMMRegister dst, Address src) {
5432   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5433   InstructionMark im(this);
5434   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5435   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5436   attributes.set_rex_vex_w_reverted();
5437   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5438   emit_int8(0x57);
5439   emit_operand(dst, src);
5440 }
5441 
5442 void Assembler::xorps(XMMRegister dst, Address src) {
5443   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5444   InstructionMark im(this);
5445   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5446   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5447   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5448   emit_int8(0x57);
5449   emit_operand(dst, src);
5450 }
5451 
5452 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5453   assert(VM_Version::supports_avx(), "");
5454   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5455   attributes.set_rex_vex_w_reverted();
5456   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5457   emit_int8(0x57);
5458   emit_int8((unsigned char)(0xC0 | encode));
5459 }
5460 
5461 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5462   assert(VM_Version::supports_avx(), "");
5463   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5464   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5465   emit_int8(0x57);
5466   emit_int8((unsigned char)(0xC0 | encode));
5467 }
5468 
5469 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5470   assert(VM_Version::supports_avx(), "");
5471   InstructionMark im(this);
5472   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5473   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5474   attributes.set_rex_vex_w_reverted();
5475   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5476   emit_int8(0x57);
5477   emit_operand(dst, src);
5478 }
5479 
5480 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5481   assert(VM_Version::supports_avx(), "");
5482   InstructionMark im(this);
5483   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5484   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5485   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5486   emit_int8(0x57);
5487   emit_operand(dst, src);
5488 }
5489 
5490 // Integer vector arithmetic
5491 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5492   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5493          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5494   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5495   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5496   emit_int8(0x01);
5497   emit_int8((unsigned char)(0xC0 | encode));
5498 }
5499 
5500 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5501   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5502          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5503   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5504   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5505   emit_int8(0x02);
5506   emit_int8((unsigned char)(0xC0 | encode));
5507 }
5508 
5509 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
5510   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5511   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5512   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5513   emit_int8((unsigned char)0xFC);
5514   emit_int8((unsigned char)(0xC0 | encode));
5515 }
5516 
5517 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
5518   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5519   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5520   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5521   emit_int8((unsigned char)0xFD);
5522   emit_int8((unsigned char)(0xC0 | encode));
5523 }
5524 
5525 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
5526   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5527   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5528   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5529   emit_int8((unsigned char)0xFE);
5530   emit_int8((unsigned char)(0xC0 | encode));
5531 }
5532 
5533 void Assembler::paddd(XMMRegister dst, Address src) {
5534   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5535   InstructionMark im(this);
5536   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5537   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5538   emit_int8((unsigned char)0xFE);
5539   emit_operand(dst, src);
5540 }
5541 
5542 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
5543   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5544   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5545   attributes.set_rex_vex_w_reverted();
5546   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5547   emit_int8((unsigned char)0xD4);
5548   emit_int8((unsigned char)(0xC0 | encode));
5549 }
5550 
5551 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
5552   assert(VM_Version::supports_sse3(), "");
5553   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5554   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5555   emit_int8(0x01);
5556   emit_int8((unsigned char)(0xC0 | encode));
5557 }
5558 
5559 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
5560   assert(VM_Version::supports_sse3(), "");
5561   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5562   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5563   emit_int8(0x02);
5564   emit_int8((unsigned char)(0xC0 | encode));
5565 }
5566 
5567 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5568   assert(UseAVX > 0, "requires some form of AVX");
5569   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5570   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5571   emit_int8((unsigned char)0xFC);
5572   emit_int8((unsigned char)(0xC0 | encode));
5573 }
5574 
5575 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5576   assert(UseAVX > 0, "requires some form of AVX");
5577   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5578   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5579   emit_int8((unsigned char)0xFD);
5580   emit_int8((unsigned char)(0xC0 | encode));
5581 }
5582 
5583 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5584   assert(UseAVX > 0, "requires some form of AVX");
5585   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5586   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5587   emit_int8((unsigned char)0xFE);
5588   emit_int8((unsigned char)(0xC0 | encode));
5589 }
5590 
5591 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5592   assert(UseAVX > 0, "requires some form of AVX");
5593   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5594   attributes.set_rex_vex_w_reverted();
5595   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5596   emit_int8((unsigned char)0xD4);
5597   emit_int8((unsigned char)(0xC0 | encode));
5598 }
5599 
5600 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5601   assert(UseAVX > 0, "requires some form of AVX");
5602   InstructionMark im(this);
5603   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5604   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5605   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5606   emit_int8((unsigned char)0xFC);
5607   emit_operand(dst, src);
5608 }
5609 
5610 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5611   assert(UseAVX > 0, "requires some form of AVX");
5612   InstructionMark im(this);
5613   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5614   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5615   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5616   emit_int8((unsigned char)0xFD);
5617   emit_operand(dst, src);
5618 }
5619 
5620 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5621   assert(UseAVX > 0, "requires some form of AVX");
5622   InstructionMark im(this);
5623   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5624   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5625   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5626   emit_int8((unsigned char)0xFE);
5627   emit_operand(dst, src);
5628 }
5629 
5630 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5631   assert(UseAVX > 0, "requires some form of AVX");
5632   InstructionMark im(this);
5633   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5634   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5635   attributes.set_rex_vex_w_reverted();
5636   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5637   emit_int8((unsigned char)0xD4);
5638   emit_operand(dst, src);
5639 }
5640 
5641 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
5642   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5643   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5644   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5645   emit_int8((unsigned char)0xF8);
5646   emit_int8((unsigned char)(0xC0 | encode));
5647 }
5648 
5649 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
5650   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5651   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5652   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5653   emit_int8((unsigned char)0xF9);
5654   emit_int8((unsigned char)(0xC0 | encode));
5655 }
5656 
5657 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
5658   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5659   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5660   emit_int8((unsigned char)0xFA);
5661   emit_int8((unsigned char)(0xC0 | encode));
5662 }
5663 
5664 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
5665   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5666   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5667   attributes.set_rex_vex_w_reverted();
5668   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5669   emit_int8((unsigned char)0xFB);
5670   emit_int8((unsigned char)(0xC0 | encode));
5671 }
5672 
5673 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5674   assert(UseAVX > 0, "requires some form of AVX");
5675   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5676   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5677   emit_int8((unsigned char)0xF8);
5678   emit_int8((unsigned char)(0xC0 | encode));
5679 }
5680 
5681 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5682   assert(UseAVX > 0, "requires some form of AVX");
5683   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5684   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5685   emit_int8((unsigned char)0xF9);
5686   emit_int8((unsigned char)(0xC0 | encode));
5687 }
5688 
5689 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5690   assert(UseAVX > 0, "requires some form of AVX");
5691   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5692   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5693   emit_int8((unsigned char)0xFA);
5694   emit_int8((unsigned char)(0xC0 | encode));
5695 }
5696 
5697 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5698   assert(UseAVX > 0, "requires some form of AVX");
5699   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5700   attributes.set_rex_vex_w_reverted();
5701   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5702   emit_int8((unsigned char)0xFB);
5703   emit_int8((unsigned char)(0xC0 | encode));
5704 }
5705 
5706 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5707   assert(UseAVX > 0, "requires some form of AVX");
5708   InstructionMark im(this);
5709   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5710   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5711   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5712   emit_int8((unsigned char)0xF8);
5713   emit_operand(dst, src);
5714 }
5715 
5716 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5717   assert(UseAVX > 0, "requires some form of AVX");
5718   InstructionMark im(this);
5719   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5720   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5721   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5722   emit_int8((unsigned char)0xF9);
5723   emit_operand(dst, src);
5724 }
5725 
5726 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5727   assert(UseAVX > 0, "requires some form of AVX");
5728   InstructionMark im(this);
5729   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5730   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5731   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5732   emit_int8((unsigned char)0xFA);
5733   emit_operand(dst, src);
5734 }
5735 
5736 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5737   assert(UseAVX > 0, "requires some form of AVX");
5738   InstructionMark im(this);
5739   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5740   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5741   attributes.set_rex_vex_w_reverted();
5742   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5743   emit_int8((unsigned char)0xFB);
5744   emit_operand(dst, src);
5745 }
5746 
5747 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
5748   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5749   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5750   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5751   emit_int8((unsigned char)0xD5);
5752   emit_int8((unsigned char)(0xC0 | encode));
5753 }
5754 
5755 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
5756   assert(VM_Version::supports_sse4_1(), "");
5757   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5758   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5759   emit_int8(0x40);
5760   emit_int8((unsigned char)(0xC0 | encode));
5761 }
5762 
5763 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5764   assert(UseAVX > 0, "requires some form of AVX");
5765   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5766   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5767   emit_int8((unsigned char)0xD5);
5768   emit_int8((unsigned char)(0xC0 | encode));
5769 }
5770 
5771 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5772   assert(UseAVX > 0, "requires some form of AVX");
5773   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5774   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5775   emit_int8(0x40);
5776   emit_int8((unsigned char)(0xC0 | encode));
5777 }
5778 
5779 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5780   assert(UseAVX > 2, "requires some form of EVEX");
5781   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5782   attributes.set_is_evex_instruction();
5783   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5784   emit_int8(0x40);
5785   emit_int8((unsigned char)(0xC0 | encode));
5786 }
5787 
5788 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5789   assert(UseAVX > 0, "requires some form of AVX");
5790   InstructionMark im(this);
5791   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5792   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5793   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5794   emit_int8((unsigned char)0xD5);
5795   emit_operand(dst, src);
5796 }
5797 
5798 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5799   assert(UseAVX > 0, "requires some form of AVX");
5800   InstructionMark im(this);
5801   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5802   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5803   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5804   emit_int8(0x40);
5805   emit_operand(dst, src);
5806 }
5807 
5808 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5809   assert(UseAVX > 2, "requires some form of EVEX");
5810   InstructionMark im(this);
5811   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5812   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5813   attributes.set_is_evex_instruction();
5814   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5815   emit_int8(0x40);
5816   emit_operand(dst, src);
5817 }
5818 
5819 // Shift packed integers left by specified number of bits.
5820 void Assembler::psllw(XMMRegister dst, int shift) {
5821   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5822   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5823   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5824   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5825   emit_int8(0x71);
5826   emit_int8((unsigned char)(0xC0 | encode));
5827   emit_int8(shift & 0xFF);
5828 }
5829 
5830 void Assembler::pslld(XMMRegister dst, int shift) {
5831   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5832   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5833   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5834   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5835   emit_int8(0x72);
5836   emit_int8((unsigned char)(0xC0 | encode));
5837   emit_int8(shift & 0xFF);
5838 }
5839 
5840 void Assembler::psllq(XMMRegister dst, int shift) {
5841   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5842   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5843   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5844   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5845   emit_int8(0x73);
5846   emit_int8((unsigned char)(0xC0 | encode));
5847   emit_int8(shift & 0xFF);
5848 }
5849 
5850 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
5851   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5852   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5853   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5854   emit_int8((unsigned char)0xF1);
5855   emit_int8((unsigned char)(0xC0 | encode));
5856 }
5857 
5858 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
5859   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5860   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5861   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5862   emit_int8((unsigned char)0xF2);
5863   emit_int8((unsigned char)(0xC0 | encode));
5864 }
5865 
5866 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
5867   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5868   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5869   attributes.set_rex_vex_w_reverted();
5870   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5871   emit_int8((unsigned char)0xF3);
5872   emit_int8((unsigned char)(0xC0 | encode));
5873 }
5874 
5875 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5876   assert(UseAVX > 0, "requires some form of AVX");
5877   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5878   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5879   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5880   emit_int8(0x71);
5881   emit_int8((unsigned char)(0xC0 | encode));
5882   emit_int8(shift & 0xFF);
5883 }
5884 
5885 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5886   assert(UseAVX > 0, "requires some form of AVX");
5887   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5888   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5889   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5890   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5891   emit_int8(0x72);
5892   emit_int8((unsigned char)(0xC0 | encode));
5893   emit_int8(shift & 0xFF);
5894 }
5895 
5896 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5897   assert(UseAVX > 0, "requires some form of AVX");
5898   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5899   attributes.set_rex_vex_w_reverted();
5900   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5901   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5902   emit_int8(0x73);
5903   emit_int8((unsigned char)(0xC0 | encode));
5904   emit_int8(shift & 0xFF);
5905 }
5906 
5907 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5908   assert(UseAVX > 0, "requires some form of AVX");
5909   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5910   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5911   emit_int8((unsigned char)0xF1);
5912   emit_int8((unsigned char)(0xC0 | encode));
5913 }
5914 
5915 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5916   assert(UseAVX > 0, "requires some form of AVX");
5917   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5918   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5919   emit_int8((unsigned char)0xF2);
5920   emit_int8((unsigned char)(0xC0 | encode));
5921 }
5922 
5923 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5924   assert(UseAVX > 0, "requires some form of AVX");
5925   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5926   attributes.set_rex_vex_w_reverted();
5927   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5928   emit_int8((unsigned char)0xF3);
5929   emit_int8((unsigned char)(0xC0 | encode));
5930 }
5931 
5932 // Shift packed integers logically right by specified number of bits.
5933 void Assembler::psrlw(XMMRegister dst, int shift) {
5934   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5935   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5936   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5937   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5938   emit_int8(0x71);
5939   emit_int8((unsigned char)(0xC0 | encode));
5940   emit_int8(shift & 0xFF);
5941 }
5942 
5943 void Assembler::psrld(XMMRegister dst, int shift) {
5944   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5945   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5946   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
5947   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5948   emit_int8(0x72);
5949   emit_int8((unsigned char)(0xC0 | encode));
5950   emit_int8(shift & 0xFF);
5951 }
5952 
5953 void Assembler::psrlq(XMMRegister dst, int shift) {
5954   // Do not confuse it with psrldq SSE2 instruction which
5955   // shifts 128 bit value in xmm register by number of bytes.
5956   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5957   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5958   attributes.set_rex_vex_w_reverted();
5959   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
5960   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5961   emit_int8(0x73);
5962   emit_int8((unsigned char)(0xC0 | encode));
5963   emit_int8(shift & 0xFF);
5964 }
5965 
5966 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
5967   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5968   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5969   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5970   emit_int8((unsigned char)0xD1);
5971   emit_int8((unsigned char)(0xC0 | encode));
5972 }
5973 
5974 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
5975   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5976   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5977   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5978   emit_int8((unsigned char)0xD2);
5979   emit_int8((unsigned char)(0xC0 | encode));
5980 }
5981 
5982 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
5983   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5984   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5985   attributes.set_rex_vex_w_reverted();
5986   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5987   emit_int8((unsigned char)0xD3);
5988   emit_int8((unsigned char)(0xC0 | encode));
5989 }
5990 
5991 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5992   assert(UseAVX > 0, "requires some form of AVX");
5993   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5994   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5995   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5996   emit_int8(0x71);
5997   emit_int8((unsigned char)(0xC0 | encode));
5998   emit_int8(shift & 0xFF);
5999 }
6000 
6001 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6002   assert(UseAVX > 0, "requires some form of AVX");
6003   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6004   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6005   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6006   emit_int8(0x72);
6007   emit_int8((unsigned char)(0xC0 | encode));
6008   emit_int8(shift & 0xFF);
6009 }
6010 
6011 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6012   assert(UseAVX > 0, "requires some form of AVX");
6013   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6014   attributes.set_rex_vex_w_reverted();
6015   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6016   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6017   emit_int8(0x73);
6018   emit_int8((unsigned char)(0xC0 | encode));
6019   emit_int8(shift & 0xFF);
6020 }
6021 
6022 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6023   assert(UseAVX > 0, "requires some form of AVX");
6024   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6025   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6026   emit_int8((unsigned char)0xD1);
6027   emit_int8((unsigned char)(0xC0 | encode));
6028 }
6029 
6030 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6031   assert(UseAVX > 0, "requires some form of AVX");
6032   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6033   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6034   emit_int8((unsigned char)0xD2);
6035   emit_int8((unsigned char)(0xC0 | encode));
6036 }
6037 
6038 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6039   assert(UseAVX > 0, "requires some form of AVX");
6040   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6041   attributes.set_rex_vex_w_reverted();
6042   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6043   emit_int8((unsigned char)0xD3);
6044   emit_int8((unsigned char)(0xC0 | encode));
6045 }
6046 
6047 // Shift packed integers arithmetically right by specified number of bits.
6048 void Assembler::psraw(XMMRegister dst, int shift) {
6049   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6050   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6051   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6052   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6053   emit_int8(0x71);
6054   emit_int8((unsigned char)(0xC0 | encode));
6055   emit_int8(shift & 0xFF);
6056 }
6057 
6058 void Assembler::psrad(XMMRegister dst, int shift) {
6059   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6060   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6061   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
6062   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6063   emit_int8(0x72);
6064   emit_int8((unsigned char)(0xC0 | encode));
6065   emit_int8(shift & 0xFF);
6066 }
6067 
6068 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
6069   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6070   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6071   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6072   emit_int8((unsigned char)0xE1);
6073   emit_int8((unsigned char)(0xC0 | encode));
6074 }
6075 
6076 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
6077   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6078   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6079   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6080   emit_int8((unsigned char)0xE2);
6081   emit_int8((unsigned char)(0xC0 | encode));
6082 }
6083 
6084 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6085   assert(UseAVX > 0, "requires some form of AVX");
6086   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6087   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6088   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6089   emit_int8(0x71);
6090   emit_int8((unsigned char)(0xC0 | encode));
6091   emit_int8(shift & 0xFF);
6092 }
6093 
6094 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6095   assert(UseAVX > 0, "requires some form of AVX");
6096   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6097   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6098   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6099   emit_int8(0x72);
6100   emit_int8((unsigned char)(0xC0 | encode));
6101   emit_int8(shift & 0xFF);
6102 }
6103 
6104 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6105   assert(UseAVX > 0, "requires some form of AVX");
6106   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6107   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6108   emit_int8((unsigned char)0xE1);
6109   emit_int8((unsigned char)(0xC0 | encode));
6110 }
6111 
6112 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6113   assert(UseAVX > 0, "requires some form of AVX");
6114   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6115   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6116   emit_int8((unsigned char)0xE2);
6117   emit_int8((unsigned char)(0xC0 | encode));
6118 }
6119 
6120 
6121 // logical operations packed integers
6122 void Assembler::pand(XMMRegister dst, XMMRegister src) {
6123   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6124   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6125   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6126   emit_int8((unsigned char)0xDB);
6127   emit_int8((unsigned char)(0xC0 | encode));
6128 }
6129 
6130 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6131   assert(UseAVX > 0, "requires some form of AVX");
6132   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6133   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6134   emit_int8((unsigned char)0xDB);
6135   emit_int8((unsigned char)(0xC0 | encode));
6136 }
6137 
6138 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6139   assert(UseAVX > 0, "requires some form of AVX");
6140   InstructionMark im(this);
6141   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6142   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6143   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6144   emit_int8((unsigned char)0xDB);
6145   emit_operand(dst, src);
6146 }
6147 
6148 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
6149   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6150   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6151   attributes.set_rex_vex_w_reverted();
6152   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6153   emit_int8((unsigned char)0xDF);
6154   emit_int8((unsigned char)(0xC0 | encode));
6155 }
6156 
6157 void Assembler::por(XMMRegister dst, XMMRegister src) {
6158   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6159   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6160   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6161   emit_int8((unsigned char)0xEB);
6162   emit_int8((unsigned char)(0xC0 | encode));
6163 }
6164 
6165 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6166   assert(UseAVX > 0, "requires some form of AVX");
6167   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6168   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6169   emit_int8((unsigned char)0xEB);
6170   emit_int8((unsigned char)(0xC0 | encode));
6171 }
6172 
6173 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6174   assert(UseAVX > 0, "requires some form of AVX");
6175   InstructionMark im(this);
6176   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6177   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6178   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6179   emit_int8((unsigned char)0xEB);
6180   emit_operand(dst, src);
6181 }
6182 
6183 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
6184   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6185   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6186   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6187   emit_int8((unsigned char)0xEF);
6188   emit_int8((unsigned char)(0xC0 | encode));
6189 }
6190 
6191 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6192   assert(UseAVX > 0, "requires some form of AVX");
6193   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6194   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6195   emit_int8((unsigned char)0xEF);
6196   emit_int8((unsigned char)(0xC0 | encode));
6197 }
6198 
6199 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6200   assert(UseAVX > 0, "requires some form of AVX");
6201   InstructionMark im(this);
6202   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6203   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6204   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6205   emit_int8((unsigned char)0xEF);
6206   emit_operand(dst, src);
6207 }
6208 
6209 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6210   assert(VM_Version::supports_evex(), "requires EVEX support");
6211   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6212   attributes.set_is_evex_instruction();
6213   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6214   emit_int8((unsigned char)0xEF);
6215   emit_int8((unsigned char)(0xC0 | encode));
6216 }
6217 
6218 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6219   assert(VM_Version::supports_evex(), "requires EVEX support");
6220   assert(dst != xnoreg, "sanity");
6221   InstructionMark im(this);
6222   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6223   attributes.set_is_evex_instruction();
6224   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6225   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6226   emit_int8((unsigned char)0xEF);
6227   emit_operand(dst, src);
6228 }
6229 
6230 
6231 // vinserti forms
6232 
6233 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6234   assert(VM_Version::supports_avx2(), "");
6235   assert(imm8 <= 0x01, "imm8: %u", imm8);
6236   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6237   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6238   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6239   emit_int8(0x38);
6240   emit_int8((unsigned char)(0xC0 | encode));
6241   // 0x00 - insert into lower 128 bits
6242   // 0x01 - insert into upper 128 bits
6243   emit_int8(imm8 & 0x01);
6244 }
6245 
6246 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6247   assert(VM_Version::supports_avx2(), "");
6248   assert(dst != xnoreg, "sanity");
6249   assert(imm8 <= 0x01, "imm8: %u", imm8);
6250   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6251   InstructionMark im(this);
6252   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6253   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6254   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6255   emit_int8(0x38);
6256   emit_operand(dst, src);
6257   // 0x00 - insert into lower 128 bits
6258   // 0x01 - insert into upper 128 bits
6259   emit_int8(imm8 & 0x01);
6260 }
6261 
6262 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6263   assert(VM_Version::supports_evex(), "");
6264   assert(imm8 <= 0x03, "imm8: %u", imm8);
6265   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6266   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6267   emit_int8(0x38);
6268   emit_int8((unsigned char)(0xC0 | encode));
6269   // 0x00 - insert into q0 128 bits (0..127)
6270   // 0x01 - insert into q1 128 bits (128..255)
6271   // 0x02 - insert into q2 128 bits (256..383)
6272   // 0x03 - insert into q3 128 bits (384..511)
6273   emit_int8(imm8 & 0x03);
6274 }
6275 
6276 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6277   assert(VM_Version::supports_avx(), "");
6278   assert(dst != xnoreg, "sanity");
6279   assert(imm8 <= 0x03, "imm8: %u", imm8);
6280   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6281   InstructionMark im(this);
6282   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6283   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6284   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6285   emit_int8(0x18);
6286   emit_operand(dst, src);
6287   // 0x00 - insert into q0 128 bits (0..127)
6288   // 0x01 - insert into q1 128 bits (128..255)
6289   // 0x02 - insert into q2 128 bits (256..383)
6290   // 0x03 - insert into q3 128 bits (384..511)
6291   emit_int8(imm8 & 0x03);
6292 }
6293 
6294 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6295   assert(VM_Version::supports_evex(), "");
6296   assert(imm8 <= 0x01, "imm8: %u", imm8);
6297   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6298   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6299   emit_int8(0x38);
6300   emit_int8((unsigned char)(0xC0 | encode));
6301   // 0x00 - insert into lower 256 bits
6302   // 0x01 - insert into upper 256 bits
6303   emit_int8(imm8 & 0x01);
6304 }
6305 
6306 
6307 // vinsertf forms
6308 
6309 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6310   assert(VM_Version::supports_avx(), "");
6311   assert(imm8 <= 0x01, "imm8: %u", imm8);
6312   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6313   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6314   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6315   emit_int8(0x18);
6316   emit_int8((unsigned char)(0xC0 | encode));
6317   // 0x00 - insert into lower 128 bits
6318   // 0x01 - insert into upper 128 bits
6319   emit_int8(imm8 & 0x01);
6320 }
6321 
6322 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6323   assert(VM_Version::supports_avx(), "");
6324   assert(dst != xnoreg, "sanity");
6325   assert(imm8 <= 0x01, "imm8: %u", imm8);
6326   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6327   InstructionMark im(this);
6328   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6329   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6330   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6331   emit_int8(0x18);
6332   emit_operand(dst, src);
6333   // 0x00 - insert into lower 128 bits
6334   // 0x01 - insert into upper 128 bits
6335   emit_int8(imm8 & 0x01);
6336 }
6337 
6338 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6339   assert(VM_Version::supports_evex(), "");
6340   assert(imm8 <= 0x03, "imm8: %u", imm8);
6341   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6342   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6343   emit_int8(0x18);
6344   emit_int8((unsigned char)(0xC0 | encode));
6345   // 0x00 - insert into q0 128 bits (0..127)
6346   // 0x01 - insert into q1 128 bits (128..255)
6347   // 0x02 - insert into q2 128 bits (256..383)
6348   // 0x03 - insert into q3 128 bits (384..511)
6349   emit_int8(imm8 & 0x03);
6350 }
6351 
6352 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6353   assert(VM_Version::supports_avx(), "");
6354   assert(dst != xnoreg, "sanity");
6355   assert(imm8 <= 0x03, "imm8: %u", imm8);
6356   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6357   InstructionMark im(this);
6358   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6359   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6360   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6361   emit_int8(0x18);
6362   emit_operand(dst, src);
6363   // 0x00 - insert into q0 128 bits (0..127)
6364   // 0x01 - insert into q1 128 bits (128..255)
6365   // 0x02 - insert into q2 128 bits (256..383)
6366   // 0x03 - insert into q3 128 bits (384..511)
6367   emit_int8(imm8 & 0x03);
6368 }
6369 
6370 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6371   assert(VM_Version::supports_evex(), "");
6372   assert(imm8 <= 0x01, "imm8: %u", imm8);
6373   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6374   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6375   emit_int8(0x1A);
6376   emit_int8((unsigned char)(0xC0 | encode));
6377   // 0x00 - insert into lower 256 bits
6378   // 0x01 - insert into upper 256 bits
6379   emit_int8(imm8 & 0x01);
6380 }
6381 
6382 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6383   assert(VM_Version::supports_evex(), "");
6384   assert(dst != xnoreg, "sanity");
6385   assert(imm8 <= 0x01, "imm8: %u", imm8);
6386   InstructionMark im(this);
6387   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6388   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
6389   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6390   emit_int8(0x1A);
6391   emit_operand(dst, src);
6392   // 0x00 - insert into lower 256 bits
6393   // 0x01 - insert into upper 256 bits
6394   emit_int8(imm8 & 0x01);
6395 }
6396 
6397 
6398 // vextracti forms
6399 
6400 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6401   assert(VM_Version::supports_avx(), "");
6402   assert(imm8 <= 0x01, "imm8: %u", imm8);
6403   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6404   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6405   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6406   emit_int8(0x39);
6407   emit_int8((unsigned char)(0xC0 | encode));
6408   // 0x00 - extract from lower 128 bits
6409   // 0x01 - extract from upper 128 bits
6410   emit_int8(imm8 & 0x01);
6411 }
6412 
6413 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
6414   assert(VM_Version::supports_avx2(), "");
6415   assert(src != xnoreg, "sanity");
6416   assert(imm8 <= 0x01, "imm8: %u", imm8);
6417   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6418   InstructionMark im(this);
6419   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6420   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6421   attributes.reset_is_clear_context();
6422   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6423   emit_int8(0x39);
6424   emit_operand(src, dst);
6425   // 0x00 - extract from lower 128 bits
6426   // 0x01 - extract from upper 128 bits
6427   emit_int8(imm8 & 0x01);
6428 }
6429 
6430 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6431   assert(VM_Version::supports_avx(), "");
6432   assert(imm8 <= 0x03, "imm8: %u", imm8);
6433   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6434   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6435   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6436   emit_int8(0x39);
6437   emit_int8((unsigned char)(0xC0 | encode));
6438   // 0x00 - extract from bits 127:0
6439   // 0x01 - extract from bits 255:128
6440   // 0x02 - extract from bits 383:256
6441   // 0x03 - extract from bits 511:384
6442   emit_int8(imm8 & 0x03);
6443 }
6444 
6445 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
6446   assert(VM_Version::supports_evex(), "");
6447   assert(src != xnoreg, "sanity");
6448   assert(imm8 <= 0x03, "imm8: %u", imm8);
6449   InstructionMark im(this);
6450   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6451   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6452   attributes.reset_is_clear_context();
6453   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6454   emit_int8(0x39);
6455   emit_operand(src, dst);
6456   // 0x00 - extract from bits 127:0
6457   // 0x01 - extract from bits 255:128
6458   // 0x02 - extract from bits 383:256
6459   // 0x03 - extract from bits 511:384
6460   emit_int8(imm8 & 0x03);
6461 }
6462 
6463 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6464   assert(VM_Version::supports_avx512dq(), "");
6465   assert(imm8 <= 0x03, "imm8: %u", imm8);
6466   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6467   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6468   emit_int8(0x39);
6469   emit_int8((unsigned char)(0xC0 | encode));
6470   // 0x00 - extract from bits 127:0
6471   // 0x01 - extract from bits 255:128
6472   // 0x02 - extract from bits 383:256
6473   // 0x03 - extract from bits 511:384
6474   emit_int8(imm8 & 0x03);
6475 }
6476 
6477 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6478   assert(VM_Version::supports_evex(), "");
6479   assert(imm8 <= 0x01, "imm8: %u", imm8);
6480   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6481   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6482   emit_int8(0x3B);
6483   emit_int8((unsigned char)(0xC0 | encode));
6484   // 0x00 - extract from lower 256 bits
6485   // 0x01 - extract from upper 256 bits
6486   emit_int8(imm8 & 0x01);
6487 }
6488 
6489 
6490 // vextractf forms
6491 
6492 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6493   assert(VM_Version::supports_avx(), "");
6494   assert(imm8 <= 0x01, "imm8: %u", imm8);
6495   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6496   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6497   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6498   emit_int8(0x19);
6499   emit_int8((unsigned char)(0xC0 | encode));
6500   // 0x00 - extract from lower 128 bits
6501   // 0x01 - extract from upper 128 bits
6502   emit_int8(imm8 & 0x01);
6503 }
6504 
6505 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
6506   assert(VM_Version::supports_avx(), "");
6507   assert(src != xnoreg, "sanity");
6508   assert(imm8 <= 0x01, "imm8: %u", imm8);
6509   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6510   InstructionMark im(this);
6511   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6512   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6513   attributes.reset_is_clear_context();
6514   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6515   emit_int8(0x19);
6516   emit_operand(src, dst);
6517   // 0x00 - extract from lower 128 bits
6518   // 0x01 - extract from upper 128 bits
6519   emit_int8(imm8 & 0x01);
6520 }
6521 
6522 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6523   assert(VM_Version::supports_avx(), "");
6524   assert(imm8 <= 0x03, "imm8: %u", imm8);
6525   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6526   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6527   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6528   emit_int8(0x19);
6529   emit_int8((unsigned char)(0xC0 | encode));
6530   // 0x00 - extract from bits 127:0
6531   // 0x01 - extract from bits 255:128
6532   // 0x02 - extract from bits 383:256
6533   // 0x03 - extract from bits 511:384
6534   emit_int8(imm8 & 0x03);
6535 }
6536 
6537 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
6538   assert(VM_Version::supports_evex(), "");
6539   assert(src != xnoreg, "sanity");
6540   assert(imm8 <= 0x03, "imm8: %u", imm8);
6541   InstructionMark im(this);
6542   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6543   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6544   attributes.reset_is_clear_context();
6545   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6546   emit_int8(0x19);
6547   emit_operand(src, dst);
6548   // 0x00 - extract from bits 127:0
6549   // 0x01 - extract from bits 255:128
6550   // 0x02 - extract from bits 383:256
6551   // 0x03 - extract from bits 511:384
6552   emit_int8(imm8 & 0x03);
6553 }
6554 
6555 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6556   assert(VM_Version::supports_avx512dq(), "");
6557   assert(imm8 <= 0x03, "imm8: %u", imm8);
6558   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6559   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6560   emit_int8(0x19);
6561   emit_int8((unsigned char)(0xC0 | encode));
6562   // 0x00 - extract from bits 127:0
6563   // 0x01 - extract from bits 255:128
6564   // 0x02 - extract from bits 383:256
6565   // 0x03 - extract from bits 511:384
6566   emit_int8(imm8 & 0x03);
6567 }
6568 
6569 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6570   assert(VM_Version::supports_evex(), "");
6571   assert(imm8 <= 0x01, "imm8: %u", imm8);
6572   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6573   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6574   emit_int8(0x1B);
6575   emit_int8((unsigned char)(0xC0 | encode));
6576   // 0x00 - extract from lower 256 bits
6577   // 0x01 - extract from upper 256 bits
6578   emit_int8(imm8 & 0x01);
6579 }
6580 
6581 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
6582   assert(VM_Version::supports_evex(), "");
6583   assert(src != xnoreg, "sanity");
6584   assert(imm8 <= 0x01, "imm8: %u", imm8);
6585   InstructionMark im(this);
6586   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6587   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
6588   attributes.reset_is_clear_context();
6589   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6590   emit_int8(0x1B);
6591   emit_operand(src, dst);
6592   // 0x00 - extract from lower 256 bits
6593   // 0x01 - extract from upper 256 bits
6594   emit_int8(imm8 & 0x01);
6595 }
6596 
6597 
6598 // legacy word/dword replicate
6599 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
6600   assert(VM_Version::supports_avx2(), "");
6601   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6602   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6603   emit_int8(0x79);
6604   emit_int8((unsigned char)(0xC0 | encode));
6605 }
6606 
6607 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
6608   assert(VM_Version::supports_avx2(), "");
6609   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6610   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6611   emit_int8(0x58);
6612   emit_int8((unsigned char)(0xC0 | encode));
6613 }
6614 
6615 
6616 // xmm/mem sourced byte/word/dword/qword replicate
6617 
6618 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6619 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
6620   assert(VM_Version::supports_evex(), "");
6621   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6622   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6623   emit_int8(0x78);
6624   emit_int8((unsigned char)(0xC0 | encode));
6625 }
6626 
6627 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
6628   assert(VM_Version::supports_evex(), "");
6629   assert(dst != xnoreg, "sanity");
6630   InstructionMark im(this);
6631   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6632   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
6633   // swap src<->dst for encoding
6634   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6635   emit_int8(0x78);
6636   emit_operand(dst, src);
6637 }
6638 
6639 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6640 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
6641   assert(VM_Version::supports_evex(), "");
6642   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6643   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6644   emit_int8(0x79);
6645   emit_int8((unsigned char)(0xC0 | encode));
6646 }
6647 
6648 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
6649   assert(VM_Version::supports_evex(), "");
6650   assert(dst != xnoreg, "sanity");
6651   InstructionMark im(this);
6652   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6653   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
6654   // swap src<->dst for encoding
6655   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6656   emit_int8(0x79);
6657   emit_operand(dst, src);
6658 }
6659 
6660 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6661 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
6662   assert(VM_Version::supports_evex(), "");
6663   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6664   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6665   emit_int8(0x58);
6666   emit_int8((unsigned char)(0xC0 | encode));
6667 }
6668 
6669 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
6670   assert(VM_Version::supports_evex(), "");
6671   assert(dst != xnoreg, "sanity");
6672   InstructionMark im(this);
6673   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6674   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6675   // swap src<->dst for encoding
6676   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6677   emit_int8(0x58);
6678   emit_operand(dst, src);
6679 }
6680 
6681 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6682 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
6683   assert(VM_Version::supports_evex(), "");
6684   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6685   attributes.set_rex_vex_w_reverted();
6686   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6687   emit_int8(0x59);
6688   emit_int8((unsigned char)(0xC0 | encode));
6689 }
6690 
6691 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
6692   assert(VM_Version::supports_evex(), "");
6693   assert(dst != xnoreg, "sanity");
6694   InstructionMark im(this);
6695   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6696   attributes.set_rex_vex_w_reverted();
6697   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6698   // swap src<->dst for encoding
6699   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6700   emit_int8(0x59);
6701   emit_operand(dst, src);
6702 }
6703 
6704 
6705 // scalar single/double precision replicate
6706 
6707 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
6708 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
6709   assert(VM_Version::supports_evex(), "");
6710   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6711   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6712   emit_int8(0x18);
6713   emit_int8((unsigned char)(0xC0 | encode));
6714 }
6715 
6716 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
6717   assert(VM_Version::supports_evex(), "");
6718   assert(dst != xnoreg, "sanity");
6719   InstructionMark im(this);
6720   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6721   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6722   // swap src<->dst for encoding
6723   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6724   emit_int8(0x18);
6725   emit_operand(dst, src);
6726 }
6727 
6728 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
6729 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
6730   assert(VM_Version::supports_evex(), "");
6731   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6732   attributes.set_rex_vex_w_reverted();
6733   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6734   emit_int8(0x19);
6735   emit_int8((unsigned char)(0xC0 | encode));
6736 }
6737 
6738 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
6739   assert(VM_Version::supports_evex(), "");
6740   assert(dst != xnoreg, "sanity");
6741   InstructionMark im(this);
6742   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6743   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6744   attributes.set_rex_vex_w_reverted();
6745   // swap src<->dst for encoding
6746   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6747   emit_int8(0x19);
6748   emit_operand(dst, src);
6749 }
6750 
6751 
6752 // gpr source broadcast forms
6753 
6754 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6755 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
6756   assert(VM_Version::supports_evex(), "");
6757   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6758   attributes.set_is_evex_instruction();
6759   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6760   emit_int8(0x7A);
6761   emit_int8((unsigned char)(0xC0 | encode));
6762 }
6763 
6764 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6765 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
6766   assert(VM_Version::supports_evex(), "");
6767   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6768   attributes.set_is_evex_instruction();
6769   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6770   emit_int8(0x7B);
6771   emit_int8((unsigned char)(0xC0 | encode));
6772 }
6773 
6774 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6775 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
6776   assert(VM_Version::supports_evex(), "");
6777   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6778   attributes.set_is_evex_instruction();
6779   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6780   emit_int8(0x7C);
6781   emit_int8((unsigned char)(0xC0 | encode));
6782 }
6783 
6784 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6785 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
6786   assert(VM_Version::supports_evex(), "");
6787   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6788   attributes.set_is_evex_instruction();
6789   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6790   emit_int8(0x7C);
6791   emit_int8((unsigned char)(0xC0 | encode));
6792 }
6793 
6794 
6795 // Carry-Less Multiplication Quadword
6796 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
6797   assert(VM_Version::supports_clmul(), "");
6798   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6799   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6800   emit_int8(0x44);
6801   emit_int8((unsigned char)(0xC0 | encode));
6802   emit_int8((unsigned char)mask);
6803 }
6804 
6805 // Carry-Less Multiplication Quadword
6806 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
6807   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
6808   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6809   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6810   emit_int8(0x44);
6811   emit_int8((unsigned char)(0xC0 | encode));
6812   emit_int8((unsigned char)mask);
6813 }
6814 
6815 void Assembler::evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len) {
6816   assert(VM_Version::supports_vpclmulqdq(), "Requires vector carryless multiplication support");
6817   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6818   attributes.set_is_evex_instruction();
6819   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6820   emit_int8(0x44);
6821   emit_int8((unsigned char)(0xC0 | encode));
6822   emit_int8((unsigned char)mask);
6823 }
6824 
6825 void Assembler::vzeroupper() {
6826   if (VM_Version::supports_vzeroupper()) {
6827     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
6828     (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6829     emit_int8(0x77);
6830   }
6831 }
6832 
6833 #ifndef _LP64
6834 // 32bit only pieces of the assembler
6835 
6836 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
6837   // NO PREFIX AS NEVER 64BIT
6838   InstructionMark im(this);
6839   emit_int8((unsigned char)0x81);
6840   emit_int8((unsigned char)(0xF8 | src1->encoding()));
6841   emit_data(imm32, rspec, 0);
6842 }
6843 
6844 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
6845   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
6846   InstructionMark im(this);
6847   emit_int8((unsigned char)0x81);
6848   emit_operand(rdi, src1);
6849   emit_data(imm32, rspec, 0);
6850 }
6851 
6852 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
6853 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
6854 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
6855 void Assembler::cmpxchg8(Address adr) {
6856   InstructionMark im(this);
6857   emit_int8(0x0F);
6858   emit_int8((unsigned char)0xC7);
6859   emit_operand(rcx, adr);
6860 }
6861 
6862 void Assembler::decl(Register dst) {
6863   // Don't use it directly. Use MacroAssembler::decrementl() instead.
6864  emit_int8(0x48 | dst->encoding());
6865 }
6866 
6867 #endif // _LP64
6868 
6869 // 64bit typically doesn't use the x87 but needs to for the trig funcs
6870 
6871 void Assembler::fabs() {
6872   emit_int8((unsigned char)0xD9);
6873   emit_int8((unsigned char)0xE1);
6874 }
6875 
6876 void Assembler::fadd(int i) {
6877   emit_farith(0xD8, 0xC0, i);
6878 }
6879 
6880 void Assembler::fadd_d(Address src) {
6881   InstructionMark im(this);
6882   emit_int8((unsigned char)0xDC);
6883   emit_operand32(rax, src);
6884 }
6885 
6886 void Assembler::fadd_s(Address src) {
6887   InstructionMark im(this);
6888   emit_int8((unsigned char)0xD8);
6889   emit_operand32(rax, src);
6890 }
6891 
6892 void Assembler::fadda(int i) {
6893   emit_farith(0xDC, 0xC0, i);
6894 }
6895 
6896 void Assembler::faddp(int i) {
6897   emit_farith(0xDE, 0xC0, i);
6898 }
6899 
6900 void Assembler::fchs() {
6901   emit_int8((unsigned char)0xD9);
6902   emit_int8((unsigned char)0xE0);
6903 }
6904 
6905 void Assembler::fcom(int i) {
6906   emit_farith(0xD8, 0xD0, i);
6907 }
6908 
6909 void Assembler::fcomp(int i) {
6910   emit_farith(0xD8, 0xD8, i);
6911 }
6912 
6913 void Assembler::fcomp_d(Address src) {
6914   InstructionMark im(this);
6915   emit_int8((unsigned char)0xDC);
6916   emit_operand32(rbx, src);
6917 }
6918 
6919 void Assembler::fcomp_s(Address src) {
6920   InstructionMark im(this);
6921   emit_int8((unsigned char)0xD8);
6922   emit_operand32(rbx, src);
6923 }
6924 
6925 void Assembler::fcompp() {
6926   emit_int8((unsigned char)0xDE);
6927   emit_int8((unsigned char)0xD9);
6928 }
6929 
6930 void Assembler::fcos() {
6931   emit_int8((unsigned char)0xD9);
6932   emit_int8((unsigned char)0xFF);
6933 }
6934 
6935 void Assembler::fdecstp() {
6936   emit_int8((unsigned char)0xD9);
6937   emit_int8((unsigned char)0xF6);
6938 }
6939 
6940 void Assembler::fdiv(int i) {
6941   emit_farith(0xD8, 0xF0, i);
6942 }
6943 
6944 void Assembler::fdiv_d(Address src) {
6945   InstructionMark im(this);
6946   emit_int8((unsigned char)0xDC);
6947   emit_operand32(rsi, src);
6948 }
6949 
6950 void Assembler::fdiv_s(Address src) {
6951   InstructionMark im(this);
6952   emit_int8((unsigned char)0xD8);
6953   emit_operand32(rsi, src);
6954 }
6955 
6956 void Assembler::fdiva(int i) {
6957   emit_farith(0xDC, 0xF8, i);
6958 }
6959 
6960 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
6961 //       is erroneous for some of the floating-point instructions below.
6962 
6963 void Assembler::fdivp(int i) {
6964   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
6965 }
6966 
6967 void Assembler::fdivr(int i) {
6968   emit_farith(0xD8, 0xF8, i);
6969 }
6970 
6971 void Assembler::fdivr_d(Address src) {
6972   InstructionMark im(this);
6973   emit_int8((unsigned char)0xDC);
6974   emit_operand32(rdi, src);
6975 }
6976 
6977 void Assembler::fdivr_s(Address src) {
6978   InstructionMark im(this);
6979   emit_int8((unsigned char)0xD8);
6980   emit_operand32(rdi, src);
6981 }
6982 
6983 void Assembler::fdivra(int i) {
6984   emit_farith(0xDC, 0xF0, i);
6985 }
6986 
6987 void Assembler::fdivrp(int i) {
6988   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
6989 }
6990 
6991 void Assembler::ffree(int i) {
6992   emit_farith(0xDD, 0xC0, i);
6993 }
6994 
6995 void Assembler::fild_d(Address adr) {
6996   InstructionMark im(this);
6997   emit_int8((unsigned char)0xDF);
6998   emit_operand32(rbp, adr);
6999 }
7000 
7001 void Assembler::fild_s(Address adr) {
7002   InstructionMark im(this);
7003   emit_int8((unsigned char)0xDB);
7004   emit_operand32(rax, adr);
7005 }
7006 
7007 void Assembler::fincstp() {
7008   emit_int8((unsigned char)0xD9);
7009   emit_int8((unsigned char)0xF7);
7010 }
7011 
7012 void Assembler::finit() {
7013   emit_int8((unsigned char)0x9B);
7014   emit_int8((unsigned char)0xDB);
7015   emit_int8((unsigned char)0xE3);
7016 }
7017 
7018 void Assembler::fist_s(Address adr) {
7019   InstructionMark im(this);
7020   emit_int8((unsigned char)0xDB);
7021   emit_operand32(rdx, adr);
7022 }
7023 
7024 void Assembler::fistp_d(Address adr) {
7025   InstructionMark im(this);
7026   emit_int8((unsigned char)0xDF);
7027   emit_operand32(rdi, adr);
7028 }
7029 
7030 void Assembler::fistp_s(Address adr) {
7031   InstructionMark im(this);
7032   emit_int8((unsigned char)0xDB);
7033   emit_operand32(rbx, adr);
7034 }
7035 
7036 void Assembler::fld1() {
7037   emit_int8((unsigned char)0xD9);
7038   emit_int8((unsigned char)0xE8);
7039 }
7040 
7041 void Assembler::fld_d(Address adr) {
7042   InstructionMark im(this);
7043   emit_int8((unsigned char)0xDD);
7044   emit_operand32(rax, adr);
7045 }
7046 
7047 void Assembler::fld_s(Address adr) {
7048   InstructionMark im(this);
7049   emit_int8((unsigned char)0xD9);
7050   emit_operand32(rax, adr);
7051 }
7052 
7053 
7054 void Assembler::fld_s(int index) {
7055   emit_farith(0xD9, 0xC0, index);
7056 }
7057 
7058 void Assembler::fld_x(Address adr) {
7059   InstructionMark im(this);
7060   emit_int8((unsigned char)0xDB);
7061   emit_operand32(rbp, adr);
7062 }
7063 
7064 void Assembler::fldcw(Address src) {
7065   InstructionMark im(this);
7066   emit_int8((unsigned char)0xD9);
7067   emit_operand32(rbp, src);
7068 }
7069 
7070 void Assembler::fldenv(Address src) {
7071   InstructionMark im(this);
7072   emit_int8((unsigned char)0xD9);
7073   emit_operand32(rsp, src);
7074 }
7075 
7076 void Assembler::fldlg2() {
7077   emit_int8((unsigned char)0xD9);
7078   emit_int8((unsigned char)0xEC);
7079 }
7080 
7081 void Assembler::fldln2() {
7082   emit_int8((unsigned char)0xD9);
7083   emit_int8((unsigned char)0xED);
7084 }
7085 
7086 void Assembler::fldz() {
7087   emit_int8((unsigned char)0xD9);
7088   emit_int8((unsigned char)0xEE);
7089 }
7090 
7091 void Assembler::flog() {
7092   fldln2();
7093   fxch();
7094   fyl2x();
7095 }
7096 
7097 void Assembler::flog10() {
7098   fldlg2();
7099   fxch();
7100   fyl2x();
7101 }
7102 
7103 void Assembler::fmul(int i) {
7104   emit_farith(0xD8, 0xC8, i);
7105 }
7106 
7107 void Assembler::fmul_d(Address src) {
7108   InstructionMark im(this);
7109   emit_int8((unsigned char)0xDC);
7110   emit_operand32(rcx, src);
7111 }
7112 
7113 void Assembler::fmul_s(Address src) {
7114   InstructionMark im(this);
7115   emit_int8((unsigned char)0xD8);
7116   emit_operand32(rcx, src);
7117 }
7118 
7119 void Assembler::fmula(int i) {
7120   emit_farith(0xDC, 0xC8, i);
7121 }
7122 
7123 void Assembler::fmulp(int i) {
7124   emit_farith(0xDE, 0xC8, i);
7125 }
7126 
7127 void Assembler::fnsave(Address dst) {
7128   InstructionMark im(this);
7129   emit_int8((unsigned char)0xDD);
7130   emit_operand32(rsi, dst);
7131 }
7132 
7133 void Assembler::fnstcw(Address src) {
7134   InstructionMark im(this);
7135   emit_int8((unsigned char)0x9B);
7136   emit_int8((unsigned char)0xD9);
7137   emit_operand32(rdi, src);
7138 }
7139 
7140 void Assembler::fnstsw_ax() {
7141   emit_int8((unsigned char)0xDF);
7142   emit_int8((unsigned char)0xE0);
7143 }
7144 
7145 void Assembler::fprem() {
7146   emit_int8((unsigned char)0xD9);
7147   emit_int8((unsigned char)0xF8);
7148 }
7149 
7150 void Assembler::fprem1() {
7151   emit_int8((unsigned char)0xD9);
7152   emit_int8((unsigned char)0xF5);
7153 }
7154 
7155 void Assembler::frstor(Address src) {
7156   InstructionMark im(this);
7157   emit_int8((unsigned char)0xDD);
7158   emit_operand32(rsp, src);
7159 }
7160 
7161 void Assembler::fsin() {
7162   emit_int8((unsigned char)0xD9);
7163   emit_int8((unsigned char)0xFE);
7164 }
7165 
7166 void Assembler::fsqrt() {
7167   emit_int8((unsigned char)0xD9);
7168   emit_int8((unsigned char)0xFA);
7169 }
7170 
7171 void Assembler::fst_d(Address adr) {
7172   InstructionMark im(this);
7173   emit_int8((unsigned char)0xDD);
7174   emit_operand32(rdx, adr);
7175 }
7176 
7177 void Assembler::fst_s(Address adr) {
7178   InstructionMark im(this);
7179   emit_int8((unsigned char)0xD9);
7180   emit_operand32(rdx, adr);
7181 }
7182 
7183 void Assembler::fstp_d(Address adr) {
7184   InstructionMark im(this);
7185   emit_int8((unsigned char)0xDD);
7186   emit_operand32(rbx, adr);
7187 }
7188 
7189 void Assembler::fstp_d(int index) {
7190   emit_farith(0xDD, 0xD8, index);
7191 }
7192 
7193 void Assembler::fstp_s(Address adr) {
7194   InstructionMark im(this);
7195   emit_int8((unsigned char)0xD9);
7196   emit_operand32(rbx, adr);
7197 }
7198 
7199 void Assembler::fstp_x(Address adr) {
7200   InstructionMark im(this);
7201   emit_int8((unsigned char)0xDB);
7202   emit_operand32(rdi, adr);
7203 }
7204 
7205 void Assembler::fsub(int i) {
7206   emit_farith(0xD8, 0xE0, i);
7207 }
7208 
7209 void Assembler::fsub_d(Address src) {
7210   InstructionMark im(this);
7211   emit_int8((unsigned char)0xDC);
7212   emit_operand32(rsp, src);
7213 }
7214 
7215 void Assembler::fsub_s(Address src) {
7216   InstructionMark im(this);
7217   emit_int8((unsigned char)0xD8);
7218   emit_operand32(rsp, src);
7219 }
7220 
7221 void Assembler::fsuba(int i) {
7222   emit_farith(0xDC, 0xE8, i);
7223 }
7224 
7225 void Assembler::fsubp(int i) {
7226   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
7227 }
7228 
7229 void Assembler::fsubr(int i) {
7230   emit_farith(0xD8, 0xE8, i);
7231 }
7232 
7233 void Assembler::fsubr_d(Address src) {
7234   InstructionMark im(this);
7235   emit_int8((unsigned char)0xDC);
7236   emit_operand32(rbp, src);
7237 }
7238 
7239 void Assembler::fsubr_s(Address src) {
7240   InstructionMark im(this);
7241   emit_int8((unsigned char)0xD8);
7242   emit_operand32(rbp, src);
7243 }
7244 
7245 void Assembler::fsubra(int i) {
7246   emit_farith(0xDC, 0xE0, i);
7247 }
7248 
7249 void Assembler::fsubrp(int i) {
7250   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
7251 }
7252 
7253 void Assembler::ftan() {
7254   emit_int8((unsigned char)0xD9);
7255   emit_int8((unsigned char)0xF2);
7256   emit_int8((unsigned char)0xDD);
7257   emit_int8((unsigned char)0xD8);
7258 }
7259 
7260 void Assembler::ftst() {
7261   emit_int8((unsigned char)0xD9);
7262   emit_int8((unsigned char)0xE4);
7263 }
7264 
7265 void Assembler::fucomi(int i) {
7266   // make sure the instruction is supported (introduced for P6, together with cmov)
7267   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7268   emit_farith(0xDB, 0xE8, i);
7269 }
7270 
7271 void Assembler::fucomip(int i) {
7272   // make sure the instruction is supported (introduced for P6, together with cmov)
7273   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7274   emit_farith(0xDF, 0xE8, i);
7275 }
7276 
7277 void Assembler::fwait() {
7278   emit_int8((unsigned char)0x9B);
7279 }
7280 
7281 void Assembler::fxch(int i) {
7282   emit_farith(0xD9, 0xC8, i);
7283 }
7284 
7285 void Assembler::fyl2x() {
7286   emit_int8((unsigned char)0xD9);
7287   emit_int8((unsigned char)0xF1);
7288 }
7289 
7290 void Assembler::frndint() {
7291   emit_int8((unsigned char)0xD9);
7292   emit_int8((unsigned char)0xFC);
7293 }
7294 
7295 void Assembler::f2xm1() {
7296   emit_int8((unsigned char)0xD9);
7297   emit_int8((unsigned char)0xF0);
7298 }
7299 
7300 void Assembler::fldl2e() {
7301   emit_int8((unsigned char)0xD9);
7302   emit_int8((unsigned char)0xEA);
7303 }
7304 
7305 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
7306 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
7307 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
7308 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
7309 
7310 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
7311 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7312   if (pre > 0) {
7313     emit_int8(simd_pre[pre]);
7314   }
7315   if (rex_w) {
7316     prefixq(adr, xreg);
7317   } else {
7318     prefix(adr, xreg);
7319   }
7320   if (opc > 0) {
7321     emit_int8(0x0F);
7322     int opc2 = simd_opc[opc];
7323     if (opc2 > 0) {
7324       emit_int8(opc2);
7325     }
7326   }
7327 }
7328 
7329 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7330   if (pre > 0) {
7331     emit_int8(simd_pre[pre]);
7332   }
7333   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
7334   if (opc > 0) {
7335     emit_int8(0x0F);
7336     int opc2 = simd_opc[opc];
7337     if (opc2 > 0) {
7338       emit_int8(opc2);
7339     }
7340   }
7341   return encode;
7342 }
7343 
7344 
7345 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
7346   int vector_len = _attributes->get_vector_len();
7347   bool vex_w = _attributes->is_rex_vex_w();
7348   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
7349     prefix(VEX_3bytes);
7350 
7351     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
7352     byte1 = (~byte1) & 0xE0;
7353     byte1 |= opc;
7354     emit_int8(byte1);
7355 
7356     int byte2 = ((~nds_enc) & 0xf) << 3;
7357     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
7358     emit_int8(byte2);
7359   } else {
7360     prefix(VEX_2bytes);
7361 
7362     int byte1 = vex_r ? VEX_R : 0;
7363     byte1 = (~byte1) & 0x80;
7364     byte1 |= ((~nds_enc) & 0xf) << 3;
7365     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
7366     emit_int8(byte1);
7367   }
7368 }
7369 
7370 // This is a 4 byte encoding
7371 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
7372   // EVEX 0x62 prefix
7373   prefix(EVEX_4bytes);
7374   bool vex_w = _attributes->is_rex_vex_w();
7375   int evex_encoding = (vex_w ? VEX_W : 0);
7376   // EVEX.b is not currently used for broadcast of single element or data rounding modes
7377   _attributes->set_evex_encoding(evex_encoding);
7378 
7379   // P0: byte 2, initialized to RXBR`00mm
7380   // instead of not'd
7381   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
7382   byte2 = (~byte2) & 0xF0;
7383   // confine opc opcode extensions in mm bits to lower two bits
7384   // of form {0F, 0F_38, 0F_3A}
7385   byte2 |= opc;
7386   emit_int8(byte2);
7387 
7388   // P1: byte 3 as Wvvvv1pp
7389   int byte3 = ((~nds_enc) & 0xf) << 3;
7390   // p[10] is always 1
7391   byte3 |= EVEX_F;
7392   byte3 |= (vex_w & 1) << 7;
7393   // confine pre opcode extensions in pp bits to lower two bits
7394   // of form {66, F3, F2}
7395   byte3 |= pre;
7396   emit_int8(byte3);
7397 
7398   // P2: byte 4 as zL'Lbv'aaa
7399   // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
7400   int byte4 = (_attributes->is_no_reg_mask()) ?
7401               0 :
7402               _attributes->get_embedded_opmask_register_specifier();
7403   // EVEX.v` for extending EVEX.vvvv or VIDX
7404   byte4 |= (evex_v ? 0: EVEX_V);
7405   // third EXEC.b for broadcast actions
7406   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
7407   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
7408   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
7409   // last is EVEX.z for zero/merge actions
7410   if (_attributes->is_no_reg_mask() == false) {
7411     byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
7412   }
7413   emit_int8(byte4);
7414 }
7415 
7416 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7417   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
7418   bool vex_b = adr.base_needs_rex();
7419   bool vex_x = adr.index_needs_rex();
7420   set_attributes(attributes);
7421   attributes->set_current_assembler(this);
7422 
7423   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7424   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7425     switch (attributes->get_vector_len()) {
7426     case AVX_128bit:
7427     case AVX_256bit:
7428       attributes->set_is_legacy_mode();
7429       break;
7430     }
7431   }
7432 
7433   // For pure EVEX check and see if this instruction
7434   // is allowed in legacy mode and has resources which will
7435   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7436   // else that field is set when we encode to EVEX
7437   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7438       !_is_managed && !attributes->is_evex_instruction()) {
7439     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7440       bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7441       if (check_register_bank) {
7442         // check nds_enc and xreg_enc for upper bank usage
7443         if (nds_enc < 16 && xreg_enc < 16) {
7444           attributes->set_is_legacy_mode();
7445         }
7446       } else {
7447         attributes->set_is_legacy_mode();
7448       }
7449     }
7450   }
7451 
7452   _is_managed = false;
7453   if (UseAVX > 2 && !attributes->is_legacy_mode())
7454   {
7455     bool evex_r = (xreg_enc >= 16);
7456     bool evex_v = (nds_enc >= 16);
7457     attributes->set_is_evex_instruction();
7458     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7459   } else {
7460     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7461       attributes->set_rex_vex_w(false);
7462     }
7463     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7464   }
7465 }
7466 
7467 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7468   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
7469   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
7470   bool vex_x = false;
7471   set_attributes(attributes);
7472   attributes->set_current_assembler(this);
7473   bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7474 
7475   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7476   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7477     switch (attributes->get_vector_len()) {
7478     case AVX_128bit:
7479     case AVX_256bit:
7480       if (check_register_bank) {
7481         if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
7482           // up propagate arithmetic instructions to meet RA requirements
7483           attributes->set_vector_len(AVX_512bit);
7484         } else {
7485           attributes->set_is_legacy_mode();
7486         }
7487       } else {
7488         attributes->set_is_legacy_mode();
7489       }
7490       break;
7491     }
7492   }
7493 
7494   // For pure EVEX check and see if this instruction
7495   // is allowed in legacy mode and has resources which will
7496   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7497   // else that field is set when we encode to EVEX
7498   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7499       !_is_managed && !attributes->is_evex_instruction()) {
7500     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7501       if (check_register_bank) {
7502         // check dst_enc, nds_enc and src_enc for upper bank usage
7503         if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
7504           attributes->set_is_legacy_mode();
7505         }
7506       } else {
7507         attributes->set_is_legacy_mode();
7508       }
7509     }
7510   }
7511 
7512   _is_managed = false;
7513   if (UseAVX > 2 && !attributes->is_legacy_mode())
7514   {
7515     bool evex_r = (dst_enc >= 16);
7516     bool evex_v = (nds_enc >= 16);
7517     // can use vex_x as bank extender on rm encoding
7518     vex_x = (src_enc >= 16);
7519     attributes->set_is_evex_instruction();
7520     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7521   } else {
7522     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7523       attributes->set_rex_vex_w(false);
7524     }
7525     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7526   }
7527 
7528   // return modrm byte components for operands
7529   return (((dst_enc & 7) << 3) | (src_enc & 7));
7530 }
7531 
7532 
7533 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
7534                             VexOpcode opc, InstructionAttr *attributes) {
7535   if (UseAVX > 0) {
7536     int xreg_enc = xreg->encoding();
7537     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7538     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
7539   } else {
7540     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
7541     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
7542   }
7543 }
7544 
7545 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
7546                                       VexOpcode opc, InstructionAttr *attributes) {
7547   int dst_enc = dst->encoding();
7548   int src_enc = src->encoding();
7549   if (UseAVX > 0) {
7550     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7551     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
7552   } else {
7553     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
7554     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
7555   }
7556 }
7557 
7558 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7559   assert(VM_Version::supports_avx(), "");
7560   assert(!VM_Version::supports_evex(), "");
7561   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7562   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7563   emit_int8((unsigned char)0xC2);
7564   emit_int8((unsigned char)(0xC0 | encode));
7565   emit_int8((unsigned char)(0xF & cop));
7566 }
7567 
7568 void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7569   assert(VM_Version::supports_avx(), "");
7570   assert(!VM_Version::supports_evex(), "");
7571   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7572   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7573   emit_int8((unsigned char)0x4B);
7574   emit_int8((unsigned char)(0xC0 | encode));
7575   int src2_enc = src2->encoding();
7576   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7577 }
7578 
7579 void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7580   assert(VM_Version::supports_avx(), "");
7581   assert(!VM_Version::supports_evex(), "");
7582   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7583   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
7584   emit_int8((unsigned char)0xC2);
7585   emit_int8((unsigned char)(0xC0 | encode));
7586   emit_int8((unsigned char)(0xF & cop));
7587 }
7588 
7589 void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7590   assert(VM_Version::supports_avx(), "");
7591   assert(!VM_Version::supports_evex(), "");
7592   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7593   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7594   emit_int8((unsigned char)0x4A);
7595   emit_int8((unsigned char)(0xC0 | encode));
7596   int src2_enc = src2->encoding();
7597   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7598 }
7599 
7600 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
7601   assert(VM_Version::supports_avx2(), "");
7602   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7603   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7604   emit_int8((unsigned char)0x02);
7605   emit_int8((unsigned char)(0xC0 | encode));
7606   emit_int8((unsigned char)imm8);
7607 }
7608 
7609 void Assembler::shlxl(Register dst, Register src1, Register src2) {
7610   assert(VM_Version::supports_bmi2(), "");
7611   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7612   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7613   emit_int8((unsigned char)0xF7);
7614   emit_int8((unsigned char)(0xC0 | encode));
7615 }
7616 
7617 void Assembler::shlxq(Register dst, Register src1, Register src2) {
7618   assert(VM_Version::supports_bmi2(), "");
7619   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7620   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7621   emit_int8((unsigned char)0xF7);
7622   emit_int8((unsigned char)(0xC0 | encode));
7623 }
7624 
7625 #ifndef _LP64
7626 
7627 void Assembler::incl(Register dst) {
7628   // Don't use it directly. Use MacroAssembler::incrementl() instead.
7629   emit_int8(0x40 | dst->encoding());
7630 }
7631 
7632 void Assembler::lea(Register dst, Address src) {
7633   leal(dst, src);
7634 }
7635 
7636 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
7637   InstructionMark im(this);
7638   emit_int8((unsigned char)0xC7);
7639   emit_operand(rax, dst);
7640   emit_data((int)imm32, rspec, 0);
7641 }
7642 
7643 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
7644   InstructionMark im(this);
7645   int encode = prefix_and_encode(dst->encoding());
7646   emit_int8((unsigned char)(0xB8 | encode));
7647   emit_data((int)imm32, rspec, 0);
7648 }
7649 
7650 void Assembler::popa() { // 32bit
7651   emit_int8(0x61);
7652 }
7653 
7654 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
7655   InstructionMark im(this);
7656   emit_int8(0x68);
7657   emit_data(imm32, rspec, 0);
7658 }
7659 
7660 void Assembler::pusha() { // 32bit
7661   emit_int8(0x60);
7662 }
7663 
7664 void Assembler::set_byte_if_not_zero(Register dst) {
7665   emit_int8(0x0F);
7666   emit_int8((unsigned char)0x95);
7667   emit_int8((unsigned char)(0xE0 | dst->encoding()));
7668 }
7669 
7670 void Assembler::shldl(Register dst, Register src) {
7671   emit_int8(0x0F);
7672   emit_int8((unsigned char)0xA5);
7673   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7674 }
7675 
7676 // 0F A4 / r ib
7677 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
7678   emit_int8(0x0F);
7679   emit_int8((unsigned char)0xA4);
7680   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7681   emit_int8(imm8);
7682 }
7683 
7684 void Assembler::shrdl(Register dst, Register src) {
7685   emit_int8(0x0F);
7686   emit_int8((unsigned char)0xAD);
7687   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7688 }
7689 
7690 #else // LP64
7691 
7692 void Assembler::set_byte_if_not_zero(Register dst) {
7693   int enc = prefix_and_encode(dst->encoding(), true);
7694   emit_int8(0x0F);
7695   emit_int8((unsigned char)0x95);
7696   emit_int8((unsigned char)(0xE0 | enc));
7697 }
7698 
7699 // 64bit only pieces of the assembler
7700 // This should only be used by 64bit instructions that can use rip-relative
7701 // it cannot be used by instructions that want an immediate value.
7702 
7703 bool Assembler::reachable(AddressLiteral adr) {
7704   int64_t disp;
7705   // None will force a 64bit literal to the code stream. Likely a placeholder
7706   // for something that will be patched later and we need to certain it will
7707   // always be reachable.
7708   if (adr.reloc() == relocInfo::none) {
7709     return false;
7710   }
7711   if (adr.reloc() == relocInfo::internal_word_type) {
7712     // This should be rip relative and easily reachable.
7713     return true;
7714   }
7715   if (adr.reloc() == relocInfo::virtual_call_type ||
7716       adr.reloc() == relocInfo::opt_virtual_call_type ||
7717       adr.reloc() == relocInfo::static_call_type ||
7718       adr.reloc() == relocInfo::static_stub_type ) {
7719     // This should be rip relative within the code cache and easily
7720     // reachable until we get huge code caches. (At which point
7721     // ic code is going to have issues).
7722     return true;
7723   }
7724   if (adr.reloc() != relocInfo::external_word_type &&
7725       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
7726       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
7727       adr.reloc() != relocInfo::runtime_call_type ) {
7728     return false;
7729   }
7730 
7731   // Stress the correction code
7732   if (ForceUnreachable) {
7733     // Must be runtimecall reloc, see if it is in the codecache
7734     // Flipping stuff in the codecache to be unreachable causes issues
7735     // with things like inline caches where the additional instructions
7736     // are not handled.
7737     if (CodeCache::find_blob(adr._target) == NULL) {
7738       return false;
7739     }
7740   }
7741   // For external_word_type/runtime_call_type if it is reachable from where we
7742   // are now (possibly a temp buffer) and where we might end up
7743   // anywhere in the codeCache then we are always reachable.
7744   // This would have to change if we ever save/restore shared code
7745   // to be more pessimistic.
7746   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
7747   if (!is_simm32(disp)) return false;
7748   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
7749   if (!is_simm32(disp)) return false;
7750 
7751   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
7752 
7753   // Because rip relative is a disp + address_of_next_instruction and we
7754   // don't know the value of address_of_next_instruction we apply a fudge factor
7755   // to make sure we will be ok no matter the size of the instruction we get placed into.
7756   // We don't have to fudge the checks above here because they are already worst case.
7757 
7758   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
7759   // + 4 because better safe than sorry.
7760   const int fudge = 12 + 4;
7761   if (disp < 0) {
7762     disp -= fudge;
7763   } else {
7764     disp += fudge;
7765   }
7766   return is_simm32(disp);
7767 }
7768 
7769 // Check if the polling page is not reachable from the code cache using rip-relative
7770 // addressing.
7771 bool Assembler::is_polling_page_far() {
7772   intptr_t addr = (intptr_t)os::get_polling_page();
7773   return ForceUnreachable ||
7774          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
7775          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
7776 }
7777 
7778 void Assembler::emit_data64(jlong data,
7779                             relocInfo::relocType rtype,
7780                             int format) {
7781   if (rtype == relocInfo::none) {
7782     emit_int64(data);
7783   } else {
7784     emit_data64(data, Relocation::spec_simple(rtype), format);
7785   }
7786 }
7787 
7788 void Assembler::emit_data64(jlong data,
7789                             RelocationHolder const& rspec,
7790                             int format) {
7791   assert(imm_operand == 0, "default format must be immediate in this file");
7792   assert(imm_operand == format, "must be immediate");
7793   assert(inst_mark() != NULL, "must be inside InstructionMark");
7794   // Do not use AbstractAssembler::relocate, which is not intended for
7795   // embedded words.  Instead, relocate to the enclosing instruction.
7796   code_section()->relocate(inst_mark(), rspec, format);
7797 #ifdef ASSERT
7798   check_relocation(rspec, format);
7799 #endif
7800   emit_int64(data);
7801 }
7802 
7803 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
7804   if (reg_enc >= 8) {
7805     prefix(REX_B);
7806     reg_enc -= 8;
7807   } else if (byteinst && reg_enc >= 4) {
7808     prefix(REX);
7809   }
7810   return reg_enc;
7811 }
7812 
7813 int Assembler::prefixq_and_encode(int reg_enc) {
7814   if (reg_enc < 8) {
7815     prefix(REX_W);
7816   } else {
7817     prefix(REX_WB);
7818     reg_enc -= 8;
7819   }
7820   return reg_enc;
7821 }
7822 
7823 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
7824   if (dst_enc < 8) {
7825     if (src_enc >= 8) {
7826       prefix(REX_B);
7827       src_enc -= 8;
7828     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
7829       prefix(REX);
7830     }
7831   } else {
7832     if (src_enc < 8) {
7833       prefix(REX_R);
7834     } else {
7835       prefix(REX_RB);
7836       src_enc -= 8;
7837     }
7838     dst_enc -= 8;
7839   }
7840   return dst_enc << 3 | src_enc;
7841 }
7842 
7843 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
7844   if (dst_enc < 8) {
7845     if (src_enc < 8) {
7846       prefix(REX_W);
7847     } else {
7848       prefix(REX_WB);
7849       src_enc -= 8;
7850     }
7851   } else {
7852     if (src_enc < 8) {
7853       prefix(REX_WR);
7854     } else {
7855       prefix(REX_WRB);
7856       src_enc -= 8;
7857     }
7858     dst_enc -= 8;
7859   }
7860   return dst_enc << 3 | src_enc;
7861 }
7862 
7863 void Assembler::prefix(Register reg) {
7864   if (reg->encoding() >= 8) {
7865     prefix(REX_B);
7866   }
7867 }
7868 
7869 void Assembler::prefix(Register dst, Register src, Prefix p) {
7870   if (src->encoding() >= 8) {
7871     p = (Prefix)(p | REX_B);
7872   }
7873   if (dst->encoding() >= 8) {
7874     p = (Prefix)( p | REX_R);
7875   }
7876   if (p != Prefix_EMPTY) {
7877     // do not generate an empty prefix
7878     prefix(p);
7879   }
7880 }
7881 
7882 void Assembler::prefix(Register dst, Address adr, Prefix p) {
7883   if (adr.base_needs_rex()) {
7884     if (adr.index_needs_rex()) {
7885       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7886     } else {
7887       prefix(REX_B);
7888     }
7889   } else {
7890     if (adr.index_needs_rex()) {
7891       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7892     }
7893   }
7894   if (dst->encoding() >= 8) {
7895     p = (Prefix)(p | REX_R);
7896   }
7897   if (p != Prefix_EMPTY) {
7898     // do not generate an empty prefix
7899     prefix(p);
7900   }
7901 }
7902 
7903 void Assembler::prefix(Address adr) {
7904   if (adr.base_needs_rex()) {
7905     if (adr.index_needs_rex()) {
7906       prefix(REX_XB);
7907     } else {
7908       prefix(REX_B);
7909     }
7910   } else {
7911     if (adr.index_needs_rex()) {
7912       prefix(REX_X);
7913     }
7914   }
7915 }
7916 
7917 void Assembler::prefixq(Address adr) {
7918   if (adr.base_needs_rex()) {
7919     if (adr.index_needs_rex()) {
7920       prefix(REX_WXB);
7921     } else {
7922       prefix(REX_WB);
7923     }
7924   } else {
7925     if (adr.index_needs_rex()) {
7926       prefix(REX_WX);
7927     } else {
7928       prefix(REX_W);
7929     }
7930   }
7931 }
7932 
7933 
7934 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
7935   if (reg->encoding() < 8) {
7936     if (adr.base_needs_rex()) {
7937       if (adr.index_needs_rex()) {
7938         prefix(REX_XB);
7939       } else {
7940         prefix(REX_B);
7941       }
7942     } else {
7943       if (adr.index_needs_rex()) {
7944         prefix(REX_X);
7945       } else if (byteinst && reg->encoding() >= 4 ) {
7946         prefix(REX);
7947       }
7948     }
7949   } else {
7950     if (adr.base_needs_rex()) {
7951       if (adr.index_needs_rex()) {
7952         prefix(REX_RXB);
7953       } else {
7954         prefix(REX_RB);
7955       }
7956     } else {
7957       if (adr.index_needs_rex()) {
7958         prefix(REX_RX);
7959       } else {
7960         prefix(REX_R);
7961       }
7962     }
7963   }
7964 }
7965 
7966 void Assembler::prefixq(Address adr, Register src) {
7967   if (src->encoding() < 8) {
7968     if (adr.base_needs_rex()) {
7969       if (adr.index_needs_rex()) {
7970         prefix(REX_WXB);
7971       } else {
7972         prefix(REX_WB);
7973       }
7974     } else {
7975       if (adr.index_needs_rex()) {
7976         prefix(REX_WX);
7977       } else {
7978         prefix(REX_W);
7979       }
7980     }
7981   } else {
7982     if (adr.base_needs_rex()) {
7983       if (adr.index_needs_rex()) {
7984         prefix(REX_WRXB);
7985       } else {
7986         prefix(REX_WRB);
7987       }
7988     } else {
7989       if (adr.index_needs_rex()) {
7990         prefix(REX_WRX);
7991       } else {
7992         prefix(REX_WR);
7993       }
7994     }
7995   }
7996 }
7997 
7998 void Assembler::prefix(Address adr, XMMRegister reg) {
7999   if (reg->encoding() < 8) {
8000     if (adr.base_needs_rex()) {
8001       if (adr.index_needs_rex()) {
8002         prefix(REX_XB);
8003       } else {
8004         prefix(REX_B);
8005       }
8006     } else {
8007       if (adr.index_needs_rex()) {
8008         prefix(REX_X);
8009       }
8010     }
8011   } else {
8012     if (adr.base_needs_rex()) {
8013       if (adr.index_needs_rex()) {
8014         prefix(REX_RXB);
8015       } else {
8016         prefix(REX_RB);
8017       }
8018     } else {
8019       if (adr.index_needs_rex()) {
8020         prefix(REX_RX);
8021       } else {
8022         prefix(REX_R);
8023       }
8024     }
8025   }
8026 }
8027 
8028 void Assembler::prefixq(Address adr, XMMRegister src) {
8029   if (src->encoding() < 8) {
8030     if (adr.base_needs_rex()) {
8031       if (adr.index_needs_rex()) {
8032         prefix(REX_WXB);
8033       } else {
8034         prefix(REX_WB);
8035       }
8036     } else {
8037       if (adr.index_needs_rex()) {
8038         prefix(REX_WX);
8039       } else {
8040         prefix(REX_W);
8041       }
8042     }
8043   } else {
8044     if (adr.base_needs_rex()) {
8045       if (adr.index_needs_rex()) {
8046         prefix(REX_WRXB);
8047       } else {
8048         prefix(REX_WRB);
8049       }
8050     } else {
8051       if (adr.index_needs_rex()) {
8052         prefix(REX_WRX);
8053       } else {
8054         prefix(REX_WR);
8055       }
8056     }
8057   }
8058 }
8059 
8060 void Assembler::adcq(Register dst, int32_t imm32) {
8061   (void) prefixq_and_encode(dst->encoding());
8062   emit_arith(0x81, 0xD0, dst, imm32);
8063 }
8064 
8065 void Assembler::adcq(Register dst, Address src) {
8066   InstructionMark im(this);
8067   prefixq(src, dst);
8068   emit_int8(0x13);
8069   emit_operand(dst, src);
8070 }
8071 
8072 void Assembler::adcq(Register dst, Register src) {
8073   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8074   emit_arith(0x13, 0xC0, dst, src);
8075 }
8076 
8077 void Assembler::addq(Address dst, int32_t imm32) {
8078   InstructionMark im(this);
8079   prefixq(dst);
8080   emit_arith_operand(0x81, rax, dst,imm32);
8081 }
8082 
8083 void Assembler::addq(Address dst, Register src) {
8084   InstructionMark im(this);
8085   prefixq(dst, src);
8086   emit_int8(0x01);
8087   emit_operand(src, dst);
8088 }
8089 
8090 void Assembler::addq(Register dst, int32_t imm32) {
8091   (void) prefixq_and_encode(dst->encoding());
8092   emit_arith(0x81, 0xC0, dst, imm32);
8093 }
8094 
8095 void Assembler::addq(Register dst, Address src) {
8096   InstructionMark im(this);
8097   prefixq(src, dst);
8098   emit_int8(0x03);
8099   emit_operand(dst, src);
8100 }
8101 
8102 void Assembler::addq(Register dst, Register src) {
8103   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8104   emit_arith(0x03, 0xC0, dst, src);
8105 }
8106 
8107 void Assembler::adcxq(Register dst, Register src) {
8108   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8109   emit_int8((unsigned char)0x66);
8110   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8111   emit_int8(0x0F);
8112   emit_int8(0x38);
8113   emit_int8((unsigned char)0xF6);
8114   emit_int8((unsigned char)(0xC0 | encode));
8115 }
8116 
8117 void Assembler::adoxq(Register dst, Register src) {
8118   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8119   emit_int8((unsigned char)0xF3);
8120   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8121   emit_int8(0x0F);
8122   emit_int8(0x38);
8123   emit_int8((unsigned char)0xF6);
8124   emit_int8((unsigned char)(0xC0 | encode));
8125 }
8126 
8127 void Assembler::andq(Address dst, int32_t imm32) {
8128   InstructionMark im(this);
8129   prefixq(dst);
8130   emit_int8((unsigned char)0x81);
8131   emit_operand(rsp, dst, 4);
8132   emit_int32(imm32);
8133 }
8134 
8135 void Assembler::andq(Register dst, int32_t imm32) {
8136   (void) prefixq_and_encode(dst->encoding());
8137   emit_arith(0x81, 0xE0, dst, imm32);
8138 }
8139 
8140 void Assembler::andq(Register dst, Address src) {
8141   InstructionMark im(this);
8142   prefixq(src, dst);
8143   emit_int8(0x23);
8144   emit_operand(dst, src);
8145 }
8146 
8147 void Assembler::andq(Register dst, Register src) {
8148   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8149   emit_arith(0x23, 0xC0, dst, src);
8150 }
8151 
8152 void Assembler::andnq(Register dst, Register src1, Register src2) {
8153   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8154   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8155   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8156   emit_int8((unsigned char)0xF2);
8157   emit_int8((unsigned char)(0xC0 | encode));
8158 }
8159 
8160 void Assembler::andnq(Register dst, Register src1, Address src2) {
8161   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8162   InstructionMark im(this);
8163   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8164   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8165   emit_int8((unsigned char)0xF2);
8166   emit_operand(dst, src2);
8167 }
8168 
8169 void Assembler::bsfq(Register dst, Register src) {
8170   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8171   emit_int8(0x0F);
8172   emit_int8((unsigned char)0xBC);
8173   emit_int8((unsigned char)(0xC0 | encode));
8174 }
8175 
8176 void Assembler::bsrq(Register dst, Register src) {
8177   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8178   emit_int8(0x0F);
8179   emit_int8((unsigned char)0xBD);
8180   emit_int8((unsigned char)(0xC0 | encode));
8181 }
8182 
8183 void Assembler::bswapq(Register reg) {
8184   int encode = prefixq_and_encode(reg->encoding());
8185   emit_int8(0x0F);
8186   emit_int8((unsigned char)(0xC8 | encode));
8187 }
8188 
8189 void Assembler::blsiq(Register dst, Register src) {
8190   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8191   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8192   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8193   emit_int8((unsigned char)0xF3);
8194   emit_int8((unsigned char)(0xC0 | encode));
8195 }
8196 
8197 void Assembler::blsiq(Register dst, Address src) {
8198   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8199   InstructionMark im(this);
8200   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8201   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8202   emit_int8((unsigned char)0xF3);
8203   emit_operand(rbx, src);
8204 }
8205 
8206 void Assembler::blsmskq(Register dst, Register src) {
8207   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8208   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8209   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8210   emit_int8((unsigned char)0xF3);
8211   emit_int8((unsigned char)(0xC0 | encode));
8212 }
8213 
8214 void Assembler::blsmskq(Register dst, Address src) {
8215   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8216   InstructionMark im(this);
8217   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8218   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8219   emit_int8((unsigned char)0xF3);
8220   emit_operand(rdx, src);
8221 }
8222 
8223 void Assembler::blsrq(Register dst, Register src) {
8224   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8225   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8226   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8227   emit_int8((unsigned char)0xF3);
8228   emit_int8((unsigned char)(0xC0 | encode));
8229 }
8230 
8231 void Assembler::blsrq(Register dst, Address src) {
8232   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8233   InstructionMark im(this);
8234   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8235   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8236   emit_int8((unsigned char)0xF3);
8237   emit_operand(rcx, src);
8238 }
8239 
8240 void Assembler::cdqq() {
8241   prefix(REX_W);
8242   emit_int8((unsigned char)0x99);
8243 }
8244 
8245 void Assembler::clflush(Address adr) {
8246   prefix(adr);
8247   emit_int8(0x0F);
8248   emit_int8((unsigned char)0xAE);
8249   emit_operand(rdi, adr);
8250 }
8251 
8252 void Assembler::cmovq(Condition cc, Register dst, Register src) {
8253   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8254   emit_int8(0x0F);
8255   emit_int8(0x40 | cc);
8256   emit_int8((unsigned char)(0xC0 | encode));
8257 }
8258 
8259 void Assembler::cmovq(Condition cc, Register dst, Address src) {
8260   InstructionMark im(this);
8261   prefixq(src, dst);
8262   emit_int8(0x0F);
8263   emit_int8(0x40 | cc);
8264   emit_operand(dst, src);
8265 }
8266 
8267 void Assembler::cmpq(Address dst, int32_t imm32) {
8268   InstructionMark im(this);
8269   prefixq(dst);
8270   emit_int8((unsigned char)0x81);
8271   emit_operand(rdi, dst, 4);
8272   emit_int32(imm32);
8273 }
8274 
8275 void Assembler::cmpq(Register dst, int32_t imm32) {
8276   (void) prefixq_and_encode(dst->encoding());
8277   emit_arith(0x81, 0xF8, dst, imm32);
8278 }
8279 
8280 void Assembler::cmpq(Address dst, Register src) {
8281   InstructionMark im(this);
8282   prefixq(dst, src);
8283   emit_int8(0x3B);
8284   emit_operand(src, dst);
8285 }
8286 
8287 void Assembler::cmpq(Register dst, Register src) {
8288   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8289   emit_arith(0x3B, 0xC0, dst, src);
8290 }
8291 
8292 void Assembler::cmpq(Register dst, Address  src) {
8293   InstructionMark im(this);
8294   prefixq(src, dst);
8295   emit_int8(0x3B);
8296   emit_operand(dst, src);
8297 }
8298 
8299 void Assembler::cmpxchgq(Register reg, Address adr) {
8300   InstructionMark im(this);
8301   prefixq(adr, reg);
8302   emit_int8(0x0F);
8303   emit_int8((unsigned char)0xB1);
8304   emit_operand(reg, adr);
8305 }
8306 
8307 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
8308   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8309   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8310   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8311   emit_int8(0x2A);
8312   emit_int8((unsigned char)(0xC0 | encode));
8313 }
8314 
8315 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
8316   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8317   InstructionMark im(this);
8318   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8319   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8320   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8321   emit_int8(0x2A);
8322   emit_operand(dst, src);
8323 }
8324 
8325 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
8326   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8327   InstructionMark im(this);
8328   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8329   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8330   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8331   emit_int8(0x2A);
8332   emit_operand(dst, src);
8333 }
8334 
8335 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
8336   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8337   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8338   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8339   emit_int8(0x2C);
8340   emit_int8((unsigned char)(0xC0 | encode));
8341 }
8342 
8343 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
8344   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8345   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8346   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8347   emit_int8(0x2C);
8348   emit_int8((unsigned char)(0xC0 | encode));
8349 }
8350 
8351 void Assembler::decl(Register dst) {
8352   // Don't use it directly. Use MacroAssembler::decrementl() instead.
8353   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
8354   int encode = prefix_and_encode(dst->encoding());
8355   emit_int8((unsigned char)0xFF);
8356   emit_int8((unsigned char)(0xC8 | encode));
8357 }
8358 
8359 void Assembler::decq(Register dst) {
8360   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8361   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8362   int encode = prefixq_and_encode(dst->encoding());
8363   emit_int8((unsigned char)0xFF);
8364   emit_int8(0xC8 | encode);
8365 }
8366 
8367 void Assembler::decq(Address dst) {
8368   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8369   InstructionMark im(this);
8370   prefixq(dst);
8371   emit_int8((unsigned char)0xFF);
8372   emit_operand(rcx, dst);
8373 }
8374 
8375 void Assembler::fxrstor(Address src) {
8376   prefixq(src);
8377   emit_int8(0x0F);
8378   emit_int8((unsigned char)0xAE);
8379   emit_operand(as_Register(1), src);
8380 }
8381 
8382 void Assembler::xrstor(Address src) {
8383   prefixq(src);
8384   emit_int8(0x0F);
8385   emit_int8((unsigned char)0xAE);
8386   emit_operand(as_Register(5), src);
8387 }
8388 
8389 void Assembler::fxsave(Address dst) {
8390   prefixq(dst);
8391   emit_int8(0x0F);
8392   emit_int8((unsigned char)0xAE);
8393   emit_operand(as_Register(0), dst);
8394 }
8395 
8396 void Assembler::xsave(Address dst) {
8397   prefixq(dst);
8398   emit_int8(0x0F);
8399   emit_int8((unsigned char)0xAE);
8400   emit_operand(as_Register(4), dst);
8401 }
8402 
8403 void Assembler::idivq(Register src) {
8404   int encode = prefixq_and_encode(src->encoding());
8405   emit_int8((unsigned char)0xF7);
8406   emit_int8((unsigned char)(0xF8 | encode));
8407 }
8408 
8409 void Assembler::imulq(Register dst, Register src) {
8410   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8411   emit_int8(0x0F);
8412   emit_int8((unsigned char)0xAF);
8413   emit_int8((unsigned char)(0xC0 | encode));
8414 }
8415 
8416 void Assembler::imulq(Register dst, Register src, int value) {
8417   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8418   if (is8bit(value)) {
8419     emit_int8(0x6B);
8420     emit_int8((unsigned char)(0xC0 | encode));
8421     emit_int8(value & 0xFF);
8422   } else {
8423     emit_int8(0x69);
8424     emit_int8((unsigned char)(0xC0 | encode));
8425     emit_int32(value);
8426   }
8427 }
8428 
8429 void Assembler::imulq(Register dst, Address src) {
8430   InstructionMark im(this);
8431   prefixq(src, dst);
8432   emit_int8(0x0F);
8433   emit_int8((unsigned char) 0xAF);
8434   emit_operand(dst, src);
8435 }
8436 
8437 void Assembler::incl(Register dst) {
8438   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8439   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8440   int encode = prefix_and_encode(dst->encoding());
8441   emit_int8((unsigned char)0xFF);
8442   emit_int8((unsigned char)(0xC0 | encode));
8443 }
8444 
8445 void Assembler::incq(Register dst) {
8446   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8447   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8448   int encode = prefixq_and_encode(dst->encoding());
8449   emit_int8((unsigned char)0xFF);
8450   emit_int8((unsigned char)(0xC0 | encode));
8451 }
8452 
8453 void Assembler::incq(Address dst) {
8454   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8455   InstructionMark im(this);
8456   prefixq(dst);
8457   emit_int8((unsigned char)0xFF);
8458   emit_operand(rax, dst);
8459 }
8460 
8461 void Assembler::lea(Register dst, Address src) {
8462   leaq(dst, src);
8463 }
8464 
8465 void Assembler::leaq(Register dst, Address src) {
8466   InstructionMark im(this);
8467   prefixq(src, dst);
8468   emit_int8((unsigned char)0x8D);
8469   emit_operand(dst, src);
8470 }
8471 
8472 void Assembler::mov64(Register dst, int64_t imm64) {
8473   InstructionMark im(this);
8474   int encode = prefixq_and_encode(dst->encoding());
8475   emit_int8((unsigned char)(0xB8 | encode));
8476   emit_int64(imm64);
8477 }
8478 
8479 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
8480   InstructionMark im(this);
8481   int encode = prefixq_and_encode(dst->encoding());
8482   emit_int8(0xB8 | encode);
8483   emit_data64(imm64, rspec);
8484 }
8485 
8486 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
8487   InstructionMark im(this);
8488   int encode = prefix_and_encode(dst->encoding());
8489   emit_int8((unsigned char)(0xB8 | encode));
8490   emit_data((int)imm32, rspec, narrow_oop_operand);
8491 }
8492 
8493 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
8494   InstructionMark im(this);
8495   prefix(dst);
8496   emit_int8((unsigned char)0xC7);
8497   emit_operand(rax, dst, 4);
8498   emit_data((int)imm32, rspec, narrow_oop_operand);
8499 }
8500 
8501 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
8502   InstructionMark im(this);
8503   int encode = prefix_and_encode(src1->encoding());
8504   emit_int8((unsigned char)0x81);
8505   emit_int8((unsigned char)(0xF8 | encode));
8506   emit_data((int)imm32, rspec, narrow_oop_operand);
8507 }
8508 
8509 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
8510   InstructionMark im(this);
8511   prefix(src1);
8512   emit_int8((unsigned char)0x81);
8513   emit_operand(rax, src1, 4);
8514   emit_data((int)imm32, rspec, narrow_oop_operand);
8515 }
8516 
8517 void Assembler::lzcntq(Register dst, Register src) {
8518   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
8519   emit_int8((unsigned char)0xF3);
8520   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8521   emit_int8(0x0F);
8522   emit_int8((unsigned char)0xBD);
8523   emit_int8((unsigned char)(0xC0 | encode));
8524 }
8525 
8526 void Assembler::movdq(XMMRegister dst, Register src) {
8527   // table D-1 says MMX/SSE2
8528   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8529   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8530   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8531   emit_int8(0x6E);
8532   emit_int8((unsigned char)(0xC0 | encode));
8533 }
8534 
8535 void Assembler::movdq(Register dst, XMMRegister src) {
8536   // table D-1 says MMX/SSE2
8537   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8538   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8539   // swap src/dst to get correct prefix
8540   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8541   emit_int8(0x7E);
8542   emit_int8((unsigned char)(0xC0 | encode));
8543 }
8544 
8545 void Assembler::movq(Register dst, Register src) {
8546   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8547   emit_int8((unsigned char)0x8B);
8548   emit_int8((unsigned char)(0xC0 | encode));
8549 }
8550 
8551 void Assembler::movq(Register dst, Address src) {
8552   InstructionMark im(this);
8553   prefixq(src, dst);
8554   emit_int8((unsigned char)0x8B);
8555   emit_operand(dst, src);
8556 }
8557 
8558 void Assembler::movq(Address dst, Register src) {
8559   InstructionMark im(this);
8560   prefixq(dst, src);
8561   emit_int8((unsigned char)0x89);
8562   emit_operand(src, dst);
8563 }
8564 
8565 void Assembler::movsbq(Register dst, Address src) {
8566   InstructionMark im(this);
8567   prefixq(src, dst);
8568   emit_int8(0x0F);
8569   emit_int8((unsigned char)0xBE);
8570   emit_operand(dst, src);
8571 }
8572 
8573 void Assembler::movsbq(Register dst, Register src) {
8574   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8575   emit_int8(0x0F);
8576   emit_int8((unsigned char)0xBE);
8577   emit_int8((unsigned char)(0xC0 | encode));
8578 }
8579 
8580 void Assembler::movslq(Register dst, int32_t imm32) {
8581   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
8582   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
8583   // as a result we shouldn't use until tested at runtime...
8584   ShouldNotReachHere();
8585   InstructionMark im(this);
8586   int encode = prefixq_and_encode(dst->encoding());
8587   emit_int8((unsigned char)(0xC7 | encode));
8588   emit_int32(imm32);
8589 }
8590 
8591 void Assembler::movslq(Address dst, int32_t imm32) {
8592   assert(is_simm32(imm32), "lost bits");
8593   InstructionMark im(this);
8594   prefixq(dst);
8595   emit_int8((unsigned char)0xC7);
8596   emit_operand(rax, dst, 4);
8597   emit_int32(imm32);
8598 }
8599 
8600 void Assembler::movslq(Register dst, Address src) {
8601   InstructionMark im(this);
8602   prefixq(src, dst);
8603   emit_int8(0x63);
8604   emit_operand(dst, src);
8605 }
8606 
8607 void Assembler::movslq(Register dst, Register src) {
8608   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8609   emit_int8(0x63);
8610   emit_int8((unsigned char)(0xC0 | encode));
8611 }
8612 
8613 void Assembler::movswq(Register dst, Address src) {
8614   InstructionMark im(this);
8615   prefixq(src, dst);
8616   emit_int8(0x0F);
8617   emit_int8((unsigned char)0xBF);
8618   emit_operand(dst, src);
8619 }
8620 
8621 void Assembler::movswq(Register dst, Register src) {
8622   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8623   emit_int8((unsigned char)0x0F);
8624   emit_int8((unsigned char)0xBF);
8625   emit_int8((unsigned char)(0xC0 | encode));
8626 }
8627 
8628 void Assembler::movzbq(Register dst, Address src) {
8629   InstructionMark im(this);
8630   prefixq(src, dst);
8631   emit_int8((unsigned char)0x0F);
8632   emit_int8((unsigned char)0xB6);
8633   emit_operand(dst, src);
8634 }
8635 
8636 void Assembler::movzbq(Register dst, Register src) {
8637   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8638   emit_int8(0x0F);
8639   emit_int8((unsigned char)0xB6);
8640   emit_int8(0xC0 | encode);
8641 }
8642 
8643 void Assembler::movzwq(Register dst, Address src) {
8644   InstructionMark im(this);
8645   prefixq(src, dst);
8646   emit_int8((unsigned char)0x0F);
8647   emit_int8((unsigned char)0xB7);
8648   emit_operand(dst, src);
8649 }
8650 
8651 void Assembler::movzwq(Register dst, Register src) {
8652   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8653   emit_int8((unsigned char)0x0F);
8654   emit_int8((unsigned char)0xB7);
8655   emit_int8((unsigned char)(0xC0 | encode));
8656 }
8657 
8658 void Assembler::mulq(Address src) {
8659   InstructionMark im(this);
8660   prefixq(src);
8661   emit_int8((unsigned char)0xF7);
8662   emit_operand(rsp, src);
8663 }
8664 
8665 void Assembler::mulq(Register src) {
8666   int encode = prefixq_and_encode(src->encoding());
8667   emit_int8((unsigned char)0xF7);
8668   emit_int8((unsigned char)(0xE0 | encode));
8669 }
8670 
8671 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
8672   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8673   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8674   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
8675   emit_int8((unsigned char)0xF6);
8676   emit_int8((unsigned char)(0xC0 | encode));
8677 }
8678 
8679 void Assembler::negq(Register dst) {
8680   int encode = prefixq_and_encode(dst->encoding());
8681   emit_int8((unsigned char)0xF7);
8682   emit_int8((unsigned char)(0xD8 | encode));
8683 }
8684 
8685 void Assembler::notq(Register dst) {
8686   int encode = prefixq_and_encode(dst->encoding());
8687   emit_int8((unsigned char)0xF7);
8688   emit_int8((unsigned char)(0xD0 | encode));
8689 }
8690 
8691 void Assembler::orq(Address dst, int32_t imm32) {
8692   InstructionMark im(this);
8693   prefixq(dst);
8694   emit_int8((unsigned char)0x81);
8695   emit_operand(rcx, dst, 4);
8696   emit_int32(imm32);
8697 }
8698 
8699 void Assembler::orq(Register dst, int32_t imm32) {
8700   (void) prefixq_and_encode(dst->encoding());
8701   emit_arith(0x81, 0xC8, dst, imm32);
8702 }
8703 
8704 void Assembler::orq(Register dst, Address src) {
8705   InstructionMark im(this);
8706   prefixq(src, dst);
8707   emit_int8(0x0B);
8708   emit_operand(dst, src);
8709 }
8710 
8711 void Assembler::orq(Register dst, Register src) {
8712   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8713   emit_arith(0x0B, 0xC0, dst, src);
8714 }
8715 
8716 void Assembler::popa() { // 64bit
8717   movq(r15, Address(rsp, 0));
8718   movq(r14, Address(rsp, wordSize));
8719   movq(r13, Address(rsp, 2 * wordSize));
8720   movq(r12, Address(rsp, 3 * wordSize));
8721   movq(r11, Address(rsp, 4 * wordSize));
8722   movq(r10, Address(rsp, 5 * wordSize));
8723   movq(r9,  Address(rsp, 6 * wordSize));
8724   movq(r8,  Address(rsp, 7 * wordSize));
8725   movq(rdi, Address(rsp, 8 * wordSize));
8726   movq(rsi, Address(rsp, 9 * wordSize));
8727   movq(rbp, Address(rsp, 10 * wordSize));
8728   // skip rsp
8729   movq(rbx, Address(rsp, 12 * wordSize));
8730   movq(rdx, Address(rsp, 13 * wordSize));
8731   movq(rcx, Address(rsp, 14 * wordSize));
8732   movq(rax, Address(rsp, 15 * wordSize));
8733 
8734   addq(rsp, 16 * wordSize);
8735 }
8736 
8737 void Assembler::popcntq(Register dst, Address src) {
8738   assert(VM_Version::supports_popcnt(), "must support");
8739   InstructionMark im(this);
8740   emit_int8((unsigned char)0xF3);
8741   prefixq(src, dst);
8742   emit_int8((unsigned char)0x0F);
8743   emit_int8((unsigned char)0xB8);
8744   emit_operand(dst, src);
8745 }
8746 
8747 void Assembler::popcntq(Register dst, Register src) {
8748   assert(VM_Version::supports_popcnt(), "must support");
8749   emit_int8((unsigned char)0xF3);
8750   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8751   emit_int8((unsigned char)0x0F);
8752   emit_int8((unsigned char)0xB8);
8753   emit_int8((unsigned char)(0xC0 | encode));
8754 }
8755 
8756 void Assembler::popq(Address dst) {
8757   InstructionMark im(this);
8758   prefixq(dst);
8759   emit_int8((unsigned char)0x8F);
8760   emit_operand(rax, dst);
8761 }
8762 
8763 void Assembler::pusha() { // 64bit
8764   // we have to store original rsp.  ABI says that 128 bytes
8765   // below rsp are local scratch.
8766   movq(Address(rsp, -5 * wordSize), rsp);
8767 
8768   subq(rsp, 16 * wordSize);
8769 
8770   movq(Address(rsp, 15 * wordSize), rax);
8771   movq(Address(rsp, 14 * wordSize), rcx);
8772   movq(Address(rsp, 13 * wordSize), rdx);
8773   movq(Address(rsp, 12 * wordSize), rbx);
8774   // skip rsp
8775   movq(Address(rsp, 10 * wordSize), rbp);
8776   movq(Address(rsp, 9 * wordSize), rsi);
8777   movq(Address(rsp, 8 * wordSize), rdi);
8778   movq(Address(rsp, 7 * wordSize), r8);
8779   movq(Address(rsp, 6 * wordSize), r9);
8780   movq(Address(rsp, 5 * wordSize), r10);
8781   movq(Address(rsp, 4 * wordSize), r11);
8782   movq(Address(rsp, 3 * wordSize), r12);
8783   movq(Address(rsp, 2 * wordSize), r13);
8784   movq(Address(rsp, wordSize), r14);
8785   movq(Address(rsp, 0), r15);
8786 }
8787 
8788 void Assembler::pushq(Address src) {
8789   InstructionMark im(this);
8790   prefixq(src);
8791   emit_int8((unsigned char)0xFF);
8792   emit_operand(rsi, src);
8793 }
8794 
8795 void Assembler::rclq(Register dst, int imm8) {
8796   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8797   int encode = prefixq_and_encode(dst->encoding());
8798   if (imm8 == 1) {
8799     emit_int8((unsigned char)0xD1);
8800     emit_int8((unsigned char)(0xD0 | encode));
8801   } else {
8802     emit_int8((unsigned char)0xC1);
8803     emit_int8((unsigned char)(0xD0 | encode));
8804     emit_int8(imm8);
8805   }
8806 }
8807 
8808 void Assembler::rcrq(Register dst, int imm8) {
8809   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8810   int encode = prefixq_and_encode(dst->encoding());
8811   if (imm8 == 1) {
8812     emit_int8((unsigned char)0xD1);
8813     emit_int8((unsigned char)(0xD8 | encode));
8814   } else {
8815     emit_int8((unsigned char)0xC1);
8816     emit_int8((unsigned char)(0xD8 | encode));
8817     emit_int8(imm8);
8818   }
8819 }
8820 
8821 void Assembler::rorq(Register dst, int imm8) {
8822   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8823   int encode = prefixq_and_encode(dst->encoding());
8824   if (imm8 == 1) {
8825     emit_int8((unsigned char)0xD1);
8826     emit_int8((unsigned char)(0xC8 | encode));
8827   } else {
8828     emit_int8((unsigned char)0xC1);
8829     emit_int8((unsigned char)(0xc8 | encode));
8830     emit_int8(imm8);
8831   }
8832 }
8833 
8834 void Assembler::rorxq(Register dst, Register src, int imm8) {
8835   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8836   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8837   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8838   emit_int8((unsigned char)0xF0);
8839   emit_int8((unsigned char)(0xC0 | encode));
8840   emit_int8(imm8);
8841 }
8842 
8843 void Assembler::rorxd(Register dst, Register src, int imm8) {
8844   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8845   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8846   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8847   emit_int8((unsigned char)0xF0);
8848   emit_int8((unsigned char)(0xC0 | encode));
8849   emit_int8(imm8);
8850 }
8851 
8852 void Assembler::sarq(Register dst, int imm8) {
8853   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8854   int encode = prefixq_and_encode(dst->encoding());
8855   if (imm8 == 1) {
8856     emit_int8((unsigned char)0xD1);
8857     emit_int8((unsigned char)(0xF8 | encode));
8858   } else {
8859     emit_int8((unsigned char)0xC1);
8860     emit_int8((unsigned char)(0xF8 | encode));
8861     emit_int8(imm8);
8862   }
8863 }
8864 
8865 void Assembler::sarq(Register dst) {
8866   int encode = prefixq_and_encode(dst->encoding());
8867   emit_int8((unsigned char)0xD3);
8868   emit_int8((unsigned char)(0xF8 | encode));
8869 }
8870 
8871 void Assembler::sbbq(Address dst, int32_t imm32) {
8872   InstructionMark im(this);
8873   prefixq(dst);
8874   emit_arith_operand(0x81, rbx, dst, imm32);
8875 }
8876 
8877 void Assembler::sbbq(Register dst, int32_t imm32) {
8878   (void) prefixq_and_encode(dst->encoding());
8879   emit_arith(0x81, 0xD8, dst, imm32);
8880 }
8881 
8882 void Assembler::sbbq(Register dst, Address src) {
8883   InstructionMark im(this);
8884   prefixq(src, dst);
8885   emit_int8(0x1B);
8886   emit_operand(dst, src);
8887 }
8888 
8889 void Assembler::sbbq(Register dst, Register src) {
8890   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8891   emit_arith(0x1B, 0xC0, dst, src);
8892 }
8893 
8894 void Assembler::shlq(Register dst, int imm8) {
8895   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8896   int encode = prefixq_and_encode(dst->encoding());
8897   if (imm8 == 1) {
8898     emit_int8((unsigned char)0xD1);
8899     emit_int8((unsigned char)(0xE0 | encode));
8900   } else {
8901     emit_int8((unsigned char)0xC1);
8902     emit_int8((unsigned char)(0xE0 | encode));
8903     emit_int8(imm8);
8904   }
8905 }
8906 
8907 void Assembler::shlq(Register dst) {
8908   int encode = prefixq_and_encode(dst->encoding());
8909   emit_int8((unsigned char)0xD3);
8910   emit_int8((unsigned char)(0xE0 | encode));
8911 }
8912 
8913 void Assembler::shrq(Register dst, int imm8) {
8914   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8915   int encode = prefixq_and_encode(dst->encoding());
8916   emit_int8((unsigned char)0xC1);
8917   emit_int8((unsigned char)(0xE8 | encode));
8918   emit_int8(imm8);
8919 }
8920 
8921 void Assembler::shrq(Register dst) {
8922   int encode = prefixq_and_encode(dst->encoding());
8923   emit_int8((unsigned char)0xD3);
8924   emit_int8(0xE8 | encode);
8925 }
8926 
8927 void Assembler::subq(Address dst, int32_t imm32) {
8928   InstructionMark im(this);
8929   prefixq(dst);
8930   emit_arith_operand(0x81, rbp, dst, imm32);
8931 }
8932 
8933 void Assembler::subq(Address dst, Register src) {
8934   InstructionMark im(this);
8935   prefixq(dst, src);
8936   emit_int8(0x29);
8937   emit_operand(src, dst);
8938 }
8939 
8940 void Assembler::subq(Register dst, int32_t imm32) {
8941   (void) prefixq_and_encode(dst->encoding());
8942   emit_arith(0x81, 0xE8, dst, imm32);
8943 }
8944 
8945 // Force generation of a 4 byte immediate value even if it fits into 8bit
8946 void Assembler::subq_imm32(Register dst, int32_t imm32) {
8947   (void) prefixq_and_encode(dst->encoding());
8948   emit_arith_imm32(0x81, 0xE8, dst, imm32);
8949 }
8950 
8951 void Assembler::subq(Register dst, Address src) {
8952   InstructionMark im(this);
8953   prefixq(src, dst);
8954   emit_int8(0x2B);
8955   emit_operand(dst, src);
8956 }
8957 
8958 void Assembler::subq(Register dst, Register src) {
8959   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8960   emit_arith(0x2B, 0xC0, dst, src);
8961 }
8962 
8963 void Assembler::testq(Register dst, int32_t imm32) {
8964   // not using emit_arith because test
8965   // doesn't support sign-extension of
8966   // 8bit operands
8967   int encode = dst->encoding();
8968   if (encode == 0) {
8969     prefix(REX_W);
8970     emit_int8((unsigned char)0xA9);
8971   } else {
8972     encode = prefixq_and_encode(encode);
8973     emit_int8((unsigned char)0xF7);
8974     emit_int8((unsigned char)(0xC0 | encode));
8975   }
8976   emit_int32(imm32);
8977 }
8978 
8979 void Assembler::testq(Register dst, Register src) {
8980   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8981   emit_arith(0x85, 0xC0, dst, src);
8982 }
8983 
8984 void Assembler::xaddq(Address dst, Register src) {
8985   InstructionMark im(this);
8986   prefixq(dst, src);
8987   emit_int8(0x0F);
8988   emit_int8((unsigned char)0xC1);
8989   emit_operand(src, dst);
8990 }
8991 
8992 void Assembler::xchgq(Register dst, Address src) {
8993   InstructionMark im(this);
8994   prefixq(src, dst);
8995   emit_int8((unsigned char)0x87);
8996   emit_operand(dst, src);
8997 }
8998 
8999 void Assembler::xchgq(Register dst, Register src) {
9000   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9001   emit_int8((unsigned char)0x87);
9002   emit_int8((unsigned char)(0xc0 | encode));
9003 }
9004 
9005 void Assembler::xorq(Register dst, Register src) {
9006   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9007   emit_arith(0x33, 0xC0, dst, src);
9008 }
9009 
9010 void Assembler::xorq(Register dst, Address src) {
9011   InstructionMark im(this);
9012   prefixq(src, dst);
9013   emit_int8(0x33);
9014   emit_operand(dst, src);
9015 }
9016 
9017 #endif // !LP64