Old src/hotspot/cpu/x86/assembler

   1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "gc/shared/cardTableBarrierSet.hpp"
  29 #include "gc/shared/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/resourceArea.hpp"
  32 #include "prims/methodHandles.hpp"
  33 #include "runtime/biasedLocking.hpp"
  34 #include "runtime/objectMonitor.hpp"
  35 #include "runtime/os.hpp"
  36 #include "runtime/sharedRuntime.hpp"
  37 #include "runtime/stubRoutines.hpp"
  38 #include "utilities/macros.hpp"
  39 #if INCLUDE_ALL_GCS
  40 #include "gc/g1/g1BarrierSet.hpp"
  41 #include "gc/g1/heapRegion.hpp"
  42 #endif // INCLUDE_ALL_GCS
  43 
  44 #ifdef PRODUCT
  45 #define BLOCK_COMMENT(str) /* nothing */
  46 #define STOP(error) stop(error)
  47 #else
  48 #define BLOCK_COMMENT(str) block_comment(str)
  49 #define STOP(error) block_comment(error); stop(error)
  50 #endif
  51 
  52 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  53 // Implementation of AddressLiteral
  54 
  55 // A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
  56 unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
  57   // -----------------Table 4.5 -------------------- //
  58   16, 32, 64,  // EVEX_FV(0)
  59   4,  4,  4,   // EVEX_FV(1) - with Evex.b
  60   16, 32, 64,  // EVEX_FV(2) - with Evex.w
  61   8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
  62   8,  16, 32,  // EVEX_HV(0)
  63   4,  4,  4,   // EVEX_HV(1) - with Evex.b
  64   // -----------------Table 4.6 -------------------- //
  65   16, 32, 64,  // EVEX_FVM(0)
  66   1,  1,  1,   // EVEX_T1S(0)
  67   2,  2,  2,   // EVEX_T1S(1)
  68   4,  4,  4,   // EVEX_T1S(2)
  69   8,  8,  8,   // EVEX_T1S(3)
  70   4,  4,  4,   // EVEX_T1F(0)
  71   8,  8,  8,   // EVEX_T1F(1)
  72   8,  8,  8,   // EVEX_T2(0)
  73   0,  16, 16,  // EVEX_T2(1)
  74   0,  16, 16,  // EVEX_T4(0)
  75   0,  0,  32,  // EVEX_T4(1)
  76   0,  0,  32,  // EVEX_T8(0)
  77   8,  16, 32,  // EVEX_HVM(0)
  78   4,  8,  16,  // EVEX_QVM(0)
  79   2,  4,  8,   // EVEX_OVM(0)
  80   16, 16, 16,  // EVEX_M128(0)
  81   8,  32, 64,  // EVEX_DUP(0)
  82   0,  0,  0    // EVEX_NTUP
  83 };
  84 
  85 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
  86   _is_lval = false;
  87   _target = target;
  88   switch (rtype) {
  89   case relocInfo::oop_type:
  90   case relocInfo::metadata_type:
  91     // Oops are a special case. Normally they would be their own section
  92     // but in cases like icBuffer they are literals in the code stream that
  93     // we don't have a section for. We use none so that we get a literal address
  94     // which is always patchable.
  95     break;
  96   case relocInfo::external_word_type:
  97     _rspec = external_word_Relocation::spec(target);
  98     break;
  99   case relocInfo::internal_word_type:
 100     _rspec = internal_word_Relocation::spec(target);
 101     break;
 102   case relocInfo::opt_virtual_call_type:
 103     _rspec = opt_virtual_call_Relocation::spec();
 104     break;
 105   case relocInfo::static_call_type:
 106     _rspec = static_call_Relocation::spec();
 107     break;
 108   case relocInfo::runtime_call_type:
 109     _rspec = runtime_call_Relocation::spec();
 110     break;
 111   case relocInfo::poll_type:
 112   case relocInfo::poll_return_type:
 113     _rspec = Relocation::spec_simple(rtype);
 114     break;
 115   case relocInfo::none:
 116     break;
 117   default:
 118     ShouldNotReachHere();
 119     break;
 120   }
 121 }
 122 
 123 // Implementation of Address
 124 
 125 #ifdef _LP64
 126 
 127 Address Address::make_array(ArrayAddress adr) {
 128   // Not implementable on 64bit machines
 129   // Should have been handled higher up the call chain.
 130   ShouldNotReachHere();
 131   return Address();
 132 }
 133 
 134 // exceedingly dangerous constructor
 135 Address::Address(int disp, address loc, relocInfo::relocType rtype) {
 136   _base  = noreg;
 137   _index = noreg;
 138   _scale = no_scale;
 139   _disp  = disp;
 140   switch (rtype) {
 141     case relocInfo::external_word_type:
 142       _rspec = external_word_Relocation::spec(loc);
 143       break;
 144     case relocInfo::internal_word_type:
 145       _rspec = internal_word_Relocation::spec(loc);
 146       break;
 147     case relocInfo::runtime_call_type:
 148       // HMM
 149       _rspec = runtime_call_Relocation::spec();
 150       break;
 151     case relocInfo::poll_type:
 152     case relocInfo::poll_return_type:
 153       _rspec = Relocation::spec_simple(rtype);
 154       break;
 155     case relocInfo::none:
 156       break;
 157     default:
 158       ShouldNotReachHere();
 159   }
 160 }
 161 #else // LP64
 162 
 163 Address Address::make_array(ArrayAddress adr) {
 164   AddressLiteral base = adr.base();
 165   Address index = adr.index();
 166   assert(index._disp == 0, "must not have disp"); // maybe it can?
 167   Address array(index._base, index._index, index._scale, (intptr_t) base.target());
 168   array._rspec = base._rspec;
 169   return array;
 170 }
 171 
 172 // exceedingly dangerous constructor
 173 Address::Address(address loc, RelocationHolder spec) {
 174   _base  = noreg;
 175   _index = noreg;
 176   _scale = no_scale;
 177   _disp  = (intptr_t) loc;
 178   _rspec = spec;
 179 }
 180 
 181 #endif // _LP64
 182 
 183 
 184 
 185 // Convert the raw encoding form into the form expected by the constructor for
 186 // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 187 // that to noreg for the Address constructor.
 188 Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
 189   RelocationHolder rspec;
 190   if (disp_reloc != relocInfo::none) {
 191     rspec = Relocation::spec_simple(disp_reloc);
 192   }
 193   bool valid_index = index != rsp->encoding();
 194   if (valid_index) {
 195     Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
 196     madr._rspec = rspec;
 197     return madr;
 198   } else {
 199     Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp));
 200     madr._rspec = rspec;
 201     return madr;
 202   }
 203 }
 204 
 205 // Implementation of Assembler
 206 
 207 int AbstractAssembler::code_fill_byte() {
 208   return (u_char)'\xF4'; // hlt
 209 }
 210 
 211 // make this go away someday
 212 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
 213   if (rtype == relocInfo::none)
 214     emit_int32(data);
 215   else
 216     emit_data(data, Relocation::spec_simple(rtype), format);
 217 }
 218 
 219 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
 220   assert(imm_operand == 0, "default format must be immediate in this file");
 221   assert(inst_mark() != NULL, "must be inside InstructionMark");
 222   if (rspec.type() !=  relocInfo::none) {
 223     #ifdef ASSERT
 224       check_relocation(rspec, format);
 225     #endif
 226     // Do not use AbstractAssembler::relocate, which is not intended for
 227     // embedded words.  Instead, relocate to the enclosing instruction.
 228 
 229     // hack. call32 is too wide for mask so use disp32
 230     if (format == call32_operand)
 231       code_section()->relocate(inst_mark(), rspec, disp32_operand);
 232     else
 233       code_section()->relocate(inst_mark(), rspec, format);
 234   }
 235   emit_int32(data);
 236 }
 237 
 238 static int encode(Register r) {
 239   int enc = r->encoding();
 240   if (enc >= 8) {
 241     enc -= 8;
 242   }
 243   return enc;
 244 }
 245 
 246 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
 247   assert(dst->has_byte_register(), "must have byte register");
 248   assert(isByte(op1) && isByte(op2), "wrong opcode");
 249   assert(isByte(imm8), "not a byte");
 250   assert((op1 & 0x01) == 0, "should be 8bit operation");
 251   emit_int8(op1);
 252   emit_int8(op2 | encode(dst));
 253   emit_int8(imm8);
 254 }
 255 
 256 
 257 void Assembler::emit_arith(int op1, int op2, Register dst, int32_t imm32) {
 258   assert(isByte(op1) && isByte(op2), "wrong opcode");
 259   assert((op1 & 0x01) == 1, "should be 32bit operation");
 260   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 261   if (is8bit(imm32)) {
 262     emit_int8(op1 | 0x02); // set sign bit
 263     emit_int8(op2 | encode(dst));
 264     emit_int8(imm32 & 0xFF);
 265   } else {
 266     emit_int8(op1);
 267     emit_int8(op2 | encode(dst));
 268     emit_int32(imm32);
 269   }
 270 }
 271 
 272 // Force generation of a 4 byte immediate value even if it fits into 8bit
 273 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
 274   assert(isByte(op1) && isByte(op2), "wrong opcode");
 275   assert((op1 & 0x01) == 1, "should be 32bit operation");
 276   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 277   emit_int8(op1);
 278   emit_int8(op2 | encode(dst));
 279   emit_int32(imm32);
 280 }
 281 
 282 // immediate-to-memory forms
 283 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 284   assert((op1 & 0x01) == 1, "should be 32bit operation");
 285   assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
 286   if (is8bit(imm32)) {
 287     emit_int8(op1 | 0x02); // set sign bit
 288     emit_operand(rm, adr, 1);
 289     emit_int8(imm32 & 0xFF);
 290   } else {
 291     emit_int8(op1);
 292     emit_operand(rm, adr, 4);
 293     emit_int32(imm32);
 294   }
 295 }
 296 
 297 
 298 void Assembler::emit_arith(int op1, int op2, Register dst, Register src) {
 299   assert(isByte(op1) && isByte(op2), "wrong opcode");
 300   emit_int8(op1);
 301   emit_int8(op2 | encode(dst) << 3 | encode(src));
 302 }
 303 
 304 
 305 bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 306                                            int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
 307   int mod_idx = 0;
 308   // We will test if the displacement fits the compressed format and if so
 309   // apply the compression to the displacment iff the result is8bit.
 310   if (VM_Version::supports_evex() && is_evex_inst) {
 311     switch (cur_tuple_type) {
 312     case EVEX_FV:
 313       if ((cur_encoding & VEX_W) == VEX_W) {
 314         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 315       } else {
 316         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 317       }
 318       break;
 319 
 320     case EVEX_HV:
 321       mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 322       break;
 323 
 324     case EVEX_FVM:
 325       break;
 326 
 327     case EVEX_T1S:
 328       switch (in_size_in_bits) {
 329       case EVEX_8bit:
 330         break;
 331 
 332       case EVEX_16bit:
 333         mod_idx = 1;
 334         break;
 335 
 336       case EVEX_32bit:
 337         mod_idx = 2;
 338         break;
 339 
 340       case EVEX_64bit:
 341         mod_idx = 3;
 342         break;
 343       }
 344       break;
 345 
 346     case EVEX_T1F:
 347     case EVEX_T2:
 348     case EVEX_T4:
 349       mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
 350       break;
 351 
 352     case EVEX_T8:
 353       break;
 354 
 355     case EVEX_HVM:
 356       break;
 357 
 358     case EVEX_QVM:
 359       break;
 360 
 361     case EVEX_OVM:
 362       break;
 363 
 364     case EVEX_M128:
 365       break;
 366 
 367     case EVEX_DUP:
 368       break;
 369 
 370     default:
 371       assert(0, "no valid evex tuple_table entry");
 372       break;
 373     }
 374 
 375     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 376       int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
 377       if ((disp % disp_factor) == 0) {
 378         int new_disp = disp / disp_factor;
 379         if ((-0x80 <= new_disp && new_disp < 0x80)) {
 380           disp = new_disp;
 381         }
 382       } else {
 383         return false;
 384       }
 385     }
 386   }
 387   return (-0x80 <= disp && disp < 0x80);
 388 }
 389 
 390 
 391 bool Assembler::emit_compressed_disp_byte(int &disp) {
 392   int mod_idx = 0;
 393   // We will test if the displacement fits the compressed format and if so
 394   // apply the compression to the displacment iff the result is8bit.
 395   if (VM_Version::supports_evex() && _attributes && _attributes->is_evex_instruction()) {
 396     int evex_encoding = _attributes->get_evex_encoding();
 397     int tuple_type = _attributes->get_tuple_type();
 398     switch (tuple_type) {
 399     case EVEX_FV:
 400       if ((evex_encoding & VEX_W) == VEX_W) {
 401         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
 402       } else {
 403         mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 404       }
 405       break;
 406 
 407     case EVEX_HV:
 408       mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
 409       break;
 410 
 411     case EVEX_FVM:
 412       break;
 413 
 414     case EVEX_T1S:
 415       switch (_attributes->get_input_size()) {
 416       case EVEX_8bit:
 417         break;
 418 
 419       case EVEX_16bit:
 420         mod_idx = 1;
 421         break;
 422 
 423       case EVEX_32bit:
 424         mod_idx = 2;
 425         break;
 426 
 427       case EVEX_64bit:
 428         mod_idx = 3;
 429         break;
 430       }
 431       break;
 432 
 433     case EVEX_T1F:
 434     case EVEX_T2:
 435     case EVEX_T4:
 436       mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
 437       break;
 438 
 439     case EVEX_T8:
 440       break;
 441 
 442     case EVEX_HVM:
 443       break;
 444 
 445     case EVEX_QVM:
 446       break;
 447 
 448     case EVEX_OVM:
 449       break;
 450 
 451     case EVEX_M128:
 452       break;
 453 
 454     case EVEX_DUP:
 455       break;
 456 
 457     default:
 458       assert(0, "no valid evex tuple_table entry");
 459       break;
 460     }
 461 
 462     int vector_len = _attributes->get_vector_len();
 463     if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
 464       int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
 465       if ((disp % disp_factor) == 0) {
 466         int new_disp = disp / disp_factor;
 467         if (is8bit(new_disp)) {
 468           disp = new_disp;
 469         }
 470       } else {
 471         return false;
 472       }
 473     }
 474   }
 475   return is8bit(disp);
 476 }
 477 
 478 
 479 void Assembler::emit_operand(Register reg, Register base, Register index,
 480                              Address::ScaleFactor scale, int disp,
 481                              RelocationHolder const& rspec,
 482                              int rip_relative_correction) {
 483   relocInfo::relocType rtype = (relocInfo::relocType) rspec.type();
 484 
 485   // Encode the registers as needed in the fields they are used in
 486 
 487   int regenc = encode(reg) << 3;
 488   int indexenc = index->is_valid() ? encode(index) << 3 : 0;
 489   int baseenc = base->is_valid() ? encode(base) : 0;
 490 
 491   if (base->is_valid()) {
 492     if (index->is_valid()) {
 493       assert(scale != Address::no_scale, "inconsistent address");
 494       // [base + index*scale + disp]
 495       if (disp == 0 && rtype == relocInfo::none  &&
 496           base != rbp LP64_ONLY(&& base != r13)) {
 497         // [base + index*scale]
 498         // [00 reg 100][ss index base]
 499         assert(index != rsp, "illegal addressing mode");
 500         emit_int8(0x04 | regenc);
 501         emit_int8(scale << 6 | indexenc | baseenc);
 502       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 503         // [base + index*scale + imm8]
 504         // [01 reg 100][ss index base] imm8
 505         assert(index != rsp, "illegal addressing mode");
 506         emit_int8(0x44 | regenc);
 507         emit_int8(scale << 6 | indexenc | baseenc);
 508         emit_int8(disp & 0xFF);
 509       } else {
 510         // [base + index*scale + disp32]
 511         // [10 reg 100][ss index base] disp32
 512         assert(index != rsp, "illegal addressing mode");
 513         emit_int8(0x84 | regenc);
 514         emit_int8(scale << 6 | indexenc | baseenc);
 515         emit_data(disp, rspec, disp32_operand);
 516       }
 517     } else if (base == rsp LP64_ONLY(|| base == r12)) {
 518       // [rsp + disp]
 519       if (disp == 0 && rtype == relocInfo::none) {
 520         // [rsp]
 521         // [00 reg 100][00 100 100]
 522         emit_int8(0x04 | regenc);
 523         emit_int8(0x24);
 524       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 525         // [rsp + imm8]
 526         // [01 reg 100][00 100 100] disp8
 527         emit_int8(0x44 | regenc);
 528         emit_int8(0x24);
 529         emit_int8(disp & 0xFF);
 530       } else {
 531         // [rsp + imm32]
 532         // [10 reg 100][00 100 100] disp32
 533         emit_int8(0x84 | regenc);
 534         emit_int8(0x24);
 535         emit_data(disp, rspec, disp32_operand);
 536       }
 537     } else {
 538       // [base + disp]
 539       assert(base != rsp LP64_ONLY(&& base != r12), "illegal addressing mode");
 540       if (disp == 0 && rtype == relocInfo::none &&
 541           base != rbp LP64_ONLY(&& base != r13)) {
 542         // [base]
 543         // [00 reg base]
 544         emit_int8(0x00 | regenc | baseenc);
 545       } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
 546         // [base + disp8]
 547         // [01 reg base] disp8
 548         emit_int8(0x40 | regenc | baseenc);
 549         emit_int8(disp & 0xFF);
 550       } else {
 551         // [base + disp32]
 552         // [10 reg base] disp32
 553         emit_int8(0x80 | regenc | baseenc);
 554         emit_data(disp, rspec, disp32_operand);
 555       }
 556     }
 557   } else {
 558     if (index->is_valid()) {
 559       assert(scale != Address::no_scale, "inconsistent address");
 560       // [index*scale + disp]
 561       // [00 reg 100][ss index 101] disp32
 562       assert(index != rsp, "illegal addressing mode");
 563       emit_int8(0x04 | regenc);
 564       emit_int8(scale << 6 | indexenc | 0x05);
 565       emit_data(disp, rspec, disp32_operand);
 566     } else if (rtype != relocInfo::none ) {
 567       // [disp] (64bit) RIP-RELATIVE (32bit) abs
 568       // [00 000 101] disp32
 569 
 570       emit_int8(0x05 | regenc);
 571       // Note that the RIP-rel. correction applies to the generated
 572       // disp field, but _not_ to the target address in the rspec.
 573 
 574       // disp was created by converting the target address minus the pc
 575       // at the start of the instruction. That needs more correction here.
 576       // intptr_t disp = target - next_ip;
 577       assert(inst_mark() != NULL, "must be inside InstructionMark");
 578       address next_ip = pc() + sizeof(int32_t) + rip_relative_correction;
 579       int64_t adjusted = disp;
 580       // Do rip-rel adjustment for 64bit
 581       LP64_ONLY(adjusted -=  (next_ip - inst_mark()));
 582       assert(is_simm32(adjusted),
 583              "must be 32bit offset (RIP relative address)");
 584       emit_data((int32_t) adjusted, rspec, disp32_operand);
 585 
 586     } else {
 587       // 32bit never did this, did everything as the rip-rel/disp code above
 588       // [disp] ABSOLUTE
 589       // [00 reg 100][00 100 101] disp32
 590       emit_int8(0x04 | regenc);
 591       emit_int8(0x25);
 592       emit_data(disp, rspec, disp32_operand);
 593     }
 594   }
 595 }
 596 
 597 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
 598                              Address::ScaleFactor scale, int disp,
 599                              RelocationHolder const& rspec) {
 600   if (UseAVX > 2) {
 601     int xreg_enc = reg->encoding();
 602     if (xreg_enc > 15) {
 603       XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
 604       emit_operand((Register)new_reg, base, index, scale, disp, rspec);
 605       return;
 606     }
 607   }
 608   emit_operand((Register)reg, base, index, scale, disp, rspec);
 609 }
 610 
 611 // Secret local extension to Assembler::WhichOperand:
 612 #define end_pc_operand (_WhichOperand_limit)
 613 
 614 address Assembler::locate_operand(address inst, WhichOperand which) {
 615   // Decode the given instruction, and return the address of
 616   // an embedded 32-bit operand word.
 617 
 618   // If "which" is disp32_operand, selects the displacement portion
 619   // of an effective address specifier.
 620   // If "which" is imm64_operand, selects the trailing immediate constant.
 621   // If "which" is call32_operand, selects the displacement of a call or jump.
 622   // Caller is responsible for ensuring that there is such an operand,
 623   // and that it is 32/64 bits wide.
 624 
 625   // If "which" is end_pc_operand, find the end of the instruction.
 626 
 627   address ip = inst;
 628   bool is_64bit = false;
 629 
 630   debug_only(bool has_disp32 = false);
 631   int tail_size = 0; // other random bytes (#32, #16, etc.) at end of insn
 632 
 633   again_after_prefix:
 634   switch (0xFF & *ip++) {
 635 
 636   // These convenience macros generate groups of "case" labels for the switch.
 637 #define REP4(x) (x)+0: case (x)+1: case (x)+2: case (x)+3
 638 #define REP8(x) (x)+0: case (x)+1: case (x)+2: case (x)+3: \
 639              case (x)+4: case (x)+5: case (x)+6: case (x)+7
 640 #define REP16(x) REP8((x)+0): \
 641               case REP8((x)+8)
 642 
 643   case CS_segment:
 644   case SS_segment:
 645   case DS_segment:
 646   case ES_segment:
 647   case FS_segment:
 648   case GS_segment:
 649     // Seems dubious
 650     LP64_ONLY(assert(false, "shouldn't have that prefix"));
 651     assert(ip == inst+1, "only one prefix allowed");
 652     goto again_after_prefix;
 653 
 654   case 0x67:
 655   case REX:
 656   case REX_B:
 657   case REX_X:
 658   case REX_XB:
 659   case REX_R:
 660   case REX_RB:
 661   case REX_RX:
 662   case REX_RXB:
 663     NOT_LP64(assert(false, "64bit prefixes"));
 664     goto again_after_prefix;
 665 
 666   case REX_W:
 667   case REX_WB:
 668   case REX_WX:
 669   case REX_WXB:
 670   case REX_WR:
 671   case REX_WRB:
 672   case REX_WRX:
 673   case REX_WRXB:
 674     NOT_LP64(assert(false, "64bit prefixes"));
 675     is_64bit = true;
 676     goto again_after_prefix;
 677 
 678   case 0xFF: // pushq a; decl a; incl a; call a; jmp a
 679   case 0x88: // movb a, r
 680   case 0x89: // movl a, r
 681   case 0x8A: // movb r, a
 682   case 0x8B: // movl r, a
 683   case 0x8F: // popl a
 684     debug_only(has_disp32 = true);
 685     break;
 686 
 687   case 0x68: // pushq #32
 688     if (which == end_pc_operand) {
 689       return ip + 4;
 690     }
 691     assert(which == imm_operand && !is_64bit, "pushl has no disp32 or 64bit immediate");
 692     return ip;                  // not produced by emit_operand
 693 
 694   case 0x66: // movw ... (size prefix)
 695     again_after_size_prefix2:
 696     switch (0xFF & *ip++) {
 697     case REX:
 698     case REX_B:
 699     case REX_X:
 700     case REX_XB:
 701     case REX_R:
 702     case REX_RB:
 703     case REX_RX:
 704     case REX_RXB:
 705     case REX_W:
 706     case REX_WB:
 707     case REX_WX:
 708     case REX_WXB:
 709     case REX_WR:
 710     case REX_WRB:
 711     case REX_WRX:
 712     case REX_WRXB:
 713       NOT_LP64(assert(false, "64bit prefix found"));
 714       goto again_after_size_prefix2;
 715     case 0x8B: // movw r, a
 716     case 0x89: // movw a, r
 717       debug_only(has_disp32 = true);
 718       break;
 719     case 0xC7: // movw a, #16
 720       debug_only(has_disp32 = true);
 721       tail_size = 2;  // the imm16
 722       break;
 723     case 0x0F: // several SSE/SSE2 variants
 724       ip--;    // reparse the 0x0F
 725       goto again_after_prefix;
 726     default:
 727       ShouldNotReachHere();
 728     }
 729     break;
 730 
 731   case REP8(0xB8): // movl/q r, #32/#64(oop?)
 732     if (which == end_pc_operand)  return ip + (is_64bit ? 8 : 4);
 733     // these asserts are somewhat nonsensical
 734 #ifndef _LP64
 735     assert(which == imm_operand || which == disp32_operand,
 736            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 737 #else
 738     assert((which == call32_operand || which == imm_operand) && is_64bit ||
 739            which == narrow_oop_operand && !is_64bit,
 740            "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 741 #endif // _LP64
 742     return ip;
 743 
 744   case 0x69: // imul r, a, #32
 745   case 0xC7: // movl a, #32(oop?)
 746     tail_size = 4;
 747     debug_only(has_disp32 = true); // has both kinds of operands!
 748     break;
 749 
 750   case 0x0F: // movx..., etc.
 751     switch (0xFF & *ip++) {
 752     case 0x3A: // pcmpestri
 753       tail_size = 1;
 754     case 0x38: // ptest, pmovzxbw
 755       ip++; // skip opcode
 756       debug_only(has_disp32 = true); // has both kinds of operands!
 757       break;
 758 
 759     case 0x70: // pshufd r, r/a, #8
 760       debug_only(has_disp32 = true); // has both kinds of operands!
 761     case 0x73: // psrldq r, #8
 762       tail_size = 1;
 763       break;
 764 
 765     case 0x12: // movlps
 766     case 0x28: // movaps
 767     case 0x2E: // ucomiss
 768     case 0x2F: // comiss
 769     case 0x54: // andps
 770     case 0x55: // andnps
 771     case 0x56: // orps
 772     case 0x57: // xorps
 773     case 0x58: // addpd
 774     case 0x59: // mulpd
 775     case 0x6E: // movd
 776     case 0x7E: // movd
 777     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
 778     case 0xFE: // paddd
 779       debug_only(has_disp32 = true);
 780       break;
 781 
 782     case 0xAD: // shrd r, a, %cl
 783     case 0xAF: // imul r, a
 784     case 0xBE: // movsbl r, a (movsxb)
 785     case 0xBF: // movswl r, a (movsxw)
 786     case 0xB6: // movzbl r, a (movzxb)
 787     case 0xB7: // movzwl r, a (movzxw)
 788     case REP16(0x40): // cmovl cc, r, a
 789     case 0xB0: // cmpxchgb
 790     case 0xB1: // cmpxchg
 791     case 0xC1: // xaddl
 792     case 0xC7: // cmpxchg8
 793     case REP16(0x90): // setcc a
 794       debug_only(has_disp32 = true);
 795       // fall out of the switch to decode the address
 796       break;
 797 
 798     case 0xC4: // pinsrw r, a, #8
 799       debug_only(has_disp32 = true);
 800     case 0xC5: // pextrw r, r, #8
 801       tail_size = 1;  // the imm8
 802       break;
 803 
 804     case 0xAC: // shrd r, a, #8
 805       debug_only(has_disp32 = true);
 806       tail_size = 1;  // the imm8
 807       break;
 808 
 809     case REP16(0x80): // jcc rdisp32
 810       if (which == end_pc_operand)  return ip + 4;
 811       assert(which == call32_operand, "jcc has no disp32 or imm");
 812       return ip;
 813     default:
 814       ShouldNotReachHere();
 815     }
 816     break;
 817 
 818   case 0x81: // addl a, #32; addl r, #32
 819     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 820     // on 32bit in the case of cmpl, the imm might be an oop
 821     tail_size = 4;
 822     debug_only(has_disp32 = true); // has both kinds of operands!
 823     break;
 824 
 825   case 0x83: // addl a, #8; addl r, #8
 826     // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
 827     debug_only(has_disp32 = true); // has both kinds of operands!
 828     tail_size = 1;
 829     break;
 830 
 831   case 0x9B:
 832     switch (0xFF & *ip++) {
 833     case 0xD9: // fnstcw a
 834       debug_only(has_disp32 = true);
 835       break;
 836     default:
 837       ShouldNotReachHere();
 838     }
 839     break;
 840 
 841   case REP4(0x00): // addb a, r; addl a, r; addb r, a; addl r, a
 842   case REP4(0x10): // adc...
 843   case REP4(0x20): // and...
 844   case REP4(0x30): // xor...
 845   case REP4(0x08): // or...
 846   case REP4(0x18): // sbb...
 847   case REP4(0x28): // sub...
 848   case 0xF7: // mull a
 849   case 0x8D: // lea r, a
 850   case 0x87: // xchg r, a
 851   case REP4(0x38): // cmp...
 852   case 0x85: // test r, a
 853     debug_only(has_disp32 = true); // has both kinds of operands!
 854     break;
 855 
 856   case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
 857   case 0xC6: // movb a, #8
 858   case 0x80: // cmpb a, #8
 859   case 0x6B: // imul r, a, #8
 860     debug_only(has_disp32 = true); // has both kinds of operands!
 861     tail_size = 1; // the imm8
 862     break;
 863 
 864   case 0xC4: // VEX_3bytes
 865   case 0xC5: // VEX_2bytes
 866     assert((UseAVX > 0), "shouldn't have VEX prefix");
 867     assert(ip == inst+1, "no prefixes allowed");
 868     // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions
 869     // but they have prefix 0x0F and processed when 0x0F processed above.
 870     //
 871     // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES
 872     // instructions (these instructions are not supported in 64-bit mode).
 873     // To distinguish them bits [7:6] are set in the VEX second byte since
 874     // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set
 875     // those VEX bits REX and vvvv bits are inverted.
 876     //
 877     // Fortunately C2 doesn't generate these instructions so we don't need
 878     // to check for them in product version.
 879 
 880     // Check second byte
 881     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 882 
 883     int vex_opcode;
 884     // First byte
 885     if ((0xFF & *inst) == VEX_3bytes) {
 886       vex_opcode = VEX_OPCODE_MASK & *ip;
 887       ip++; // third byte
 888       is_64bit = ((VEX_W & *ip) == VEX_W);
 889     } else {
 890       vex_opcode = VEX_OPCODE_0F;
 891     }
 892     ip++; // opcode
 893     // To find the end of instruction (which == end_pc_operand).
 894     switch (vex_opcode) {
 895       case VEX_OPCODE_0F:
 896         switch (0xFF & *ip) {
 897         case 0x70: // pshufd r, r/a, #8
 898         case 0x71: // ps[rl|ra|ll]w r, #8
 899         case 0x72: // ps[rl|ra|ll]d r, #8
 900         case 0x73: // ps[rl|ra|ll]q r, #8
 901         case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
 902         case 0xC4: // pinsrw r, r, r/a, #8
 903         case 0xC5: // pextrw r/a, r, #8
 904         case 0xC6: // shufp[s|d] r, r, r/a, #8
 905           tail_size = 1;  // the imm8
 906           break;
 907         }
 908         break;
 909       case VEX_OPCODE_0F_3A:
 910         tail_size = 1;
 911         break;
 912     }
 913     ip++; // skip opcode
 914     debug_only(has_disp32 = true); // has both kinds of operands!
 915     break;
 916 
 917   case 0x62: // EVEX_4bytes
 918     assert(VM_Version::supports_evex(), "shouldn't have EVEX prefix");
 919     assert(ip == inst+1, "no prefixes allowed");
 920     // no EVEX collisions, all instructions that have 0x62 opcodes
 921     // have EVEX versions and are subopcodes of 0x66
 922     ip++; // skip P0 and exmaine W in P1
 923     is_64bit = ((VEX_W & *ip) == VEX_W);
 924     ip++; // move to P2
 925     ip++; // skip P2, move to opcode
 926     // To find the end of instruction (which == end_pc_operand).
 927     switch (0xFF & *ip) {
 928     case 0x22: // pinsrd r, r/a, #8
 929     case 0x61: // pcmpestri r, r/a, #8
 930     case 0x70: // pshufd r, r/a, #8
 931     case 0x73: // psrldq r, #8
 932       tail_size = 1;  // the imm8
 933       break;
 934     default:
 935       break;
 936     }
 937     ip++; // skip opcode
 938     debug_only(has_disp32 = true); // has both kinds of operands!
 939     break;
 940 
 941   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
 942   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
 943   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
 944   case 0xDD: // fld_d a; fst_d a; fstp_d a
 945   case 0xDB: // fild_s a; fistp_s a; fld_x a; fstp_x a
 946   case 0xDF: // fild_d a; fistp_d a
 947   case 0xD8: // fadd_s a; fsubr_s a; fmul_s a; fdivr_s a; fcomp_s a
 948   case 0xDC: // fadd_d a; fsubr_d a; fmul_d a; fdivr_d a; fcomp_d a
 949   case 0xDE: // faddp_d a; fsubrp_d a; fmulp_d a; fdivrp_d a; fcompp_d a
 950     debug_only(has_disp32 = true);
 951     break;
 952 
 953   case 0xE8: // call rdisp32
 954   case 0xE9: // jmp  rdisp32
 955     if (which == end_pc_operand)  return ip + 4;
 956     assert(which == call32_operand, "call has no disp32 or imm");
 957     return ip;
 958 
 959   case 0xF0:                    // Lock
 960     assert(os::is_MP(), "only on MP");
 961     goto again_after_prefix;
 962 
 963   case 0xF3:                    // For SSE
 964   case 0xF2:                    // For SSE2
 965     switch (0xFF & *ip++) {
 966     case REX:
 967     case REX_B:
 968     case REX_X:
 969     case REX_XB:
 970     case REX_R:
 971     case REX_RB:
 972     case REX_RX:
 973     case REX_RXB:
 974     case REX_W:
 975     case REX_WB:
 976     case REX_WX:
 977     case REX_WXB:
 978     case REX_WR:
 979     case REX_WRB:
 980     case REX_WRX:
 981     case REX_WRXB:
 982       NOT_LP64(assert(false, "found 64bit prefix"));
 983       ip++;
 984     default:
 985       ip++;
 986     }
 987     debug_only(has_disp32 = true); // has both kinds of operands!
 988     break;
 989 
 990   default:
 991     ShouldNotReachHere();
 992 
 993 #undef REP8
 994 #undef REP16
 995   }
 996 
 997   assert(which != call32_operand, "instruction is not a call, jmp, or jcc");
 998 #ifdef _LP64
 999   assert(which != imm_operand, "instruction is not a movq reg, imm64");
1000 #else
1001   // assert(which != imm_operand || has_imm32, "instruction has no imm32 field");
1002   assert(which != imm_operand || has_disp32, "instruction has no imm32 field");
1003 #endif // LP64
1004   assert(which != disp32_operand || has_disp32, "instruction has no disp32 field");
1005 
1006   // parse the output of emit_operand
1007   int op2 = 0xFF & *ip++;
1008   int base = op2 & 0x07;
1009   int op3 = -1;
1010   const int b100 = 4;
1011   const int b101 = 5;
1012   if (base == b100 && (op2 >> 6) != 3) {
1013     op3 = 0xFF & *ip++;
1014     base = op3 & 0x07;   // refetch the base
1015   }
1016   // now ip points at the disp (if any)
1017 
1018   switch (op2 >> 6) {
1019   case 0:
1020     // [00 reg  100][ss index base]
1021     // [00 reg  100][00   100  esp]
1022     // [00 reg base]
1023     // [00 reg  100][ss index  101][disp32]
1024     // [00 reg  101]               [disp32]
1025 
1026     if (base == b101) {
1027       if (which == disp32_operand)
1028         return ip;              // caller wants the disp32
1029       ip += 4;                  // skip the disp32
1030     }
1031     break;
1032 
1033   case 1:
1034     // [01 reg  100][ss index base][disp8]
1035     // [01 reg  100][00   100  esp][disp8]
1036     // [01 reg base]               [disp8]
1037     ip += 1;                    // skip the disp8
1038     break;
1039 
1040   case 2:
1041     // [10 reg  100][ss index base][disp32]
1042     // [10 reg  100][00   100  esp][disp32]
1043     // [10 reg base]               [disp32]
1044     if (which == disp32_operand)
1045       return ip;                // caller wants the disp32
1046     ip += 4;                    // skip the disp32
1047     break;
1048 
1049   case 3:
1050     // [11 reg base]  (not a memory addressing mode)
1051     break;
1052   }
1053 
1054   if (which == end_pc_operand) {
1055     return ip + tail_size;
1056   }
1057 
1058 #ifdef _LP64
1059   assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
1060 #else
1061   assert(which == imm_operand, "instruction has only an imm field");
1062 #endif // LP64
1063   return ip;
1064 }
1065 
1066 address Assembler::locate_next_instruction(address inst) {
1067   // Secretly share code with locate_operand:
1068   return locate_operand(inst, end_pc_operand);
1069 }
1070 
1071 
1072 #ifdef ASSERT
1073 void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
1074   address inst = inst_mark();
1075   assert(inst != NULL && inst < pc(), "must point to beginning of instruction");
1076   address opnd;
1077 
1078   Relocation* r = rspec.reloc();
1079   if (r->type() == relocInfo::none) {
1080     return;
1081   } else if (r->is_call() || format == call32_operand) {
1082     // assert(format == imm32_operand, "cannot specify a nonzero format");
1083     opnd = locate_operand(inst, call32_operand);
1084   } else if (r->is_data()) {
1085     assert(format == imm_operand || format == disp32_operand
1086            LP64_ONLY(|| format == narrow_oop_operand), "format ok");
1087     opnd = locate_operand(inst, (WhichOperand)format);
1088   } else {
1089     assert(format == imm_operand, "cannot specify a format");
1090     return;
1091   }
1092   assert(opnd == pc(), "must put operand where relocs can find it");
1093 }
1094 #endif // ASSERT
1095 
1096 void Assembler::emit_operand32(Register reg, Address adr) {
1097   assert(reg->encoding() < 8, "no extended registers");
1098   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1099   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1100                adr._rspec);
1101 }
1102 
1103 void Assembler::emit_operand(Register reg, Address adr,
1104                              int rip_relative_correction) {
1105   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1106                adr._rspec,
1107                rip_relative_correction);
1108 }
1109 
1110 void Assembler::emit_operand(XMMRegister reg, Address adr) {
1111   emit_operand(reg, adr._base, adr._index, adr._scale, adr._disp,
1112                adr._rspec);
1113 }
1114 
1115 // MMX operations
1116 void Assembler::emit_operand(MMXRegister reg, Address adr) {
1117   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1118   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1119 }
1120 
1121 // work around gcc (3.2.1-7a) bug
1122 void Assembler::emit_operand(Address adr, MMXRegister reg) {
1123   assert(!adr.base_needs_rex() && !adr.index_needs_rex(), "no extended registers");
1124   emit_operand((Register)reg, adr._base, adr._index, adr._scale, adr._disp, adr._rspec);
1125 }
1126 
1127 
1128 void Assembler::emit_farith(int b1, int b2, int i) {
1129   assert(isByte(b1) && isByte(b2), "wrong opcode");
1130   assert(0 <= i &&  i < 8, "illegal stack offset");
1131   emit_int8(b1);
1132   emit_int8(b2 + i);
1133 }
1134 
1135 
1136 // Now the Assembler instructions (identical for 32/64 bits)
1137 
1138 void Assembler::adcl(Address dst, int32_t imm32) {
1139   InstructionMark im(this);
1140   prefix(dst);
1141   emit_arith_operand(0x81, rdx, dst, imm32);
1142 }
1143 
1144 void Assembler::adcl(Address dst, Register src) {
1145   InstructionMark im(this);
1146   prefix(dst, src);
1147   emit_int8(0x11);
1148   emit_operand(src, dst);
1149 }
1150 
1151 void Assembler::adcl(Register dst, int32_t imm32) {
1152   prefix(dst);
1153   emit_arith(0x81, 0xD0, dst, imm32);
1154 }
1155 
1156 void Assembler::adcl(Register dst, Address src) {
1157   InstructionMark im(this);
1158   prefix(src, dst);
1159   emit_int8(0x13);
1160   emit_operand(dst, src);
1161 }
1162 
1163 void Assembler::adcl(Register dst, Register src) {
1164   (void) prefix_and_encode(dst->encoding(), src->encoding());
1165   emit_arith(0x13, 0xC0, dst, src);
1166 }
1167 
1168 void Assembler::addl(Address dst, int32_t imm32) {
1169   InstructionMark im(this);
1170   prefix(dst);
1171   emit_arith_operand(0x81, rax, dst, imm32);
1172 }
1173 
1174 void Assembler::addb(Address dst, int imm8) {
1175   InstructionMark im(this);
1176   prefix(dst);
1177   emit_int8((unsigned char)0x80);
1178   emit_operand(rax, dst, 1);
1179   emit_int8(imm8);
1180 }
1181 
1182 void Assembler::addw(Address dst, int imm16) {
1183   InstructionMark im(this);
1184   emit_int8(0x66);
1185   prefix(dst);
1186   emit_int8((unsigned char)0x81);
1187   emit_operand(rax, dst, 2);
1188   emit_int16(imm16);
1189 }
1190 
1191 void Assembler::addl(Address dst, Register src) {
1192   InstructionMark im(this);
1193   prefix(dst, src);
1194   emit_int8(0x01);
1195   emit_operand(src, dst);
1196 }
1197 
1198 void Assembler::addl(Register dst, int32_t imm32) {
1199   prefix(dst);
1200   emit_arith(0x81, 0xC0, dst, imm32);
1201 }
1202 
1203 void Assembler::addl(Register dst, Address src) {
1204   InstructionMark im(this);
1205   prefix(src, dst);
1206   emit_int8(0x03);
1207   emit_operand(dst, src);
1208 }
1209 
1210 void Assembler::addl(Register dst, Register src) {
1211   (void) prefix_and_encode(dst->encoding(), src->encoding());
1212   emit_arith(0x03, 0xC0, dst, src);
1213 }
1214 
1215 void Assembler::addr_nop_4() {
1216   assert(UseAddressNop, "no CPU support");
1217   // 4 bytes: NOP DWORD PTR [EAX+0]
1218   emit_int8(0x0F);
1219   emit_int8(0x1F);
1220   emit_int8(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
1221   emit_int8(0);    // 8-bits offset (1 byte)
1222 }
1223 
1224 void Assembler::addr_nop_5() {
1225   assert(UseAddressNop, "no CPU support");
1226   // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
1227   emit_int8(0x0F);
1228   emit_int8(0x1F);
1229   emit_int8(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
1230   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1231   emit_int8(0);    // 8-bits offset (1 byte)
1232 }
1233 
1234 void Assembler::addr_nop_7() {
1235   assert(UseAddressNop, "no CPU support");
1236   // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
1237   emit_int8(0x0F);
1238   emit_int8(0x1F);
1239   emit_int8((unsigned char)0x80);
1240                    // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
1241   emit_int32(0);   // 32-bits offset (4 bytes)
1242 }
1243 
1244 void Assembler::addr_nop_8() {
1245   assert(UseAddressNop, "no CPU support");
1246   // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1247   emit_int8(0x0F);
1248   emit_int8(0x1F);
1249   emit_int8((unsigned char)0x84);
1250                    // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
1251   emit_int8(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
1252   emit_int32(0);   // 32-bits offset (4 bytes)
1253 }
1254 
1255 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
1256   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1257   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1258   attributes.set_rex_vex_w_reverted();
1259   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1260   emit_int8(0x58);
1261   emit_int8((unsigned char)(0xC0 | encode));
1262 }
1263 
1264 void Assembler::addsd(XMMRegister dst, Address src) {
1265   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1266   InstructionMark im(this);
1267   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1268   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1269   attributes.set_rex_vex_w_reverted();
1270   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1271   emit_int8(0x58);
1272   emit_operand(dst, src);
1273 }
1274 
1275 void Assembler::addss(XMMRegister dst, XMMRegister src) {
1276   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1277   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1278   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1279   emit_int8(0x58);
1280   emit_int8((unsigned char)(0xC0 | encode));
1281 }
1282 
1283 void Assembler::addss(XMMRegister dst, Address src) {
1284   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1285   InstructionMark im(this);
1286   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1287   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1288   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1289   emit_int8(0x58);
1290   emit_operand(dst, src);
1291 }
1292 
1293 void Assembler::aesdec(XMMRegister dst, Address src) {
1294   assert(VM_Version::supports_aes(), "");
1295   InstructionMark im(this);
1296   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1297   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1298   emit_int8((unsigned char)0xDE);
1299   emit_operand(dst, src);
1300 }
1301 
1302 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
1303   assert(VM_Version::supports_aes(), "");
1304   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1305   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1306   emit_int8((unsigned char)0xDE);
1307   emit_int8(0xC0 | encode);
1308 }
1309 
1310 void Assembler::aesdeclast(XMMRegister dst, Address src) {
1311   assert(VM_Version::supports_aes(), "");
1312   InstructionMark im(this);
1313   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1314   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1315   emit_int8((unsigned char)0xDF);
1316   emit_operand(dst, src);
1317 }
1318 
1319 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
1320   assert(VM_Version::supports_aes(), "");
1321   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1322   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1323   emit_int8((unsigned char)0xDF);
1324   emit_int8((unsigned char)(0xC0 | encode));
1325 }
1326 
1327 void Assembler::aesenc(XMMRegister dst, Address src) {
1328   assert(VM_Version::supports_aes(), "");
1329   InstructionMark im(this);
1330   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1331   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1332   emit_int8((unsigned char)0xDC);
1333   emit_operand(dst, src);
1334 }
1335 
1336 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
1337   assert(VM_Version::supports_aes(), "");
1338   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1339   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1340   emit_int8((unsigned char)0xDC);
1341   emit_int8(0xC0 | encode);
1342 }
1343 
1344 void Assembler::aesenclast(XMMRegister dst, Address src) {
1345   assert(VM_Version::supports_aes(), "");
1346   InstructionMark im(this);
1347   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1348   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1349   emit_int8((unsigned char)0xDD);
1350   emit_operand(dst, src);
1351 }
1352 
1353 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
1354   assert(VM_Version::supports_aes(), "");
1355   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1356   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
1357   emit_int8((unsigned char)0xDD);
1358   emit_int8((unsigned char)(0xC0 | encode));
1359 }
1360 
1361 void Assembler::andl(Address dst, int32_t imm32) {
1362   InstructionMark im(this);
1363   prefix(dst);
1364   emit_int8((unsigned char)0x81);
1365   emit_operand(rsp, dst, 4);
1366   emit_int32(imm32);
1367 }
1368 
1369 void Assembler::andl(Register dst, int32_t imm32) {
1370   prefix(dst);
1371   emit_arith(0x81, 0xE0, dst, imm32);
1372 }
1373 
1374 void Assembler::andl(Register dst, Address src) {
1375   InstructionMark im(this);
1376   prefix(src, dst);
1377   emit_int8(0x23);
1378   emit_operand(dst, src);
1379 }
1380 
1381 void Assembler::andl(Register dst, Register src) {
1382   (void) prefix_and_encode(dst->encoding(), src->encoding());
1383   emit_arith(0x23, 0xC0, dst, src);
1384 }
1385 
1386 void Assembler::andnl(Register dst, Register src1, Register src2) {
1387   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1388   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1389   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1390   emit_int8((unsigned char)0xF2);
1391   emit_int8((unsigned char)(0xC0 | encode));
1392 }
1393 
1394 void Assembler::andnl(Register dst, Register src1, Address src2) {
1395   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1396   InstructionMark im(this);
1397   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1398   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1399   emit_int8((unsigned char)0xF2);
1400   emit_operand(dst, src2);
1401 }
1402 
1403 void Assembler::bsfl(Register dst, Register src) {
1404   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1405   emit_int8(0x0F);
1406   emit_int8((unsigned char)0xBC);
1407   emit_int8((unsigned char)(0xC0 | encode));
1408 }
1409 
1410 void Assembler::bsrl(Register dst, Register src) {
1411   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1412   emit_int8(0x0F);
1413   emit_int8((unsigned char)0xBD);
1414   emit_int8((unsigned char)(0xC0 | encode));
1415 }
1416 
1417 void Assembler::bswapl(Register reg) { // bswap
1418   int encode = prefix_and_encode(reg->encoding());
1419   emit_int8(0x0F);
1420   emit_int8((unsigned char)(0xC8 | encode));
1421 }
1422 
1423 void Assembler::blsil(Register dst, Register src) {
1424   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1425   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1426   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1427   emit_int8((unsigned char)0xF3);
1428   emit_int8((unsigned char)(0xC0 | encode));
1429 }
1430 
1431 void Assembler::blsil(Register dst, Address src) {
1432   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1433   InstructionMark im(this);
1434   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1435   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1436   emit_int8((unsigned char)0xF3);
1437   emit_operand(rbx, src);
1438 }
1439 
1440 void Assembler::blsmskl(Register dst, Register src) {
1441   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1442   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1443   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1444   emit_int8((unsigned char)0xF3);
1445   emit_int8((unsigned char)(0xC0 | encode));
1446 }
1447 
1448 void Assembler::blsmskl(Register dst, Address src) {
1449   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1450   InstructionMark im(this);
1451   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1452   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1453   emit_int8((unsigned char)0xF3);
1454   emit_operand(rdx, src);
1455 }
1456 
1457 void Assembler::blsrl(Register dst, Register src) {
1458   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1459   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1460   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1461   emit_int8((unsigned char)0xF3);
1462   emit_int8((unsigned char)(0xC0 | encode));
1463 }
1464 
1465 void Assembler::blsrl(Register dst, Address src) {
1466   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
1467   InstructionMark im(this);
1468   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
1469   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
1470   emit_int8((unsigned char)0xF3);
1471   emit_operand(rcx, src);
1472 }
1473 
1474 void Assembler::call(Label& L, relocInfo::relocType rtype) {
1475   // suspect disp32 is always good
1476   int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand);
1477 
1478   if (L.is_bound()) {
1479     const int long_size = 5;
1480     int offs = (int)( target(L) - pc() );
1481     assert(offs <= 0, "assembler error");
1482     InstructionMark im(this);
1483     // 1110 1000 #32-bit disp
1484     emit_int8((unsigned char)0xE8);
1485     emit_data(offs - long_size, rtype, operand);
1486   } else {
1487     InstructionMark im(this);
1488     // 1110 1000 #32-bit disp
1489     L.add_patch_at(code(), locator());
1490 
1491     emit_int8((unsigned char)0xE8);
1492     emit_data(int(0), rtype, operand);
1493   }
1494 }
1495 
1496 void Assembler::call(Register dst) {
1497   int encode = prefix_and_encode(dst->encoding());
1498   emit_int8((unsigned char)0xFF);
1499   emit_int8((unsigned char)(0xD0 | encode));
1500 }
1501 
1502 
1503 void Assembler::call(Address adr) {
1504   InstructionMark im(this);
1505   prefix(adr);
1506   emit_int8((unsigned char)0xFF);
1507   emit_operand(rdx, adr);
1508 }
1509 
1510 void Assembler::call_literal(address entry, RelocationHolder const& rspec) {
1511   InstructionMark im(this);
1512   emit_int8((unsigned char)0xE8);
1513   intptr_t disp = entry - (pc() + sizeof(int32_t));
1514   // Entry is NULL in case of a scratch emit.
1515   assert(entry == NULL || is_simm32(disp), "disp=" INTPTR_FORMAT " must be 32bit offset (call2)", disp);
1516   // Technically, should use call32_operand, but this format is
1517   // implied by the fact that we're emitting a call instruction.
1518 
1519   int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand);
1520   emit_data((int) disp, rspec, operand);
1521 }
1522 
1523 void Assembler::cdql() {
1524   emit_int8((unsigned char)0x99);
1525 }
1526 
1527 void Assembler::cld() {
1528   emit_int8((unsigned char)0xFC);
1529 }
1530 
1531 void Assembler::cmovl(Condition cc, Register dst, Register src) {
1532   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1533   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1534   emit_int8(0x0F);
1535   emit_int8(0x40 | cc);
1536   emit_int8((unsigned char)(0xC0 | encode));
1537 }
1538 
1539 
1540 void Assembler::cmovl(Condition cc, Register dst, Address src) {
1541   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
1542   prefix(src, dst);
1543   emit_int8(0x0F);
1544   emit_int8(0x40 | cc);
1545   emit_operand(dst, src);
1546 }
1547 
1548 void Assembler::cmpb(Address dst, int imm8) {
1549   InstructionMark im(this);
1550   prefix(dst);
1551   emit_int8((unsigned char)0x80);
1552   emit_operand(rdi, dst, 1);
1553   emit_int8(imm8);
1554 }
1555 
1556 void Assembler::cmpl(Address dst, int32_t imm32) {
1557   InstructionMark im(this);
1558   prefix(dst);
1559   emit_int8((unsigned char)0x81);
1560   emit_operand(rdi, dst, 4);
1561   emit_int32(imm32);
1562 }
1563 
1564 void Assembler::cmpl(Register dst, int32_t imm32) {
1565   prefix(dst);
1566   emit_arith(0x81, 0xF8, dst, imm32);
1567 }
1568 
1569 void Assembler::cmpl(Register dst, Register src) {
1570   (void) prefix_and_encode(dst->encoding(), src->encoding());
1571   emit_arith(0x3B, 0xC0, dst, src);
1572 }
1573 
1574 void Assembler::cmpl(Register dst, Address  src) {
1575   InstructionMark im(this);
1576   prefix(src, dst);
1577   emit_int8((unsigned char)0x3B);
1578   emit_operand(dst, src);
1579 }
1580 
1581 void Assembler::cmpw(Address dst, int imm16) {
1582   InstructionMark im(this);
1583   assert(!dst.base_needs_rex() && !dst.index_needs_rex(), "no extended registers");
1584   emit_int8(0x66);
1585   emit_int8((unsigned char)0x81);
1586   emit_operand(rdi, dst, 2);
1587   emit_int16(imm16);
1588 }
1589 
1590 // The 32-bit cmpxchg compares the value at adr with the contents of rax,
1591 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1592 // The ZF is set if the compared values were equal, and cleared otherwise.
1593 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
1594   InstructionMark im(this);
1595   prefix(adr, reg);
1596   emit_int8(0x0F);
1597   emit_int8((unsigned char)0xB1);
1598   emit_operand(reg, adr);
1599 }
1600 
1601 // The 8-bit cmpxchg compares the value at adr with the contents of rax,
1602 // and stores reg into adr if so; otherwise, the value at adr is loaded into rax,.
1603 // The ZF is set if the compared values were equal, and cleared otherwise.
1604 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
1605   InstructionMark im(this);
1606   prefix(adr, reg, true);
1607   emit_int8(0x0F);
1608   emit_int8((unsigned char)0xB0);
1609   emit_operand(reg, adr);
1610 }
1611 
1612 void Assembler::comisd(XMMRegister dst, Address src) {
1613   // NOTE: dbx seems to decode this as comiss even though the
1614   // 0x66 is there. Strangly ucomisd comes out correct
1615   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1616   InstructionMark im(this);
1617   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
1618   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1619   attributes.set_rex_vex_w_reverted();
1620   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1621   emit_int8(0x2F);
1622   emit_operand(dst, src);
1623 }
1624 
1625 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
1626   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1627   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1628   attributes.set_rex_vex_w_reverted();
1629   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1630   emit_int8(0x2F);
1631   emit_int8((unsigned char)(0xC0 | encode));
1632 }
1633 
1634 void Assembler::comiss(XMMRegister dst, Address src) {
1635   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1636   InstructionMark im(this);
1637   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1638   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1639   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1640   emit_int8(0x2F);
1641   emit_operand(dst, src);
1642 }
1643 
1644 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
1645   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1646   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1647   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1648   emit_int8(0x2F);
1649   emit_int8((unsigned char)(0xC0 | encode));
1650 }
1651 
1652 void Assembler::cpuid() {
1653   emit_int8(0x0F);
1654   emit_int8((unsigned char)0xA2);
1655 }
1656 
1657 // Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
1658 // F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
1659 // F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1660 // F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
1661 //
1662 // F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
1663 //
1664 // F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
1665 //
1666 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
1667 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
1668   assert(VM_Version::supports_sse4_2(), "");
1669   int8_t w = 0x01;
1670   Prefix p = Prefix_EMPTY;
1671 
1672   emit_int8((int8_t)0xF2);
1673   switch (sizeInBytes) {
1674   case 1:
1675     w = 0;
1676     break;
1677   case 2:
1678   case 4:
1679     break;
1680   LP64_ONLY(case 8:)
1681     // This instruction is not valid in 32 bits
1682     // Note:
1683     // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
1684     //
1685     // Page B - 72   Vol. 2C says
1686     // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
1687     // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
1688     //                                                                            F0!!!
1689     // while 3 - 208 Vol. 2A
1690     // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
1691     //
1692     // the 0 on a last bit is reserved for a different flavor of this instruction :
1693     // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
1694     p = REX_W;
1695     break;
1696   default:
1697     assert(0, "Unsupported value for a sizeInBytes argument");
1698     break;
1699   }
1700   LP64_ONLY(prefix(crc, v, p);)
1701   emit_int8((int8_t)0x0F);
1702   emit_int8(0x38);
1703   emit_int8((int8_t)(0xF0 | w));
1704   emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
1705 }
1706 
1707 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
1708   assert(VM_Version::supports_sse4_2(), "");
1709   InstructionMark im(this);
1710   int8_t w = 0x01;
1711   Prefix p = Prefix_EMPTY;
1712 
1713   emit_int8((int8_t)0xF2);
1714   switch (sizeInBytes) {
1715   case 1:
1716     w = 0;
1717     break;
1718   case 2:
1719   case 4:
1720     break;
1721   LP64_ONLY(case 8:)
1722     // This instruction is not valid in 32 bits
1723     p = REX_W;
1724     break;
1725   default:
1726     assert(0, "Unsupported value for a sizeInBytes argument");
1727     break;
1728   }
1729   LP64_ONLY(prefix(crc, adr, p);)
1730   emit_int8((int8_t)0x0F);
1731   emit_int8(0x38);
1732   emit_int8((int8_t)(0xF0 | w));
1733   emit_operand(crc, adr);
1734 }
1735 
1736 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
1737   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1738   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1739   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1740   emit_int8((unsigned char)0xE6);
1741   emit_int8((unsigned char)(0xC0 | encode));
1742 }
1743 
1744 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
1745   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1746   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1747   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
1748   emit_int8(0x5B);
1749   emit_int8((unsigned char)(0xC0 | encode));
1750 }
1751 
1752 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
1753   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1754   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1755   attributes.set_rex_vex_w_reverted();
1756   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1757   emit_int8(0x5A);
1758   emit_int8((unsigned char)(0xC0 | encode));
1759 }
1760 
1761 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
1762   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1763   InstructionMark im(this);
1764   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1765   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1766   attributes.set_rex_vex_w_reverted();
1767   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1768   emit_int8(0x5A);
1769   emit_operand(dst, src);
1770 }
1771 
1772 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
1773   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1774   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1775   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1776   emit_int8(0x2A);
1777   emit_int8((unsigned char)(0xC0 | encode));
1778 }
1779 
1780 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
1781   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1782   InstructionMark im(this);
1783   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1784   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1785   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1786   emit_int8(0x2A);
1787   emit_operand(dst, src);
1788 }
1789 
1790 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
1791   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1792   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1793   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1794   emit_int8(0x2A);
1795   emit_int8((unsigned char)(0xC0 | encode));
1796 }
1797 
1798 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
1799   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1800   InstructionMark im(this);
1801   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1802   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1803   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1804   emit_int8(0x2A);
1805   emit_operand(dst, src);
1806 }
1807 
1808 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
1809   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1810   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1811   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1812   emit_int8(0x2A);
1813   emit_int8((unsigned char)(0xC0 | encode));
1814 }
1815 
1816 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
1817   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1818   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1819   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1820   emit_int8(0x5A);
1821   emit_int8((unsigned char)(0xC0 | encode));
1822 }
1823 
1824 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
1825   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1826   InstructionMark im(this);
1827   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1828   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1829   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1830   emit_int8(0x5A);
1831   emit_operand(dst, src);
1832 }
1833 
1834 
1835 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
1836   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1837   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1838   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1839   emit_int8(0x2C);
1840   emit_int8((unsigned char)(0xC0 | encode));
1841 }
1842 
1843 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
1844   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1845   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1846   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1847   emit_int8(0x2C);
1848   emit_int8((unsigned char)(0xC0 | encode));
1849 }
1850 
1851 void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
1852   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1853   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
1854   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
1855   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
1856   emit_int8((unsigned char)0xE6);
1857   emit_int8((unsigned char)(0xC0 | encode));
1858 }
1859 
1860 void Assembler::decl(Address dst) {
1861   // Don't use it directly. Use MacroAssembler::decrement() instead.
1862   InstructionMark im(this);
1863   prefix(dst);
1864   emit_int8((unsigned char)0xFF);
1865   emit_operand(rcx, dst);
1866 }
1867 
1868 void Assembler::divsd(XMMRegister dst, Address src) {
1869   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1870   InstructionMark im(this);
1871   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1872   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
1873   attributes.set_rex_vex_w_reverted();
1874   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1875   emit_int8(0x5E);
1876   emit_operand(dst, src);
1877 }
1878 
1879 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
1880   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
1881   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1882   attributes.set_rex_vex_w_reverted();
1883   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
1884   emit_int8(0x5E);
1885   emit_int8((unsigned char)(0xC0 | encode));
1886 }
1887 
1888 void Assembler::divss(XMMRegister dst, Address src) {
1889   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1890   InstructionMark im(this);
1891   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1892   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
1893   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1894   emit_int8(0x5E);
1895   emit_operand(dst, src);
1896 }
1897 
1898 void Assembler::divss(XMMRegister dst, XMMRegister src) {
1899   NOT_LP64(assert(VM_Version::supports_sse(), ""));
1900   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
1901   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
1902   emit_int8(0x5E);
1903   emit_int8((unsigned char)(0xC0 | encode));
1904 }
1905 
1906 void Assembler::emms() {
1907   NOT_LP64(assert(VM_Version::supports_mmx(), ""));
1908   emit_int8(0x0F);
1909   emit_int8(0x77);
1910 }
1911 
1912 void Assembler::hlt() {
1913   emit_int8((unsigned char)0xF4);
1914 }
1915 
1916 void Assembler::idivl(Register src) {
1917   int encode = prefix_and_encode(src->encoding());
1918   emit_int8((unsigned char)0xF7);
1919   emit_int8((unsigned char)(0xF8 | encode));
1920 }
1921 
1922 void Assembler::divl(Register src) { // Unsigned
1923   int encode = prefix_and_encode(src->encoding());
1924   emit_int8((unsigned char)0xF7);
1925   emit_int8((unsigned char)(0xF0 | encode));
1926 }
1927 
1928 void Assembler::imull(Register src) {
1929   int encode = prefix_and_encode(src->encoding());
1930   emit_int8((unsigned char)0xF7);
1931   emit_int8((unsigned char)(0xE8 | encode));
1932 }
1933 
1934 void Assembler::imull(Register dst, Register src) {
1935   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1936   emit_int8(0x0F);
1937   emit_int8((unsigned char)0xAF);
1938   emit_int8((unsigned char)(0xC0 | encode));
1939 }
1940 
1941 
1942 void Assembler::imull(Register dst, Register src, int value) {
1943   int encode = prefix_and_encode(dst->encoding(), src->encoding());
1944   if (is8bit(value)) {
1945     emit_int8(0x6B);
1946     emit_int8((unsigned char)(0xC0 | encode));
1947     emit_int8(value & 0xFF);
1948   } else {
1949     emit_int8(0x69);
1950     emit_int8((unsigned char)(0xC0 | encode));
1951     emit_int32(value);
1952   }
1953 }
1954 
1955 void Assembler::imull(Register dst, Address src) {
1956   InstructionMark im(this);
1957   prefix(src, dst);
1958   emit_int8(0x0F);
1959   emit_int8((unsigned char) 0xAF);
1960   emit_operand(dst, src);
1961 }
1962 
1963 
1964 void Assembler::incl(Address dst) {
1965   // Don't use it directly. Use MacroAssembler::increment() instead.
1966   InstructionMark im(this);
1967   prefix(dst);
1968   emit_int8((unsigned char)0xFF);
1969   emit_operand(rax, dst);
1970 }
1971 
1972 void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
1973   InstructionMark im(this);
1974   assert((0 <= cc) && (cc < 16), "illegal cc");
1975   if (L.is_bound()) {
1976     address dst = target(L);
1977     assert(dst != NULL, "jcc most probably wrong");
1978 
1979     const int short_size = 2;
1980     const int long_size = 6;
1981     intptr_t offs = (intptr_t)dst - (intptr_t)pc();
1982     if (maybe_short && is8bit(offs - short_size)) {
1983       // 0111 tttn #8-bit disp
1984       emit_int8(0x70 | cc);
1985       emit_int8((offs - short_size) & 0xFF);
1986     } else {
1987       // 0000 1111 1000 tttn #32-bit disp
1988       assert(is_simm32(offs - long_size),
1989              "must be 32bit offset (call4)");
1990       emit_int8(0x0F);
1991       emit_int8((unsigned char)(0x80 | cc));
1992       emit_int32(offs - long_size);
1993     }
1994   } else {
1995     // Note: could eliminate cond. jumps to this jump if condition
1996     //       is the same however, seems to be rather unlikely case.
1997     // Note: use jccb() if label to be bound is very close to get
1998     //       an 8-bit displacement
1999     L.add_patch_at(code(), locator());
2000     emit_int8(0x0F);
2001     emit_int8((unsigned char)(0x80 | cc));
2002     emit_int32(0);
2003   }
2004 }
2005 
2006 void Assembler::jccb(Condition cc, Label& L) {
2007   if (L.is_bound()) {
2008     const int short_size = 2;
2009     address entry = target(L);
2010 #ifdef ASSERT
2011     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2012     intptr_t delta = short_branch_delta();
2013     if (delta != 0) {
2014       dist += (dist < 0 ? (-delta) :delta);
2015     }
2016     assert(is8bit(dist), "Dispacement too large for a short jmp");
2017 #endif
2018     intptr_t offs = (intptr_t)entry - (intptr_t)pc();
2019     // 0111 tttn #8-bit disp
2020     emit_int8(0x70 | cc);
2021     emit_int8((offs - short_size) & 0xFF);
2022   } else {
2023     InstructionMark im(this);
2024     L.add_patch_at(code(), locator());
2025     emit_int8(0x70 | cc);
2026     emit_int8(0);
2027   }
2028 }
2029 
2030 void Assembler::jmp(Address adr) {
2031   InstructionMark im(this);
2032   prefix(adr);
2033   emit_int8((unsigned char)0xFF);
2034   emit_operand(rsp, adr);
2035 }
2036 
2037 void Assembler::jmp(Label& L, bool maybe_short) {
2038   if (L.is_bound()) {
2039     address entry = target(L);
2040     assert(entry != NULL, "jmp most probably wrong");
2041     InstructionMark im(this);
2042     const int short_size = 2;
2043     const int long_size = 5;
2044     intptr_t offs = entry - pc();
2045     if (maybe_short && is8bit(offs - short_size)) {
2046       emit_int8((unsigned char)0xEB);
2047       emit_int8((offs - short_size) & 0xFF);
2048     } else {
2049       emit_int8((unsigned char)0xE9);
2050       emit_int32(offs - long_size);
2051     }
2052   } else {
2053     // By default, forward jumps are always 32-bit displacements, since
2054     // we can't yet know where the label will be bound.  If you're sure that
2055     // the forward jump will not run beyond 256 bytes, use jmpb to
2056     // force an 8-bit displacement.
2057     InstructionMark im(this);
2058     L.add_patch_at(code(), locator());
2059     emit_int8((unsigned char)0xE9);
2060     emit_int32(0);
2061   }
2062 }
2063 
2064 void Assembler::jmp(Register entry) {
2065   int encode = prefix_and_encode(entry->encoding());
2066   emit_int8((unsigned char)0xFF);
2067   emit_int8((unsigned char)(0xE0 | encode));
2068 }
2069 
2070 void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) {
2071   InstructionMark im(this);
2072   emit_int8((unsigned char)0xE9);
2073   assert(dest != NULL, "must have a target");
2074   intptr_t disp = dest - (pc() + sizeof(int32_t));
2075   assert(is_simm32(disp), "must be 32bit offset (jmp)");
2076   emit_data(disp, rspec.reloc(), call32_operand);
2077 }
2078 
2079 void Assembler::jmpb(Label& L) {
2080   if (L.is_bound()) {
2081     const int short_size = 2;
2082     address entry = target(L);
2083     assert(entry != NULL, "jmp most probably wrong");
2084 #ifdef ASSERT
2085     intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size);
2086     intptr_t delta = short_branch_delta();
2087     if (delta != 0) {
2088       dist += (dist < 0 ? (-delta) :delta);
2089     }
2090     assert(is8bit(dist), "Dispacement too large for a short jmp");
2091 #endif
2092     intptr_t offs = entry - pc();
2093     emit_int8((unsigned char)0xEB);
2094     emit_int8((offs - short_size) & 0xFF);
2095   } else {
2096     InstructionMark im(this);
2097     L.add_patch_at(code(), locator());
2098     emit_int8((unsigned char)0xEB);
2099     emit_int8(0);
2100   }
2101 }
2102 
2103 void Assembler::ldmxcsr( Address src) {
2104   if (UseAVX > 0 ) {
2105     InstructionMark im(this);
2106     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2107     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2108     emit_int8((unsigned char)0xAE);
2109     emit_operand(as_Register(2), src);
2110   } else {
2111     NOT_LP64(assert(VM_Version::supports_sse(), ""));
2112     InstructionMark im(this);
2113     prefix(src);
2114     emit_int8(0x0F);
2115     emit_int8((unsigned char)0xAE);
2116     emit_operand(as_Register(2), src);
2117   }
2118 }
2119 
2120 void Assembler::leal(Register dst, Address src) {
2121   InstructionMark im(this);
2122 #ifdef _LP64
2123   emit_int8(0x67); // addr32
2124   prefix(src, dst);
2125 #endif // LP64
2126   emit_int8((unsigned char)0x8D);
2127   emit_operand(dst, src);
2128 }
2129 
2130 void Assembler::lfence() {
2131   emit_int8(0x0F);
2132   emit_int8((unsigned char)0xAE);
2133   emit_int8((unsigned char)0xE8);
2134 }
2135 
2136 void Assembler::lock() {
2137   emit_int8((unsigned char)0xF0);
2138 }
2139 
2140 void Assembler::lzcntl(Register dst, Register src) {
2141   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
2142   emit_int8((unsigned char)0xF3);
2143   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2144   emit_int8(0x0F);
2145   emit_int8((unsigned char)0xBD);
2146   emit_int8((unsigned char)(0xC0 | encode));
2147 }
2148 
2149 // Emit mfence instruction
2150 void Assembler::mfence() {
2151   NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");)
2152   emit_int8(0x0F);
2153   emit_int8((unsigned char)0xAE);
2154   emit_int8((unsigned char)0xF0);
2155 }
2156 
2157 void Assembler::mov(Register dst, Register src) {
2158   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
2159 }
2160 
2161 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
2162   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2163   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2164   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2165   attributes.set_rex_vex_w_reverted();
2166   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2167   emit_int8(0x28);
2168   emit_int8((unsigned char)(0xC0 | encode));
2169 }
2170 
2171 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
2172   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2173   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2174   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
2175   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2176   emit_int8(0x28);
2177   emit_int8((unsigned char)(0xC0 | encode));
2178 }
2179 
2180 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
2181   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2182   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2183   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2184   emit_int8(0x16);
2185   emit_int8((unsigned char)(0xC0 | encode));
2186 }
2187 
2188 void Assembler::movb(Register dst, Address src) {
2189   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
2190   InstructionMark im(this);
2191   prefix(src, dst, true);
2192   emit_int8((unsigned char)0x8A);
2193   emit_operand(dst, src);
2194 }
2195 
2196 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
2197   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
2198   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2199   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2200   attributes.set_rex_vex_w_reverted();
2201   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2202   emit_int8(0x12);
2203   emit_int8(0xC0 | encode);
2204 }
2205 
2206 void Assembler::kmovbl(KRegister dst, Register src) {
2207   assert(VM_Version::supports_avx512dq(), "");
2208   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2209   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2210   emit_int8((unsigned char)0x92);
2211   emit_int8((unsigned char)(0xC0 | encode));
2212 }
2213 
2214 void Assembler::kmovbl(Register dst, KRegister src) {
2215   assert(VM_Version::supports_avx512dq(), "");
2216   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2217   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2218   emit_int8((unsigned char)0x93);
2219   emit_int8((unsigned char)(0xC0 | encode));
2220 }
2221 
2222 void Assembler::kmovwl(KRegister dst, Register src) {
2223   assert(VM_Version::supports_evex(), "");
2224   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2225   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2226   emit_int8((unsigned char)0x92);
2227   emit_int8((unsigned char)(0xC0 | encode));
2228 }
2229 
2230 void Assembler::kmovwl(Register dst, KRegister src) {
2231   assert(VM_Version::supports_evex(), "");
2232   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2233   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2234   emit_int8((unsigned char)0x93);
2235   emit_int8((unsigned char)(0xC0 | encode));
2236 }
2237 
2238 void Assembler::kmovwl(KRegister dst, Address src) {
2239   assert(VM_Version::supports_evex(), "");
2240   InstructionMark im(this);
2241   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2242   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2243   emit_int8((unsigned char)0x90);
2244   emit_operand((Register)dst, src);
2245 }
2246 
2247 void Assembler::kmovdl(KRegister dst, Register src) {
2248   assert(VM_Version::supports_avx512bw(), "");
2249   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2250   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2251   emit_int8((unsigned char)0x92);
2252   emit_int8((unsigned char)(0xC0 | encode));
2253 }
2254 
2255 void Assembler::kmovdl(Register dst, KRegister src) {
2256   assert(VM_Version::supports_avx512bw(), "");
2257   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2258   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2259   emit_int8((unsigned char)0x93);
2260   emit_int8((unsigned char)(0xC0 | encode));
2261 }
2262 
2263 void Assembler::kmovql(KRegister dst, KRegister src) {
2264   assert(VM_Version::supports_avx512bw(), "");
2265   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2266   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2267   emit_int8((unsigned char)0x90);
2268   emit_int8((unsigned char)(0xC0 | encode));
2269 }
2270 
2271 void Assembler::kmovql(KRegister dst, Address src) {
2272   assert(VM_Version::supports_avx512bw(), "");
2273   InstructionMark im(this);
2274   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2275   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2276   emit_int8((unsigned char)0x90);
2277   emit_operand((Register)dst, src);
2278 }
2279 
2280 void Assembler::kmovql(Address dst, KRegister src) {
2281   assert(VM_Version::supports_avx512bw(), "");
2282   InstructionMark im(this);
2283   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2284   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2285   emit_int8((unsigned char)0x90);
2286   emit_operand((Register)src, dst);
2287 }
2288 
2289 void Assembler::kmovql(KRegister dst, Register src) {
2290   assert(VM_Version::supports_avx512bw(), "");
2291   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2292   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2293   emit_int8((unsigned char)0x92);
2294   emit_int8((unsigned char)(0xC0 | encode));
2295 }
2296 
2297 void Assembler::kmovql(Register dst, KRegister src) {
2298   assert(VM_Version::supports_avx512bw(), "");
2299   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2300   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2301   emit_int8((unsigned char)0x93);
2302   emit_int8((unsigned char)(0xC0 | encode));
2303 }
2304 
2305 void Assembler::knotwl(KRegister dst, KRegister src) {
2306   assert(VM_Version::supports_evex(), "");
2307   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2308   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2309   emit_int8((unsigned char)0x44);
2310   emit_int8((unsigned char)(0xC0 | encode));
2311 }
2312 
2313 // This instruction produces ZF or CF flags
2314 void Assembler::kortestbl(KRegister src1, KRegister src2) {
2315   assert(VM_Version::supports_avx512dq(), "");
2316   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2317   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2318   emit_int8((unsigned char)0x98);
2319   emit_int8((unsigned char)(0xC0 | encode));
2320 }
2321 
2322 // This instruction produces ZF or CF flags
2323 void Assembler::kortestwl(KRegister src1, KRegister src2) {
2324   assert(VM_Version::supports_evex(), "");
2325   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2326   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2327   emit_int8((unsigned char)0x98);
2328   emit_int8((unsigned char)(0xC0 | encode));
2329 }
2330 
2331 // This instruction produces ZF or CF flags
2332 void Assembler::kortestdl(KRegister src1, KRegister src2) {
2333   assert(VM_Version::supports_avx512bw(), "");
2334   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2335   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2336   emit_int8((unsigned char)0x98);
2337   emit_int8((unsigned char)(0xC0 | encode));
2338 }
2339 
2340 // This instruction produces ZF or CF flags
2341 void Assembler::kortestql(KRegister src1, KRegister src2) {
2342   assert(VM_Version::supports_avx512bw(), "");
2343   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2344   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2345   emit_int8((unsigned char)0x98);
2346   emit_int8((unsigned char)(0xC0 | encode));
2347 }
2348 
2349 // This instruction produces ZF or CF flags
2350 void Assembler::ktestql(KRegister src1, KRegister src2) {
2351   assert(VM_Version::supports_avx512bw(), "");
2352   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2353   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2354   emit_int8((unsigned char)0x99);
2355   emit_int8((unsigned char)(0xC0 | encode));
2356 }
2357 
2358 void Assembler::ktestq(KRegister src1, KRegister src2) {
2359   assert(VM_Version::supports_avx512bw(), "");
2360   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2361   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
2362   emit_int8((unsigned char)0x99);
2363   emit_int8((unsigned char)(0xC0 | encode));
2364 }
2365 
2366 void Assembler::ktestd(KRegister src1, KRegister src2) {
2367   assert(VM_Version::supports_avx512bw(), "");
2368   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
2369   int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2370   emit_int8((unsigned char)0x99);
2371   emit_int8((unsigned char)(0xC0 | encode));
2372 }
2373 
2374 void Assembler::movb(Address dst, int imm8) {
2375   InstructionMark im(this);
2376    prefix(dst);
2377   emit_int8((unsigned char)0xC6);
2378   emit_operand(rax, dst, 1);
2379   emit_int8(imm8);
2380 }
2381 
2382 
2383 void Assembler::movb(Address dst, Register src) {
2384   assert(src->has_byte_register(), "must have byte register");
2385   InstructionMark im(this);
2386   prefix(dst, src, true);
2387   emit_int8((unsigned char)0x88);
2388   emit_operand(src, dst);
2389 }
2390 
2391 void Assembler::movdl(XMMRegister dst, Register src) {
2392   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2393   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2394   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2395   emit_int8(0x6E);
2396   emit_int8((unsigned char)(0xC0 | encode));
2397 }
2398 
2399 void Assembler::movdl(Register dst, XMMRegister src) {
2400   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2401   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2402   // swap src/dst to get correct prefix
2403   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2404   emit_int8(0x7E);
2405   emit_int8((unsigned char)(0xC0 | encode));
2406 }
2407 
2408 void Assembler::movdl(XMMRegister dst, Address src) {
2409   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2410   InstructionMark im(this);
2411   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2412   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2413   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2414   emit_int8(0x6E);
2415   emit_operand(dst, src);
2416 }
2417 
2418 void Assembler::movdl(Address dst, XMMRegister src) {
2419   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2420   InstructionMark im(this);
2421   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2422   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2423   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2424   emit_int8(0x7E);
2425   emit_operand(src, dst);
2426 }
2427 
2428 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
2429   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2430   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
2431   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2432   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2433   emit_int8(0x6F);
2434   emit_int8((unsigned char)(0xC0 | encode));
2435 }
2436 
2437 void Assembler::movdqa(XMMRegister dst, Address src) {
2438   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2439   InstructionMark im(this);
2440   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2441   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2442   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2443   emit_int8(0x6F);
2444   emit_operand(dst, src);
2445 }
2446 
2447 void Assembler::movdqu(XMMRegister dst, Address src) {
2448   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2449   InstructionMark im(this);
2450   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2451   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2452   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2453   emit_int8(0x6F);
2454   emit_operand(dst, src);
2455 }
2456 
2457 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
2458   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2459   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2460   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2461   emit_int8(0x6F);
2462   emit_int8((unsigned char)(0xC0 | encode));
2463 }
2464 
2465 void Assembler::movdqu(Address dst, XMMRegister src) {
2466   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2467   InstructionMark im(this);
2468   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2469   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2470   attributes.reset_is_clear_context();
2471   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2472   emit_int8(0x7F);
2473   emit_operand(src, dst);
2474 }
2475 
2476 // Move Unaligned 256bit Vector
2477 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
2478   assert(UseAVX > 0, "");
2479   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2480   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2481   emit_int8(0x6F);
2482   emit_int8((unsigned char)(0xC0 | encode));
2483 }
2484 
2485 void Assembler::vmovdqu(XMMRegister dst, Address src) {
2486   assert(UseAVX > 0, "");
2487   InstructionMark im(this);
2488   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2489   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2490   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2491   emit_int8(0x6F);
2492   emit_operand(dst, src);
2493 }
2494 
2495 void Assembler::vmovdqu(Address dst, XMMRegister src) {
2496   assert(UseAVX > 0, "");
2497   InstructionMark im(this);
2498   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2499   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2500   attributes.reset_is_clear_context();
2501   // swap src<->dst for encoding
2502   assert(src != xnoreg, "sanity");
2503   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2504   emit_int8(0x7F);
2505   emit_operand(src, dst);
2506 }
2507 
2508 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
2509 void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
2510   assert(VM_Version::supports_evex(), "");
2511   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2512   attributes.set_is_evex_instruction();
2513   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2514   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2515   emit_int8(0x6F);
2516   emit_int8((unsigned char)(0xC0 | encode));
2517 }
2518 
2519 void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
2520   assert(VM_Version::supports_evex(), "");
2521   InstructionMark im(this);
2522   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2523   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2524   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2525   attributes.set_is_evex_instruction();
2526   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2527   emit_int8(0x6F);
2528   emit_operand(dst, src);
2529 }
2530 
2531 void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
2532   assert(VM_Version::supports_evex(), "");
2533   assert(src != xnoreg, "sanity");
2534   InstructionMark im(this);
2535   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2536   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2537   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2538   attributes.set_is_evex_instruction();
2539   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2540   emit_int8(0x7F);
2541   emit_operand(src, dst);
2542 }
2543 
2544 void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2545   assert(VM_Version::supports_avx512vlbw(), "");
2546   assert(is_vector_masking(), "");    // For stub code use only
2547   InstructionMark im(this);
2548   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2549   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2550   attributes.set_embedded_opmask_register_specifier(mask);
2551   attributes.set_is_evex_instruction();
2552   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2553   emit_int8(0x6F);
2554   emit_operand(dst, src);
2555 }
2556 
2557 void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
2558   assert(VM_Version::supports_evex(), "");
2559   InstructionMark im(this);
2560   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2561   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2562   attributes.set_is_evex_instruction();
2563   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2564   vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2565   emit_int8(0x6F);
2566   emit_operand(dst, src);
2567 }
2568 
2569 void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
2570   assert(is_vector_masking(), "");
2571   assert(VM_Version::supports_avx512vlbw(), "");
2572   InstructionMark im(this);
2573   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
2574   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2575   attributes.set_embedded_opmask_register_specifier(mask);
2576   attributes.set_is_evex_instruction();
2577   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2578   emit_int8(0x6F);
2579   emit_operand(dst, src);
2580 }
2581 
2582 void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
2583   assert(VM_Version::supports_evex(), "");
2584   assert(src != xnoreg, "sanity");
2585   InstructionMark im(this);
2586   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
2587   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2588   attributes.set_is_evex_instruction();
2589   int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
2590   vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
2591   emit_int8(0x7F);
2592   emit_operand(src, dst);
2593 }
2594 
2595 void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) {
2596   assert(VM_Version::supports_avx512vlbw(), "");
2597   assert(src != xnoreg, "sanity");
2598   InstructionMark im(this);
2599   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2600   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2601   attributes.reset_is_clear_context();
2602   attributes.set_embedded_opmask_register_specifier(mask);
2603   attributes.set_is_evex_instruction();
2604   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2605   emit_int8(0x7F);
2606   emit_operand(src, dst);
2607 }
2608 
2609 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
2610   assert(VM_Version::supports_evex(), "");
2611   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2612   attributes.set_is_evex_instruction();
2613   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2614   emit_int8(0x6F);
2615   emit_int8((unsigned char)(0xC0 | encode));
2616 }
2617 
2618 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
2619   assert(VM_Version::supports_evex(), "");
2620   InstructionMark im(this);
2621   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
2622   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2623   attributes.set_is_evex_instruction();
2624   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2625   emit_int8(0x6F);
2626   emit_operand(dst, src);
2627 }
2628 
2629 void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
2630   assert(VM_Version::supports_evex(), "");
2631   assert(src != xnoreg, "sanity");
2632   InstructionMark im(this);
2633   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2634   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2635   attributes.reset_is_clear_context();
2636   attributes.set_is_evex_instruction();
2637   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2638   emit_int8(0x7F);
2639   emit_operand(src, dst);
2640 }
2641 
2642 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
2643   assert(VM_Version::supports_evex(), "");
2644   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2645   attributes.set_is_evex_instruction();
2646   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2647   emit_int8(0x6F);
2648   emit_int8((unsigned char)(0xC0 | encode));
2649 }
2650 
2651 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
2652   assert(VM_Version::supports_evex(), "");
2653   InstructionMark im(this);
2654   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2655   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2656   attributes.set_is_evex_instruction();
2657   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2658   emit_int8(0x6F);
2659   emit_operand(dst, src);
2660 }
2661 
2662 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
2663   assert(VM_Version::supports_evex(), "");
2664   assert(src != xnoreg, "sanity");
2665   InstructionMark im(this);
2666   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
2667   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
2668   attributes.reset_is_clear_context();
2669   attributes.set_is_evex_instruction();
2670   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2671   emit_int8(0x7F);
2672   emit_operand(src, dst);
2673 }
2674 
2675 // Uses zero extension on 64bit
2676 
2677 void Assembler::movl(Register dst, int32_t imm32) {
2678   int encode = prefix_and_encode(dst->encoding());
2679   emit_int8((unsigned char)(0xB8 | encode));
2680   emit_int32(imm32);
2681 }
2682 
2683 void Assembler::movl(Register dst, Register src) {
2684   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2685   emit_int8((unsigned char)0x8B);
2686   emit_int8((unsigned char)(0xC0 | encode));
2687 }
2688 
2689 void Assembler::movl(Register dst, Address src) {
2690   InstructionMark im(this);
2691   prefix(src, dst);
2692   emit_int8((unsigned char)0x8B);
2693   emit_operand(dst, src);
2694 }
2695 
2696 void Assembler::movl(Address dst, int32_t imm32) {
2697   InstructionMark im(this);
2698   prefix(dst);
2699   emit_int8((unsigned char)0xC7);
2700   emit_operand(rax, dst, 4);
2701   emit_int32(imm32);
2702 }
2703 
2704 void Assembler::movl(Address dst, Register src) {
2705   InstructionMark im(this);
2706   prefix(dst, src);
2707   emit_int8((unsigned char)0x89);
2708   emit_operand(src, dst);
2709 }
2710 
2711 // New cpus require to use movsd and movss to avoid partial register stall
2712 // when loading from memory. But for old Opteron use movlpd instead of movsd.
2713 // The selection is done in MacroAssembler::movdbl() and movflt().
2714 void Assembler::movlpd(XMMRegister dst, Address src) {
2715   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2716   InstructionMark im(this);
2717   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2718   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2719   attributes.set_rex_vex_w_reverted();
2720   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2721   emit_int8(0x12);
2722   emit_operand(dst, src);
2723 }
2724 
2725 void Assembler::movq( MMXRegister dst, Address src ) {
2726   assert( VM_Version::supports_mmx(), "" );
2727   emit_int8(0x0F);
2728   emit_int8(0x6F);
2729   emit_operand(dst, src);
2730 }
2731 
2732 void Assembler::movq( Address dst, MMXRegister src ) {
2733   assert( VM_Version::supports_mmx(), "" );
2734   emit_int8(0x0F);
2735   emit_int8(0x7F);
2736   // workaround gcc (3.2.1-7a) bug
2737   // In that version of gcc with only an emit_operand(MMX, Address)
2738   // gcc will tail jump and try and reverse the parameters completely
2739   // obliterating dst in the process. By having a version available
2740   // that doesn't need to swap the args at the tail jump the bug is
2741   // avoided.
2742   emit_operand(dst, src);
2743 }
2744 
2745 void Assembler::movq(XMMRegister dst, Address src) {
2746   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2747   InstructionMark im(this);
2748   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2749   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2750   attributes.set_rex_vex_w_reverted();
2751   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2752   emit_int8(0x7E);
2753   emit_operand(dst, src);
2754 }
2755 
2756 void Assembler::movq(Address dst, XMMRegister src) {
2757   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2758   InstructionMark im(this);
2759   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2760   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2761   attributes.set_rex_vex_w_reverted();
2762   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
2763   emit_int8((unsigned char)0xD6);
2764   emit_operand(src, dst);
2765 }
2766 
2767 void Assembler::movsbl(Register dst, Address src) { // movsxb
2768   InstructionMark im(this);
2769   prefix(src, dst);
2770   emit_int8(0x0F);
2771   emit_int8((unsigned char)0xBE);
2772   emit_operand(dst, src);
2773 }
2774 
2775 void Assembler::movsbl(Register dst, Register src) { // movsxb
2776   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2777   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2778   emit_int8(0x0F);
2779   emit_int8((unsigned char)0xBE);
2780   emit_int8((unsigned char)(0xC0 | encode));
2781 }
2782 
2783 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
2784   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2785   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2786   attributes.set_rex_vex_w_reverted();
2787   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2788   emit_int8(0x10);
2789   emit_int8((unsigned char)(0xC0 | encode));
2790 }
2791 
2792 void Assembler::movsd(XMMRegister dst, Address src) {
2793   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2794   InstructionMark im(this);
2795   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2796   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2797   attributes.set_rex_vex_w_reverted();
2798   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2799   emit_int8(0x10);
2800   emit_operand(dst, src);
2801 }
2802 
2803 void Assembler::movsd(Address dst, XMMRegister src) {
2804   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2805   InstructionMark im(this);
2806   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2807   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2808   attributes.reset_is_clear_context();
2809   attributes.set_rex_vex_w_reverted();
2810   simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2811   emit_int8(0x11);
2812   emit_operand(src, dst);
2813 }
2814 
2815 void Assembler::movss(XMMRegister dst, XMMRegister src) {
2816   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2817   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2818   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2819   emit_int8(0x10);
2820   emit_int8((unsigned char)(0xC0 | encode));
2821 }
2822 
2823 void Assembler::movss(XMMRegister dst, Address src) {
2824   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2825   InstructionMark im(this);
2826   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2827   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2828   simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2829   emit_int8(0x10);
2830   emit_operand(dst, src);
2831 }
2832 
2833 void Assembler::movss(Address dst, XMMRegister src) {
2834   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2835   InstructionMark im(this);
2836   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2837   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2838   attributes.reset_is_clear_context();
2839   simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2840   emit_int8(0x11);
2841   emit_operand(src, dst);
2842 }
2843 
2844 void Assembler::movswl(Register dst, Address src) { // movsxw
2845   InstructionMark im(this);
2846   prefix(src, dst);
2847   emit_int8(0x0F);
2848   emit_int8((unsigned char)0xBF);
2849   emit_operand(dst, src);
2850 }
2851 
2852 void Assembler::movswl(Register dst, Register src) { // movsxw
2853   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2854   emit_int8(0x0F);
2855   emit_int8((unsigned char)0xBF);
2856   emit_int8((unsigned char)(0xC0 | encode));
2857 }
2858 
2859 void Assembler::movw(Address dst, int imm16) {
2860   InstructionMark im(this);
2861 
2862   emit_int8(0x66); // switch to 16-bit mode
2863   prefix(dst);
2864   emit_int8((unsigned char)0xC7);
2865   emit_operand(rax, dst, 2);
2866   emit_int16(imm16);
2867 }
2868 
2869 void Assembler::movw(Register dst, Address src) {
2870   InstructionMark im(this);
2871   emit_int8(0x66);
2872   prefix(src, dst);
2873   emit_int8((unsigned char)0x8B);
2874   emit_operand(dst, src);
2875 }
2876 
2877 void Assembler::movw(Address dst, Register src) {
2878   InstructionMark im(this);
2879   emit_int8(0x66);
2880   prefix(dst, src);
2881   emit_int8((unsigned char)0x89);
2882   emit_operand(src, dst);
2883 }
2884 
2885 void Assembler::movzbl(Register dst, Address src) { // movzxb
2886   InstructionMark im(this);
2887   prefix(src, dst);
2888   emit_int8(0x0F);
2889   emit_int8((unsigned char)0xB6);
2890   emit_operand(dst, src);
2891 }
2892 
2893 void Assembler::movzbl(Register dst, Register src) { // movzxb
2894   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
2895   int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
2896   emit_int8(0x0F);
2897   emit_int8((unsigned char)0xB6);
2898   emit_int8(0xC0 | encode);
2899 }
2900 
2901 void Assembler::movzwl(Register dst, Address src) { // movzxw
2902   InstructionMark im(this);
2903   prefix(src, dst);
2904   emit_int8(0x0F);
2905   emit_int8((unsigned char)0xB7);
2906   emit_operand(dst, src);
2907 }
2908 
2909 void Assembler::movzwl(Register dst, Register src) { // movzxw
2910   int encode = prefix_and_encode(dst->encoding(), src->encoding());
2911   emit_int8(0x0F);
2912   emit_int8((unsigned char)0xB7);
2913   emit_int8(0xC0 | encode);
2914 }
2915 
2916 void Assembler::mull(Address src) {
2917   InstructionMark im(this);
2918   prefix(src);
2919   emit_int8((unsigned char)0xF7);
2920   emit_operand(rsp, src);
2921 }
2922 
2923 void Assembler::mull(Register src) {
2924   int encode = prefix_and_encode(src->encoding());
2925   emit_int8((unsigned char)0xF7);
2926   emit_int8((unsigned char)(0xE0 | encode));
2927 }
2928 
2929 void Assembler::mulsd(XMMRegister dst, Address src) {
2930   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2931   InstructionMark im(this);
2932   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2933   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
2934   attributes.set_rex_vex_w_reverted();
2935   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2936   emit_int8(0x59);
2937   emit_operand(dst, src);
2938 }
2939 
2940 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
2941   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
2942   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2943   attributes.set_rex_vex_w_reverted();
2944   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
2945   emit_int8(0x59);
2946   emit_int8((unsigned char)(0xC0 | encode));
2947 }
2948 
2949 void Assembler::mulss(XMMRegister dst, Address src) {
2950   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2951   InstructionMark im(this);
2952   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2953   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
2954   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2955   emit_int8(0x59);
2956   emit_operand(dst, src);
2957 }
2958 
2959 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
2960   NOT_LP64(assert(VM_Version::supports_sse(), ""));
2961   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
2962   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
2963   emit_int8(0x59);
2964   emit_int8((unsigned char)(0xC0 | encode));
2965 }
2966 
2967 void Assembler::negl(Register dst) {
2968   int encode = prefix_and_encode(dst->encoding());
2969   emit_int8((unsigned char)0xF7);
2970   emit_int8((unsigned char)(0xD8 | encode));
2971 }
2972 
2973 void Assembler::nop(int i) {
2974 #ifdef ASSERT
2975   assert(i > 0, " ");
2976   // The fancy nops aren't currently recognized by debuggers making it a
2977   // pain to disassemble code while debugging. If asserts are on clearly
2978   // speed is not an issue so simply use the single byte traditional nop
2979   // to do alignment.
2980 
2981   for (; i > 0 ; i--) emit_int8((unsigned char)0x90);
2982   return;
2983 
2984 #endif // ASSERT
2985 
2986   if (UseAddressNop && VM_Version::is_intel()) {
2987     //
2988     // Using multi-bytes nops "0x0F 0x1F [address]" for Intel
2989     //  1: 0x90
2990     //  2: 0x66 0x90
2991     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
2992     //  4: 0x0F 0x1F 0x40 0x00
2993     //  5: 0x0F 0x1F 0x44 0x00 0x00
2994     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
2995     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
2996     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2997     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2998     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
2999     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3000 
3001     // The rest coding is Intel specific - don't use consecutive address nops
3002 
3003     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3004     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3005     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3006     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3007 
3008     while(i >= 15) {
3009       // For Intel don't generate consecutive addess nops (mix with regular nops)
3010       i -= 15;
3011       emit_int8(0x66);   // size prefix
3012       emit_int8(0x66);   // size prefix
3013       emit_int8(0x66);   // size prefix
3014       addr_nop_8();
3015       emit_int8(0x66);   // size prefix
3016       emit_int8(0x66);   // size prefix
3017       emit_int8(0x66);   // size prefix
3018       emit_int8((unsigned char)0x90);
3019                          // nop
3020     }
3021     switch (i) {
3022       case 14:
3023         emit_int8(0x66); // size prefix
3024       case 13:
3025         emit_int8(0x66); // size prefix
3026       case 12:
3027         addr_nop_8();
3028         emit_int8(0x66); // size prefix
3029         emit_int8(0x66); // size prefix
3030         emit_int8(0x66); // size prefix
3031         emit_int8((unsigned char)0x90);
3032                          // nop
3033         break;
3034       case 11:
3035         emit_int8(0x66); // size prefix
3036       case 10:
3037         emit_int8(0x66); // size prefix
3038       case 9:
3039         emit_int8(0x66); // size prefix
3040       case 8:
3041         addr_nop_8();
3042         break;
3043       case 7:
3044         addr_nop_7();
3045         break;
3046       case 6:
3047         emit_int8(0x66); // size prefix
3048       case 5:
3049         addr_nop_5();
3050         break;
3051       case 4:
3052         addr_nop_4();
3053         break;
3054       case 3:
3055         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3056         emit_int8(0x66); // size prefix
3057       case 2:
3058         emit_int8(0x66); // size prefix
3059       case 1:
3060         emit_int8((unsigned char)0x90);
3061                          // nop
3062         break;
3063       default:
3064         assert(i == 0, " ");
3065     }
3066     return;
3067   }
3068   if (UseAddressNop && VM_Version::is_amd()) {
3069     //
3070     // Using multi-bytes nops "0x0F 0x1F [address]" for AMD.
3071     //  1: 0x90
3072     //  2: 0x66 0x90
3073     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3074     //  4: 0x0F 0x1F 0x40 0x00
3075     //  5: 0x0F 0x1F 0x44 0x00 0x00
3076     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3077     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3078     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3079     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3080     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3081     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3082 
3083     // The rest coding is AMD specific - use consecutive address nops
3084 
3085     // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3086     // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
3087     // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3088     // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3089     // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3090     //     Size prefixes (0x66) are added for larger sizes
3091 
3092     while(i >= 22) {
3093       i -= 11;
3094       emit_int8(0x66); // size prefix
3095       emit_int8(0x66); // size prefix
3096       emit_int8(0x66); // size prefix
3097       addr_nop_8();
3098     }
3099     // Generate first nop for size between 21-12
3100     switch (i) {
3101       case 21:
3102         i -= 1;
3103         emit_int8(0x66); // size prefix
3104       case 20:
3105       case 19:
3106         i -= 1;
3107         emit_int8(0x66); // size prefix
3108       case 18:
3109       case 17:
3110         i -= 1;
3111         emit_int8(0x66); // size prefix
3112       case 16:
3113       case 15:
3114         i -= 8;
3115         addr_nop_8();
3116         break;
3117       case 14:
3118       case 13:
3119         i -= 7;
3120         addr_nop_7();
3121         break;
3122       case 12:
3123         i -= 6;
3124         emit_int8(0x66); // size prefix
3125         addr_nop_5();
3126         break;
3127       default:
3128         assert(i < 12, " ");
3129     }
3130 
3131     // Generate second nop for size between 11-1
3132     switch (i) {
3133       case 11:
3134         emit_int8(0x66); // size prefix
3135       case 10:
3136         emit_int8(0x66); // size prefix
3137       case 9:
3138         emit_int8(0x66); // size prefix
3139       case 8:
3140         addr_nop_8();
3141         break;
3142       case 7:
3143         addr_nop_7();
3144         break;
3145       case 6:
3146         emit_int8(0x66); // size prefix
3147       case 5:
3148         addr_nop_5();
3149         break;
3150       case 4:
3151         addr_nop_4();
3152         break;
3153       case 3:
3154         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3155         emit_int8(0x66); // size prefix
3156       case 2:
3157         emit_int8(0x66); // size prefix
3158       case 1:
3159         emit_int8((unsigned char)0x90);
3160                          // nop
3161         break;
3162       default:
3163         assert(i == 0, " ");
3164     }
3165     return;
3166   }
3167 
3168   if (UseAddressNop && VM_Version::is_zx()) {
3169     //
3170     // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
3171     //  1: 0x90
3172     //  2: 0x66 0x90
3173     //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
3174     //  4: 0x0F 0x1F 0x40 0x00
3175     //  5: 0x0F 0x1F 0x44 0x00 0x00
3176     //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
3177     //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
3178     //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3179     //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3180     // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3181     // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
3182 
3183     // The rest coding is ZX specific - don't use consecutive address nops
3184 
3185     // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3186     // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3187     // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3188     // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
3189 
3190     while (i >= 15) {
3191       // For ZX don't generate consecutive addess nops (mix with regular nops)
3192       i -= 15;
3193       emit_int8(0x66);   // size prefix
3194       emit_int8(0x66);   // size prefix
3195       emit_int8(0x66);   // size prefix
3196       addr_nop_8();
3197       emit_int8(0x66);   // size prefix
3198       emit_int8(0x66);   // size prefix
3199       emit_int8(0x66);   // size prefix
3200       emit_int8((unsigned char)0x90);
3201                          // nop
3202     }
3203     switch (i) {
3204       case 14:
3205         emit_int8(0x66); // size prefix
3206       case 13:
3207         emit_int8(0x66); // size prefix
3208       case 12:
3209         addr_nop_8();
3210         emit_int8(0x66); // size prefix
3211         emit_int8(0x66); // size prefix
3212         emit_int8(0x66); // size prefix
3213         emit_int8((unsigned char)0x90);
3214                          // nop
3215         break;
3216       case 11:
3217         emit_int8(0x66); // size prefix
3218       case 10:
3219         emit_int8(0x66); // size prefix
3220       case 9:
3221         emit_int8(0x66); // size prefix
3222       case 8:
3223         addr_nop_8();
3224         break;
3225       case 7:
3226         addr_nop_7();
3227         break;
3228       case 6:
3229         emit_int8(0x66); // size prefix
3230       case 5:
3231         addr_nop_5();
3232         break;
3233       case 4:
3234         addr_nop_4();
3235         break;
3236       case 3:
3237         // Don't use "0x0F 0x1F 0x00" - need patching safe padding
3238         emit_int8(0x66); // size prefix
3239       case 2:
3240         emit_int8(0x66); // size prefix
3241       case 1:
3242         emit_int8((unsigned char)0x90);
3243                          // nop
3244         break;
3245       default:
3246         assert(i == 0, " ");
3247     }
3248     return;
3249   }
3250 
3251   // Using nops with size prefixes "0x66 0x90".
3252   // From AMD Optimization Guide:
3253   //  1: 0x90
3254   //  2: 0x66 0x90
3255   //  3: 0x66 0x66 0x90
3256   //  4: 0x66 0x66 0x66 0x90
3257   //  5: 0x66 0x66 0x90 0x66 0x90
3258   //  6: 0x66 0x66 0x90 0x66 0x66 0x90
3259   //  7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
3260   //  8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
3261   //  9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3262   // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
3263   //
3264   while(i > 12) {
3265     i -= 4;
3266     emit_int8(0x66); // size prefix
3267     emit_int8(0x66);
3268     emit_int8(0x66);
3269     emit_int8((unsigned char)0x90);
3270                      // nop
3271   }
3272   // 1 - 12 nops
3273   if(i > 8) {
3274     if(i > 9) {
3275       i -= 1;
3276       emit_int8(0x66);
3277     }
3278     i -= 3;
3279     emit_int8(0x66);
3280     emit_int8(0x66);
3281     emit_int8((unsigned char)0x90);
3282   }
3283   // 1 - 8 nops
3284   if(i > 4) {
3285     if(i > 6) {
3286       i -= 1;
3287       emit_int8(0x66);
3288     }
3289     i -= 3;
3290     emit_int8(0x66);
3291     emit_int8(0x66);
3292     emit_int8((unsigned char)0x90);
3293   }
3294   switch (i) {
3295     case 4:
3296       emit_int8(0x66);
3297     case 3:
3298       emit_int8(0x66);
3299     case 2:
3300       emit_int8(0x66);
3301     case 1:
3302       emit_int8((unsigned char)0x90);
3303       break;
3304     default:
3305       assert(i == 0, " ");
3306   }
3307 }
3308 
3309 void Assembler::notl(Register dst) {
3310   int encode = prefix_and_encode(dst->encoding());
3311   emit_int8((unsigned char)0xF7);
3312   emit_int8((unsigned char)(0xD0 | encode));
3313 }
3314 
3315 void Assembler::orl(Address dst, int32_t imm32) {
3316   InstructionMark im(this);
3317   prefix(dst);
3318   emit_arith_operand(0x81, rcx, dst, imm32);
3319 }
3320 
3321 void Assembler::orl(Register dst, int32_t imm32) {
3322   prefix(dst);
3323   emit_arith(0x81, 0xC8, dst, imm32);
3324 }
3325 
3326 void Assembler::orl(Register dst, Address src) {
3327   InstructionMark im(this);
3328   prefix(src, dst);
3329   emit_int8(0x0B);
3330   emit_operand(dst, src);
3331 }
3332 
3333 void Assembler::orl(Register dst, Register src) {
3334   (void) prefix_and_encode(dst->encoding(), src->encoding());
3335   emit_arith(0x0B, 0xC0, dst, src);
3336 }
3337 
3338 void Assembler::orl(Address dst, Register src) {
3339   InstructionMark im(this);
3340   prefix(dst, src);
3341   emit_int8(0x09);
3342   emit_operand(src, dst);
3343 }
3344 
3345 void Assembler::packuswb(XMMRegister dst, Address src) {
3346   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3347   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
3348   InstructionMark im(this);
3349   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3350   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3351   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3352   emit_int8(0x67);
3353   emit_operand(dst, src);
3354 }
3355 
3356 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
3357   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
3358   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3359   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3360   emit_int8(0x67);
3361   emit_int8((unsigned char)(0xC0 | encode));
3362 }
3363 
3364 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3365   assert(UseAVX > 0, "some form of AVX must be enabled");
3366   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
3367   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3368   emit_int8(0x67);
3369   emit_int8((unsigned char)(0xC0 | encode));
3370 }
3371 
3372 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
3373   assert(VM_Version::supports_avx2(), "");
3374   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3375   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3376   emit_int8(0x00);
3377   emit_int8(0xC0 | encode);
3378   emit_int8(imm8);
3379 }
3380 
3381 void Assembler::vperm2i128(XMMRegister dst,  XMMRegister nds, XMMRegister src, int imm8) {
3382   assert(VM_Version::supports_avx2(), "");
3383   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3384   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3385   emit_int8(0x46);
3386   emit_int8(0xC0 | encode);
3387   emit_int8(imm8);
3388 }
3389 
3390 void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
3391   assert(VM_Version::supports_avx(), "");
3392   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3393   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3394   emit_int8(0x06);
3395   emit_int8(0xC0 | encode);
3396   emit_int8(imm8);
3397 }
3398 
3399 
3400 void Assembler::pause() {
3401   emit_int8((unsigned char)0xF3);
3402   emit_int8((unsigned char)0x90);
3403 }
3404 
3405 void Assembler::ud2() {
3406   emit_int8(0x0F);
3407   emit_int8(0x0B);
3408 }
3409 
3410 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
3411   assert(VM_Version::supports_sse4_2(), "");
3412   InstructionMark im(this);
3413   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3414   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3415   emit_int8(0x61);
3416   emit_operand(dst, src);
3417   emit_int8(imm8);
3418 }
3419 
3420 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
3421   assert(VM_Version::supports_sse4_2(), "");
3422   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3423   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3424   emit_int8(0x61);
3425   emit_int8((unsigned char)(0xC0 | encode));
3426   emit_int8(imm8);
3427 }
3428 
3429 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3430 void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
3431   assert(VM_Version::supports_sse2(), "");
3432   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3433   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3434   emit_int8(0x74);
3435   emit_int8((unsigned char)(0xC0 | encode));
3436 }
3437 
3438 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3439 void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3440   assert(VM_Version::supports_avx(), "");
3441   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3442   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3443   emit_int8(0x74);
3444   emit_int8((unsigned char)(0xC0 | encode));
3445 }
3446 
3447 // In this context, kdst is written the mask used to process the equal components
3448 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3449   assert(VM_Version::supports_avx512bw(), "");
3450   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3451   attributes.set_is_evex_instruction();
3452   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3453   emit_int8(0x74);
3454   emit_int8((unsigned char)(0xC0 | encode));
3455 }
3456 
3457 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3458   assert(VM_Version::supports_avx512vlbw(), "");
3459   InstructionMark im(this);
3460   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3461   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3462   attributes.set_is_evex_instruction();
3463   int dst_enc = kdst->encoding();
3464   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3465   emit_int8(0x64);
3466   emit_operand(as_Register(dst_enc), src);
3467 }
3468 
3469 void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3470   assert(is_vector_masking(), "");
3471   assert(VM_Version::supports_avx512vlbw(), "");
3472   InstructionMark im(this);
3473   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3474   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3475   attributes.reset_is_clear_context();
3476   attributes.set_embedded_opmask_register_specifier(mask);
3477   attributes.set_is_evex_instruction();
3478   int dst_enc = kdst->encoding();
3479   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3480   emit_int8(0x64);
3481   emit_operand(as_Register(dst_enc), src);
3482 }
3483 
3484 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3485   assert(VM_Version::supports_avx512vlbw(), "");
3486   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3487   attributes.set_is_evex_instruction();
3488   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3489   emit_int8(0x3E);
3490   emit_int8((unsigned char)(0xC0 | encode));
3491   emit_int8(vcc);
3492 }
3493 
3494 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
3495   assert(is_vector_masking(), "");
3496   assert(VM_Version::supports_avx512vlbw(), "");
3497   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3498   attributes.reset_is_clear_context();
3499   attributes.set_embedded_opmask_register_specifier(mask);
3500   attributes.set_is_evex_instruction();
3501   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3502   emit_int8(0x3E);
3503   emit_int8((unsigned char)(0xC0 | encode));
3504   emit_int8(vcc);
3505 }
3506 
3507 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
3508   assert(VM_Version::supports_avx512vlbw(), "");
3509   InstructionMark im(this);
3510   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3511   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3512   attributes.set_is_evex_instruction();
3513   int dst_enc = kdst->encoding();
3514   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3515   emit_int8(0x3E);
3516   emit_operand(as_Register(dst_enc), src);
3517   emit_int8(vcc);
3518 }
3519 
3520 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3521   assert(VM_Version::supports_avx512bw(), "");
3522   InstructionMark im(this);
3523   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3524   attributes.set_is_evex_instruction();
3525   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3526   int dst_enc = kdst->encoding();
3527   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3528   emit_int8(0x74);
3529   emit_operand(as_Register(dst_enc), src);
3530 }
3531 
3532 void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
3533   assert(VM_Version::supports_avx512vlbw(), "");
3534   assert(is_vector_masking(), "");    // For stub code use only
3535   InstructionMark im(this);
3536   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
3537   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3538   attributes.reset_is_clear_context();
3539   attributes.set_embedded_opmask_register_specifier(mask);
3540   attributes.set_is_evex_instruction();
3541   vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3542   emit_int8(0x74);
3543   emit_operand(as_Register(kdst->encoding()), src);
3544 }
3545 
3546 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3547 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
3548   assert(VM_Version::supports_sse2(), "");
3549   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3550   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3551   emit_int8(0x75);
3552   emit_int8((unsigned char)(0xC0 | encode));
3553 }
3554 
3555 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3556 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3557   assert(VM_Version::supports_avx(), "");
3558   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3559   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3560   emit_int8(0x75);
3561   emit_int8((unsigned char)(0xC0 | encode));
3562 }
3563 
3564 // In this context, kdst is written the mask used to process the equal components
3565 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3566   assert(VM_Version::supports_avx512bw(), "");
3567   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3568   attributes.set_is_evex_instruction();
3569   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3570   emit_int8(0x75);
3571   emit_int8((unsigned char)(0xC0 | encode));
3572 }
3573 
3574 void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3575   assert(VM_Version::supports_avx512bw(), "");
3576   InstructionMark im(this);
3577   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
3578   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
3579   attributes.set_is_evex_instruction();
3580   int dst_enc = kdst->encoding();
3581   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3582   emit_int8(0x75);
3583   emit_operand(as_Register(dst_enc), src);
3584 }
3585 
3586 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3587 void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
3588   assert(VM_Version::supports_sse2(), "");
3589   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3590   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3591   emit_int8(0x76);
3592   emit_int8((unsigned char)(0xC0 | encode));
3593 }
3594 
3595 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3596 void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3597   assert(VM_Version::supports_avx(), "");
3598   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3599   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3600   emit_int8(0x76);
3601   emit_int8((unsigned char)(0xC0 | encode));
3602 }
3603 
3604 // In this context, kdst is written the mask used to process the equal components
3605 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3606   assert(VM_Version::supports_evex(), "");
3607   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3608   attributes.set_is_evex_instruction();
3609   attributes.reset_is_clear_context();
3610   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3611   emit_int8(0x76);
3612   emit_int8((unsigned char)(0xC0 | encode));
3613 }
3614 
3615 void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3616   assert(VM_Version::supports_evex(), "");
3617   InstructionMark im(this);
3618   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3619   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
3620   attributes.reset_is_clear_context();
3621   attributes.set_is_evex_instruction();
3622   int dst_enc = kdst->encoding();
3623   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3624   emit_int8(0x76);
3625   emit_operand(as_Register(dst_enc), src);
3626 }
3627 
3628 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3629 void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
3630   assert(VM_Version::supports_sse4_1(), "");
3631   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3632   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3633   emit_int8(0x29);
3634   emit_int8((unsigned char)(0xC0 | encode));
3635 }
3636 
3637 // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
3638 void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
3639   assert(VM_Version::supports_avx(), "");
3640   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3641   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3642   emit_int8(0x29);
3643   emit_int8((unsigned char)(0xC0 | encode));
3644 }
3645 
3646 // In this context, kdst is written the mask used to process the equal components
3647 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
3648   assert(VM_Version::supports_evex(), "");
3649   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3650   attributes.reset_is_clear_context();
3651   attributes.set_is_evex_instruction();
3652   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3653   emit_int8(0x29);
3654   emit_int8((unsigned char)(0xC0 | encode));
3655 }
3656 
3657 // In this context, kdst is written the mask used to process the equal components
3658 void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
3659   assert(VM_Version::supports_evex(), "");
3660   InstructionMark im(this);
3661   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3662   attributes.reset_is_clear_context();
3663   attributes.set_is_evex_instruction();
3664   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
3665   int dst_enc = kdst->encoding();
3666   vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3667   emit_int8(0x29);
3668   emit_operand(as_Register(dst_enc), src);
3669 }
3670 
3671 void Assembler::pmovmskb(Register dst, XMMRegister src) {
3672   assert(VM_Version::supports_sse2(), "");
3673   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3674   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3675   emit_int8((unsigned char)0xD7);
3676   emit_int8((unsigned char)(0xC0 | encode));
3677 }
3678 
3679 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
3680   assert(VM_Version::supports_avx2(), "");
3681   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
3682   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3683   emit_int8((unsigned char)0xD7);
3684   emit_int8((unsigned char)(0xC0 | encode));
3685 }
3686 
3687 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
3688   assert(VM_Version::supports_sse4_1(), "");
3689   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3690   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3691   emit_int8(0x16);
3692   emit_int8((unsigned char)(0xC0 | encode));
3693   emit_int8(imm8);
3694 }
3695 
3696 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
3697   assert(VM_Version::supports_sse4_1(), "");
3698   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3699   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3700   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3701   emit_int8(0x16);
3702   emit_operand(src, dst);
3703   emit_int8(imm8);
3704 }
3705 
3706 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
3707   assert(VM_Version::supports_sse4_1(), "");
3708   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3709   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3710   emit_int8(0x16);
3711   emit_int8((unsigned char)(0xC0 | encode));
3712   emit_int8(imm8);
3713 }
3714 
3715 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
3716   assert(VM_Version::supports_sse4_1(), "");
3717   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3718   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3719   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3720   emit_int8(0x16);
3721   emit_operand(src, dst);
3722   emit_int8(imm8);
3723 }
3724 
3725 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
3726   assert(VM_Version::supports_sse2(), "");
3727   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3728   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3729   emit_int8((unsigned char)0xC5);
3730   emit_int8((unsigned char)(0xC0 | encode));
3731   emit_int8(imm8);
3732 }
3733 
3734 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
3735   assert(VM_Version::supports_sse4_1(), "");
3736   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3737   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3738   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3739   emit_int8((unsigned char)0x15);
3740   emit_operand(src, dst);
3741   emit_int8(imm8);
3742 }
3743 
3744 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
3745   assert(VM_Version::supports_sse4_1(), "");
3746   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3747   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3748   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3749   emit_int8(0x14);
3750   emit_operand(src, dst);
3751   emit_int8(imm8);
3752 }
3753 
3754 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
3755   assert(VM_Version::supports_sse4_1(), "");
3756   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3757   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3758   emit_int8(0x22);
3759   emit_int8((unsigned char)(0xC0 | encode));
3760   emit_int8(imm8);
3761 }
3762 
3763 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
3764   assert(VM_Version::supports_sse4_1(), "");
3765   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3766   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
3767   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3768   emit_int8(0x22);
3769   emit_operand(dst,src);
3770   emit_int8(imm8);
3771 }
3772 
3773 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
3774   assert(VM_Version::supports_sse4_1(), "");
3775   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3776   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3777   emit_int8(0x22);
3778   emit_int8((unsigned char)(0xC0 | encode));
3779   emit_int8(imm8);
3780 }
3781 
3782 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
3783   assert(VM_Version::supports_sse4_1(), "");
3784   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
3785   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
3786   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3787   emit_int8(0x22);
3788   emit_operand(dst, src);
3789   emit_int8(imm8);
3790 }
3791 
3792 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
3793   assert(VM_Version::supports_sse2(), "");
3794   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3795   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3796   emit_int8((unsigned char)0xC4);
3797   emit_int8((unsigned char)(0xC0 | encode));
3798   emit_int8(imm8);
3799 }
3800 
3801 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
3802   assert(VM_Version::supports_sse2(), "");
3803   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3804   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
3805   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
3806   emit_int8((unsigned char)0xC4);
3807   emit_operand(dst, src);
3808   emit_int8(imm8);
3809 }
3810 
3811 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
3812   assert(VM_Version::supports_sse4_1(), "");
3813   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3814   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
3815   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
3816   emit_int8(0x20);
3817   emit_operand(dst, src);
3818   emit_int8(imm8);
3819 }
3820 
3821 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
3822   assert(VM_Version::supports_sse4_1(), "");
3823   InstructionMark im(this);
3824   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3825   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3826   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3827   emit_int8(0x30);
3828   emit_operand(dst, src);
3829 }
3830 
3831 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
3832   assert(VM_Version::supports_sse4_1(), "");
3833   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3834   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3835   emit_int8(0x30);
3836   emit_int8((unsigned char)(0xC0 | encode));
3837 }
3838 
3839 void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
3840   assert(VM_Version::supports_avx(), "");
3841   InstructionMark im(this);
3842   assert(dst != xnoreg, "sanity");
3843   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3844   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3845   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3846   emit_int8(0x30);
3847   emit_operand(dst, src);
3848 }
3849 
3850 void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
3851   assert(is_vector_masking(), "");
3852   assert(VM_Version::supports_avx512vlbw(), "");
3853   assert(dst != xnoreg, "sanity");
3854   InstructionMark im(this);
3855   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3856   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3857   attributes.set_embedded_opmask_register_specifier(mask);
3858   attributes.set_is_evex_instruction();
3859   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3860   emit_int8(0x30);
3861   emit_operand(dst, src);
3862 }
3863 
3864 void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
3865   assert(VM_Version::supports_avx512vlbw(), "");
3866   assert(src != xnoreg, "sanity");
3867   InstructionMark im(this);
3868   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
3869   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3870   attributes.set_is_evex_instruction();
3871   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3872   emit_int8(0x30);
3873   emit_operand(src, dst);
3874 }
3875 
3876 void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
3877   assert(is_vector_masking(), "");
3878   assert(VM_Version::supports_avx512vlbw(), "");
3879   assert(src != xnoreg, "sanity");
3880   InstructionMark im(this);
3881   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
3882   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
3883   attributes.reset_is_clear_context();
3884   attributes.set_embedded_opmask_register_specifier(mask);
3885   attributes.set_is_evex_instruction();
3886   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
3887   emit_int8(0x30);
3888   emit_operand(src, dst);
3889 }
3890 
3891 // generic
3892 void Assembler::pop(Register dst) {
3893   int encode = prefix_and_encode(dst->encoding());
3894   emit_int8(0x58 | encode);
3895 }
3896 
3897 void Assembler::popcntl(Register dst, Address src) {
3898   assert(VM_Version::supports_popcnt(), "must support");
3899   InstructionMark im(this);
3900   emit_int8((unsigned char)0xF3);
3901   prefix(src, dst);
3902   emit_int8(0x0F);
3903   emit_int8((unsigned char)0xB8);
3904   emit_operand(dst, src);
3905 }
3906 
3907 void Assembler::popcntl(Register dst, Register src) {
3908   assert(VM_Version::supports_popcnt(), "must support");
3909   emit_int8((unsigned char)0xF3);
3910   int encode = prefix_and_encode(dst->encoding(), src->encoding());
3911   emit_int8(0x0F);
3912   emit_int8((unsigned char)0xB8);
3913   emit_int8((unsigned char)(0xC0 | encode));
3914 }
3915 
3916 void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
3917   assert(VM_Version::supports_vpopcntdq(), "must support vpopcntdq feature");
3918   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
3919   attributes.set_is_evex_instruction();
3920   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
3921   emit_int8(0x55);
3922   emit_int8((unsigned char)(0xC0 | encode));
3923 }
3924 
3925 void Assembler::popf() {
3926   emit_int8((unsigned char)0x9D);
3927 }
3928 
3929 #ifndef _LP64 // no 32bit push/pop on amd64
3930 void Assembler::popl(Address dst) {
3931   // NOTE: this will adjust stack by 8byte on 64bits
3932   InstructionMark im(this);
3933   prefix(dst);
3934   emit_int8((unsigned char)0x8F);
3935   emit_operand(rax, dst);
3936 }
3937 #endif
3938 
3939 void Assembler::prefetch_prefix(Address src) {
3940   prefix(src);
3941   emit_int8(0x0F);
3942 }
3943 
3944 void Assembler::prefetchnta(Address src) {
3945   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3946   InstructionMark im(this);
3947   prefetch_prefix(src);
3948   emit_int8(0x18);
3949   emit_operand(rax, src); // 0, src
3950 }
3951 
3952 void Assembler::prefetchr(Address src) {
3953   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3954   InstructionMark im(this);
3955   prefetch_prefix(src);
3956   emit_int8(0x0D);
3957   emit_operand(rax, src); // 0, src
3958 }
3959 
3960 void Assembler::prefetcht0(Address src) {
3961   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3962   InstructionMark im(this);
3963   prefetch_prefix(src);
3964   emit_int8(0x18);
3965   emit_operand(rcx, src); // 1, src
3966 }
3967 
3968 void Assembler::prefetcht1(Address src) {
3969   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3970   InstructionMark im(this);
3971   prefetch_prefix(src);
3972   emit_int8(0x18);
3973   emit_operand(rdx, src); // 2, src
3974 }
3975 
3976 void Assembler::prefetcht2(Address src) {
3977   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
3978   InstructionMark im(this);
3979   prefetch_prefix(src);
3980   emit_int8(0x18);
3981   emit_operand(rbx, src); // 3, src
3982 }
3983 
3984 void Assembler::prefetchw(Address src) {
3985   assert(VM_Version::supports_3dnow_prefetch(), "must support");
3986   InstructionMark im(this);
3987   prefetch_prefix(src);
3988   emit_int8(0x0D);
3989   emit_operand(rcx, src); // 1, src
3990 }
3991 
3992 void Assembler::prefix(Prefix p) {
3993   emit_int8(p);
3994 }
3995 
3996 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
3997   assert(VM_Version::supports_ssse3(), "");
3998   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
3999   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4000   emit_int8(0x00);
4001   emit_int8((unsigned char)(0xC0 | encode));
4002 }
4003 
4004 void Assembler::vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
4005   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4006          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4007          0, "");
4008   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
4009   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4010   emit_int8(0x00);
4011   emit_int8((unsigned char)(0xC0 | encode));
4012 }
4013 
4014 void Assembler::pshufb(XMMRegister dst, Address src) {
4015   assert(VM_Version::supports_ssse3(), "");
4016   InstructionMark im(this);
4017   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4018   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4019   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4020   emit_int8(0x00);
4021   emit_operand(dst, src);
4022 }
4023 
4024 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
4025   assert(isByte(mode), "invalid value");
4026   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4027   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
4028   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4029   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4030   emit_int8(0x70);
4031   emit_int8((unsigned char)(0xC0 | encode));
4032   emit_int8(mode & 0xFF);
4033 }
4034 
4035 void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
4036   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4037          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4038          0, "");
4039   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4040   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4041   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4042   emit_int8(0x70);
4043   emit_int8((unsigned char)(0xC0 | encode));
4044   emit_int8(mode & 0xFF);
4045 }
4046 
4047 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
4048   assert(isByte(mode), "invalid value");
4049   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4050   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4051   InstructionMark im(this);
4052   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4053   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4054   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4055   emit_int8(0x70);
4056   emit_operand(dst, src);
4057   emit_int8(mode & 0xFF);
4058 }
4059 
4060 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
4061   assert(isByte(mode), "invalid value");
4062   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4063   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4064   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4065   emit_int8(0x70);
4066   emit_int8((unsigned char)(0xC0 | encode));
4067   emit_int8(mode & 0xFF);
4068 }
4069 
4070 void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
4071   assert(isByte(mode), "invalid value");
4072   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4073   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4074   InstructionMark im(this);
4075   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4076   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4077   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4078   emit_int8(0x70);
4079   emit_operand(dst, src);
4080   emit_int8(mode & 0xFF);
4081 }
4082 void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4083   assert(VM_Version::supports_evex(), "requires EVEX support");
4084   assert(vector_len == Assembler::AVX_256bit || vector_len == Assembler::AVX_512bit, "");
4085   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
4086   attributes.set_is_evex_instruction();
4087   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4088   emit_int8(0x43);
4089   emit_int8((unsigned char)(0xC0 | encode));
4090   emit_int8(imm8 & 0xFF);
4091 }
4092 
4093 void Assembler::psrldq(XMMRegister dst, int shift) {
4094   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4095   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4096   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4097   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4098   emit_int8(0x73);
4099   emit_int8((unsigned char)(0xC0 | encode));
4100   emit_int8(shift);
4101 }
4102 
4103 void Assembler::pslldq(XMMRegister dst, int shift) {
4104   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
4105   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4106   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
4107   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
4108   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4109   emit_int8(0x73);
4110   emit_int8((unsigned char)(0xC0 | encode));
4111   emit_int8(shift);
4112 }
4113 
4114 void Assembler::ptest(XMMRegister dst, Address src) {
4115   assert(VM_Version::supports_sse4_1(), "");
4116   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4117   InstructionMark im(this);
4118   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4119   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4120   emit_int8(0x17);
4121   emit_operand(dst, src);
4122 }
4123 
4124 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
4125   assert(VM_Version::supports_sse4_1(), "");
4126   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4127   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4128   emit_int8(0x17);
4129   emit_int8((unsigned char)(0xC0 | encode));
4130 }
4131 
4132 void Assembler::vptest(XMMRegister dst, Address src) {
4133   assert(VM_Version::supports_avx(), "");
4134   InstructionMark im(this);
4135   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4136   assert(dst != xnoreg, "sanity");
4137   // swap src<->dst for encoding
4138   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4139   emit_int8(0x17);
4140   emit_operand(dst, src);
4141 }
4142 
4143 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
4144   assert(VM_Version::supports_avx(), "");
4145   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4146   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4147   emit_int8(0x17);
4148   emit_int8((unsigned char)(0xC0 | encode));
4149 }
4150 
4151 void Assembler::punpcklbw(XMMRegister dst, Address src) {
4152   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4153   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4154   InstructionMark im(this);
4155   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4156   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
4157   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4158   emit_int8(0x60);
4159   emit_operand(dst, src);
4160 }
4161 
4162 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
4163   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4164   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
4165   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4166   emit_int8(0x60);
4167   emit_int8((unsigned char)(0xC0 | encode));
4168 }
4169 
4170 void Assembler::punpckldq(XMMRegister dst, Address src) {
4171   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4172   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
4173   InstructionMark im(this);
4174   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4175   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
4176   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4177   emit_int8(0x62);
4178   emit_operand(dst, src);
4179 }
4180 
4181 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
4182   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4183   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4184   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4185   emit_int8(0x62);
4186   emit_int8((unsigned char)(0xC0 | encode));
4187 }
4188 
4189 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
4190   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4191   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
4192   attributes.set_rex_vex_w_reverted();
4193   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4194   emit_int8(0x6C);
4195   emit_int8((unsigned char)(0xC0 | encode));
4196 }
4197 
4198 void Assembler::push(int32_t imm32) {
4199   // in 64bits we push 64bits onto the stack but only
4200   // take a 32bit immediate
4201   emit_int8(0x68);
4202   emit_int32(imm32);
4203 }
4204 
4205 void Assembler::push(Register src) {
4206   int encode = prefix_and_encode(src->encoding());
4207 
4208   emit_int8(0x50 | encode);
4209 }
4210 
4211 void Assembler::pushf() {
4212   emit_int8((unsigned char)0x9C);
4213 }
4214 
4215 #ifndef _LP64 // no 32bit push/pop on amd64
4216 void Assembler::pushl(Address src) {
4217   // Note this will push 64bit on 64bit
4218   InstructionMark im(this);
4219   prefix(src);
4220   emit_int8((unsigned char)0xFF);
4221   emit_operand(rsi, src);
4222 }
4223 #endif
4224 
4225 void Assembler::rcll(Register dst, int imm8) {
4226   assert(isShiftCount(imm8), "illegal shift count");
4227   int encode = prefix_and_encode(dst->encoding());
4228   if (imm8 == 1) {
4229     emit_int8((unsigned char)0xD1);
4230     emit_int8((unsigned char)(0xD0 | encode));
4231   } else {
4232     emit_int8((unsigned char)0xC1);
4233     emit_int8((unsigned char)0xD0 | encode);
4234     emit_int8(imm8);
4235   }
4236 }
4237 
4238 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
4239   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4240   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4241   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4242   emit_int8(0x53);
4243   emit_int8((unsigned char)(0xC0 | encode));
4244 }
4245 
4246 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
4247   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4248   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4249   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4250   emit_int8(0x53);
4251   emit_int8((unsigned char)(0xC0 | encode));
4252 }
4253 
4254 void Assembler::rdtsc() {
4255   emit_int8((unsigned char)0x0F);
4256   emit_int8((unsigned char)0x31);
4257 }
4258 
4259 // copies data from [esi] to [edi] using rcx pointer sized words
4260 // generic
4261 void Assembler::rep_mov() {
4262   emit_int8((unsigned char)0xF3);
4263   // MOVSQ
4264   LP64_ONLY(prefix(REX_W));
4265   emit_int8((unsigned char)0xA5);
4266 }
4267 
4268 // sets rcx bytes with rax, value at [edi]
4269 void Assembler::rep_stosb() {
4270   emit_int8((unsigned char)0xF3); // REP
4271   LP64_ONLY(prefix(REX_W));
4272   emit_int8((unsigned char)0xAA); // STOSB
4273 }
4274 
4275 // sets rcx pointer sized words with rax, value at [edi]
4276 // generic
4277 void Assembler::rep_stos() {
4278   emit_int8((unsigned char)0xF3); // REP
4279   LP64_ONLY(prefix(REX_W));       // LP64:STOSQ, LP32:STOSD
4280   emit_int8((unsigned char)0xAB);
4281 }
4282 
4283 // scans rcx pointer sized words at [edi] for occurance of rax,
4284 // generic
4285 void Assembler::repne_scan() { // repne_scan
4286   emit_int8((unsigned char)0xF2);
4287   // SCASQ
4288   LP64_ONLY(prefix(REX_W));
4289   emit_int8((unsigned char)0xAF);
4290 }
4291 
4292 #ifdef _LP64
4293 // scans rcx 4 byte words at [edi] for occurance of rax,
4294 // generic
4295 void Assembler::repne_scanl() { // repne_scan
4296   emit_int8((unsigned char)0xF2);
4297   // SCASL
4298   emit_int8((unsigned char)0xAF);
4299 }
4300 #endif
4301 
4302 void Assembler::ret(int imm16) {
4303   if (imm16 == 0) {
4304     emit_int8((unsigned char)0xC3);
4305   } else {
4306     emit_int8((unsigned char)0xC2);
4307     emit_int16(imm16);
4308   }
4309 }
4310 
4311 void Assembler::sahf() {
4312 #ifdef _LP64
4313   // Not supported in 64bit mode
4314   ShouldNotReachHere();
4315 #endif
4316   emit_int8((unsigned char)0x9E);
4317 }
4318 
4319 void Assembler::sarl(Register dst, int imm8) {
4320   int encode = prefix_and_encode(dst->encoding());
4321   assert(isShiftCount(imm8), "illegal shift count");
4322   if (imm8 == 1) {
4323     emit_int8((unsigned char)0xD1);
4324     emit_int8((unsigned char)(0xF8 | encode));
4325   } else {
4326     emit_int8((unsigned char)0xC1);
4327     emit_int8((unsigned char)(0xF8 | encode));
4328     emit_int8(imm8);
4329   }
4330 }
4331 
4332 void Assembler::sarl(Register dst) {
4333   int encode = prefix_and_encode(dst->encoding());
4334   emit_int8((unsigned char)0xD3);
4335   emit_int8((unsigned char)(0xF8 | encode));
4336 }
4337 
4338 void Assembler::sbbl(Address dst, int32_t imm32) {
4339   InstructionMark im(this);
4340   prefix(dst);
4341   emit_arith_operand(0x81, rbx, dst, imm32);
4342 }
4343 
4344 void Assembler::sbbl(Register dst, int32_t imm32) {
4345   prefix(dst);
4346   emit_arith(0x81, 0xD8, dst, imm32);
4347 }
4348 
4349 
4350 void Assembler::sbbl(Register dst, Address src) {
4351   InstructionMark im(this);
4352   prefix(src, dst);
4353   emit_int8(0x1B);
4354   emit_operand(dst, src);
4355 }
4356 
4357 void Assembler::sbbl(Register dst, Register src) {
4358   (void) prefix_and_encode(dst->encoding(), src->encoding());
4359   emit_arith(0x1B, 0xC0, dst, src);
4360 }
4361 
4362 void Assembler::setb(Condition cc, Register dst) {
4363   assert(0 <= cc && cc < 16, "illegal cc");
4364   int encode = prefix_and_encode(dst->encoding(), true);
4365   emit_int8(0x0F);
4366   emit_int8((unsigned char)0x90 | cc);
4367   emit_int8((unsigned char)(0xC0 | encode));
4368 }
4369 
4370 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
4371   assert(VM_Version::supports_ssse3(), "");
4372   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
4373   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4374   emit_int8((unsigned char)0x0F);
4375   emit_int8((unsigned char)(0xC0 | encode));
4376   emit_int8(imm8);
4377 }
4378 
4379 void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
4380   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
4381          vector_len == AVX_256bit? VM_Version::supports_avx2() :
4382          0, "");
4383   InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
4384   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4385   emit_int8((unsigned char)0x0F);
4386   emit_int8((unsigned char)(0xC0 | encode));
4387   emit_int8(imm8);
4388 }
4389 
4390 void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
4391   assert(VM_Version::supports_sse4_1(), "");
4392   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
4393   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
4394   emit_int8((unsigned char)0x0E);
4395   emit_int8((unsigned char)(0xC0 | encode));
4396   emit_int8(imm8);
4397 }
4398 
4399 void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
4400   assert(VM_Version::supports_sha(), "");
4401   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, /* rex_w */ false);
4402   emit_int8((unsigned char)0xCC);
4403   emit_int8((unsigned char)(0xC0 | encode));
4404   emit_int8((unsigned char)imm8);
4405 }
4406 
4407 void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
4408   assert(VM_Version::supports_sha(), "");
4409   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4410   emit_int8((unsigned char)0xC8);
4411   emit_int8((unsigned char)(0xC0 | encode));
4412 }
4413 
4414 void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
4415   assert(VM_Version::supports_sha(), "");
4416   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4417   emit_int8((unsigned char)0xC9);
4418   emit_int8((unsigned char)(0xC0 | encode));
4419 }
4420 
4421 void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
4422   assert(VM_Version::supports_sha(), "");
4423   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4424   emit_int8((unsigned char)0xCA);
4425   emit_int8((unsigned char)(0xC0 | encode));
4426 }
4427 
4428 // xmm0 is implicit additional source to this instruction.
4429 void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
4430   assert(VM_Version::supports_sha(), "");
4431   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4432   emit_int8((unsigned char)0xCB);
4433   emit_int8((unsigned char)(0xC0 | encode));
4434 }
4435 
4436 void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
4437   assert(VM_Version::supports_sha(), "");
4438   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4439   emit_int8((unsigned char)0xCC);
4440   emit_int8((unsigned char)(0xC0 | encode));
4441 }
4442 
4443 void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
4444   assert(VM_Version::supports_sha(), "");
4445   int encode = rex_prefix_and_encode(dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, /* rex_w */ false);
4446   emit_int8((unsigned char)0xCD);
4447   emit_int8((unsigned char)(0xC0 | encode));
4448 }
4449 
4450 
4451 void Assembler::shll(Register dst, int imm8) {
4452   assert(isShiftCount(imm8), "illegal shift count");
4453   int encode = prefix_and_encode(dst->encoding());
4454   if (imm8 == 1 ) {
4455     emit_int8((unsigned char)0xD1);
4456     emit_int8((unsigned char)(0xE0 | encode));
4457   } else {
4458     emit_int8((unsigned char)0xC1);
4459     emit_int8((unsigned char)(0xE0 | encode));
4460     emit_int8(imm8);
4461   }
4462 }
4463 
4464 void Assembler::shll(Register dst) {
4465   int encode = prefix_and_encode(dst->encoding());
4466   emit_int8((unsigned char)0xD3);
4467   emit_int8((unsigned char)(0xE0 | encode));
4468 }
4469 
4470 void Assembler::shrl(Register dst, int imm8) {
4471   assert(isShiftCount(imm8), "illegal shift count");
4472   int encode = prefix_and_encode(dst->encoding());
4473   emit_int8((unsigned char)0xC1);
4474   emit_int8((unsigned char)(0xE8 | encode));
4475   emit_int8(imm8);
4476 }
4477 
4478 void Assembler::shrl(Register dst) {
4479   int encode = prefix_and_encode(dst->encoding());
4480   emit_int8((unsigned char)0xD3);
4481   emit_int8((unsigned char)(0xE8 | encode));
4482 }
4483 
4484 // copies a single word from [esi] to [edi]
4485 void Assembler::smovl() {
4486   emit_int8((unsigned char)0xA5);
4487 }
4488 
4489 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
4490   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4491   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4492   attributes.set_rex_vex_w_reverted();
4493   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4494   emit_int8(0x51);
4495   emit_int8((unsigned char)(0xC0 | encode));
4496 }
4497 
4498 void Assembler::sqrtsd(XMMRegister dst, Address src) {
4499   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4500   InstructionMark im(this);
4501   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4502   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4503   attributes.set_rex_vex_w_reverted();
4504   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4505   emit_int8(0x51);
4506   emit_operand(dst, src);
4507 }
4508 
4509 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
4510   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4511   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4512   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4513   emit_int8(0x51);
4514   emit_int8((unsigned char)(0xC0 | encode));
4515 }
4516 
4517 void Assembler::std() {
4518   emit_int8((unsigned char)0xFD);
4519 }
4520 
4521 void Assembler::sqrtss(XMMRegister dst, Address src) {
4522   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4523   InstructionMark im(this);
4524   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4525   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4526   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4527   emit_int8(0x51);
4528   emit_operand(dst, src);
4529 }
4530 
4531 void Assembler::stmxcsr( Address dst) {
4532   if (UseAVX > 0 ) {
4533     assert(VM_Version::supports_avx(), "");
4534     InstructionMark im(this);
4535     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
4536     vex_prefix(dst, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4537     emit_int8((unsigned char)0xAE);
4538     emit_operand(as_Register(3), dst);
4539   } else {
4540     NOT_LP64(assert(VM_Version::supports_sse(), ""));
4541     InstructionMark im(this);
4542     prefix(dst);
4543     emit_int8(0x0F);
4544     emit_int8((unsigned char)0xAE);
4545     emit_operand(as_Register(3), dst);
4546   }
4547 }
4548 
4549 void Assembler::subl(Address dst, int32_t imm32) {
4550   InstructionMark im(this);
4551   prefix(dst);
4552   emit_arith_operand(0x81, rbp, dst, imm32);
4553 }
4554 
4555 void Assembler::subl(Address dst, Register src) {
4556   InstructionMark im(this);
4557   prefix(dst, src);
4558   emit_int8(0x29);
4559   emit_operand(src, dst);
4560 }
4561 
4562 void Assembler::subl(Register dst, int32_t imm32) {
4563   prefix(dst);
4564   emit_arith(0x81, 0xE8, dst, imm32);
4565 }
4566 
4567 // Force generation of a 4 byte immediate value even if it fits into 8bit
4568 void Assembler::subl_imm32(Register dst, int32_t imm32) {
4569   prefix(dst);
4570   emit_arith_imm32(0x81, 0xE8, dst, imm32);
4571 }
4572 
4573 void Assembler::subl(Register dst, Address src) {
4574   InstructionMark im(this);
4575   prefix(src, dst);
4576   emit_int8(0x2B);
4577   emit_operand(dst, src);
4578 }
4579 
4580 void Assembler::subl(Register dst, Register src) {
4581   (void) prefix_and_encode(dst->encoding(), src->encoding());
4582   emit_arith(0x2B, 0xC0, dst, src);
4583 }
4584 
4585 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
4586   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4587   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4588   attributes.set_rex_vex_w_reverted();
4589   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4590   emit_int8(0x5C);
4591   emit_int8((unsigned char)(0xC0 | encode));
4592 }
4593 
4594 void Assembler::subsd(XMMRegister dst, Address src) {
4595   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4596   InstructionMark im(this);
4597   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4598   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4599   attributes.set_rex_vex_w_reverted();
4600   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4601   emit_int8(0x5C);
4602   emit_operand(dst, src);
4603 }
4604 
4605 void Assembler::subss(XMMRegister dst, XMMRegister src) {
4606   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4607   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ false);
4608   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4609   emit_int8(0x5C);
4610   emit_int8((unsigned char)(0xC0 | encode));
4611 }
4612 
4613 void Assembler::subss(XMMRegister dst, Address src) {
4614   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4615   InstructionMark im(this);
4616   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4617   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4618   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4619   emit_int8(0x5C);
4620   emit_operand(dst, src);
4621 }
4622 
4623 void Assembler::testb(Register dst, int imm8) {
4624   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
4625   (void) prefix_and_encode(dst->encoding(), true);
4626   emit_arith_b(0xF6, 0xC0, dst, imm8);
4627 }
4628 
4629 void Assembler::testb(Address dst, int imm8) {
4630   InstructionMark im(this);
4631   prefix(dst);
4632   emit_int8((unsigned char)0xF6);
4633   emit_operand(rax, dst, 1);
4634   emit_int8(imm8);
4635 }
4636 
4637 void Assembler::testl(Register dst, int32_t imm32) {
4638   // not using emit_arith because test
4639   // doesn't support sign-extension of
4640   // 8bit operands
4641   int encode = dst->encoding();
4642   if (encode == 0) {
4643     emit_int8((unsigned char)0xA9);
4644   } else {
4645     encode = prefix_and_encode(encode);
4646     emit_int8((unsigned char)0xF7);
4647     emit_int8((unsigned char)(0xC0 | encode));
4648   }
4649   emit_int32(imm32);
4650 }
4651 
4652 void Assembler::testl(Register dst, Register src) {
4653   (void) prefix_and_encode(dst->encoding(), src->encoding());
4654   emit_arith(0x85, 0xC0, dst, src);
4655 }
4656 
4657 void Assembler::testl(Register dst, Address src) {
4658   InstructionMark im(this);
4659   prefix(src, dst);
4660   emit_int8((unsigned char)0x85);
4661   emit_operand(dst, src);
4662 }
4663 
4664 void Assembler::tzcntl(Register dst, Register src) {
4665   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4666   emit_int8((unsigned char)0xF3);
4667   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4668   emit_int8(0x0F);
4669   emit_int8((unsigned char)0xBC);
4670   emit_int8((unsigned char)0xC0 | encode);
4671 }
4672 
4673 void Assembler::tzcntq(Register dst, Register src) {
4674   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
4675   emit_int8((unsigned char)0xF3);
4676   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
4677   emit_int8(0x0F);
4678   emit_int8((unsigned char)0xBC);
4679   emit_int8((unsigned char)(0xC0 | encode));
4680 }
4681 
4682 void Assembler::ucomisd(XMMRegister dst, Address src) {
4683   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4684   InstructionMark im(this);
4685   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4686   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4687   attributes.set_rex_vex_w_reverted();
4688   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4689   emit_int8(0x2E);
4690   emit_operand(dst, src);
4691 }
4692 
4693 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
4694   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
4695   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4696   attributes.set_rex_vex_w_reverted();
4697   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
4698   emit_int8(0x2E);
4699   emit_int8((unsigned char)(0xC0 | encode));
4700 }
4701 
4702 void Assembler::ucomiss(XMMRegister dst, Address src) {
4703   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4704   InstructionMark im(this);
4705   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4706   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4707   simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4708   emit_int8(0x2E);
4709   emit_operand(dst, src);
4710 }
4711 
4712 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
4713   NOT_LP64(assert(VM_Version::supports_sse(), ""));
4714   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4715   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
4716   emit_int8(0x2E);
4717   emit_int8((unsigned char)(0xC0 | encode));
4718 }
4719 
4720 void Assembler::xabort(int8_t imm8) {
4721   emit_int8((unsigned char)0xC6);
4722   emit_int8((unsigned char)0xF8);
4723   emit_int8((unsigned char)(imm8 & 0xFF));
4724 }
4725 
4726 void Assembler::xaddb(Address dst, Register src) {
4727   InstructionMark im(this);
4728   prefix(dst, src, true);
4729   emit_int8(0x0F);
4730   emit_int8((unsigned char)0xC0);
4731   emit_operand(src, dst);
4732 }
4733 
4734 void Assembler::xaddw(Address dst, Register src) {
4735   InstructionMark im(this);
4736   emit_int8(0x66);
4737   prefix(dst, src);
4738   emit_int8(0x0F);
4739   emit_int8((unsigned char)0xC1);
4740   emit_operand(src, dst);
4741 }
4742 
4743 void Assembler::xaddl(Address dst, Register src) {
4744   InstructionMark im(this);
4745   prefix(dst, src);
4746   emit_int8(0x0F);
4747   emit_int8((unsigned char)0xC1);
4748   emit_operand(src, dst);
4749 }
4750 
4751 void Assembler::xbegin(Label& abort, relocInfo::relocType rtype) {
4752   InstructionMark im(this);
4753   relocate(rtype);
4754   if (abort.is_bound()) {
4755     address entry = target(abort);
4756     assert(entry != NULL, "abort entry NULL");
4757     intptr_t offset = entry - pc();
4758     emit_int8((unsigned char)0xC7);
4759     emit_int8((unsigned char)0xF8);
4760     emit_int32(offset - 6); // 2 opcode + 4 address
4761   } else {
4762     abort.add_patch_at(code(), locator());
4763     emit_int8((unsigned char)0xC7);
4764     emit_int8((unsigned char)0xF8);
4765     emit_int32(0);
4766   }
4767 }
4768 
4769 void Assembler::xchgb(Register dst, Address src) { // xchg
4770   InstructionMark im(this);
4771   prefix(src, dst, true);
4772   emit_int8((unsigned char)0x86);
4773   emit_operand(dst, src);
4774 }
4775 
4776 void Assembler::xchgw(Register dst, Address src) { // xchg
4777   InstructionMark im(this);
4778   emit_int8(0x66);
4779   prefix(src, dst);
4780   emit_int8((unsigned char)0x87);
4781   emit_operand(dst, src);
4782 }
4783 
4784 void Assembler::xchgl(Register dst, Address src) { // xchg
4785   InstructionMark im(this);
4786   prefix(src, dst);
4787   emit_int8((unsigned char)0x87);
4788   emit_operand(dst, src);
4789 }
4790 
4791 void Assembler::xchgl(Register dst, Register src) {
4792   int encode = prefix_and_encode(dst->encoding(), src->encoding());
4793   emit_int8((unsigned char)0x87);
4794   emit_int8((unsigned char)(0xC0 | encode));
4795 }
4796 
4797 void Assembler::xend() {
4798   emit_int8((unsigned char)0x0F);
4799   emit_int8((unsigned char)0x01);
4800   emit_int8((unsigned char)0xD5);
4801 }
4802 
4803 void Assembler::xgetbv() {
4804   emit_int8(0x0F);
4805   emit_int8(0x01);
4806   emit_int8((unsigned char)0xD0);
4807 }
4808 
4809 void Assembler::xorl(Register dst, int32_t imm32) {
4810   prefix(dst);
4811   emit_arith(0x81, 0xF0, dst, imm32);
4812 }
4813 
4814 void Assembler::xorl(Register dst, Address src) {
4815   InstructionMark im(this);
4816   prefix(src, dst);
4817   emit_int8(0x33);
4818   emit_operand(dst, src);
4819 }
4820 
4821 void Assembler::xorl(Register dst, Register src) {
4822   (void) prefix_and_encode(dst->encoding(), src->encoding());
4823   emit_arith(0x33, 0xC0, dst, src);
4824 }
4825 
4826 void Assembler::xorb(Register dst, Address src) {
4827   InstructionMark im(this);
4828   prefix(src, dst);
4829   emit_int8(0x32);
4830   emit_operand(dst, src);
4831 }
4832 
4833 // AVX 3-operands scalar float-point arithmetic instructions
4834 
4835 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
4836   assert(VM_Version::supports_avx(), "");
4837   InstructionMark im(this);
4838   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4839   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4840   attributes.set_rex_vex_w_reverted();
4841   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4842   emit_int8(0x58);
4843   emit_operand(dst, src);
4844 }
4845 
4846 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4847   assert(VM_Version::supports_avx(), "");
4848   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4849   attributes.set_rex_vex_w_reverted();
4850   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4851   emit_int8(0x58);
4852   emit_int8((unsigned char)(0xC0 | encode));
4853 }
4854 
4855 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
4856   assert(VM_Version::supports_avx(), "");
4857   InstructionMark im(this);
4858   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4859   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4860   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4861   emit_int8(0x58);
4862   emit_operand(dst, src);
4863 }
4864 
4865 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4866   assert(VM_Version::supports_avx(), "");
4867   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4868   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4869   emit_int8(0x58);
4870   emit_int8((unsigned char)(0xC0 | encode));
4871 }
4872 
4873 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
4874   assert(VM_Version::supports_avx(), "");
4875   InstructionMark im(this);
4876   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4877   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4878   attributes.set_rex_vex_w_reverted();
4879   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4880   emit_int8(0x5E);
4881   emit_operand(dst, src);
4882 }
4883 
4884 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4885   assert(VM_Version::supports_avx(), "");
4886   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4887   attributes.set_rex_vex_w_reverted();
4888   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4889   emit_int8(0x5E);
4890   emit_int8((unsigned char)(0xC0 | encode));
4891 }
4892 
4893 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
4894   assert(VM_Version::supports_avx(), "");
4895   InstructionMark im(this);
4896   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4897   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4898   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4899   emit_int8(0x5E);
4900   emit_operand(dst, src);
4901 }
4902 
4903 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4904   assert(VM_Version::supports_avx(), "");
4905   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4906   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4907   emit_int8(0x5E);
4908   emit_int8((unsigned char)(0xC0 | encode));
4909 }
4910 
4911 void Assembler::vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4912   assert(VM_Version::supports_fma(), "");
4913   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4914   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4915   emit_int8((unsigned char)0xB9);
4916   emit_int8((unsigned char)(0xC0 | encode));
4917 }
4918 
4919 void Assembler::vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
4920   assert(VM_Version::supports_fma(), "");
4921   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4922   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
4923   emit_int8((unsigned char)0xB9);
4924   emit_int8((unsigned char)(0xC0 | encode));
4925 }
4926 
4927 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
4928   assert(VM_Version::supports_avx(), "");
4929   InstructionMark im(this);
4930   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4931   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4932   attributes.set_rex_vex_w_reverted();
4933   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4934   emit_int8(0x59);
4935   emit_operand(dst, src);
4936 }
4937 
4938 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4939   assert(VM_Version::supports_avx(), "");
4940   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4941   attributes.set_rex_vex_w_reverted();
4942   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4943   emit_int8(0x59);
4944   emit_int8((unsigned char)(0xC0 | encode));
4945 }
4946 
4947 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
4948   assert(VM_Version::supports_avx(), "");
4949   InstructionMark im(this);
4950   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4951   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4952   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4953   emit_int8(0x59);
4954   emit_operand(dst, src);
4955 }
4956 
4957 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4958   assert(VM_Version::supports_avx(), "");
4959   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4960   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4961   emit_int8(0x59);
4962   emit_int8((unsigned char)(0xC0 | encode));
4963 }
4964 
4965 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
4966   assert(VM_Version::supports_avx(), "");
4967   InstructionMark im(this);
4968   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4969   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
4970   attributes.set_rex_vex_w_reverted();
4971   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4972   emit_int8(0x5C);
4973   emit_operand(dst, src);
4974 }
4975 
4976 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4977   assert(VM_Version::supports_avx(), "");
4978   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4979   attributes.set_rex_vex_w_reverted();
4980   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
4981   emit_int8(0x5C);
4982   emit_int8((unsigned char)(0xC0 | encode));
4983 }
4984 
4985 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
4986   assert(VM_Version::supports_avx(), "");
4987   InstructionMark im(this);
4988   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4989   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
4990   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4991   emit_int8(0x5C);
4992   emit_operand(dst, src);
4993 }
4994 
4995 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
4996   assert(VM_Version::supports_avx(), "");
4997   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
4998   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
4999   emit_int8(0x5C);
5000   emit_int8((unsigned char)(0xC0 | encode));
5001 }
5002 
5003 //====================VECTOR ARITHMETIC=====================================
5004 
5005 // Float-point vector arithmetic
5006 
5007 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
5008   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5009   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5010   attributes.set_rex_vex_w_reverted();
5011   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5012   emit_int8(0x58);
5013   emit_int8((unsigned char)(0xC0 | encode));
5014 }
5015 
5016 void Assembler::addpd(XMMRegister dst, Address src) {
5017   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5018   InstructionMark im(this);
5019   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5020   attributes.set_rex_vex_w_reverted();
5021   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5022   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5023   emit_int8(0x58);
5024   emit_operand(dst, src);
5025 }
5026 
5027 
5028 void Assembler::addps(XMMRegister dst, XMMRegister src) {
5029   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5030   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5031   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5032   emit_int8(0x58);
5033   emit_int8((unsigned char)(0xC0 | encode));
5034 }
5035 
5036 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5037   assert(VM_Version::supports_avx(), "");
5038   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5039   attributes.set_rex_vex_w_reverted();
5040   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5041   emit_int8(0x58);
5042   emit_int8((unsigned char)(0xC0 | encode));
5043 }
5044 
5045 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5046   assert(VM_Version::supports_avx(), "");
5047   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5048   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5049   emit_int8(0x58);
5050   emit_int8((unsigned char)(0xC0 | encode));
5051 }
5052 
5053 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5054   assert(VM_Version::supports_avx(), "");
5055   InstructionMark im(this);
5056   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5057   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5058   attributes.set_rex_vex_w_reverted();
5059   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5060   emit_int8(0x58);
5061   emit_operand(dst, src);
5062 }
5063 
5064 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5065   assert(VM_Version::supports_avx(), "");
5066   InstructionMark im(this);
5067   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5068   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5069   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5070   emit_int8(0x58);
5071   emit_operand(dst, src);
5072 }
5073 
5074 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
5075   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5076   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5077   attributes.set_rex_vex_w_reverted();
5078   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5079   emit_int8(0x5C);
5080   emit_int8((unsigned char)(0xC0 | encode));
5081 }
5082 
5083 void Assembler::subps(XMMRegister dst, XMMRegister src) {
5084   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5085   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5086   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5087   emit_int8(0x5C);
5088   emit_int8((unsigned char)(0xC0 | encode));
5089 }
5090 
5091 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5092   assert(VM_Version::supports_avx(), "");
5093   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5094   attributes.set_rex_vex_w_reverted();
5095   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5096   emit_int8(0x5C);
5097   emit_int8((unsigned char)(0xC0 | encode));
5098 }
5099 
5100 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5101   assert(VM_Version::supports_avx(), "");
5102   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5103   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5104   emit_int8(0x5C);
5105   emit_int8((unsigned char)(0xC0 | encode));
5106 }
5107 
5108 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5109   assert(VM_Version::supports_avx(), "");
5110   InstructionMark im(this);
5111   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5112   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5113   attributes.set_rex_vex_w_reverted();
5114   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5115   emit_int8(0x5C);
5116   emit_operand(dst, src);
5117 }
5118 
5119 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5120   assert(VM_Version::supports_avx(), "");
5121   InstructionMark im(this);
5122   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5123   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5124   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5125   emit_int8(0x5C);
5126   emit_operand(dst, src);
5127 }
5128 
5129 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
5130   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5131   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5132   attributes.set_rex_vex_w_reverted();
5133   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5134   emit_int8(0x59);
5135   emit_int8((unsigned char)(0xC0 | encode));
5136 }
5137 
5138 void Assembler::mulpd(XMMRegister dst, Address src) {
5139   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5140   InstructionMark im(this);
5141   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5142   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5143   attributes.set_rex_vex_w_reverted();
5144   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5145   emit_int8(0x59);
5146   emit_operand(dst, src);
5147 }
5148 
5149 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
5150   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5151   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5152   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5153   emit_int8(0x59);
5154   emit_int8((unsigned char)(0xC0 | encode));
5155 }
5156 
5157 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5158   assert(VM_Version::supports_avx(), "");
5159   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5160   attributes.set_rex_vex_w_reverted();
5161   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5162   emit_int8(0x59);
5163   emit_int8((unsigned char)(0xC0 | encode));
5164 }
5165 
5166 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5167   assert(VM_Version::supports_avx(), "");
5168   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5169   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5170   emit_int8(0x59);
5171   emit_int8((unsigned char)(0xC0 | encode));
5172 }
5173 
5174 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5175   assert(VM_Version::supports_avx(), "");
5176   InstructionMark im(this);
5177   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5178   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5179   attributes.set_rex_vex_w_reverted();
5180   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5181   emit_int8(0x59);
5182   emit_operand(dst, src);
5183 }
5184 
5185 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5186   assert(VM_Version::supports_avx(), "");
5187   InstructionMark im(this);
5188   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5189   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5190   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5191   emit_int8(0x59);
5192   emit_operand(dst, src);
5193 }
5194 
5195 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5196   assert(VM_Version::supports_fma(), "");
5197   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5198   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5199   emit_int8((unsigned char)0xB8);
5200   emit_int8((unsigned char)(0xC0 | encode));
5201 }
5202 
5203 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
5204   assert(VM_Version::supports_fma(), "");
5205   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5206   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5207   emit_int8((unsigned char)0xB8);
5208   emit_int8((unsigned char)(0xC0 | encode));
5209 }
5210 
5211 void Assembler::vfmadd231pd(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5212   assert(VM_Version::supports_fma(), "");
5213   InstructionMark im(this);
5214   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5215   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5216   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5217   emit_int8((unsigned char)0xB8);
5218   emit_operand(dst, src2);
5219 }
5220 
5221 void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int vector_len) {
5222   assert(VM_Version::supports_fma(), "");
5223   InstructionMark im(this);
5224   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5225   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5226   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5227   emit_int8((unsigned char)0xB8);
5228   emit_operand(dst, src2);
5229 }
5230 
5231 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
5232   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5233   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5234   attributes.set_rex_vex_w_reverted();
5235   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5236   emit_int8(0x5E);
5237   emit_int8((unsigned char)(0xC0 | encode));
5238 }
5239 
5240 void Assembler::divps(XMMRegister dst, XMMRegister src) {
5241   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5242   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5243   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5244   emit_int8(0x5E);
5245   emit_int8((unsigned char)(0xC0 | encode));
5246 }
5247 
5248 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5249   assert(VM_Version::supports_avx(), "");
5250   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5251   attributes.set_rex_vex_w_reverted();
5252   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5253   emit_int8(0x5E);
5254   emit_int8((unsigned char)(0xC0 | encode));
5255 }
5256 
5257 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5258   assert(VM_Version::supports_avx(), "");
5259   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5260   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5261   emit_int8(0x5E);
5262   emit_int8((unsigned char)(0xC0 | encode));
5263 }
5264 
5265 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5266   assert(VM_Version::supports_avx(), "");
5267   InstructionMark im(this);
5268   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5269   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5270   attributes.set_rex_vex_w_reverted();
5271   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5272   emit_int8(0x5E);
5273   emit_operand(dst, src);
5274 }
5275 
5276 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5277   assert(VM_Version::supports_avx(), "");
5278   InstructionMark im(this);
5279   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5280   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5281   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5282   emit_int8(0x5E);
5283   emit_operand(dst, src);
5284 }
5285 
5286 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
5287   assert(VM_Version::supports_avx(), "");
5288   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5289   attributes.set_rex_vex_w_reverted();
5290   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5291   emit_int8(0x51);
5292   emit_int8((unsigned char)(0xC0 | encode));
5293 }
5294 
5295 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
5296   assert(VM_Version::supports_avx(), "");
5297   InstructionMark im(this);
5298   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5299   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5300   attributes.set_rex_vex_w_reverted();
5301   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5302   emit_int8(0x51);
5303   emit_operand(dst, src);
5304 }
5305 
5306 void Assembler::vsqrtps(XMMRegister dst, XMMRegister src, int vector_len) {
5307   assert(VM_Version::supports_avx(), "");
5308   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5309   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5310   emit_int8(0x51);
5311   emit_int8((unsigned char)(0xC0 | encode));
5312 }
5313 
5314 void Assembler::vsqrtps(XMMRegister dst, Address src, int vector_len) {
5315   assert(VM_Version::supports_avx(), "");
5316   InstructionMark im(this);
5317   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5318   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5319   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5320   emit_int8(0x51);
5321   emit_operand(dst, src);
5322 }
5323 
5324 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
5325   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5326   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5327   attributes.set_rex_vex_w_reverted();
5328   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5329   emit_int8(0x54);
5330   emit_int8((unsigned char)(0xC0 | encode));
5331 }
5332 
5333 void Assembler::andps(XMMRegister dst, XMMRegister src) {
5334   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5335   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5336   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5337   emit_int8(0x54);
5338   emit_int8((unsigned char)(0xC0 | encode));
5339 }
5340 
5341 void Assembler::andps(XMMRegister dst, Address src) {
5342   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5343   InstructionMark im(this);
5344   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5345   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5346   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5347   emit_int8(0x54);
5348   emit_operand(dst, src);
5349 }
5350 
5351 void Assembler::andpd(XMMRegister dst, Address src) {
5352   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5353   InstructionMark im(this);
5354   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5355   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5356   attributes.set_rex_vex_w_reverted();
5357   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5358   emit_int8(0x54);
5359   emit_operand(dst, src);
5360 }
5361 
5362 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5363   assert(VM_Version::supports_avx(), "");
5364   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5365   attributes.set_rex_vex_w_reverted();
5366   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5367   emit_int8(0x54);
5368   emit_int8((unsigned char)(0xC0 | encode));
5369 }
5370 
5371 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5372   assert(VM_Version::supports_avx(), "");
5373   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5374   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5375   emit_int8(0x54);
5376   emit_int8((unsigned char)(0xC0 | encode));
5377 }
5378 
5379 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5380   assert(VM_Version::supports_avx(), "");
5381   InstructionMark im(this);
5382   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5383   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5384   attributes.set_rex_vex_w_reverted();
5385   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5386   emit_int8(0x54);
5387   emit_operand(dst, src);
5388 }
5389 
5390 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5391   assert(VM_Version::supports_avx(), "");
5392   InstructionMark im(this);
5393   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5394   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5395   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5396   emit_int8(0x54);
5397   emit_operand(dst, src);
5398 }
5399 
5400 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
5401   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5402   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5403   attributes.set_rex_vex_w_reverted();
5404   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5405   emit_int8(0x15);
5406   emit_int8((unsigned char)(0xC0 | encode));
5407 }
5408 
5409 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
5410   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5411   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5412   attributes.set_rex_vex_w_reverted();
5413   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5414   emit_int8(0x14);
5415   emit_int8((unsigned char)(0xC0 | encode));
5416 }
5417 
5418 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
5419   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5420   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5421   attributes.set_rex_vex_w_reverted();
5422   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5423   emit_int8(0x57);
5424   emit_int8((unsigned char)(0xC0 | encode));
5425 }
5426 
5427 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
5428   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5429   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5430   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5431   emit_int8(0x57);
5432   emit_int8((unsigned char)(0xC0 | encode));
5433 }
5434 
5435 void Assembler::xorpd(XMMRegister dst, Address src) {
5436   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5437   InstructionMark im(this);
5438   InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5439   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5440   attributes.set_rex_vex_w_reverted();
5441   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5442   emit_int8(0x57);
5443   emit_operand(dst, src);
5444 }
5445 
5446 void Assembler::xorps(XMMRegister dst, Address src) {
5447   NOT_LP64(assert(VM_Version::supports_sse(), ""));
5448   InstructionMark im(this);
5449   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5450   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5451   simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5452   emit_int8(0x57);
5453   emit_operand(dst, src);
5454 }
5455 
5456 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5457   assert(VM_Version::supports_avx(), "");
5458   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5459   attributes.set_rex_vex_w_reverted();
5460   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5461   emit_int8(0x57);
5462   emit_int8((unsigned char)(0xC0 | encode));
5463 }
5464 
5465 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5466   assert(VM_Version::supports_avx(), "");
5467   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5468   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5469   emit_int8(0x57);
5470   emit_int8((unsigned char)(0xC0 | encode));
5471 }
5472 
5473 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5474   assert(VM_Version::supports_avx(), "");
5475   InstructionMark im(this);
5476   InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5477   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5478   attributes.set_rex_vex_w_reverted();
5479   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5480   emit_int8(0x57);
5481   emit_operand(dst, src);
5482 }
5483 
5484 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5485   assert(VM_Version::supports_avx(), "");
5486   InstructionMark im(this);
5487   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5488   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5489   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
5490   emit_int8(0x57);
5491   emit_operand(dst, src);
5492 }
5493 
5494 // Integer vector arithmetic
5495 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5496   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5497          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5498   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5499   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5500   emit_int8(0x01);
5501   emit_int8((unsigned char)(0xC0 | encode));
5502 }
5503 
5504 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5505   assert(VM_Version::supports_avx() && (vector_len == 0) ||
5506          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
5507   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5508   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5509   emit_int8(0x02);
5510   emit_int8((unsigned char)(0xC0 | encode));
5511 }
5512 
5513 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
5514   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5515   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5516   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5517   emit_int8((unsigned char)0xFC);
5518   emit_int8((unsigned char)(0xC0 | encode));
5519 }
5520 
5521 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
5522   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5523   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5524   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5525   emit_int8((unsigned char)0xFD);
5526   emit_int8((unsigned char)(0xC0 | encode));
5527 }
5528 
5529 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
5530   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5531   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5532   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5533   emit_int8((unsigned char)0xFE);
5534   emit_int8((unsigned char)(0xC0 | encode));
5535 }
5536 
5537 void Assembler::paddd(XMMRegister dst, Address src) {
5538   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5539   InstructionMark im(this);
5540   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5541   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5542   emit_int8((unsigned char)0xFE);
5543   emit_operand(dst, src);
5544 }
5545 
5546 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
5547   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5548   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5549   attributes.set_rex_vex_w_reverted();
5550   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5551   emit_int8((unsigned char)0xD4);
5552   emit_int8((unsigned char)(0xC0 | encode));
5553 }
5554 
5555 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
5556   assert(VM_Version::supports_sse3(), "");
5557   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
5558   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5559   emit_int8(0x01);
5560   emit_int8((unsigned char)(0xC0 | encode));
5561 }
5562 
5563 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
5564   assert(VM_Version::supports_sse3(), "");
5565   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
5566   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5567   emit_int8(0x02);
5568   emit_int8((unsigned char)(0xC0 | encode));
5569 }
5570 
5571 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5572   assert(UseAVX > 0, "requires some form of AVX");
5573   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5574   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5575   emit_int8((unsigned char)0xFC);
5576   emit_int8((unsigned char)(0xC0 | encode));
5577 }
5578 
5579 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5580   assert(UseAVX > 0, "requires some form of AVX");
5581   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5582   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5583   emit_int8((unsigned char)0xFD);
5584   emit_int8((unsigned char)(0xC0 | encode));
5585 }
5586 
5587 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5588   assert(UseAVX > 0, "requires some form of AVX");
5589   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5590   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5591   emit_int8((unsigned char)0xFE);
5592   emit_int8((unsigned char)(0xC0 | encode));
5593 }
5594 
5595 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5596   assert(UseAVX > 0, "requires some form of AVX");
5597   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5598   attributes.set_rex_vex_w_reverted();
5599   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5600   emit_int8((unsigned char)0xD4);
5601   emit_int8((unsigned char)(0xC0 | encode));
5602 }
5603 
5604 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5605   assert(UseAVX > 0, "requires some form of AVX");
5606   InstructionMark im(this);
5607   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5608   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5609   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5610   emit_int8((unsigned char)0xFC);
5611   emit_operand(dst, src);
5612 }
5613 
5614 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5615   assert(UseAVX > 0, "requires some form of AVX");
5616   InstructionMark im(this);
5617   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5618   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5619   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5620   emit_int8((unsigned char)0xFD);
5621   emit_operand(dst, src);
5622 }
5623 
5624 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5625   assert(UseAVX > 0, "requires some form of AVX");
5626   InstructionMark im(this);
5627   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5628   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5629   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5630   emit_int8((unsigned char)0xFE);
5631   emit_operand(dst, src);
5632 }
5633 
5634 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5635   assert(UseAVX > 0, "requires some form of AVX");
5636   InstructionMark im(this);
5637   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5638   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5639   attributes.set_rex_vex_w_reverted();
5640   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5641   emit_int8((unsigned char)0xD4);
5642   emit_operand(dst, src);
5643 }
5644 
5645 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
5646   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5647   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5648   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5649   emit_int8((unsigned char)0xF8);
5650   emit_int8((unsigned char)(0xC0 | encode));
5651 }
5652 
5653 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
5654   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5655   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5656   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5657   emit_int8((unsigned char)0xF9);
5658   emit_int8((unsigned char)(0xC0 | encode));
5659 }
5660 
5661 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
5662   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5663   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5664   emit_int8((unsigned char)0xFA);
5665   emit_int8((unsigned char)(0xC0 | encode));
5666 }
5667 
5668 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
5669   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5670   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5671   attributes.set_rex_vex_w_reverted();
5672   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5673   emit_int8((unsigned char)0xFB);
5674   emit_int8((unsigned char)(0xC0 | encode));
5675 }
5676 
5677 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5678   assert(UseAVX > 0, "requires some form of AVX");
5679   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5680   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5681   emit_int8((unsigned char)0xF8);
5682   emit_int8((unsigned char)(0xC0 | encode));
5683 }
5684 
5685 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5686   assert(UseAVX > 0, "requires some form of AVX");
5687   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5688   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5689   emit_int8((unsigned char)0xF9);
5690   emit_int8((unsigned char)(0xC0 | encode));
5691 }
5692 
5693 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5694   assert(UseAVX > 0, "requires some form of AVX");
5695   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5696   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5697   emit_int8((unsigned char)0xFA);
5698   emit_int8((unsigned char)(0xC0 | encode));
5699 }
5700 
5701 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5702   assert(UseAVX > 0, "requires some form of AVX");
5703   InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5704   attributes.set_rex_vex_w_reverted();
5705   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5706   emit_int8((unsigned char)0xFB);
5707   emit_int8((unsigned char)(0xC0 | encode));
5708 }
5709 
5710 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5711   assert(UseAVX > 0, "requires some form of AVX");
5712   InstructionMark im(this);
5713   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5714   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5715   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5716   emit_int8((unsigned char)0xF8);
5717   emit_operand(dst, src);
5718 }
5719 
5720 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5721   assert(UseAVX > 0, "requires some form of AVX");
5722   InstructionMark im(this);
5723   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5724   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5725   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5726   emit_int8((unsigned char)0xF9);
5727   emit_operand(dst, src);
5728 }
5729 
5730 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5731   assert(UseAVX > 0, "requires some form of AVX");
5732   InstructionMark im(this);
5733   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5734   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5735   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5736   emit_int8((unsigned char)0xFA);
5737   emit_operand(dst, src);
5738 }
5739 
5740 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5741   assert(UseAVX > 0, "requires some form of AVX");
5742   InstructionMark im(this);
5743   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5744   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5745   attributes.set_rex_vex_w_reverted();
5746   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5747   emit_int8((unsigned char)0xFB);
5748   emit_operand(dst, src);
5749 }
5750 
5751 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
5752   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5753   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5754   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5755   emit_int8((unsigned char)0xD5);
5756   emit_int8((unsigned char)(0xC0 | encode));
5757 }
5758 
5759 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
5760   assert(VM_Version::supports_sse4_1(), "");
5761   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5762   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5763   emit_int8(0x40);
5764   emit_int8((unsigned char)(0xC0 | encode));
5765 }
5766 
5767 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5768   assert(UseAVX > 0, "requires some form of AVX");
5769   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5770   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5771   emit_int8((unsigned char)0xD5);
5772   emit_int8((unsigned char)(0xC0 | encode));
5773 }
5774 
5775 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5776   assert(UseAVX > 0, "requires some form of AVX");
5777   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5778   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5779   emit_int8(0x40);
5780   emit_int8((unsigned char)(0xC0 | encode));
5781 }
5782 
5783 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
5784   assert(UseAVX > 2, "requires some form of EVEX");
5785   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5786   attributes.set_is_evex_instruction();
5787   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5788   emit_int8(0x40);
5789   emit_int8((unsigned char)(0xC0 | encode));
5790 }
5791 
5792 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5793   assert(UseAVX > 0, "requires some form of AVX");
5794   InstructionMark im(this);
5795   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5796   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
5797   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5798   emit_int8((unsigned char)0xD5);
5799   emit_operand(dst, src);
5800 }
5801 
5802 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5803   assert(UseAVX > 0, "requires some form of AVX");
5804   InstructionMark im(this);
5805   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5806   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
5807   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5808   emit_int8(0x40);
5809   emit_operand(dst, src);
5810 }
5811 
5812 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
5813   assert(UseAVX > 2, "requires some form of EVEX");
5814   InstructionMark im(this);
5815   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
5816   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
5817   attributes.set_is_evex_instruction();
5818   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
5819   emit_int8(0x40);
5820   emit_operand(dst, src);
5821 }
5822 
5823 // Shift packed integers left by specified number of bits.
5824 void Assembler::psllw(XMMRegister dst, int shift) {
5825   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5826   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5827   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5828   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5829   emit_int8(0x71);
5830   emit_int8((unsigned char)(0xC0 | encode));
5831   emit_int8(shift & 0xFF);
5832 }
5833 
5834 void Assembler::pslld(XMMRegister dst, int shift) {
5835   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5836   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5837   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5838   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5839   emit_int8(0x72);
5840   emit_int8((unsigned char)(0xC0 | encode));
5841   emit_int8(shift & 0xFF);
5842 }
5843 
5844 void Assembler::psllq(XMMRegister dst, int shift) {
5845   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5846   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5847   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5848   int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5849   emit_int8(0x73);
5850   emit_int8((unsigned char)(0xC0 | encode));
5851   emit_int8(shift & 0xFF);
5852 }
5853 
5854 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
5855   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5856   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5857   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5858   emit_int8((unsigned char)0xF1);
5859   emit_int8((unsigned char)(0xC0 | encode));
5860 }
5861 
5862 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
5863   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5864   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5865   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5866   emit_int8((unsigned char)0xF2);
5867   emit_int8((unsigned char)(0xC0 | encode));
5868 }
5869 
5870 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
5871   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5872   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5873   attributes.set_rex_vex_w_reverted();
5874   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5875   emit_int8((unsigned char)0xF3);
5876   emit_int8((unsigned char)(0xC0 | encode));
5877 }
5878 
5879 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5880   assert(UseAVX > 0, "requires some form of AVX");
5881   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5882   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
5883   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5884   emit_int8(0x71);
5885   emit_int8((unsigned char)(0xC0 | encode));
5886   emit_int8(shift & 0xFF);
5887 }
5888 
5889 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5890   assert(UseAVX > 0, "requires some form of AVX");
5891   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5892   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5893   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
5894   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5895   emit_int8(0x72);
5896   emit_int8((unsigned char)(0xC0 | encode));
5897   emit_int8(shift & 0xFF);
5898 }
5899 
5900 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5901   assert(UseAVX > 0, "requires some form of AVX");
5902   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5903   attributes.set_rex_vex_w_reverted();
5904   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
5905   int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5906   emit_int8(0x73);
5907   emit_int8((unsigned char)(0xC0 | encode));
5908   emit_int8(shift & 0xFF);
5909 }
5910 
5911 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5912   assert(UseAVX > 0, "requires some form of AVX");
5913   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5914   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5915   emit_int8((unsigned char)0xF1);
5916   emit_int8((unsigned char)(0xC0 | encode));
5917 }
5918 
5919 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5920   assert(UseAVX > 0, "requires some form of AVX");
5921   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5922   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5923   emit_int8((unsigned char)0xF2);
5924   emit_int8((unsigned char)(0xC0 | encode));
5925 }
5926 
5927 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
5928   assert(UseAVX > 0, "requires some form of AVX");
5929   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5930   attributes.set_rex_vex_w_reverted();
5931   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5932   emit_int8((unsigned char)0xF3);
5933   emit_int8((unsigned char)(0xC0 | encode));
5934 }
5935 
5936 // Shift packed integers logically right by specified number of bits.
5937 void Assembler::psrlw(XMMRegister dst, int shift) {
5938   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5939   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5940   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5941   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5942   emit_int8(0x71);
5943   emit_int8((unsigned char)(0xC0 | encode));
5944   emit_int8(shift & 0xFF);
5945 }
5946 
5947 void Assembler::psrld(XMMRegister dst, int shift) {
5948   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5949   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5950   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
5951   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5952   emit_int8(0x72);
5953   emit_int8((unsigned char)(0xC0 | encode));
5954   emit_int8(shift & 0xFF);
5955 }
5956 
5957 void Assembler::psrlq(XMMRegister dst, int shift) {
5958   // Do not confuse it with psrldq SSE2 instruction which
5959   // shifts 128 bit value in xmm register by number of bytes.
5960   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5961   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5962   attributes.set_rex_vex_w_reverted();
5963   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
5964   int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5965   emit_int8(0x73);
5966   emit_int8((unsigned char)(0xC0 | encode));
5967   emit_int8(shift & 0xFF);
5968 }
5969 
5970 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
5971   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5972   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5973   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5974   emit_int8((unsigned char)0xD1);
5975   emit_int8((unsigned char)(0xC0 | encode));
5976 }
5977 
5978 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
5979   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5980   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5981   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5982   emit_int8((unsigned char)0xD2);
5983   emit_int8((unsigned char)(0xC0 | encode));
5984 }
5985 
5986 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
5987   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
5988   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
5989   attributes.set_rex_vex_w_reverted();
5990   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
5991   emit_int8((unsigned char)0xD3);
5992   emit_int8((unsigned char)(0xC0 | encode));
5993 }
5994 
5995 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
5996   assert(UseAVX > 0, "requires some form of AVX");
5997   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
5998   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
5999   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6000   emit_int8(0x71);
6001   emit_int8((unsigned char)(0xC0 | encode));
6002   emit_int8(shift & 0xFF);
6003 }
6004 
6005 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6006   assert(UseAVX > 0, "requires some form of AVX");
6007   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6008   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
6009   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6010   emit_int8(0x72);
6011   emit_int8((unsigned char)(0xC0 | encode));
6012   emit_int8(shift & 0xFF);
6013 }
6014 
6015 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6016   assert(UseAVX > 0, "requires some form of AVX");
6017   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6018   attributes.set_rex_vex_w_reverted();
6019   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
6020   int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6021   emit_int8(0x73);
6022   emit_int8((unsigned char)(0xC0 | encode));
6023   emit_int8(shift & 0xFF);
6024 }
6025 
6026 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6027   assert(UseAVX > 0, "requires some form of AVX");
6028   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6029   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6030   emit_int8((unsigned char)0xD1);
6031   emit_int8((unsigned char)(0xC0 | encode));
6032 }
6033 
6034 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6035   assert(UseAVX > 0, "requires some form of AVX");
6036   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6037   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6038   emit_int8((unsigned char)0xD2);
6039   emit_int8((unsigned char)(0xC0 | encode));
6040 }
6041 
6042 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6043   assert(UseAVX > 0, "requires some form of AVX");
6044   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6045   attributes.set_rex_vex_w_reverted();
6046   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6047   emit_int8((unsigned char)0xD3);
6048   emit_int8((unsigned char)(0xC0 | encode));
6049 }
6050 
6051 // Shift packed integers arithmetically right by specified number of bits.
6052 void Assembler::psraw(XMMRegister dst, int shift) {
6053   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6054   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6055   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6056   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6057   emit_int8(0x71);
6058   emit_int8((unsigned char)(0xC0 | encode));
6059   emit_int8(shift & 0xFF);
6060 }
6061 
6062 void Assembler::psrad(XMMRegister dst, int shift) {
6063   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6064   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6065   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
6066   int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6067   emit_int8(0x72);
6068   emit_int8((unsigned char)(0xC0 | encode));
6069   emit_int8(shift & 0xFF);
6070 }
6071 
6072 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
6073   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6074   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6075   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6076   emit_int8((unsigned char)0xE1);
6077   emit_int8((unsigned char)(0xC0 | encode));
6078 }
6079 
6080 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
6081   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6082   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6083   int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6084   emit_int8((unsigned char)0xE2);
6085   emit_int8((unsigned char)(0xC0 | encode));
6086 }
6087 
6088 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6089   assert(UseAVX > 0, "requires some form of AVX");
6090   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6091   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6092   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6093   emit_int8(0x71);
6094   emit_int8((unsigned char)(0xC0 | encode));
6095   emit_int8(shift & 0xFF);
6096 }
6097 
6098 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
6099   assert(UseAVX > 0, "requires some form of AVX");
6100   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6101   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
6102   int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6103   emit_int8(0x72);
6104   emit_int8((unsigned char)(0xC0 | encode));
6105   emit_int8(shift & 0xFF);
6106 }
6107 
6108 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6109   assert(UseAVX > 0, "requires some form of AVX");
6110   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6111   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6112   emit_int8((unsigned char)0xE1);
6113   emit_int8((unsigned char)(0xC0 | encode));
6114 }
6115 
6116 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
6117   assert(UseAVX > 0, "requires some form of AVX");
6118   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6119   int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6120   emit_int8((unsigned char)0xE2);
6121   emit_int8((unsigned char)(0xC0 | encode));
6122 }
6123 
6124 
6125 // logical operations packed integers
6126 void Assembler::pand(XMMRegister dst, XMMRegister src) {
6127   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6128   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6129   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6130   emit_int8((unsigned char)0xDB);
6131   emit_int8((unsigned char)(0xC0 | encode));
6132 }
6133 
6134 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6135   assert(UseAVX > 0, "requires some form of AVX");
6136   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6137   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6138   emit_int8((unsigned char)0xDB);
6139   emit_int8((unsigned char)(0xC0 | encode));
6140 }
6141 
6142 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6143   assert(UseAVX > 0, "requires some form of AVX");
6144   InstructionMark im(this);
6145   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6146   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6147   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6148   emit_int8((unsigned char)0xDB);
6149   emit_operand(dst, src);
6150 }
6151 
6152 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
6153   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6154   InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6155   attributes.set_rex_vex_w_reverted();
6156   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6157   emit_int8((unsigned char)0xDF);
6158   emit_int8((unsigned char)(0xC0 | encode));
6159 }
6160 
6161 void Assembler::por(XMMRegister dst, XMMRegister src) {
6162   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6163   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6164   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6165   emit_int8((unsigned char)0xEB);
6166   emit_int8((unsigned char)(0xC0 | encode));
6167 }
6168 
6169 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6170   assert(UseAVX > 0, "requires some form of AVX");
6171   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6172   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6173   emit_int8((unsigned char)0xEB);
6174   emit_int8((unsigned char)(0xC0 | encode));
6175 }
6176 
6177 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6178   assert(UseAVX > 0, "requires some form of AVX");
6179   InstructionMark im(this);
6180   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6181   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6182   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6183   emit_int8((unsigned char)0xEB);
6184   emit_operand(dst, src);
6185 }
6186 
6187 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
6188   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
6189   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6190   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6191   emit_int8((unsigned char)0xEF);
6192   emit_int8((unsigned char)(0xC0 | encode));
6193 }
6194 
6195 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6196   assert(UseAVX > 0, "requires some form of AVX");
6197   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6198   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6199   emit_int8((unsigned char)0xEF);
6200   emit_int8((unsigned char)(0xC0 | encode));
6201 }
6202 
6203 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6204   assert(UseAVX > 0, "requires some form of AVX");
6205   InstructionMark im(this);
6206   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6207   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
6208   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6209   emit_int8((unsigned char)0xEF);
6210   emit_operand(dst, src);
6211 }
6212 
6213 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
6214   assert(VM_Version::supports_evex(), "requires EVEX support");
6215   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6216   attributes.set_is_evex_instruction();
6217   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6218   emit_int8((unsigned char)0xEF);
6219   emit_int8((unsigned char)(0xC0 | encode));
6220 }
6221 
6222 void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
6223   assert(VM_Version::supports_evex(), "requires EVEX support");
6224   assert(dst != xnoreg, "sanity");
6225   InstructionMark im(this);
6226   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6227   attributes.set_is_evex_instruction();
6228   attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
6229   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
6230   emit_int8((unsigned char)0xEF);
6231   emit_operand(dst, src);
6232 }
6233 
6234 
6235 // vinserti forms
6236 
6237 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6238   assert(VM_Version::supports_avx2(), "");
6239   assert(imm8 <= 0x01, "imm8: %u", imm8);
6240   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6241   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6242   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6243   emit_int8(0x38);
6244   emit_int8((unsigned char)(0xC0 | encode));
6245   // 0x00 - insert into lower 128 bits
6246   // 0x01 - insert into upper 128 bits
6247   emit_int8(imm8 & 0x01);
6248 }
6249 
6250 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6251   assert(VM_Version::supports_avx2(), "");
6252   assert(dst != xnoreg, "sanity");
6253   assert(imm8 <= 0x01, "imm8: %u", imm8);
6254   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6255   InstructionMark im(this);
6256   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6257   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6258   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6259   emit_int8(0x38);
6260   emit_operand(dst, src);
6261   // 0x00 - insert into lower 128 bits
6262   // 0x01 - insert into upper 128 bits
6263   emit_int8(imm8 & 0x01);
6264 }
6265 
6266 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6267   assert(VM_Version::supports_evex(), "");
6268   assert(imm8 <= 0x03, "imm8: %u", imm8);
6269   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6270   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6271   emit_int8(0x38);
6272   emit_int8((unsigned char)(0xC0 | encode));
6273   // 0x00 - insert into q0 128 bits (0..127)
6274   // 0x01 - insert into q1 128 bits (128..255)
6275   // 0x02 - insert into q2 128 bits (256..383)
6276   // 0x03 - insert into q3 128 bits (384..511)
6277   emit_int8(imm8 & 0x03);
6278 }
6279 
6280 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6281   assert(VM_Version::supports_avx(), "");
6282   assert(dst != xnoreg, "sanity");
6283   assert(imm8 <= 0x03, "imm8: %u", imm8);
6284   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6285   InstructionMark im(this);
6286   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6287   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6288   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6289   emit_int8(0x18);
6290   emit_operand(dst, src);
6291   // 0x00 - insert into q0 128 bits (0..127)
6292   // 0x01 - insert into q1 128 bits (128..255)
6293   // 0x02 - insert into q2 128 bits (256..383)
6294   // 0x03 - insert into q3 128 bits (384..511)
6295   emit_int8(imm8 & 0x03);
6296 }
6297 
6298 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6299   assert(VM_Version::supports_evex(), "");
6300   assert(imm8 <= 0x01, "imm8: %u", imm8);
6301   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6302   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6303   emit_int8(0x38);
6304   emit_int8((unsigned char)(0xC0 | encode));
6305   // 0x00 - insert into lower 256 bits
6306   // 0x01 - insert into upper 256 bits
6307   emit_int8(imm8 & 0x01);
6308 }
6309 
6310 
6311 // vinsertf forms
6312 
6313 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6314   assert(VM_Version::supports_avx(), "");
6315   assert(imm8 <= 0x01, "imm8: %u", imm8);
6316   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6317   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6318   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6319   emit_int8(0x18);
6320   emit_int8((unsigned char)(0xC0 | encode));
6321   // 0x00 - insert into lower 128 bits
6322   // 0x01 - insert into upper 128 bits
6323   emit_int8(imm8 & 0x01);
6324 }
6325 
6326 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6327   assert(VM_Version::supports_avx(), "");
6328   assert(dst != xnoreg, "sanity");
6329   assert(imm8 <= 0x01, "imm8: %u", imm8);
6330   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6331   InstructionMark im(this);
6332   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6333   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6334   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6335   emit_int8(0x18);
6336   emit_operand(dst, src);
6337   // 0x00 - insert into lower 128 bits
6338   // 0x01 - insert into upper 128 bits
6339   emit_int8(imm8 & 0x01);
6340 }
6341 
6342 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6343   assert(VM_Version::supports_evex(), "");
6344   assert(imm8 <= 0x03, "imm8: %u", imm8);
6345   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6346   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6347   emit_int8(0x18);
6348   emit_int8((unsigned char)(0xC0 | encode));
6349   // 0x00 - insert into q0 128 bits (0..127)
6350   // 0x01 - insert into q1 128 bits (128..255)
6351   // 0x02 - insert into q2 128 bits (256..383)
6352   // 0x03 - insert into q3 128 bits (384..511)
6353   emit_int8(imm8 & 0x03);
6354 }
6355 
6356 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6357   assert(VM_Version::supports_avx(), "");
6358   assert(dst != xnoreg, "sanity");
6359   assert(imm8 <= 0x03, "imm8: %u", imm8);
6360   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6361   InstructionMark im(this);
6362   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6363   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6364   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6365   emit_int8(0x18);
6366   emit_operand(dst, src);
6367   // 0x00 - insert into q0 128 bits (0..127)
6368   // 0x01 - insert into q1 128 bits (128..255)
6369   // 0x02 - insert into q2 128 bits (256..383)
6370   // 0x03 - insert into q3 128 bits (384..511)
6371   emit_int8(imm8 & 0x03);
6372 }
6373 
6374 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
6375   assert(VM_Version::supports_evex(), "");
6376   assert(imm8 <= 0x01, "imm8: %u", imm8);
6377   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6378   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6379   emit_int8(0x1A);
6380   emit_int8((unsigned char)(0xC0 | encode));
6381   // 0x00 - insert into lower 256 bits
6382   // 0x01 - insert into upper 256 bits
6383   emit_int8(imm8 & 0x01);
6384 }
6385 
6386 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
6387   assert(VM_Version::supports_evex(), "");
6388   assert(dst != xnoreg, "sanity");
6389   assert(imm8 <= 0x01, "imm8: %u", imm8);
6390   InstructionMark im(this);
6391   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6392   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
6393   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6394   emit_int8(0x1A);
6395   emit_operand(dst, src);
6396   // 0x00 - insert into lower 256 bits
6397   // 0x01 - insert into upper 256 bits
6398   emit_int8(imm8 & 0x01);
6399 }
6400 
6401 
6402 // vextracti forms
6403 
6404 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6405   assert(VM_Version::supports_avx(), "");
6406   assert(imm8 <= 0x01, "imm8: %u", imm8);
6407   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6408   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6409   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6410   emit_int8(0x39);
6411   emit_int8((unsigned char)(0xC0 | encode));
6412   // 0x00 - extract from lower 128 bits
6413   // 0x01 - extract from upper 128 bits
6414   emit_int8(imm8 & 0x01);
6415 }
6416 
6417 void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
6418   assert(VM_Version::supports_avx2(), "");
6419   assert(src != xnoreg, "sanity");
6420   assert(imm8 <= 0x01, "imm8: %u", imm8);
6421   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6422   InstructionMark im(this);
6423   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6424   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6425   attributes.reset_is_clear_context();
6426   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6427   emit_int8(0x39);
6428   emit_operand(src, dst);
6429   // 0x00 - extract from lower 128 bits
6430   // 0x01 - extract from upper 128 bits
6431   emit_int8(imm8 & 0x01);
6432 }
6433 
6434 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6435   assert(VM_Version::supports_avx(), "");
6436   assert(imm8 <= 0x03, "imm8: %u", imm8);
6437   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6438   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6439   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6440   emit_int8(0x39);
6441   emit_int8((unsigned char)(0xC0 | encode));
6442   // 0x00 - extract from bits 127:0
6443   // 0x01 - extract from bits 255:128
6444   // 0x02 - extract from bits 383:256
6445   // 0x03 - extract from bits 511:384
6446   emit_int8(imm8 & 0x03);
6447 }
6448 
6449 void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) {
6450   assert(VM_Version::supports_evex(), "");
6451   assert(src != xnoreg, "sanity");
6452   assert(imm8 <= 0x03, "imm8: %u", imm8);
6453   InstructionMark im(this);
6454   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6455   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6456   attributes.reset_is_clear_context();
6457   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6458   emit_int8(0x39);
6459   emit_operand(src, dst);
6460   // 0x00 - extract from bits 127:0
6461   // 0x01 - extract from bits 255:128
6462   // 0x02 - extract from bits 383:256
6463   // 0x03 - extract from bits 511:384
6464   emit_int8(imm8 & 0x03);
6465 }
6466 
6467 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6468   assert(VM_Version::supports_avx512dq(), "");
6469   assert(imm8 <= 0x03, "imm8: %u", imm8);
6470   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6471   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6472   emit_int8(0x39);
6473   emit_int8((unsigned char)(0xC0 | encode));
6474   // 0x00 - extract from bits 127:0
6475   // 0x01 - extract from bits 255:128
6476   // 0x02 - extract from bits 383:256
6477   // 0x03 - extract from bits 511:384
6478   emit_int8(imm8 & 0x03);
6479 }
6480 
6481 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6482   assert(VM_Version::supports_evex(), "");
6483   assert(imm8 <= 0x01, "imm8: %u", imm8);
6484   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6485   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6486   emit_int8(0x3B);
6487   emit_int8((unsigned char)(0xC0 | encode));
6488   // 0x00 - extract from lower 256 bits
6489   // 0x01 - extract from upper 256 bits
6490   emit_int8(imm8 & 0x01);
6491 }
6492 
6493 
6494 // vextractf forms
6495 
6496 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6497   assert(VM_Version::supports_avx(), "");
6498   assert(imm8 <= 0x01, "imm8: %u", imm8);
6499   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6500   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6501   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6502   emit_int8(0x19);
6503   emit_int8((unsigned char)(0xC0 | encode));
6504   // 0x00 - extract from lower 128 bits
6505   // 0x01 - extract from upper 128 bits
6506   emit_int8(imm8 & 0x01);
6507 }
6508 
6509 void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
6510   assert(VM_Version::supports_avx(), "");
6511   assert(src != xnoreg, "sanity");
6512   assert(imm8 <= 0x01, "imm8: %u", imm8);
6513   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
6514   InstructionMark im(this);
6515   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6516   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6517   attributes.reset_is_clear_context();
6518   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6519   emit_int8(0x19);
6520   emit_operand(src, dst);
6521   // 0x00 - extract from lower 128 bits
6522   // 0x01 - extract from upper 128 bits
6523   emit_int8(imm8 & 0x01);
6524 }
6525 
6526 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6527   assert(VM_Version::supports_avx(), "");
6528   assert(imm8 <= 0x03, "imm8: %u", imm8);
6529   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
6530   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6531   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6532   emit_int8(0x19);
6533   emit_int8((unsigned char)(0xC0 | encode));
6534   // 0x00 - extract from bits 127:0
6535   // 0x01 - extract from bits 255:128
6536   // 0x02 - extract from bits 383:256
6537   // 0x03 - extract from bits 511:384
6538   emit_int8(imm8 & 0x03);
6539 }
6540 
6541 void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
6542   assert(VM_Version::supports_evex(), "");
6543   assert(src != xnoreg, "sanity");
6544   assert(imm8 <= 0x03, "imm8: %u", imm8);
6545   InstructionMark im(this);
6546   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6547   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
6548   attributes.reset_is_clear_context();
6549   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6550   emit_int8(0x19);
6551   emit_operand(src, dst);
6552   // 0x00 - extract from bits 127:0
6553   // 0x01 - extract from bits 255:128
6554   // 0x02 - extract from bits 383:256
6555   // 0x03 - extract from bits 511:384
6556   emit_int8(imm8 & 0x03);
6557 }
6558 
6559 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6560   assert(VM_Version::supports_avx512dq(), "");
6561   assert(imm8 <= 0x03, "imm8: %u", imm8);
6562   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6563   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6564   emit_int8(0x19);
6565   emit_int8((unsigned char)(0xC0 | encode));
6566   // 0x00 - extract from bits 127:0
6567   // 0x01 - extract from bits 255:128
6568   // 0x02 - extract from bits 383:256
6569   // 0x03 - extract from bits 511:384
6570   emit_int8(imm8 & 0x03);
6571 }
6572 
6573 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
6574   assert(VM_Version::supports_evex(), "");
6575   assert(imm8 <= 0x01, "imm8: %u", imm8);
6576   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6577   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6578   emit_int8(0x1B);
6579   emit_int8((unsigned char)(0xC0 | encode));
6580   // 0x00 - extract from lower 256 bits
6581   // 0x01 - extract from upper 256 bits
6582   emit_int8(imm8 & 0x01);
6583 }
6584 
6585 void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
6586   assert(VM_Version::supports_evex(), "");
6587   assert(src != xnoreg, "sanity");
6588   assert(imm8 <= 0x01, "imm8: %u", imm8);
6589   InstructionMark im(this);
6590   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
6591   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
6592   attributes.reset_is_clear_context();
6593   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6594   emit_int8(0x1B);
6595   emit_operand(src, dst);
6596   // 0x00 - extract from lower 256 bits
6597   // 0x01 - extract from upper 256 bits
6598   emit_int8(imm8 & 0x01);
6599 }
6600 
6601 
6602 // legacy word/dword replicate
6603 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
6604   assert(VM_Version::supports_avx2(), "");
6605   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6606   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6607   emit_int8(0x79);
6608   emit_int8((unsigned char)(0xC0 | encode));
6609 }
6610 
6611 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
6612   assert(VM_Version::supports_avx2(), "");
6613   InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6614   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6615   emit_int8(0x58);
6616   emit_int8((unsigned char)(0xC0 | encode));
6617 }
6618 
6619 
6620 // xmm/mem sourced byte/word/dword/qword replicate
6621 
6622 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6623 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
6624   assert(VM_Version::supports_evex(), "");
6625   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6626   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6627   emit_int8(0x78);
6628   emit_int8((unsigned char)(0xC0 | encode));
6629 }
6630 
6631 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
6632   assert(VM_Version::supports_evex(), "");
6633   assert(dst != xnoreg, "sanity");
6634   InstructionMark im(this);
6635   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6636   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
6637   // swap src<->dst for encoding
6638   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6639   emit_int8(0x78);
6640   emit_operand(dst, src);
6641 }
6642 
6643 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6644 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
6645   assert(VM_Version::supports_evex(), "");
6646   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6647   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6648   emit_int8(0x79);
6649   emit_int8((unsigned char)(0xC0 | encode));
6650 }
6651 
6652 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
6653   assert(VM_Version::supports_evex(), "");
6654   assert(dst != xnoreg, "sanity");
6655   InstructionMark im(this);
6656   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6657   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
6658   // swap src<->dst for encoding
6659   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6660   emit_int8(0x79);
6661   emit_operand(dst, src);
6662 }
6663 
6664 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6665 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
6666   assert(VM_Version::supports_evex(), "");
6667   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6668   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6669   emit_int8(0x58);
6670   emit_int8((unsigned char)(0xC0 | encode));
6671 }
6672 
6673 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
6674   assert(VM_Version::supports_evex(), "");
6675   assert(dst != xnoreg, "sanity");
6676   InstructionMark im(this);
6677   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6678   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6679   // swap src<->dst for encoding
6680   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6681   emit_int8(0x58);
6682   emit_operand(dst, src);
6683 }
6684 
6685 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6686 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
6687   assert(VM_Version::supports_evex(), "");
6688   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6689   attributes.set_rex_vex_w_reverted();
6690   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6691   emit_int8(0x59);
6692   emit_int8((unsigned char)(0xC0 | encode));
6693 }
6694 
6695 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
6696   assert(VM_Version::supports_evex(), "");
6697   assert(dst != xnoreg, "sanity");
6698   InstructionMark im(this);
6699   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6700   attributes.set_rex_vex_w_reverted();
6701   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6702   // swap src<->dst for encoding
6703   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6704   emit_int8(0x59);
6705   emit_operand(dst, src);
6706 }
6707 
6708 
6709 // scalar single/double precision replicate
6710 
6711 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
6712 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
6713   assert(VM_Version::supports_evex(), "");
6714   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6715   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6716   emit_int8(0x18);
6717   emit_int8((unsigned char)(0xC0 | encode));
6718 }
6719 
6720 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
6721   assert(VM_Version::supports_evex(), "");
6722   assert(dst != xnoreg, "sanity");
6723   InstructionMark im(this);
6724   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6725   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
6726   // swap src<->dst for encoding
6727   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6728   emit_int8(0x18);
6729   emit_operand(dst, src);
6730 }
6731 
6732 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
6733 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
6734   assert(VM_Version::supports_evex(), "");
6735   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6736   attributes.set_rex_vex_w_reverted();
6737   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6738   emit_int8(0x19);
6739   emit_int8((unsigned char)(0xC0 | encode));
6740 }
6741 
6742 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
6743   assert(VM_Version::supports_evex(), "");
6744   assert(dst != xnoreg, "sanity");
6745   InstructionMark im(this);
6746   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6747   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
6748   attributes.set_rex_vex_w_reverted();
6749   // swap src<->dst for encoding
6750   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6751   emit_int8(0x19);
6752   emit_operand(dst, src);
6753 }
6754 
6755 
6756 // gpr source broadcast forms
6757 
6758 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6759 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
6760   assert(VM_Version::supports_evex(), "");
6761   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6762   attributes.set_is_evex_instruction();
6763   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6764   emit_int8(0x7A);
6765   emit_int8((unsigned char)(0xC0 | encode));
6766 }
6767 
6768 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
6769 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
6770   assert(VM_Version::supports_evex(), "");
6771   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
6772   attributes.set_is_evex_instruction();
6773   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6774   emit_int8(0x7B);
6775   emit_int8((unsigned char)(0xC0 | encode));
6776 }
6777 
6778 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
6779 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
6780   assert(VM_Version::supports_evex(), "");
6781   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6782   attributes.set_is_evex_instruction();
6783   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6784   emit_int8(0x7C);
6785   emit_int8((unsigned char)(0xC0 | encode));
6786 }
6787 
6788 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
6789 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
6790   assert(VM_Version::supports_evex(), "");
6791   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
6792   attributes.set_is_evex_instruction();
6793   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
6794   emit_int8(0x7C);
6795   emit_int8((unsigned char)(0xC0 | encode));
6796 }
6797 
6798 
6799 // Carry-Less Multiplication Quadword
6800 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
6801   assert(VM_Version::supports_clmul(), "");
6802   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6803   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6804   emit_int8(0x44);
6805   emit_int8((unsigned char)(0xC0 | encode));
6806   emit_int8((unsigned char)mask);
6807 }
6808 
6809 // Carry-Less Multiplication Quadword
6810 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
6811   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
6812   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
6813   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6814   emit_int8(0x44);
6815   emit_int8((unsigned char)(0xC0 | encode));
6816   emit_int8((unsigned char)mask);
6817 }
6818 
6819 void Assembler::evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len) {
6820   assert(VM_Version::supports_vpclmulqdq(), "Requires vector carryless multiplication support");
6821   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
6822   attributes.set_is_evex_instruction();
6823   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
6824   emit_int8(0x44);
6825   emit_int8((unsigned char)(0xC0 | encode));
6826   emit_int8((unsigned char)mask);
6827 }
6828 
6829 void Assembler::vzeroupper() {
6830   if (VM_Version::supports_vzeroupper()) {
6831     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
6832     (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
6833     emit_int8(0x77);
6834   }
6835 }
6836 
6837 #ifndef _LP64
6838 // 32bit only pieces of the assembler
6839 
6840 void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) {
6841   // NO PREFIX AS NEVER 64BIT
6842   InstructionMark im(this);
6843   emit_int8((unsigned char)0x81);
6844   emit_int8((unsigned char)(0xF8 | src1->encoding()));
6845   emit_data(imm32, rspec, 0);
6846 }
6847 
6848 void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) {
6849   // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs
6850   InstructionMark im(this);
6851   emit_int8((unsigned char)0x81);
6852   emit_operand(rdi, src1);
6853   emit_data(imm32, rspec, 0);
6854 }
6855 
6856 // The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax,
6857 // and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded
6858 // into rdx:rax.  The ZF is set if the compared values were equal, and cleared otherwise.
6859 void Assembler::cmpxchg8(Address adr) {
6860   InstructionMark im(this);
6861   emit_int8(0x0F);
6862   emit_int8((unsigned char)0xC7);
6863   emit_operand(rcx, adr);
6864 }
6865 
6866 void Assembler::decl(Register dst) {
6867   // Don't use it directly. Use MacroAssembler::decrementl() instead.
6868  emit_int8(0x48 | dst->encoding());
6869 }
6870 
6871 #endif // _LP64
6872 
6873 // 64bit typically doesn't use the x87 but needs to for the trig funcs
6874 
6875 void Assembler::fabs() {
6876   emit_int8((unsigned char)0xD9);
6877   emit_int8((unsigned char)0xE1);
6878 }
6879 
6880 void Assembler::fadd(int i) {
6881   emit_farith(0xD8, 0xC0, i);
6882 }
6883 
6884 void Assembler::fadd_d(Address src) {
6885   InstructionMark im(this);
6886   emit_int8((unsigned char)0xDC);
6887   emit_operand32(rax, src);
6888 }
6889 
6890 void Assembler::fadd_s(Address src) {
6891   InstructionMark im(this);
6892   emit_int8((unsigned char)0xD8);
6893   emit_operand32(rax, src);
6894 }
6895 
6896 void Assembler::fadda(int i) {
6897   emit_farith(0xDC, 0xC0, i);
6898 }
6899 
6900 void Assembler::faddp(int i) {
6901   emit_farith(0xDE, 0xC0, i);
6902 }
6903 
6904 void Assembler::fchs() {
6905   emit_int8((unsigned char)0xD9);
6906   emit_int8((unsigned char)0xE0);
6907 }
6908 
6909 void Assembler::fcom(int i) {
6910   emit_farith(0xD8, 0xD0, i);
6911 }
6912 
6913 void Assembler::fcomp(int i) {
6914   emit_farith(0xD8, 0xD8, i);
6915 }
6916 
6917 void Assembler::fcomp_d(Address src) {
6918   InstructionMark im(this);
6919   emit_int8((unsigned char)0xDC);
6920   emit_operand32(rbx, src);
6921 }
6922 
6923 void Assembler::fcomp_s(Address src) {
6924   InstructionMark im(this);
6925   emit_int8((unsigned char)0xD8);
6926   emit_operand32(rbx, src);
6927 }
6928 
6929 void Assembler::fcompp() {
6930   emit_int8((unsigned char)0xDE);
6931   emit_int8((unsigned char)0xD9);
6932 }
6933 
6934 void Assembler::fcos() {
6935   emit_int8((unsigned char)0xD9);
6936   emit_int8((unsigned char)0xFF);
6937 }
6938 
6939 void Assembler::fdecstp() {
6940   emit_int8((unsigned char)0xD9);
6941   emit_int8((unsigned char)0xF6);
6942 }
6943 
6944 void Assembler::fdiv(int i) {
6945   emit_farith(0xD8, 0xF0, i);
6946 }
6947 
6948 void Assembler::fdiv_d(Address src) {
6949   InstructionMark im(this);
6950   emit_int8((unsigned char)0xDC);
6951   emit_operand32(rsi, src);
6952 }
6953 
6954 void Assembler::fdiv_s(Address src) {
6955   InstructionMark im(this);
6956   emit_int8((unsigned char)0xD8);
6957   emit_operand32(rsi, src);
6958 }
6959 
6960 void Assembler::fdiva(int i) {
6961   emit_farith(0xDC, 0xF8, i);
6962 }
6963 
6964 // Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994)
6965 //       is erroneous for some of the floating-point instructions below.
6966 
6967 void Assembler::fdivp(int i) {
6968   emit_farith(0xDE, 0xF8, i);                    // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong)
6969 }
6970 
6971 void Assembler::fdivr(int i) {
6972   emit_farith(0xD8, 0xF8, i);
6973 }
6974 
6975 void Assembler::fdivr_d(Address src) {
6976   InstructionMark im(this);
6977   emit_int8((unsigned char)0xDC);
6978   emit_operand32(rdi, src);
6979 }
6980 
6981 void Assembler::fdivr_s(Address src) {
6982   InstructionMark im(this);
6983   emit_int8((unsigned char)0xD8);
6984   emit_operand32(rdi, src);
6985 }
6986 
6987 void Assembler::fdivra(int i) {
6988   emit_farith(0xDC, 0xF0, i);
6989 }
6990 
6991 void Assembler::fdivrp(int i) {
6992   emit_farith(0xDE, 0xF0, i);                    // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong)
6993 }
6994 
6995 void Assembler::ffree(int i) {
6996   emit_farith(0xDD, 0xC0, i);
6997 }
6998 
6999 void Assembler::fild_d(Address adr) {
7000   InstructionMark im(this);
7001   emit_int8((unsigned char)0xDF);
7002   emit_operand32(rbp, adr);
7003 }
7004 
7005 void Assembler::fild_s(Address adr) {
7006   InstructionMark im(this);
7007   emit_int8((unsigned char)0xDB);
7008   emit_operand32(rax, adr);
7009 }
7010 
7011 void Assembler::fincstp() {
7012   emit_int8((unsigned char)0xD9);
7013   emit_int8((unsigned char)0xF7);
7014 }
7015 
7016 void Assembler::finit() {
7017   emit_int8((unsigned char)0x9B);
7018   emit_int8((unsigned char)0xDB);
7019   emit_int8((unsigned char)0xE3);
7020 }
7021 
7022 void Assembler::fist_s(Address adr) {
7023   InstructionMark im(this);
7024   emit_int8((unsigned char)0xDB);
7025   emit_operand32(rdx, adr);
7026 }
7027 
7028 void Assembler::fistp_d(Address adr) {
7029   InstructionMark im(this);
7030   emit_int8((unsigned char)0xDF);
7031   emit_operand32(rdi, adr);
7032 }
7033 
7034 void Assembler::fistp_s(Address adr) {
7035   InstructionMark im(this);
7036   emit_int8((unsigned char)0xDB);
7037   emit_operand32(rbx, adr);
7038 }
7039 
7040 void Assembler::fld1() {
7041   emit_int8((unsigned char)0xD9);
7042   emit_int8((unsigned char)0xE8);
7043 }
7044 
7045 void Assembler::fld_d(Address adr) {
7046   InstructionMark im(this);
7047   emit_int8((unsigned char)0xDD);
7048   emit_operand32(rax, adr);
7049 }
7050 
7051 void Assembler::fld_s(Address adr) {
7052   InstructionMark im(this);
7053   emit_int8((unsigned char)0xD9);
7054   emit_operand32(rax, adr);
7055 }
7056 
7057 
7058 void Assembler::fld_s(int index) {
7059   emit_farith(0xD9, 0xC0, index);
7060 }
7061 
7062 void Assembler::fld_x(Address adr) {
7063   InstructionMark im(this);
7064   emit_int8((unsigned char)0xDB);
7065   emit_operand32(rbp, adr);
7066 }
7067 
7068 void Assembler::fldcw(Address src) {
7069   InstructionMark im(this);
7070   emit_int8((unsigned char)0xD9);
7071   emit_operand32(rbp, src);
7072 }
7073 
7074 void Assembler::fldenv(Address src) {
7075   InstructionMark im(this);
7076   emit_int8((unsigned char)0xD9);
7077   emit_operand32(rsp, src);
7078 }
7079 
7080 void Assembler::fldlg2() {
7081   emit_int8((unsigned char)0xD9);
7082   emit_int8((unsigned char)0xEC);
7083 }
7084 
7085 void Assembler::fldln2() {
7086   emit_int8((unsigned char)0xD9);
7087   emit_int8((unsigned char)0xED);
7088 }
7089 
7090 void Assembler::fldz() {
7091   emit_int8((unsigned char)0xD9);
7092   emit_int8((unsigned char)0xEE);
7093 }
7094 
7095 void Assembler::flog() {
7096   fldln2();
7097   fxch();
7098   fyl2x();
7099 }
7100 
7101 void Assembler::flog10() {
7102   fldlg2();
7103   fxch();
7104   fyl2x();
7105 }
7106 
7107 void Assembler::fmul(int i) {
7108   emit_farith(0xD8, 0xC8, i);
7109 }
7110 
7111 void Assembler::fmul_d(Address src) {
7112   InstructionMark im(this);
7113   emit_int8((unsigned char)0xDC);
7114   emit_operand32(rcx, src);
7115 }
7116 
7117 void Assembler::fmul_s(Address src) {
7118   InstructionMark im(this);
7119   emit_int8((unsigned char)0xD8);
7120   emit_operand32(rcx, src);
7121 }
7122 
7123 void Assembler::fmula(int i) {
7124   emit_farith(0xDC, 0xC8, i);
7125 }
7126 
7127 void Assembler::fmulp(int i) {
7128   emit_farith(0xDE, 0xC8, i);
7129 }
7130 
7131 void Assembler::fnsave(Address dst) {
7132   InstructionMark im(this);
7133   emit_int8((unsigned char)0xDD);
7134   emit_operand32(rsi, dst);
7135 }
7136 
7137 void Assembler::fnstcw(Address src) {
7138   InstructionMark im(this);
7139   emit_int8((unsigned char)0x9B);
7140   emit_int8((unsigned char)0xD9);
7141   emit_operand32(rdi, src);
7142 }
7143 
7144 void Assembler::fnstsw_ax() {
7145   emit_int8((unsigned char)0xDF);
7146   emit_int8((unsigned char)0xE0);
7147 }
7148 
7149 void Assembler::fprem() {
7150   emit_int8((unsigned char)0xD9);
7151   emit_int8((unsigned char)0xF8);
7152 }
7153 
7154 void Assembler::fprem1() {
7155   emit_int8((unsigned char)0xD9);
7156   emit_int8((unsigned char)0xF5);
7157 }
7158 
7159 void Assembler::frstor(Address src) {
7160   InstructionMark im(this);
7161   emit_int8((unsigned char)0xDD);
7162   emit_operand32(rsp, src);
7163 }
7164 
7165 void Assembler::fsin() {
7166   emit_int8((unsigned char)0xD9);
7167   emit_int8((unsigned char)0xFE);
7168 }
7169 
7170 void Assembler::fsqrt() {
7171   emit_int8((unsigned char)0xD9);
7172   emit_int8((unsigned char)0xFA);
7173 }
7174 
7175 void Assembler::fst_d(Address adr) {
7176   InstructionMark im(this);
7177   emit_int8((unsigned char)0xDD);
7178   emit_operand32(rdx, adr);
7179 }
7180 
7181 void Assembler::fst_s(Address adr) {
7182   InstructionMark im(this);
7183   emit_int8((unsigned char)0xD9);
7184   emit_operand32(rdx, adr);
7185 }
7186 
7187 void Assembler::fstp_d(Address adr) {
7188   InstructionMark im(this);
7189   emit_int8((unsigned char)0xDD);
7190   emit_operand32(rbx, adr);
7191 }
7192 
7193 void Assembler::fstp_d(int index) {
7194   emit_farith(0xDD, 0xD8, index);
7195 }
7196 
7197 void Assembler::fstp_s(Address adr) {
7198   InstructionMark im(this);
7199   emit_int8((unsigned char)0xD9);
7200   emit_operand32(rbx, adr);
7201 }
7202 
7203 void Assembler::fstp_x(Address adr) {
7204   InstructionMark im(this);
7205   emit_int8((unsigned char)0xDB);
7206   emit_operand32(rdi, adr);
7207 }
7208 
7209 void Assembler::fsub(int i) {
7210   emit_farith(0xD8, 0xE0, i);
7211 }
7212 
7213 void Assembler::fsub_d(Address src) {
7214   InstructionMark im(this);
7215   emit_int8((unsigned char)0xDC);
7216   emit_operand32(rsp, src);
7217 }
7218 
7219 void Assembler::fsub_s(Address src) {
7220   InstructionMark im(this);
7221   emit_int8((unsigned char)0xD8);
7222   emit_operand32(rsp, src);
7223 }
7224 
7225 void Assembler::fsuba(int i) {
7226   emit_farith(0xDC, 0xE8, i);
7227 }
7228 
7229 void Assembler::fsubp(int i) {
7230   emit_farith(0xDE, 0xE8, i);                    // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong)
7231 }
7232 
7233 void Assembler::fsubr(int i) {
7234   emit_farith(0xD8, 0xE8, i);
7235 }
7236 
7237 void Assembler::fsubr_d(Address src) {
7238   InstructionMark im(this);
7239   emit_int8((unsigned char)0xDC);
7240   emit_operand32(rbp, src);
7241 }
7242 
7243 void Assembler::fsubr_s(Address src) {
7244   InstructionMark im(this);
7245   emit_int8((unsigned char)0xD8);
7246   emit_operand32(rbp, src);
7247 }
7248 
7249 void Assembler::fsubra(int i) {
7250   emit_farith(0xDC, 0xE0, i);
7251 }
7252 
7253 void Assembler::fsubrp(int i) {
7254   emit_farith(0xDE, 0xE0, i);                    // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong)
7255 }
7256 
7257 void Assembler::ftan() {
7258   emit_int8((unsigned char)0xD9);
7259   emit_int8((unsigned char)0xF2);
7260   emit_int8((unsigned char)0xDD);
7261   emit_int8((unsigned char)0xD8);
7262 }
7263 
7264 void Assembler::ftst() {
7265   emit_int8((unsigned char)0xD9);
7266   emit_int8((unsigned char)0xE4);
7267 }
7268 
7269 void Assembler::fucomi(int i) {
7270   // make sure the instruction is supported (introduced for P6, together with cmov)
7271   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7272   emit_farith(0xDB, 0xE8, i);
7273 }
7274 
7275 void Assembler::fucomip(int i) {
7276   // make sure the instruction is supported (introduced for P6, together with cmov)
7277   guarantee(VM_Version::supports_cmov(), "illegal instruction");
7278   emit_farith(0xDF, 0xE8, i);
7279 }
7280 
7281 void Assembler::fwait() {
7282   emit_int8((unsigned char)0x9B);
7283 }
7284 
7285 void Assembler::fxch(int i) {
7286   emit_farith(0xD9, 0xC8, i);
7287 }
7288 
7289 void Assembler::fyl2x() {
7290   emit_int8((unsigned char)0xD9);
7291   emit_int8((unsigned char)0xF1);
7292 }
7293 
7294 void Assembler::frndint() {
7295   emit_int8((unsigned char)0xD9);
7296   emit_int8((unsigned char)0xFC);
7297 }
7298 
7299 void Assembler::f2xm1() {
7300   emit_int8((unsigned char)0xD9);
7301   emit_int8((unsigned char)0xF0);
7302 }
7303 
7304 void Assembler::fldl2e() {
7305   emit_int8((unsigned char)0xD9);
7306   emit_int8((unsigned char)0xEA);
7307 }
7308 
7309 // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
7310 static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
7311 // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
7312 static int simd_opc[4] = { 0,    0, 0x38, 0x3A };
7313 
7314 // Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding.
7315 void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7316   if (pre > 0) {
7317     emit_int8(simd_pre[pre]);
7318   }
7319   if (rex_w) {
7320     prefixq(adr, xreg);
7321   } else {
7322     prefix(adr, xreg);
7323   }
7324   if (opc > 0) {
7325     emit_int8(0x0F);
7326     int opc2 = simd_opc[opc];
7327     if (opc2 > 0) {
7328       emit_int8(opc2);
7329     }
7330   }
7331 }
7332 
7333 int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) {
7334   if (pre > 0) {
7335     emit_int8(simd_pre[pre]);
7336   }
7337   int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
7338   if (opc > 0) {
7339     emit_int8(0x0F);
7340     int opc2 = simd_opc[opc];
7341     if (opc2 > 0) {
7342       emit_int8(opc2);
7343     }
7344   }
7345   return encode;
7346 }
7347 
7348 
7349 void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
7350   int vector_len = _attributes->get_vector_len();
7351   bool vex_w = _attributes->is_rex_vex_w();
7352   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
7353     prefix(VEX_3bytes);
7354 
7355     int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0);
7356     byte1 = (~byte1) & 0xE0;
7357     byte1 |= opc;
7358     emit_int8(byte1);
7359 
7360     int byte2 = ((~nds_enc) & 0xf) << 3;
7361     byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
7362     emit_int8(byte2);
7363   } else {
7364     prefix(VEX_2bytes);
7365 
7366     int byte1 = vex_r ? VEX_R : 0;
7367     byte1 = (~byte1) & 0x80;
7368     byte1 |= ((~nds_enc) & 0xf) << 3;
7369     byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
7370     emit_int8(byte1);
7371   }
7372 }
7373 
7374 // This is a 4 byte encoding
7375 void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
7376   // EVEX 0x62 prefix
7377   prefix(EVEX_4bytes);
7378   bool vex_w = _attributes->is_rex_vex_w();
7379   int evex_encoding = (vex_w ? VEX_W : 0);
7380   // EVEX.b is not currently used for broadcast of single element or data rounding modes
7381   _attributes->set_evex_encoding(evex_encoding);
7382 
7383   // P0: byte 2, initialized to RXBR`00mm
7384   // instead of not'd
7385   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
7386   byte2 = (~byte2) & 0xF0;
7387   // confine opc opcode extensions in mm bits to lower two bits
7388   // of form {0F, 0F_38, 0F_3A}
7389   byte2 |= opc;
7390   emit_int8(byte2);
7391 
7392   // P1: byte 3 as Wvvvv1pp
7393   int byte3 = ((~nds_enc) & 0xf) << 3;
7394   // p[10] is always 1
7395   byte3 |= EVEX_F;
7396   byte3 |= (vex_w & 1) << 7;
7397   // confine pre opcode extensions in pp bits to lower two bits
7398   // of form {66, F3, F2}
7399   byte3 |= pre;
7400   emit_int8(byte3);
7401 
7402   // P2: byte 4 as zL'Lbv'aaa
7403   // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
7404   int byte4 = (_attributes->is_no_reg_mask()) ?
7405               0 :
7406               _attributes->get_embedded_opmask_register_specifier();
7407   // EVEX.v` for extending EVEX.vvvv or VIDX
7408   byte4 |= (evex_v ? 0: EVEX_V);
7409   // third EXEC.b for broadcast actions
7410   byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
7411   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
7412   byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
7413   // last is EVEX.z for zero/merge actions
7414   if (_attributes->is_no_reg_mask() == false) {
7415     byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
7416   }
7417   emit_int8(byte4);
7418 }
7419 
7420 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7421   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
7422   bool vex_b = adr.base_needs_rex();
7423   bool vex_x = adr.index_needs_rex();
7424   set_attributes(attributes);
7425   attributes->set_current_assembler(this);
7426 
7427   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7428   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7429     switch (attributes->get_vector_len()) {
7430     case AVX_128bit:
7431     case AVX_256bit:
7432       attributes->set_is_legacy_mode();
7433       break;
7434     }
7435   }
7436 
7437   // For pure EVEX check and see if this instruction
7438   // is allowed in legacy mode and has resources which will
7439   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7440   // else that field is set when we encode to EVEX
7441   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7442       !_is_managed && !attributes->is_evex_instruction()) {
7443     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7444       bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7445       if (check_register_bank) {
7446         // check nds_enc and xreg_enc for upper bank usage
7447         if (nds_enc < 16 && xreg_enc < 16) {
7448           attributes->set_is_legacy_mode();
7449         }
7450       } else {
7451         attributes->set_is_legacy_mode();
7452       }
7453     }
7454   }
7455 
7456   _is_managed = false;
7457   if (UseAVX > 2 && !attributes->is_legacy_mode())
7458   {
7459     bool evex_r = (xreg_enc >= 16);
7460     bool evex_v = (nds_enc >= 16);
7461     attributes->set_is_evex_instruction();
7462     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7463   } else {
7464     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7465       attributes->set_rex_vex_w(false);
7466     }
7467     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7468   }
7469 }
7470 
7471 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
7472   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
7473   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
7474   bool vex_x = false;
7475   set_attributes(attributes);
7476   attributes->set_current_assembler(this);
7477   bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
7478 
7479   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
7480   if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
7481     switch (attributes->get_vector_len()) {
7482     case AVX_128bit:
7483     case AVX_256bit:
7484       if (check_register_bank) {
7485         if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
7486           // up propagate arithmetic instructions to meet RA requirements
7487           attributes->set_vector_len(AVX_512bit);
7488         } else {
7489           attributes->set_is_legacy_mode();
7490         }
7491       } else {
7492         attributes->set_is_legacy_mode();
7493       }
7494       break;
7495     }
7496   }
7497 
7498   // For pure EVEX check and see if this instruction
7499   // is allowed in legacy mode and has resources which will
7500   // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
7501   // else that field is set when we encode to EVEX
7502   if (UseAVX > 2 && !attributes->is_legacy_mode() &&
7503       !_is_managed && !attributes->is_evex_instruction()) {
7504     if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
7505       if (check_register_bank) {
7506         // check dst_enc, nds_enc and src_enc for upper bank usage
7507         if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
7508           attributes->set_is_legacy_mode();
7509         }
7510       } else {
7511         attributes->set_is_legacy_mode();
7512       }
7513     }
7514   }
7515 
7516   _is_managed = false;
7517   if (UseAVX > 2 && !attributes->is_legacy_mode())
7518   {
7519     bool evex_r = (dst_enc >= 16);
7520     bool evex_v = (nds_enc >= 16);
7521     // can use vex_x as bank extender on rm encoding
7522     vex_x = (src_enc >= 16);
7523     attributes->set_is_evex_instruction();
7524     evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
7525   } else {
7526     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
7527       attributes->set_rex_vex_w(false);
7528     }
7529     vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
7530   }
7531 
7532   // return modrm byte components for operands
7533   return (((dst_enc & 7) << 3) | (src_enc & 7));
7534 }
7535 
7536 
7537 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
7538                             VexOpcode opc, InstructionAttr *attributes) {
7539   if (UseAVX > 0) {
7540     int xreg_enc = xreg->encoding();
7541     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7542     vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
7543   } else {
7544     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
7545     rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
7546   }
7547 }
7548 
7549 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
7550                                       VexOpcode opc, InstructionAttr *attributes) {
7551   int dst_enc = dst->encoding();
7552   int src_enc = src->encoding();
7553   if (UseAVX > 0) {
7554     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
7555     return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
7556   } else {
7557     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
7558     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
7559   }
7560 }
7561 
7562 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7563   assert(VM_Version::supports_avx(), "");
7564   assert(!VM_Version::supports_evex(), "");
7565   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7566   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
7567   emit_int8((unsigned char)0xC2);
7568   emit_int8((unsigned char)(0xC0 | encode));
7569   emit_int8((unsigned char)(0xF & cop));
7570 }
7571 
7572 void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7573   assert(VM_Version::supports_avx(), "");
7574   assert(!VM_Version::supports_evex(), "");
7575   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7576   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7577   emit_int8((unsigned char)0x4B);
7578   emit_int8((unsigned char)(0xC0 | encode));
7579   int src2_enc = src2->encoding();
7580   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7581 }
7582 
7583 void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
7584   assert(VM_Version::supports_avx(), "");
7585   assert(!VM_Version::supports_evex(), "");
7586   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7587   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
7588   emit_int8((unsigned char)0xC2);
7589   emit_int8((unsigned char)(0xC0 | encode));
7590   emit_int8((unsigned char)(0xF & cop));
7591 }
7592 
7593 void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
7594   assert(VM_Version::supports_avx(), "");
7595   assert(!VM_Version::supports_evex(), "");
7596   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7597   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7598   emit_int8((unsigned char)0x4A);
7599   emit_int8((unsigned char)(0xC0 | encode));
7600   int src2_enc = src2->encoding();
7601   emit_int8((unsigned char)(0xF0 & src2_enc<<4));
7602 }
7603 
7604 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
7605   assert(VM_Version::supports_avx2(), "");
7606   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
7607   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
7608   emit_int8((unsigned char)0x02);
7609   emit_int8((unsigned char)(0xC0 | encode));
7610   emit_int8((unsigned char)imm8);
7611 }
7612 
7613 void Assembler::shlxl(Register dst, Register src1, Register src2) {
7614   assert(VM_Version::supports_bmi2(), "");
7615   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7616   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7617   emit_int8((unsigned char)0xF7);
7618   emit_int8((unsigned char)(0xC0 | encode));
7619 }
7620 
7621 void Assembler::shlxq(Register dst, Register src1, Register src2) {
7622   assert(VM_Version::supports_bmi2(), "");
7623   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
7624   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
7625   emit_int8((unsigned char)0xF7);
7626   emit_int8((unsigned char)(0xC0 | encode));
7627 }
7628 
7629 #ifndef _LP64
7630 
7631 void Assembler::incl(Register dst) {
7632   // Don't use it directly. Use MacroAssembler::incrementl() instead.
7633   emit_int8(0x40 | dst->encoding());
7634 }
7635 
7636 void Assembler::lea(Register dst, Address src) {
7637   leal(dst, src);
7638 }
7639 
7640 void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
7641   InstructionMark im(this);
7642   emit_int8((unsigned char)0xC7);
7643   emit_operand(rax, dst);
7644   emit_data((int)imm32, rspec, 0);
7645 }
7646 
7647 void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
7648   InstructionMark im(this);
7649   int encode = prefix_and_encode(dst->encoding());
7650   emit_int8((unsigned char)(0xB8 | encode));
7651   emit_data((int)imm32, rspec, 0);
7652 }
7653 
7654 void Assembler::popa() { // 32bit
7655   emit_int8(0x61);
7656 }
7657 
7658 void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) {
7659   InstructionMark im(this);
7660   emit_int8(0x68);
7661   emit_data(imm32, rspec, 0);
7662 }
7663 
7664 void Assembler::pusha() { // 32bit
7665   emit_int8(0x60);
7666 }
7667 
7668 void Assembler::set_byte_if_not_zero(Register dst) {
7669   emit_int8(0x0F);
7670   emit_int8((unsigned char)0x95);
7671   emit_int8((unsigned char)(0xE0 | dst->encoding()));
7672 }
7673 
7674 void Assembler::shldl(Register dst, Register src) {
7675   emit_int8(0x0F);
7676   emit_int8((unsigned char)0xA5);
7677   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7678 }
7679 
7680 // 0F A4 / r ib
7681 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
7682   emit_int8(0x0F);
7683   emit_int8((unsigned char)0xA4);
7684   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7685   emit_int8(imm8);
7686 }
7687 
7688 void Assembler::shrdl(Register dst, Register src) {
7689   emit_int8(0x0F);
7690   emit_int8((unsigned char)0xAD);
7691   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
7692 }
7693 
7694 #else // LP64
7695 
7696 void Assembler::set_byte_if_not_zero(Register dst) {
7697   int enc = prefix_and_encode(dst->encoding(), true);
7698   emit_int8(0x0F);
7699   emit_int8((unsigned char)0x95);
7700   emit_int8((unsigned char)(0xE0 | enc));
7701 }
7702 
7703 // 64bit only pieces of the assembler
7704 // This should only be used by 64bit instructions that can use rip-relative
7705 // it cannot be used by instructions that want an immediate value.
7706 
7707 bool Assembler::reachable(AddressLiteral adr) {
7708   int64_t disp;
7709   // None will force a 64bit literal to the code stream. Likely a placeholder
7710   // for something that will be patched later and we need to certain it will
7711   // always be reachable.
7712   if (adr.reloc() == relocInfo::none) {
7713     return false;
7714   }
7715   if (adr.reloc() == relocInfo::internal_word_type) {
7716     // This should be rip relative and easily reachable.
7717     return true;
7718   }
7719   if (adr.reloc() == relocInfo::virtual_call_type ||
7720       adr.reloc() == relocInfo::opt_virtual_call_type ||
7721       adr.reloc() == relocInfo::static_call_type ||
7722       adr.reloc() == relocInfo::static_stub_type ) {
7723     // This should be rip relative within the code cache and easily
7724     // reachable until we get huge code caches. (At which point
7725     // ic code is going to have issues).
7726     return true;
7727   }
7728   if (adr.reloc() != relocInfo::external_word_type &&
7729       adr.reloc() != relocInfo::poll_return_type &&  // these are really external_word but need special
7730       adr.reloc() != relocInfo::poll_type &&         // relocs to identify them
7731       adr.reloc() != relocInfo::runtime_call_type ) {
7732     return false;
7733   }
7734 
7735   // Stress the correction code
7736   if (ForceUnreachable) {
7737     // Must be runtimecall reloc, see if it is in the codecache
7738     // Flipping stuff in the codecache to be unreachable causes issues
7739     // with things like inline caches where the additional instructions
7740     // are not handled.
7741     if (CodeCache::find_blob(adr._target) == NULL) {
7742       return false;
7743     }
7744   }
7745   // For external_word_type/runtime_call_type if it is reachable from where we
7746   // are now (possibly a temp buffer) and where we might end up
7747   // anywhere in the codeCache then we are always reachable.
7748   // This would have to change if we ever save/restore shared code
7749   // to be more pessimistic.
7750   disp = (int64_t)adr._target - ((int64_t)CodeCache::low_bound() + sizeof(int));
7751   if (!is_simm32(disp)) return false;
7752   disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int));
7753   if (!is_simm32(disp)) return false;
7754 
7755   disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int));
7756 
7757   // Because rip relative is a disp + address_of_next_instruction and we
7758   // don't know the value of address_of_next_instruction we apply a fudge factor
7759   // to make sure we will be ok no matter the size of the instruction we get placed into.
7760   // We don't have to fudge the checks above here because they are already worst case.
7761 
7762   // 12 == override/rex byte, opcode byte, rm byte, sib byte, a 4-byte disp , 4-byte literal
7763   // + 4 because better safe than sorry.
7764   const int fudge = 12 + 4;
7765   if (disp < 0) {
7766     disp -= fudge;
7767   } else {
7768     disp += fudge;
7769   }
7770   return is_simm32(disp);
7771 }
7772 
7773 // Check if the polling page is not reachable from the code cache using rip-relative
7774 // addressing.
7775 bool Assembler::is_polling_page_far() {
7776   intptr_t addr = (intptr_t)os::get_polling_page();
7777   return ForceUnreachable ||
7778          !is_simm32(addr - (intptr_t)CodeCache::low_bound()) ||
7779          !is_simm32(addr - (intptr_t)CodeCache::high_bound());
7780 }
7781 
7782 void Assembler::emit_data64(jlong data,
7783                             relocInfo::relocType rtype,
7784                             int format) {
7785   if (rtype == relocInfo::none) {
7786     emit_int64(data);
7787   } else {
7788     emit_data64(data, Relocation::spec_simple(rtype), format);
7789   }
7790 }
7791 
7792 void Assembler::emit_data64(jlong data,
7793                             RelocationHolder const& rspec,
7794                             int format) {
7795   assert(imm_operand == 0, "default format must be immediate in this file");
7796   assert(imm_operand == format, "must be immediate");
7797   assert(inst_mark() != NULL, "must be inside InstructionMark");
7798   // Do not use AbstractAssembler::relocate, which is not intended for
7799   // embedded words.  Instead, relocate to the enclosing instruction.
7800   code_section()->relocate(inst_mark(), rspec, format);
7801 #ifdef ASSERT
7802   check_relocation(rspec, format);
7803 #endif
7804   emit_int64(data);
7805 }
7806 
7807 int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
7808   if (reg_enc >= 8) {
7809     prefix(REX_B);
7810     reg_enc -= 8;
7811   } else if (byteinst && reg_enc >= 4) {
7812     prefix(REX);
7813   }
7814   return reg_enc;
7815 }
7816 
7817 int Assembler::prefixq_and_encode(int reg_enc) {
7818   if (reg_enc < 8) {
7819     prefix(REX_W);
7820   } else {
7821     prefix(REX_WB);
7822     reg_enc -= 8;
7823   }
7824   return reg_enc;
7825 }
7826 
7827 int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
7828   if (dst_enc < 8) {
7829     if (src_enc >= 8) {
7830       prefix(REX_B);
7831       src_enc -= 8;
7832     } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
7833       prefix(REX);
7834     }
7835   } else {
7836     if (src_enc < 8) {
7837       prefix(REX_R);
7838     } else {
7839       prefix(REX_RB);
7840       src_enc -= 8;
7841     }
7842     dst_enc -= 8;
7843   }
7844   return dst_enc << 3 | src_enc;
7845 }
7846 
7847 int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
7848   if (dst_enc < 8) {
7849     if (src_enc < 8) {
7850       prefix(REX_W);
7851     } else {
7852       prefix(REX_WB);
7853       src_enc -= 8;
7854     }
7855   } else {
7856     if (src_enc < 8) {
7857       prefix(REX_WR);
7858     } else {
7859       prefix(REX_WRB);
7860       src_enc -= 8;
7861     }
7862     dst_enc -= 8;
7863   }
7864   return dst_enc << 3 | src_enc;
7865 }
7866 
7867 void Assembler::prefix(Register reg) {
7868   if (reg->encoding() >= 8) {
7869     prefix(REX_B);
7870   }
7871 }
7872 
7873 void Assembler::prefix(Register dst, Register src, Prefix p) {
7874   if (src->encoding() >= 8) {
7875     p = (Prefix)(p | REX_B);
7876   }
7877   if (dst->encoding() >= 8) {
7878     p = (Prefix)( p | REX_R);
7879   }
7880   if (p != Prefix_EMPTY) {
7881     // do not generate an empty prefix
7882     prefix(p);
7883   }
7884 }
7885 
7886 void Assembler::prefix(Register dst, Address adr, Prefix p) {
7887   if (adr.base_needs_rex()) {
7888     if (adr.index_needs_rex()) {
7889       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7890     } else {
7891       prefix(REX_B);
7892     }
7893   } else {
7894     if (adr.index_needs_rex()) {
7895       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
7896     }
7897   }
7898   if (dst->encoding() >= 8) {
7899     p = (Prefix)(p | REX_R);
7900   }
7901   if (p != Prefix_EMPTY) {
7902     // do not generate an empty prefix
7903     prefix(p);
7904   }
7905 }
7906 
7907 void Assembler::prefix(Address adr) {
7908   if (adr.base_needs_rex()) {
7909     if (adr.index_needs_rex()) {
7910       prefix(REX_XB);
7911     } else {
7912       prefix(REX_B);
7913     }
7914   } else {
7915     if (adr.index_needs_rex()) {
7916       prefix(REX_X);
7917     }
7918   }
7919 }
7920 
7921 void Assembler::prefixq(Address adr) {
7922   if (adr.base_needs_rex()) {
7923     if (adr.index_needs_rex()) {
7924       prefix(REX_WXB);
7925     } else {
7926       prefix(REX_WB);
7927     }
7928   } else {
7929     if (adr.index_needs_rex()) {
7930       prefix(REX_WX);
7931     } else {
7932       prefix(REX_W);
7933     }
7934   }
7935 }
7936 
7937 
7938 void Assembler::prefix(Address adr, Register reg, bool byteinst) {
7939   if (reg->encoding() < 8) {
7940     if (adr.base_needs_rex()) {
7941       if (adr.index_needs_rex()) {
7942         prefix(REX_XB);
7943       } else {
7944         prefix(REX_B);
7945       }
7946     } else {
7947       if (adr.index_needs_rex()) {
7948         prefix(REX_X);
7949       } else if (byteinst && reg->encoding() >= 4 ) {
7950         prefix(REX);
7951       }
7952     }
7953   } else {
7954     if (adr.base_needs_rex()) {
7955       if (adr.index_needs_rex()) {
7956         prefix(REX_RXB);
7957       } else {
7958         prefix(REX_RB);
7959       }
7960     } else {
7961       if (adr.index_needs_rex()) {
7962         prefix(REX_RX);
7963       } else {
7964         prefix(REX_R);
7965       }
7966     }
7967   }
7968 }
7969 
7970 void Assembler::prefixq(Address adr, Register src) {
7971   if (src->encoding() < 8) {
7972     if (adr.base_needs_rex()) {
7973       if (adr.index_needs_rex()) {
7974         prefix(REX_WXB);
7975       } else {
7976         prefix(REX_WB);
7977       }
7978     } else {
7979       if (adr.index_needs_rex()) {
7980         prefix(REX_WX);
7981       } else {
7982         prefix(REX_W);
7983       }
7984     }
7985   } else {
7986     if (adr.base_needs_rex()) {
7987       if (adr.index_needs_rex()) {
7988         prefix(REX_WRXB);
7989       } else {
7990         prefix(REX_WRB);
7991       }
7992     } else {
7993       if (adr.index_needs_rex()) {
7994         prefix(REX_WRX);
7995       } else {
7996         prefix(REX_WR);
7997       }
7998     }
7999   }
8000 }
8001 
8002 void Assembler::prefix(Address adr, XMMRegister reg) {
8003   if (reg->encoding() < 8) {
8004     if (adr.base_needs_rex()) {
8005       if (adr.index_needs_rex()) {
8006         prefix(REX_XB);
8007       } else {
8008         prefix(REX_B);
8009       }
8010     } else {
8011       if (adr.index_needs_rex()) {
8012         prefix(REX_X);
8013       }
8014     }
8015   } else {
8016     if (adr.base_needs_rex()) {
8017       if (adr.index_needs_rex()) {
8018         prefix(REX_RXB);
8019       } else {
8020         prefix(REX_RB);
8021       }
8022     } else {
8023       if (adr.index_needs_rex()) {
8024         prefix(REX_RX);
8025       } else {
8026         prefix(REX_R);
8027       }
8028     }
8029   }
8030 }
8031 
8032 void Assembler::prefixq(Address adr, XMMRegister src) {
8033   if (src->encoding() < 8) {
8034     if (adr.base_needs_rex()) {
8035       if (adr.index_needs_rex()) {
8036         prefix(REX_WXB);
8037       } else {
8038         prefix(REX_WB);
8039       }
8040     } else {
8041       if (adr.index_needs_rex()) {
8042         prefix(REX_WX);
8043       } else {
8044         prefix(REX_W);
8045       }
8046     }
8047   } else {
8048     if (adr.base_needs_rex()) {
8049       if (adr.index_needs_rex()) {
8050         prefix(REX_WRXB);
8051       } else {
8052         prefix(REX_WRB);
8053       }
8054     } else {
8055       if (adr.index_needs_rex()) {
8056         prefix(REX_WRX);
8057       } else {
8058         prefix(REX_WR);
8059       }
8060     }
8061   }
8062 }
8063 
8064 void Assembler::adcq(Register dst, int32_t imm32) {
8065   (void) prefixq_and_encode(dst->encoding());
8066   emit_arith(0x81, 0xD0, dst, imm32);
8067 }
8068 
8069 void Assembler::adcq(Register dst, Address src) {
8070   InstructionMark im(this);
8071   prefixq(src, dst);
8072   emit_int8(0x13);
8073   emit_operand(dst, src);
8074 }
8075 
8076 void Assembler::adcq(Register dst, Register src) {
8077   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8078   emit_arith(0x13, 0xC0, dst, src);
8079 }
8080 
8081 void Assembler::addq(Address dst, int32_t imm32) {
8082   InstructionMark im(this);
8083   prefixq(dst);
8084   emit_arith_operand(0x81, rax, dst,imm32);
8085 }
8086 
8087 void Assembler::addq(Address dst, Register src) {
8088   InstructionMark im(this);
8089   prefixq(dst, src);
8090   emit_int8(0x01);
8091   emit_operand(src, dst);
8092 }
8093 
8094 void Assembler::addq(Register dst, int32_t imm32) {
8095   (void) prefixq_and_encode(dst->encoding());
8096   emit_arith(0x81, 0xC0, dst, imm32);
8097 }
8098 
8099 void Assembler::addq(Register dst, Address src) {
8100   InstructionMark im(this);
8101   prefixq(src, dst);
8102   emit_int8(0x03);
8103   emit_operand(dst, src);
8104 }
8105 
8106 void Assembler::addq(Register dst, Register src) {
8107   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8108   emit_arith(0x03, 0xC0, dst, src);
8109 }
8110 
8111 void Assembler::adcxq(Register dst, Register src) {
8112   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8113   emit_int8((unsigned char)0x66);
8114   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8115   emit_int8(0x0F);
8116   emit_int8(0x38);
8117   emit_int8((unsigned char)0xF6);
8118   emit_int8((unsigned char)(0xC0 | encode));
8119 }
8120 
8121 void Assembler::adoxq(Register dst, Register src) {
8122   //assert(VM_Version::supports_adx(), "adx instructions not supported");
8123   emit_int8((unsigned char)0xF3);
8124   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8125   emit_int8(0x0F);
8126   emit_int8(0x38);
8127   emit_int8((unsigned char)0xF6);
8128   emit_int8((unsigned char)(0xC0 | encode));
8129 }
8130 
8131 void Assembler::andq(Address dst, int32_t imm32) {
8132   InstructionMark im(this);
8133   prefixq(dst);
8134   emit_int8((unsigned char)0x81);
8135   emit_operand(rsp, dst, 4);
8136   emit_int32(imm32);
8137 }
8138 
8139 void Assembler::andq(Register dst, int32_t imm32) {
8140   (void) prefixq_and_encode(dst->encoding());
8141   emit_arith(0x81, 0xE0, dst, imm32);
8142 }
8143 
8144 void Assembler::andq(Register dst, Address src) {
8145   InstructionMark im(this);
8146   prefixq(src, dst);
8147   emit_int8(0x23);
8148   emit_operand(dst, src);
8149 }
8150 
8151 void Assembler::andq(Register dst, Register src) {
8152   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8153   emit_arith(0x23, 0xC0, dst, src);
8154 }
8155 
8156 void Assembler::andnq(Register dst, Register src1, Register src2) {
8157   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8158   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8159   int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8160   emit_int8((unsigned char)0xF2);
8161   emit_int8((unsigned char)(0xC0 | encode));
8162 }
8163 
8164 void Assembler::andnq(Register dst, Register src1, Address src2) {
8165   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8166   InstructionMark im(this);
8167   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8168   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8169   emit_int8((unsigned char)0xF2);
8170   emit_operand(dst, src2);
8171 }
8172 
8173 void Assembler::bsfq(Register dst, Register src) {
8174   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8175   emit_int8(0x0F);
8176   emit_int8((unsigned char)0xBC);
8177   emit_int8((unsigned char)(0xC0 | encode));
8178 }
8179 
8180 void Assembler::bsrq(Register dst, Register src) {
8181   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8182   emit_int8(0x0F);
8183   emit_int8((unsigned char)0xBD);
8184   emit_int8((unsigned char)(0xC0 | encode));
8185 }
8186 
8187 void Assembler::bswapq(Register reg) {
8188   int encode = prefixq_and_encode(reg->encoding());
8189   emit_int8(0x0F);
8190   emit_int8((unsigned char)(0xC8 | encode));
8191 }
8192 
8193 void Assembler::blsiq(Register dst, Register src) {
8194   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8195   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8196   int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8197   emit_int8((unsigned char)0xF3);
8198   emit_int8((unsigned char)(0xC0 | encode));
8199 }
8200 
8201 void Assembler::blsiq(Register dst, Address src) {
8202   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8203   InstructionMark im(this);
8204   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8205   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8206   emit_int8((unsigned char)0xF3);
8207   emit_operand(rbx, src);
8208 }
8209 
8210 void Assembler::blsmskq(Register dst, Register src) {
8211   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8212   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8213   int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8214   emit_int8((unsigned char)0xF3);
8215   emit_int8((unsigned char)(0xC0 | encode));
8216 }
8217 
8218 void Assembler::blsmskq(Register dst, Address src) {
8219   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8220   InstructionMark im(this);
8221   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8222   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8223   emit_int8((unsigned char)0xF3);
8224   emit_operand(rdx, src);
8225 }
8226 
8227 void Assembler::blsrq(Register dst, Register src) {
8228   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8229   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8230   int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8231   emit_int8((unsigned char)0xF3);
8232   emit_int8((unsigned char)(0xC0 | encode));
8233 }
8234 
8235 void Assembler::blsrq(Register dst, Address src) {
8236   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
8237   InstructionMark im(this);
8238   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8239   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
8240   emit_int8((unsigned char)0xF3);
8241   emit_operand(rcx, src);
8242 }
8243 
8244 void Assembler::cdqq() {
8245   prefix(REX_W);
8246   emit_int8((unsigned char)0x99);
8247 }
8248 
8249 void Assembler::clflush(Address adr) {
8250   prefix(adr);
8251   emit_int8(0x0F);
8252   emit_int8((unsigned char)0xAE);
8253   emit_operand(rdi, adr);
8254 }
8255 
8256 void Assembler::cmovq(Condition cc, Register dst, Register src) {
8257   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8258   emit_int8(0x0F);
8259   emit_int8(0x40 | cc);
8260   emit_int8((unsigned char)(0xC0 | encode));
8261 }
8262 
8263 void Assembler::cmovq(Condition cc, Register dst, Address src) {
8264   InstructionMark im(this);
8265   prefixq(src, dst);
8266   emit_int8(0x0F);
8267   emit_int8(0x40 | cc);
8268   emit_operand(dst, src);
8269 }
8270 
8271 void Assembler::cmpq(Address dst, int32_t imm32) {
8272   InstructionMark im(this);
8273   prefixq(dst);
8274   emit_int8((unsigned char)0x81);
8275   emit_operand(rdi, dst, 4);
8276   emit_int32(imm32);
8277 }
8278 
8279 void Assembler::cmpq(Register dst, int32_t imm32) {
8280   (void) prefixq_and_encode(dst->encoding());
8281   emit_arith(0x81, 0xF8, dst, imm32);
8282 }
8283 
8284 void Assembler::cmpq(Address dst, Register src) {
8285   InstructionMark im(this);
8286   prefixq(dst, src);
8287   emit_int8(0x3B);
8288   emit_operand(src, dst);
8289 }
8290 
8291 void Assembler::cmpq(Register dst, Register src) {
8292   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8293   emit_arith(0x3B, 0xC0, dst, src);
8294 }
8295 
8296 void Assembler::cmpq(Register dst, Address  src) {
8297   InstructionMark im(this);
8298   prefixq(src, dst);
8299   emit_int8(0x3B);
8300   emit_operand(dst, src);
8301 }
8302 
8303 void Assembler::cmpxchgq(Register reg, Address adr) {
8304   InstructionMark im(this);
8305   prefixq(adr, reg);
8306   emit_int8(0x0F);
8307   emit_int8((unsigned char)0xB1);
8308   emit_operand(reg, adr);
8309 }
8310 
8311 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
8312   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8313   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8314   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8315   emit_int8(0x2A);
8316   emit_int8((unsigned char)(0xC0 | encode));
8317 }
8318 
8319 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
8320   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8321   InstructionMark im(this);
8322   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8323   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8324   simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8325   emit_int8(0x2A);
8326   emit_operand(dst, src);
8327 }
8328 
8329 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
8330   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8331   InstructionMark im(this);
8332   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8333   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
8334   simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8335   emit_int8(0x2A);
8336   emit_operand(dst, src);
8337 }
8338 
8339 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
8340   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8341   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8342   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
8343   emit_int8(0x2C);
8344   emit_int8((unsigned char)(0xC0 | encode));
8345 }
8346 
8347 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
8348   NOT_LP64(assert(VM_Version::supports_sse(), ""));
8349   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8350   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
8351   emit_int8(0x2C);
8352   emit_int8((unsigned char)(0xC0 | encode));
8353 }
8354 
8355 void Assembler::decl(Register dst) {
8356   // Don't use it directly. Use MacroAssembler::decrementl() instead.
8357   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
8358   int encode = prefix_and_encode(dst->encoding());
8359   emit_int8((unsigned char)0xFF);
8360   emit_int8((unsigned char)(0xC8 | encode));
8361 }
8362 
8363 void Assembler::decq(Register dst) {
8364   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8365   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8366   int encode = prefixq_and_encode(dst->encoding());
8367   emit_int8((unsigned char)0xFF);
8368   emit_int8(0xC8 | encode);
8369 }
8370 
8371 void Assembler::decq(Address dst) {
8372   // Don't use it directly. Use MacroAssembler::decrementq() instead.
8373   InstructionMark im(this);
8374   prefixq(dst);
8375   emit_int8((unsigned char)0xFF);
8376   emit_operand(rcx, dst);
8377 }
8378 
8379 void Assembler::fxrstor(Address src) {
8380   prefixq(src);
8381   emit_int8(0x0F);
8382   emit_int8((unsigned char)0xAE);
8383   emit_operand(as_Register(1), src);
8384 }
8385 
8386 void Assembler::xrstor(Address src) {
8387   prefixq(src);
8388   emit_int8(0x0F);
8389   emit_int8((unsigned char)0xAE);
8390   emit_operand(as_Register(5), src);
8391 }
8392 
8393 void Assembler::fxsave(Address dst) {
8394   prefixq(dst);
8395   emit_int8(0x0F);
8396   emit_int8((unsigned char)0xAE);
8397   emit_operand(as_Register(0), dst);
8398 }
8399 
8400 void Assembler::xsave(Address dst) {
8401   prefixq(dst);
8402   emit_int8(0x0F);
8403   emit_int8((unsigned char)0xAE);
8404   emit_operand(as_Register(4), dst);
8405 }
8406 
8407 void Assembler::idivq(Register src) {
8408   int encode = prefixq_and_encode(src->encoding());
8409   emit_int8((unsigned char)0xF7);
8410   emit_int8((unsigned char)(0xF8 | encode));
8411 }
8412 
8413 void Assembler::imulq(Register dst, Register src) {
8414   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8415   emit_int8(0x0F);
8416   emit_int8((unsigned char)0xAF);
8417   emit_int8((unsigned char)(0xC0 | encode));
8418 }
8419 
8420 void Assembler::imulq(Register dst, Register src, int value) {
8421   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8422   if (is8bit(value)) {
8423     emit_int8(0x6B);
8424     emit_int8((unsigned char)(0xC0 | encode));
8425     emit_int8(value & 0xFF);
8426   } else {
8427     emit_int8(0x69);
8428     emit_int8((unsigned char)(0xC0 | encode));
8429     emit_int32(value);
8430   }
8431 }
8432 
8433 void Assembler::imulq(Register dst, Address src) {
8434   InstructionMark im(this);
8435   prefixq(src, dst);
8436   emit_int8(0x0F);
8437   emit_int8((unsigned char) 0xAF);
8438   emit_operand(dst, src);
8439 }
8440 
8441 void Assembler::incl(Register dst) {
8442   // Don't use it directly. Use MacroAssembler::incrementl() instead.
8443   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8444   int encode = prefix_and_encode(dst->encoding());
8445   emit_int8((unsigned char)0xFF);
8446   emit_int8((unsigned char)(0xC0 | encode));
8447 }
8448 
8449 void Assembler::incq(Register dst) {
8450   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8451   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
8452   int encode = prefixq_and_encode(dst->encoding());
8453   emit_int8((unsigned char)0xFF);
8454   emit_int8((unsigned char)(0xC0 | encode));
8455 }
8456 
8457 void Assembler::incq(Address dst) {
8458   // Don't use it directly. Use MacroAssembler::incrementq() instead.
8459   InstructionMark im(this);
8460   prefixq(dst);
8461   emit_int8((unsigned char)0xFF);
8462   emit_operand(rax, dst);
8463 }
8464 
8465 void Assembler::lea(Register dst, Address src) {
8466   leaq(dst, src);
8467 }
8468 
8469 void Assembler::leaq(Register dst, Address src) {
8470   InstructionMark im(this);
8471   prefixq(src, dst);
8472   emit_int8((unsigned char)0x8D);
8473   emit_operand(dst, src);
8474 }
8475 
8476 void Assembler::mov64(Register dst, int64_t imm64) {
8477   InstructionMark im(this);
8478   int encode = prefixq_and_encode(dst->encoding());
8479   emit_int8((unsigned char)(0xB8 | encode));
8480   emit_int64(imm64);
8481 }
8482 
8483 void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) {
8484   InstructionMark im(this);
8485   int encode = prefixq_and_encode(dst->encoding());
8486   emit_int8(0xB8 | encode);
8487   emit_data64(imm64, rspec);
8488 }
8489 
8490 void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
8491   InstructionMark im(this);
8492   int encode = prefix_and_encode(dst->encoding());
8493   emit_int8((unsigned char)(0xB8 | encode));
8494   emit_data((int)imm32, rspec, narrow_oop_operand);
8495 }
8496 
8497 void Assembler::mov_narrow_oop(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
8498   InstructionMark im(this);
8499   prefix(dst);
8500   emit_int8((unsigned char)0xC7);
8501   emit_operand(rax, dst, 4);
8502   emit_data((int)imm32, rspec, narrow_oop_operand);
8503 }
8504 
8505 void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
8506   InstructionMark im(this);
8507   int encode = prefix_and_encode(src1->encoding());
8508   emit_int8((unsigned char)0x81);
8509   emit_int8((unsigned char)(0xF8 | encode));
8510   emit_data((int)imm32, rspec, narrow_oop_operand);
8511 }
8512 
8513 void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
8514   InstructionMark im(this);
8515   prefix(src1);
8516   emit_int8((unsigned char)0x81);
8517   emit_operand(rax, src1, 4);
8518   emit_data((int)imm32, rspec, narrow_oop_operand);
8519 }
8520 
8521 void Assembler::lzcntq(Register dst, Register src) {
8522   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
8523   emit_int8((unsigned char)0xF3);
8524   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8525   emit_int8(0x0F);
8526   emit_int8((unsigned char)0xBD);
8527   emit_int8((unsigned char)(0xC0 | encode));
8528 }
8529 
8530 void Assembler::movdq(XMMRegister dst, Register src) {
8531   // table D-1 says MMX/SSE2
8532   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8533   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8534   int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8535   emit_int8(0x6E);
8536   emit_int8((unsigned char)(0xC0 | encode));
8537 }
8538 
8539 void Assembler::movdq(Register dst, XMMRegister src) {
8540   // table D-1 says MMX/SSE2
8541   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
8542   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8543   // swap src/dst to get correct prefix
8544   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
8545   emit_int8(0x7E);
8546   emit_int8((unsigned char)(0xC0 | encode));
8547 }
8548 
8549 void Assembler::movq(Register dst, Register src) {
8550   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8551   emit_int8((unsigned char)0x8B);
8552   emit_int8((unsigned char)(0xC0 | encode));
8553 }
8554 
8555 void Assembler::movq(Register dst, Address src) {
8556   InstructionMark im(this);
8557   prefixq(src, dst);
8558   emit_int8((unsigned char)0x8B);
8559   emit_operand(dst, src);
8560 }
8561 
8562 void Assembler::movq(Address dst, Register src) {
8563   InstructionMark im(this);
8564   prefixq(dst, src);
8565   emit_int8((unsigned char)0x89);
8566   emit_operand(src, dst);
8567 }
8568 
8569 void Assembler::movsbq(Register dst, Address src) {
8570   InstructionMark im(this);
8571   prefixq(src, dst);
8572   emit_int8(0x0F);
8573   emit_int8((unsigned char)0xBE);
8574   emit_operand(dst, src);
8575 }
8576 
8577 void Assembler::movsbq(Register dst, Register src) {
8578   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8579   emit_int8(0x0F);
8580   emit_int8((unsigned char)0xBE);
8581   emit_int8((unsigned char)(0xC0 | encode));
8582 }
8583 
8584 void Assembler::movslq(Register dst, int32_t imm32) {
8585   // dbx shows movslq(rcx, 3) as movq     $0x0000000049000000,(%rbx)
8586   // and movslq(r8, 3); as movl     $0x0000000048000000,(%rbx)
8587   // as a result we shouldn't use until tested at runtime...
8588   ShouldNotReachHere();
8589   InstructionMark im(this);
8590   int encode = prefixq_and_encode(dst->encoding());
8591   emit_int8((unsigned char)(0xC7 | encode));
8592   emit_int32(imm32);
8593 }
8594 
8595 void Assembler::movslq(Address dst, int32_t imm32) {
8596   assert(is_simm32(imm32), "lost bits");
8597   InstructionMark im(this);
8598   prefixq(dst);
8599   emit_int8((unsigned char)0xC7);
8600   emit_operand(rax, dst, 4);
8601   emit_int32(imm32);
8602 }
8603 
8604 void Assembler::movslq(Register dst, Address src) {
8605   InstructionMark im(this);
8606   prefixq(src, dst);
8607   emit_int8(0x63);
8608   emit_operand(dst, src);
8609 }
8610 
8611 void Assembler::movslq(Register dst, Register src) {
8612   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8613   emit_int8(0x63);
8614   emit_int8((unsigned char)(0xC0 | encode));
8615 }
8616 
8617 void Assembler::movswq(Register dst, Address src) {
8618   InstructionMark im(this);
8619   prefixq(src, dst);
8620   emit_int8(0x0F);
8621   emit_int8((unsigned char)0xBF);
8622   emit_operand(dst, src);
8623 }
8624 
8625 void Assembler::movswq(Register dst, Register src) {
8626   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8627   emit_int8((unsigned char)0x0F);
8628   emit_int8((unsigned char)0xBF);
8629   emit_int8((unsigned char)(0xC0 | encode));
8630 }
8631 
8632 void Assembler::movzbq(Register dst, Address src) {
8633   InstructionMark im(this);
8634   prefixq(src, dst);
8635   emit_int8((unsigned char)0x0F);
8636   emit_int8((unsigned char)0xB6);
8637   emit_operand(dst, src);
8638 }
8639 
8640 void Assembler::movzbq(Register dst, Register src) {
8641   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8642   emit_int8(0x0F);
8643   emit_int8((unsigned char)0xB6);
8644   emit_int8(0xC0 | encode);
8645 }
8646 
8647 void Assembler::movzwq(Register dst, Address src) {
8648   InstructionMark im(this);
8649   prefixq(src, dst);
8650   emit_int8((unsigned char)0x0F);
8651   emit_int8((unsigned char)0xB7);
8652   emit_operand(dst, src);
8653 }
8654 
8655 void Assembler::movzwq(Register dst, Register src) {
8656   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8657   emit_int8((unsigned char)0x0F);
8658   emit_int8((unsigned char)0xB7);
8659   emit_int8((unsigned char)(0xC0 | encode));
8660 }
8661 
8662 void Assembler::mulq(Address src) {
8663   InstructionMark im(this);
8664   prefixq(src);
8665   emit_int8((unsigned char)0xF7);
8666   emit_operand(rsp, src);
8667 }
8668 
8669 void Assembler::mulq(Register src) {
8670   int encode = prefixq_and_encode(src->encoding());
8671   emit_int8((unsigned char)0xF7);
8672   emit_int8((unsigned char)(0xE0 | encode));
8673 }
8674 
8675 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
8676   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8677   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8678   int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
8679   emit_int8((unsigned char)0xF6);
8680   emit_int8((unsigned char)(0xC0 | encode));
8681 }
8682 
8683 void Assembler::negq(Register dst) {
8684   int encode = prefixq_and_encode(dst->encoding());
8685   emit_int8((unsigned char)0xF7);
8686   emit_int8((unsigned char)(0xD8 | encode));
8687 }
8688 
8689 void Assembler::notq(Register dst) {
8690   int encode = prefixq_and_encode(dst->encoding());
8691   emit_int8((unsigned char)0xF7);
8692   emit_int8((unsigned char)(0xD0 | encode));
8693 }
8694 
8695 void Assembler::orq(Address dst, int32_t imm32) {
8696   InstructionMark im(this);
8697   prefixq(dst);
8698   emit_int8((unsigned char)0x81);
8699   emit_operand(rcx, dst, 4);
8700   emit_int32(imm32);
8701 }
8702 
8703 void Assembler::orq(Register dst, int32_t imm32) {
8704   (void) prefixq_and_encode(dst->encoding());
8705   emit_arith(0x81, 0xC8, dst, imm32);
8706 }
8707 
8708 void Assembler::orq(Register dst, Address src) {
8709   InstructionMark im(this);
8710   prefixq(src, dst);
8711   emit_int8(0x0B);
8712   emit_operand(dst, src);
8713 }
8714 
8715 void Assembler::orq(Register dst, Register src) {
8716   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8717   emit_arith(0x0B, 0xC0, dst, src);
8718 }
8719 
8720 void Assembler::popa() { // 64bit
8721   movq(r15, Address(rsp, 0));
8722   movq(r14, Address(rsp, wordSize));
8723   movq(r13, Address(rsp, 2 * wordSize));
8724   movq(r12, Address(rsp, 3 * wordSize));
8725   movq(r11, Address(rsp, 4 * wordSize));
8726   movq(r10, Address(rsp, 5 * wordSize));
8727   movq(r9,  Address(rsp, 6 * wordSize));
8728   movq(r8,  Address(rsp, 7 * wordSize));
8729   movq(rdi, Address(rsp, 8 * wordSize));
8730   movq(rsi, Address(rsp, 9 * wordSize));
8731   movq(rbp, Address(rsp, 10 * wordSize));
8732   // skip rsp
8733   movq(rbx, Address(rsp, 12 * wordSize));
8734   movq(rdx, Address(rsp, 13 * wordSize));
8735   movq(rcx, Address(rsp, 14 * wordSize));
8736   movq(rax, Address(rsp, 15 * wordSize));
8737 
8738   addq(rsp, 16 * wordSize);
8739 }
8740 
8741 void Assembler::popcntq(Register dst, Address src) {
8742   assert(VM_Version::supports_popcnt(), "must support");
8743   InstructionMark im(this);
8744   emit_int8((unsigned char)0xF3);
8745   prefixq(src, dst);
8746   emit_int8((unsigned char)0x0F);
8747   emit_int8((unsigned char)0xB8);
8748   emit_operand(dst, src);
8749 }
8750 
8751 void Assembler::popcntq(Register dst, Register src) {
8752   assert(VM_Version::supports_popcnt(), "must support");
8753   emit_int8((unsigned char)0xF3);
8754   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
8755   emit_int8((unsigned char)0x0F);
8756   emit_int8((unsigned char)0xB8);
8757   emit_int8((unsigned char)(0xC0 | encode));
8758 }
8759 
8760 void Assembler::popq(Address dst) {
8761   InstructionMark im(this);
8762   prefixq(dst);
8763   emit_int8((unsigned char)0x8F);
8764   emit_operand(rax, dst);
8765 }
8766 
8767 void Assembler::pusha() { // 64bit
8768   // we have to store original rsp.  ABI says that 128 bytes
8769   // below rsp are local scratch.
8770   movq(Address(rsp, -5 * wordSize), rsp);
8771 
8772   subq(rsp, 16 * wordSize);
8773 
8774   movq(Address(rsp, 15 * wordSize), rax);
8775   movq(Address(rsp, 14 * wordSize), rcx);
8776   movq(Address(rsp, 13 * wordSize), rdx);
8777   movq(Address(rsp, 12 * wordSize), rbx);
8778   // skip rsp
8779   movq(Address(rsp, 10 * wordSize), rbp);
8780   movq(Address(rsp, 9 * wordSize), rsi);
8781   movq(Address(rsp, 8 * wordSize), rdi);
8782   movq(Address(rsp, 7 * wordSize), r8);
8783   movq(Address(rsp, 6 * wordSize), r9);
8784   movq(Address(rsp, 5 * wordSize), r10);
8785   movq(Address(rsp, 4 * wordSize), r11);
8786   movq(Address(rsp, 3 * wordSize), r12);
8787   movq(Address(rsp, 2 * wordSize), r13);
8788   movq(Address(rsp, wordSize), r14);
8789   movq(Address(rsp, 0), r15);
8790 }
8791 
8792 void Assembler::pushq(Address src) {
8793   InstructionMark im(this);
8794   prefixq(src);
8795   emit_int8((unsigned char)0xFF);
8796   emit_operand(rsi, src);
8797 }
8798 
8799 void Assembler::rclq(Register dst, int imm8) {
8800   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8801   int encode = prefixq_and_encode(dst->encoding());
8802   if (imm8 == 1) {
8803     emit_int8((unsigned char)0xD1);
8804     emit_int8((unsigned char)(0xD0 | encode));
8805   } else {
8806     emit_int8((unsigned char)0xC1);
8807     emit_int8((unsigned char)(0xD0 | encode));
8808     emit_int8(imm8);
8809   }
8810 }
8811 
8812 void Assembler::rcrq(Register dst, int imm8) {
8813   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8814   int encode = prefixq_and_encode(dst->encoding());
8815   if (imm8 == 1) {
8816     emit_int8((unsigned char)0xD1);
8817     emit_int8((unsigned char)(0xD8 | encode));
8818   } else {
8819     emit_int8((unsigned char)0xC1);
8820     emit_int8((unsigned char)(0xD8 | encode));
8821     emit_int8(imm8);
8822   }
8823 }
8824 
8825 void Assembler::rorq(Register dst, int imm8) {
8826   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8827   int encode = prefixq_and_encode(dst->encoding());
8828   if (imm8 == 1) {
8829     emit_int8((unsigned char)0xD1);
8830     emit_int8((unsigned char)(0xC8 | encode));
8831   } else {
8832     emit_int8((unsigned char)0xC1);
8833     emit_int8((unsigned char)(0xc8 | encode));
8834     emit_int8(imm8);
8835   }
8836 }
8837 
8838 void Assembler::rorxq(Register dst, Register src, int imm8) {
8839   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8840   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8841   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8842   emit_int8((unsigned char)0xF0);
8843   emit_int8((unsigned char)(0xC0 | encode));
8844   emit_int8(imm8);
8845 }
8846 
8847 void Assembler::rorxd(Register dst, Register src, int imm8) {
8848   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
8849   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
8850   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
8851   emit_int8((unsigned char)0xF0);
8852   emit_int8((unsigned char)(0xC0 | encode));
8853   emit_int8(imm8);
8854 }
8855 
8856 void Assembler::sarq(Register dst, int imm8) {
8857   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8858   int encode = prefixq_and_encode(dst->encoding());
8859   if (imm8 == 1) {
8860     emit_int8((unsigned char)0xD1);
8861     emit_int8((unsigned char)(0xF8 | encode));
8862   } else {
8863     emit_int8((unsigned char)0xC1);
8864     emit_int8((unsigned char)(0xF8 | encode));
8865     emit_int8(imm8);
8866   }
8867 }
8868 
8869 void Assembler::sarq(Register dst) {
8870   int encode = prefixq_and_encode(dst->encoding());
8871   emit_int8((unsigned char)0xD3);
8872   emit_int8((unsigned char)(0xF8 | encode));
8873 }
8874 
8875 void Assembler::sbbq(Address dst, int32_t imm32) {
8876   InstructionMark im(this);
8877   prefixq(dst);
8878   emit_arith_operand(0x81, rbx, dst, imm32);
8879 }
8880 
8881 void Assembler::sbbq(Register dst, int32_t imm32) {
8882   (void) prefixq_and_encode(dst->encoding());
8883   emit_arith(0x81, 0xD8, dst, imm32);
8884 }
8885 
8886 void Assembler::sbbq(Register dst, Address src) {
8887   InstructionMark im(this);
8888   prefixq(src, dst);
8889   emit_int8(0x1B);
8890   emit_operand(dst, src);
8891 }
8892 
8893 void Assembler::sbbq(Register dst, Register src) {
8894   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8895   emit_arith(0x1B, 0xC0, dst, src);
8896 }
8897 
8898 void Assembler::shlq(Register dst, int imm8) {
8899   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8900   int encode = prefixq_and_encode(dst->encoding());
8901   if (imm8 == 1) {
8902     emit_int8((unsigned char)0xD1);
8903     emit_int8((unsigned char)(0xE0 | encode));
8904   } else {
8905     emit_int8((unsigned char)0xC1);
8906     emit_int8((unsigned char)(0xE0 | encode));
8907     emit_int8(imm8);
8908   }
8909 }
8910 
8911 void Assembler::shlq(Register dst) {
8912   int encode = prefixq_and_encode(dst->encoding());
8913   emit_int8((unsigned char)0xD3);
8914   emit_int8((unsigned char)(0xE0 | encode));
8915 }
8916 
8917 void Assembler::shrq(Register dst, int imm8) {
8918   assert(isShiftCount(imm8 >> 1), "illegal shift count");
8919   int encode = prefixq_and_encode(dst->encoding());
8920   emit_int8((unsigned char)0xC1);
8921   emit_int8((unsigned char)(0xE8 | encode));
8922   emit_int8(imm8);
8923 }
8924 
8925 void Assembler::shrq(Register dst) {
8926   int encode = prefixq_and_encode(dst->encoding());
8927   emit_int8((unsigned char)0xD3);
8928   emit_int8(0xE8 | encode);
8929 }
8930 
8931 void Assembler::subq(Address dst, int32_t imm32) {
8932   InstructionMark im(this);
8933   prefixq(dst);
8934   emit_arith_operand(0x81, rbp, dst, imm32);
8935 }
8936 
8937 void Assembler::subq(Address dst, Register src) {
8938   InstructionMark im(this);
8939   prefixq(dst, src);
8940   emit_int8(0x29);
8941   emit_operand(src, dst);
8942 }
8943 
8944 void Assembler::subq(Register dst, int32_t imm32) {
8945   (void) prefixq_and_encode(dst->encoding());
8946   emit_arith(0x81, 0xE8, dst, imm32);
8947 }
8948 
8949 // Force generation of a 4 byte immediate value even if it fits into 8bit
8950 void Assembler::subq_imm32(Register dst, int32_t imm32) {
8951   (void) prefixq_and_encode(dst->encoding());
8952   emit_arith_imm32(0x81, 0xE8, dst, imm32);
8953 }
8954 
8955 void Assembler::subq(Register dst, Address src) {
8956   InstructionMark im(this);
8957   prefixq(src, dst);
8958   emit_int8(0x2B);
8959   emit_operand(dst, src);
8960 }
8961 
8962 void Assembler::subq(Register dst, Register src) {
8963   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8964   emit_arith(0x2B, 0xC0, dst, src);
8965 }
8966 
8967 void Assembler::testq(Register dst, int32_t imm32) {
8968   // not using emit_arith because test
8969   // doesn't support sign-extension of
8970   // 8bit operands
8971   int encode = dst->encoding();
8972   if (encode == 0) {
8973     prefix(REX_W);
8974     emit_int8((unsigned char)0xA9);
8975   } else {
8976     encode = prefixq_and_encode(encode);
8977     emit_int8((unsigned char)0xF7);
8978     emit_int8((unsigned char)(0xC0 | encode));
8979   }
8980   emit_int32(imm32);
8981 }
8982 
8983 void Assembler::testq(Register dst, Register src) {
8984   (void) prefixq_and_encode(dst->encoding(), src->encoding());
8985   emit_arith(0x85, 0xC0, dst, src);
8986 }
8987 
8988 void Assembler::xaddq(Address dst, Register src) {
8989   InstructionMark im(this);
8990   prefixq(dst, src);
8991   emit_int8(0x0F);
8992   emit_int8((unsigned char)0xC1);
8993   emit_operand(src, dst);
8994 }
8995 
8996 void Assembler::xchgq(Register dst, Address src) {
8997   InstructionMark im(this);
8998   prefixq(src, dst);
8999   emit_int8((unsigned char)0x87);
9000   emit_operand(dst, src);
9001 }
9002 
9003 void Assembler::xchgq(Register dst, Register src) {
9004   int encode = prefixq_and_encode(dst->encoding(), src->encoding());
9005   emit_int8((unsigned char)0x87);
9006   emit_int8((unsigned char)(0xc0 | encode));
9007 }
9008 
9009 void Assembler::xorq(Register dst, Register src) {
9010   (void) prefixq_and_encode(dst->encoding(), src->encoding());
9011   emit_arith(0x33, 0xC0, dst, src);
9012 }
9013 
9014 void Assembler::xorq(Register dst, Address src) {
9015   InstructionMark im(this);
9016   prefixq(src, dst);
9017   emit_int8(0x33);
9018   emit_operand(dst, src);
9019 }
9020 
9021 #endif // !LP64