open Sdiff src/hotspot/cpu/x86

src/hotspot/cpu/x86/assembler_x86.hpp

rev 58404 : 8241042: x86_64: Improve Assembler generation
Reviewed-by: vlivanov

 322 #endif // ASSERT
 323 
 324   // accessors
 325   bool        uses(Register reg) const { return _base == reg || _index == reg; }
 326   Register    base()             const { return _base;  }
 327   Register    index()            const { return _index; }
 328   XMMRegister xmmindex()         const { return _xmmindex; }
 329   ScaleFactor scale()            const { return _scale; }
 330   int         disp()             const { return _disp;  }
 331   bool        isxmmindex()       const { return _isxmmindex; }
 332 
 333   // Convert the raw encoding form into the form expected by the constructor for
 334   // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 335   // that to noreg for the Address constructor.
 336   static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
 337 
 338   static Address make_array(ArrayAddress);
 339 
 340  private:
 341   bool base_needs_rex() const {
 342     return _base != noreg && _base->encoding() >= 8;
 343   }
 344 
 345   bool index_needs_rex() const {
 346     return _index != noreg &&_index->encoding() >= 8;
 347   }
 348 
 349   bool xmmindex_needs_rex() const {
 350     return _xmmindex != xnoreg && _xmmindex->encoding() >= 8;
 351   }
 352 
 353   relocInfo::relocType reloc() const { return _rspec.type(); }
 354 
 355   friend class Assembler;
 356   friend class MacroAssembler;
 357   friend class LIR_Assembler; // base/index/scale/disp
 358 };
 359 
 360 //
 361 // AddressLiteral has been split out from Address because operands of this type
 362 // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
 363 // the few instructions that need to deal with address literals are unique and the
 364 // MacroAssembler does not have to implement every instruction in the Assembler
 365 // in order to search for address literals that may need special handling depending
 366 // on the instruction and the platform. As small step on the way to merging i486/amd64
 367 // directories.
 368 //
 369 class AddressLiteral {
 370   friend class ArrayAddress;

 642   // We could use a "safe enough" estimate (15), but just default to
 643   // instruction length guess from above.
 644   static unsigned int instr_maxlen() { return 4; }
 645 
 646   // NOTE: The general philopsophy of the declarations here is that 64bit versions
 647   // of instructions are freely declared without the need for wrapping them an ifdef.
 648   // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
 649   // In the .cpp file the implementations are wrapped so that they are dropped out
 650   // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
 651   // to the size it was prior to merging up the 32bit and 64bit assemblers.
 652   //
 653   // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
 654   // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
 655 
 656 private:
 657 
 658   bool _legacy_mode_bw;
 659   bool _legacy_mode_dq;
 660   bool _legacy_mode_vl;
 661   bool _legacy_mode_vlbw;
 662   bool _is_managed;
 663 
 664   class InstructionAttr *_attributes;
 665 
 666   // 64bit prefixes
 667   int prefix_and_encode(int reg_enc, bool byteinst = false);
 668   int prefixq_and_encode(int reg_enc);
 669 
 670   int prefix_and_encode(int dst_enc, int src_enc) {
 671     return prefix_and_encode(dst_enc, false, src_enc, false);
 672   }
 673   int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
 674   int prefixq_and_encode(int dst_enc, int src_enc);
 675 

 676   void prefix(Register reg);
 677   void prefix(Register dst, Register src, Prefix p);
 678   void prefix(Register dst, Address adr, Prefix p);
 679   void prefix(Address adr);
 680   void prefixq(Address adr);
 681 

 682   void prefix(Address adr, Register reg,  bool byteinst = false);
 683   void prefix(Address adr, XMMRegister reg);
 684   void prefixq(Address adr, Register reg);
 685   void prefixq(Address adr, XMMRegister reg);
 686 
 687   void prefetch_prefix(Address src);





 688 
 689   void rex_prefix(Address adr, XMMRegister xreg,
 690                   VexSimdPrefix pre, VexOpcode opc, bool rex_w);
 691   int  rex_prefix_and_encode(int dst_enc, int src_enc,
 692                              VexSimdPrefix pre, VexOpcode opc, bool rex_w);
 693 
 694   void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
 695 
 696   void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
 697                    int nds_enc, VexSimdPrefix pre, VexOpcode opc);
 698 
 699   void vex_prefix(Address adr, int nds_enc, int xreg_enc,
 700                   VexSimdPrefix pre, VexOpcode opc,
 701                   InstructionAttr *attributes);
 702 
 703   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
 704                              VexSimdPrefix pre, VexOpcode opc,
 705                              InstructionAttr *attributes);
 706 
 707   void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,

 853   }
 854 
 855   // Decoding
 856   static address locate_operand(address inst, WhichOperand which);
 857   static address locate_next_instruction(address inst);
 858 
 859   // Utilities
 860   static bool is_polling_page_far() NOT_LP64({ return false;});
 861   static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 862                                          int cur_tuple_type, int in_size_in_bits, int cur_encoding);
 863 
 864   // Generic instructions
 865   // Does 32bit or 64bit as needed for the platform. In some sense these
 866   // belong in macro assembler but there is no need for both varieties to exist
 867 
 868   void init_attributes(void) {
 869     _legacy_mode_bw = (VM_Version::supports_avx512bw() == false);
 870     _legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
 871     _legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
 872     _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
 873     _is_managed = false;
 874     _attributes = NULL;
 875   }
 876 
 877   void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
 878   void clear_attributes(void) { _attributes = NULL; }
 879 
 880   void set_managed(void) { _is_managed = true; }
 881   void clear_managed(void) { _is_managed = false; }
 882   bool is_managed(void) { return _is_managed; }


 883 
 884   void lea(Register dst, Address src);
 885 
 886   void mov(Register dst, Register src);
 887 
 888 #ifdef _LP64
 889   // support caching the result of some routines
 890 
 891   // must be called before pusha(), popa(), vzeroupper() - checked with asserts
 892   static void precompute_instructions();
 893 
 894   void pusha_uncached();
 895   void popa_uncached();
 896 #endif
 897   void vzeroupper_uncached();
 898 
 899   void pusha();
 900   void popa();
 901 
 902   void pushf();

2263   // They should be called only from corresponding MacroAssembler instructions.
2264   void andpd(XMMRegister dst, Address src);
2265   void andps(XMMRegister dst, Address src);
2266   void xorpd(XMMRegister dst, Address src);
2267   void xorps(XMMRegister dst, Address src);
2268 
2269 };
2270 
2271 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2272 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2273 // are applied.
2274 class InstructionAttr {
2275 public:
2276   InstructionAttr(
2277     int vector_len,     // The length of vector to be applied in encoding - for both AVX and EVEX
2278     bool rex_vex_w,     // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2279     bool legacy_mode,   // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2280     bool no_reg_mask,   // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
2281     bool uses_vl)       // This instruction may have legacy constraints based on vector length for EVEX
2282     :
2283       _avx_vector_len(vector_len),
2284       _rex_vex_w(rex_vex_w),
2285       _rex_vex_w_reverted(false),
2286       _legacy_mode(legacy_mode),
2287       _no_reg_mask(no_reg_mask),
2288       _uses_vl(uses_vl),
2289       _tuple_type(Assembler::EVEX_ETUP),
2290       _input_size_in_bits(Assembler::EVEX_NObit),
2291       _is_evex_instruction(false),
2292       _evex_encoding(0),
2293       _is_clear_context(true),
2294       _is_extended_context(false),




2295       _embedded_opmask_register_specifier(0), // hard code k0
2296       _current_assembler(NULL) {
2297     if (UseAVX < 3) _legacy_mode = true;
2298   }
2299 
2300   ~InstructionAttr() {
2301     if (_current_assembler != NULL) {
2302       _current_assembler->clear_attributes();
2303     }
2304     _current_assembler = NULL;
2305   }
2306 
2307 private:
2308   int  _avx_vector_len;
2309   bool _rex_vex_w;
2310   bool _rex_vex_w_reverted;
2311   bool _legacy_mode;
2312   bool _no_reg_mask;
2313   bool _uses_vl;
2314   int  _tuple_type;
2315   int  _input_size_in_bits;
2316   bool _is_evex_instruction;
2317   int  _evex_encoding;
2318   bool _is_clear_context;
2319   bool _is_extended_context;




2320   int _embedded_opmask_register_specifier;
2321 
2322   Assembler *_current_assembler;
2323 
2324 public:
2325   // query functions for field accessors
2326   int  get_vector_len(void) const { return _avx_vector_len; }
2327   bool is_rex_vex_w(void) const { return _rex_vex_w; }
2328   bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
2329   bool is_legacy_mode(void) const { return _legacy_mode; }
2330   bool is_no_reg_mask(void) const { return _no_reg_mask; }
2331   bool uses_vl(void) const { return _uses_vl; }





2332   int  get_tuple_type(void) const { return _tuple_type; }
2333   int  get_input_size(void) const { return _input_size_in_bits; }
2334   int  is_evex_instruction(void) const { return _is_evex_instruction; }
2335   int  get_evex_encoding(void) const { return _evex_encoding; }
2336   bool is_clear_context(void) const { return _is_clear_context; }
2337   bool is_extended_context(void) const { return _is_extended_context; }
2338   int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
2339 
2340   // Set the vector len manually
2341   void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
2342 
2343   // Set revert rex_vex_w for avx encoding
2344   void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
2345 
2346   // Set rex_vex_w based on state
2347   void set_rex_vex_w(bool state) { _rex_vex_w = state; }
2348 
2349   // Set the instruction to be encoded in AVX mode
2350   void set_is_legacy_mode(void) { _legacy_mode = true; }
2351 
2352   // Set the current instuction to be encoded as an EVEX instuction
2353   void set_is_evex_instruction(void) { _is_evex_instruction = true; }
2354 
2355   // Internal encoding data used in compressed immediate offset programming
2356   void set_evex_encoding(int value) { _evex_encoding = value; }
2357

 322 #endif // ASSERT
 323 
 324   // accessors
 325   bool        uses(Register reg) const { return _base == reg || _index == reg; }
 326   Register    base()             const { return _base;  }
 327   Register    index()            const { return _index; }
 328   XMMRegister xmmindex()         const { return _xmmindex; }
 329   ScaleFactor scale()            const { return _scale; }
 330   int         disp()             const { return _disp;  }
 331   bool        isxmmindex()       const { return _isxmmindex; }
 332 
 333   // Convert the raw encoding form into the form expected by the constructor for
 334   // Address.  An index of 4 (rsp) corresponds to having no index, so convert
 335   // that to noreg for the Address constructor.
 336   static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
 337 
 338   static Address make_array(ArrayAddress);
 339 
 340  private:
 341   bool base_needs_rex() const {
 342     return _base->is_valid() && _base->encoding() >= 8;
 343   }
 344 
 345   bool index_needs_rex() const {
 346     return _index->is_valid() &&_index->encoding() >= 8;
 347   }
 348 
 349   bool xmmindex_needs_rex() const {
 350     return _xmmindex->is_valid() && _xmmindex->encoding() >= 8;
 351   }
 352 
 353   relocInfo::relocType reloc() const { return _rspec.type(); }
 354 
 355   friend class Assembler;
 356   friend class MacroAssembler;
 357   friend class LIR_Assembler; // base/index/scale/disp
 358 };
 359 
 360 //
 361 // AddressLiteral has been split out from Address because operands of this type
 362 // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
 363 // the few instructions that need to deal with address literals are unique and the
 364 // MacroAssembler does not have to implement every instruction in the Assembler
 365 // in order to search for address literals that may need special handling depending
 366 // on the instruction and the platform. As small step on the way to merging i486/amd64
 367 // directories.
 368 //
 369 class AddressLiteral {
 370   friend class ArrayAddress;

 642   // We could use a "safe enough" estimate (15), but just default to
 643   // instruction length guess from above.
 644   static unsigned int instr_maxlen() { return 4; }
 645 
 646   // NOTE: The general philopsophy of the declarations here is that 64bit versions
 647   // of instructions are freely declared without the need for wrapping them an ifdef.
 648   // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
 649   // In the .cpp file the implementations are wrapped so that they are dropped out
 650   // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
 651   // to the size it was prior to merging up the 32bit and 64bit assemblers.
 652   //
 653   // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
 654   // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
 655 
 656 private:
 657 
 658   bool _legacy_mode_bw;
 659   bool _legacy_mode_dq;
 660   bool _legacy_mode_vl;
 661   bool _legacy_mode_vlbw;
 662   NOT_LP64(bool _is_managed;)
 663 
 664   class InstructionAttr *_attributes;
 665 
 666   // 64bit prefixes
 667   int prefix_and_encode(int reg_enc, bool byteinst = false);
 668   int prefixq_and_encode(int reg_enc);
 669 
 670   int prefix_and_encode(int dst_enc, int src_enc) {
 671     return prefix_and_encode(dst_enc, false, src_enc, false);
 672   }
 673   int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
 674   int prefixq_and_encode(int dst_enc, int src_enc);
 675 
 676 
 677   void prefix(Register reg);
 678   void prefix(Register dst, Register src, Prefix p);
 679   void prefix(Register dst, Address adr, Prefix p);
 680   void prefix(Address adr);
 681   void prefixq(Address adr);
 682 
 683 
 684   void prefix(Address adr, Register reg,  bool byteinst = false);
 685   void prefix(Address adr, XMMRegister reg);
 686   void prefixq(Address adr, Register reg);
 687   void prefixq(Address adr, XMMRegister reg);
 688 
 689   // Some prefix variant have a total mapping - they always exactly one prefix
 690   // byte per input), so beside a prefix-emitting method we provide a method
 691   // to get the prefix byte to emit. This byte can then be folded into a byte
 692   // stream. This can generate faster, more compact code.
 693   int8_t get_prefixq(Address adr);
 694   int8_t get_prefixq(Address adr, Register reg);
 695 
 696   void rex_prefix(Address adr, XMMRegister xreg,
 697                   VexSimdPrefix pre, VexOpcode opc, bool rex_w);
 698   int  rex_prefix_and_encode(int dst_enc, int src_enc,
 699                              VexSimdPrefix pre, VexOpcode opc, bool rex_w);
 700 
 701   void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
 702 
 703   void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
 704                    int nds_enc, VexSimdPrefix pre, VexOpcode opc);
 705 
 706   void vex_prefix(Address adr, int nds_enc, int xreg_enc,
 707                   VexSimdPrefix pre, VexOpcode opc,
 708                   InstructionAttr *attributes);
 709 
 710   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
 711                              VexSimdPrefix pre, VexOpcode opc,
 712                              InstructionAttr *attributes);
 713 
 714   void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,

 860   }
 861 
 862   // Decoding
 863   static address locate_operand(address inst, WhichOperand which);
 864   static address locate_next_instruction(address inst);
 865 
 866   // Utilities
 867   static bool is_polling_page_far() NOT_LP64({ return false;});
 868   static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
 869                                          int cur_tuple_type, int in_size_in_bits, int cur_encoding);
 870 
 871   // Generic instructions
 872   // Does 32bit or 64bit as needed for the platform. In some sense these
 873   // belong in macro assembler but there is no need for both varieties to exist
 874 
 875   void init_attributes(void) {
 876     _legacy_mode_bw = (VM_Version::supports_avx512bw() == false);
 877     _legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
 878     _legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
 879     _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
 880     NOT_LP64(_is_managed = false;)
 881     _attributes = NULL;
 882   }
 883 
 884   void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
 885   void clear_attributes(void) { _attributes = NULL; }
 886 
 887   void set_managed(void) { NOT_LP64(_is_managed = true;) }
 888   void clear_managed(void) { NOT_LP64(_is_managed = false;) }
 889   bool is_managed(void) {
 890     NOT_LP64(return _is_managed;)
 891     LP64_ONLY(return false;) }
 892 
 893   void lea(Register dst, Address src);
 894 
 895   void mov(Register dst, Register src);
 896 
 897 #ifdef _LP64
 898   // support caching the result of some routines
 899 
 900   // must be called before pusha(), popa(), vzeroupper() - checked with asserts
 901   static void precompute_instructions();
 902 
 903   void pusha_uncached();
 904   void popa_uncached();
 905 #endif
 906   void vzeroupper_uncached();
 907 
 908   void pusha();
 909   void popa();
 910 
 911   void pushf();

2272   // They should be called only from corresponding MacroAssembler instructions.
2273   void andpd(XMMRegister dst, Address src);
2274   void andps(XMMRegister dst, Address src);
2275   void xorpd(XMMRegister dst, Address src);
2276   void xorps(XMMRegister dst, Address src);
2277 
2278 };
2279 
2280 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2281 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2282 // are applied.
2283 class InstructionAttr {
2284 public:
2285   InstructionAttr(
2286     int vector_len,     // The length of vector to be applied in encoding - for both AVX and EVEX
2287     bool rex_vex_w,     // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2288     bool legacy_mode,   // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2289     bool no_reg_mask,   // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
2290     bool uses_vl)       // This instruction may have legacy constraints based on vector length for EVEX
2291     :

2292       _rex_vex_w(rex_vex_w),
2293       _legacy_mode(legacy_mode || UseAVX < 3),

2294       _no_reg_mask(no_reg_mask),
2295       _uses_vl(uses_vl),
2296       _rex_vex_w_reverted(false),

2297       _is_evex_instruction(false),

2298       _is_clear_context(true),
2299       _is_extended_context(false),
2300       _avx_vector_len(vector_len),
2301       _tuple_type(Assembler::EVEX_ETUP),
2302       _input_size_in_bits(Assembler::EVEX_NObit),
2303       _evex_encoding(0),
2304       _embedded_opmask_register_specifier(0), // hard code k0
2305       _current_assembler(NULL) { }


2306 
2307   ~InstructionAttr() {
2308     if (_current_assembler != NULL) {
2309       _current_assembler->clear_attributes();
2310     }
2311     _current_assembler = NULL;
2312   }
2313 
2314 private:

2315   bool _rex_vex_w;

2316   bool _legacy_mode;
2317   bool _no_reg_mask;
2318   bool _uses_vl;
2319   bool _rex_vex_w_reverted;

2320   bool _is_evex_instruction;

2321   bool _is_clear_context;
2322   bool _is_extended_context;
2323   int  _avx_vector_len;
2324   int  _tuple_type;
2325   int  _input_size_in_bits;
2326   int  _evex_encoding;
2327   int _embedded_opmask_register_specifier;
2328 
2329   Assembler *_current_assembler;
2330 
2331 public:
2332   // query functions for field accessors

2333   bool is_rex_vex_w(void) const { return _rex_vex_w; }

2334   bool is_legacy_mode(void) const { return _legacy_mode; }
2335   bool is_no_reg_mask(void) const { return _no_reg_mask; }
2336   bool uses_vl(void) const { return _uses_vl; }
2337   bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
2338   bool is_evex_instruction(void) const { return _is_evex_instruction; }
2339   bool is_clear_context(void) const { return _is_clear_context; }
2340   bool is_extended_context(void) const { return _is_extended_context; }
2341   int  get_vector_len(void) const { return _avx_vector_len; }
2342   int  get_tuple_type(void) const { return _tuple_type; }
2343   int  get_input_size(void) const { return _input_size_in_bits; }

2344   int  get_evex_encoding(void) const { return _evex_encoding; }


2345   int  get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
2346 
2347   // Set the vector len manually
2348   void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
2349 
2350   // Set revert rex_vex_w for avx encoding
2351   void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
2352 
2353   // Set rex_vex_w based on state
2354   void set_rex_vex_w(bool state) { _rex_vex_w = state; }
2355 
2356   // Set the instruction to be encoded in AVX mode
2357   void set_is_legacy_mode(void) { _legacy_mode = true; }
2358 
2359   // Set the current instuction to be encoded as an EVEX instuction
2360   void set_is_evex_instruction(void) { _is_evex_instruction = true; }
2361 
2362   // Internal encoding data used in compressed immediate offset programming
2363   void set_evex_encoding(int value) { _evex_encoding = value; }
2364

< prev index next >