< prev index next >

src/hotspot/cpu/aarch64/aarch64.ad

Print this page
rev 61975 : [vector] Address review comments for AArch64 backend changes
1. Seperate newly added NEON instructions to a new ad file
   aarch64_neon.ad
2. Add assembler tests for NEON instructions. Trailing spaces
   in the python script are also removed.


2058   if (!has_match_rule(opcode))
2059     return false;
2060 
2061   bool ret_value = true;
2062   switch (opcode) {
2063     case Op_CacheWB:
2064     case Op_CacheWBPreSync:
2065     case Op_CacheWBPostSync:
2066       if (!VM_Version::supports_data_cache_line_flush()) {
2067         ret_value = false;
2068       }
2069       break;
2070   }
2071 
2072   return ret_value; // Per default match rules are supported.
2073 }
2074 
2075 // Identify extra cases that we might want to provide match rules for vector nodes and
2076 // other intrinsics guarded with vector length (vlen) and element type (bt).
2077 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
2078   if (!match_rule_supported(opcode)) {
2079     return false;
2080   }
2081 
2082   // Special cases which require vector length
2083   switch (opcode) {
2084     case Op_MulAddVS2VI: {
2085       if (vlen != 4) {
2086         return false;
2087       }
2088       break;
2089     }






2090   }
2091 
2092   return true; // Per default match rules are supported.
2093 }
2094 
2095 const bool Matcher::has_predicated_vectors(void) {
2096   return false;
2097 }
2098 




2099 const int Matcher::float_pressure(int default_pressure_threshold) {
2100   return default_pressure_threshold;
2101 }
2102 
2103 int Matcher::regnum_to_fpu_offset(int regnum)
2104 {
2105   Unimplemented();
2106   return 0;
2107 }
2108 
2109 // Is this branch offset short enough that a short branch can be used?
2110 //
2111 // NOTE: If the platform does not provide any short branch variants, then
2112 //       this method should return false for offset 0.
2113 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2114   // The passed offset is relative to address of the branch.
2115 
2116   return (-32768 <= offset && offset < 32768);
2117 }
2118 


2125 // true just means we have fast l2f conversion
2126 const bool Matcher::convL2FSupported(void) {
2127   return true;
2128 }
2129 
2130 // Vector width in bytes.
2131 const int Matcher::vector_width_in_bytes(BasicType bt) {
2132   int size = MIN2(16,(int)MaxVectorSize);
2133   // Minimum 2 values in vector
2134   if (size < 2*type2aelembytes(bt)) size = 0;
2135   // But never < 4
2136   if (size < 4) size = 0;
2137   return size;
2138 }
2139 
2140 // Limits on vector size (number of elements) loaded into vector.
2141 const int Matcher::max_vector_size(const BasicType bt) {
2142   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2143 }
2144 const int Matcher::min_vector_size(const BasicType bt) {
2145 //  For the moment limit the vector size to 8 bytes

2146     int size = 8 / type2aelembytes(bt);







2147     if (size < 2) size = 2;
2148     return size;
2149 }
2150 
2151 // Vector ideal reg.
2152 const uint Matcher::vector_ideal_reg(int len) {
2153   switch(len) {



2154     case  8: return Op_VecD;
2155     case 16: return Op_VecX;
2156   }
2157   ShouldNotReachHere();
2158   return 0;
2159 }
2160 
2161 // AES support not yet implemented
2162 const bool Matcher::pass_original_key_for_aes() {
2163   return false;
2164 }
2165 
2166 // aarch64 supports misaligned vectors store/load.
2167 const bool Matcher::misaligned_vectors_ok() {
2168   return true;
2169 }
2170 
2171 // false => size gets scaled to BytesPerLong, ok.
2172 const bool Matcher::init_array_count_is_in_bytes = false;
2173 


2776     // need to do this the hard way until we can manage relocs
2777     // for 32 bit constants
2778     __ movoop(rscratch2, (jobject)con);
2779     __ encode_klass_not_null(rscratch2);
2780     loadStore(_masm, &MacroAssembler::strw, rscratch2, $mem->opcode(),
2781                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
2782   %}
2783 
2784   // This encoding class is generated automatically from ad_encode.m4.
2785   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2786   enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
2787       C2_MacroAssembler _masm(&cbuf);
2788       __ membar(Assembler::StoreStore);
2789       loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2790                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
2791   %}
2792 
2793   // END Non-volatile memory access
2794 
2795   // Vector loads and stores






2796   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2797     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2798     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2799        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2800   %}
2801 
2802   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2803     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2804     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2805        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2806   %}
2807 
2808   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2809     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2810     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2811        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2812   %}
2813 






2814   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2815     FloatRegister src_reg = as_FloatRegister($src$$reg);
2816     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2817        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2818   %}
2819 
2820   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2821     FloatRegister src_reg = as_FloatRegister($src$$reg);
2822     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2823        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2824   %}
2825 
2826   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2827     FloatRegister src_reg = as_FloatRegister($src$$reg);
2828     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2829        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2830   %}
2831 
2832   // volatile loads and stores
2833 


3878 operand immI_le_4()
3879 %{
3880   predicate(n->get_int() <= 4);
3881   match(ConI);
3882 
3883   op_cost(0);
3884   format %{ %}
3885   interface(CONST_INTER);
3886 %}
3887 
3888 operand immI_31()
3889 %{
3890   predicate(n->get_int() == 31);
3891   match(ConI);
3892 
3893   op_cost(0);
3894   format %{ %}
3895   interface(CONST_INTER);
3896 %}
3897 




















3898 operand immI_8()
3899 %{
3900   predicate(n->get_int() == 8);
3901   match(ConI);
3902 
3903   op_cost(0);
3904   format %{ %}
3905   interface(CONST_INTER);
3906 %}
3907 
3908 operand immI_16()
3909 %{
3910   predicate(n->get_int() == 16);
3911   match(ConI);
3912 
3913   op_cost(0);
3914   format %{ %}
3915   interface(CONST_INTER);
3916 %}
3917 


10780   ins_pipe(ialu_reg_reg_vshift);
10781 %}
10782 
10783 // Shift Right Arithmetic Immediate
10784 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10785   match(Set dst (RShiftL src1 src2));
10786 
10787   ins_cost(INSN_COST);
10788   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10789 
10790   ins_encode %{
10791     __ asr(as_Register($dst$$reg),
10792            as_Register($src1$$reg),
10793            $src2$$constant & 0x3f);
10794   %}
10795 
10796   ins_pipe(ialu_reg_shift);
10797 %}
10798 
10799 // BEGIN This section of the file is automatically generated. Do not edit --------------

10800 
10801 instruct regL_not_reg(iRegLNoSp dst,
10802                          iRegL src1, immL_M1 m1,
10803                          rFlagsReg cr) %{
10804   match(Set dst (XorL src1 m1));
10805   ins_cost(INSN_COST);
10806   format %{ "eon  $dst, $src1, zr" %}
10807 
10808   ins_encode %{
10809     __ eon(as_Register($dst$$reg),
10810               as_Register($src1$$reg),
10811               zr,
10812               Assembler::LSL, 0);
10813   %}
10814 
10815   ins_pipe(ialu_reg);
10816 %}
10817 instruct regI_not_reg(iRegINoSp dst,
10818                          iRegIorL2I src1, immI_M1 m1,
10819                          rFlagsReg cr) %{


15970   ins_pipe(vdup_reg_freg128);
15971 %}
15972 
15973 instruct replicate2D(vecX dst, vRegD src)
15974 %{
15975   predicate(n->as_Vector()->length() == 2);
15976   match(Set dst (ReplicateD src));
15977   ins_cost(INSN_COST);
15978   format %{ "dup  $dst, $src\t# vector (2D)" %}
15979   ins_encode %{
15980     __ dup(as_FloatRegister($dst$$reg), __ T2D,
15981            as_FloatRegister($src$$reg));
15982   %}
15983   ins_pipe(vdup_reg_dreg128);
15984 %}
15985 
15986 // ====================REDUCTION ARITHMETIC====================================
15987 
15988 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
15989 %{

15990   match(Set dst (AddReductionVI isrc vsrc));
15991   ins_cost(INSN_COST);
15992   effect(TEMP tmp, TEMP tmp2);
15993   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
15994             "umov  $tmp2, $vsrc, S, 1\n\t"
15995             "addw  $tmp, $isrc, $tmp\n\t"
15996             "addw  $dst, $tmp, $tmp2\t# add reduction2I"
15997   %}
15998   ins_encode %{
15999     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
16000     __ umov($tmp2$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
16001     __ addw($tmp$$Register, $isrc$$Register, $tmp$$Register);
16002     __ addw($dst$$Register, $tmp$$Register, $tmp2$$Register);
16003   %}
16004   ins_pipe(pipe_class_default);
16005 %}
16006 
16007 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
16008 %{

16009   match(Set dst (AddReductionVI isrc vsrc));
16010   ins_cost(INSN_COST);
16011   effect(TEMP vtmp, TEMP itmp);
16012   format %{ "addv  $vtmp, T4S, $vsrc\n\t"
16013             "umov  $itmp, $vtmp, S, 0\n\t"
16014             "addw  $dst, $itmp, $isrc\t# add reduction4I"
16015   %}
16016   ins_encode %{
16017     __ addv(as_FloatRegister($vtmp$$reg), __ T4S,
16018             as_FloatRegister($vsrc$$reg));
16019     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
16020     __ addw($dst$$Register, $itmp$$Register, $isrc$$Register);
16021   %}
16022   ins_pipe(pipe_class_default);
16023 %}
16024 
16025 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
16026 %{

16027   match(Set dst (MulReductionVI isrc vsrc));
16028   ins_cost(INSN_COST);
16029   effect(TEMP tmp, TEMP dst);
16030   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
16031             "mul   $dst, $tmp, $isrc\n\t"
16032             "umov  $tmp, $vsrc, S, 1\n\t"
16033             "mul   $dst, $tmp, $dst\t# mul reduction2I"
16034   %}
16035   ins_encode %{
16036     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
16037     __ mul($dst$$Register, $tmp$$Register, $isrc$$Register);
16038     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
16039     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16040   %}
16041   ins_pipe(pipe_class_default);
16042 %}
16043 
16044 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
16045 %{

16046   match(Set dst (MulReductionVI isrc vsrc));
16047   ins_cost(INSN_COST);
16048   effect(TEMP vtmp, TEMP itmp, TEMP dst);
16049   format %{ "ins   $vtmp, D, $vsrc, 0, 1\n\t"
16050             "mulv  $vtmp, T2S, $vtmp, $vsrc\n\t"
16051             "umov  $itmp, $vtmp, S, 0\n\t"
16052             "mul   $dst, $itmp, $isrc\n\t"
16053             "umov  $itmp, $vtmp, S, 1\n\t"
16054             "mul   $dst, $itmp, $dst\t# mul reduction4I"
16055   %}
16056   ins_encode %{
16057     __ ins(as_FloatRegister($vtmp$$reg), __ D,
16058            as_FloatRegister($vsrc$$reg), 0, 1);
16059     __ mulv(as_FloatRegister($vtmp$$reg), __ T2S,
16060             as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
16061     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
16062     __ mul($dst$$Register, $itmp$$Register, $isrc$$Register);
16063     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 1);
16064     __ mul($dst$$Register, $itmp$$Register, $dst$$Register);
16065   %}


17107 
17108 instruct vabs2L(vecX dst, vecX src)
17109 %{
17110   predicate(n->as_Vector()->length() == 2);
17111   match(Set dst (AbsVL src));
17112   ins_cost(INSN_COST);
17113   format %{ "abs  $dst, $src\t# vector (2D)" %}
17114   ins_encode %{
17115     __ absr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
17116   %}
17117   ins_pipe(vlogical128);
17118 %}
17119 
17120 instruct vabs2F(vecD dst, vecD src)
17121 %{
17122   predicate(n->as_Vector()->length() == 2);
17123   match(Set dst (AbsVF src));
17124   ins_cost(INSN_COST * 3);
17125   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17126   ins_encode %{
17127     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17128             as_FloatRegister($src$$reg));
17129   %}
17130   ins_pipe(vunop_fp64);
17131 %}
17132 
17133 instruct vabs4F(vecX dst, vecX src)
17134 %{
17135   predicate(n->as_Vector()->length() == 4);
17136   match(Set dst (AbsVF src));
17137   ins_cost(INSN_COST * 3);
17138   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17139   ins_encode %{
17140     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17141             as_FloatRegister($src$$reg));
17142   %}
17143   ins_pipe(vunop_fp128);
17144 %}
17145 
17146 instruct vabs2D(vecX dst, vecX src)
17147 %{
17148   predicate(n->as_Vector()->length() == 2);
17149   match(Set dst (AbsVD src));
17150   ins_cost(INSN_COST * 3);
17151   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17152   ins_encode %{
17153     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17154             as_FloatRegister($src$$reg));
17155   %}
17156   ins_pipe(vunop_fp128);
17157 %}
17158 
17159 // --------------------------------- NEG --------------------------------------
17160 
17161 instruct vneg2F(vecD dst, vecD src)
17162 %{
17163   predicate(n->as_Vector()->length() == 2);
17164   match(Set dst (NegVF src));
17165   ins_cost(INSN_COST * 3);
17166   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17167   ins_encode %{
17168     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17169             as_FloatRegister($src$$reg));
17170   %}
17171   ins_pipe(vunop_fp64);
17172 %}
17173 
17174 instruct vneg4F(vecX dst, vecX src)


17275   %}
17276   ins_pipe(vlogical64);
17277 %}
17278 
17279 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17280 %{
17281   predicate(n->as_Vector()->length_in_bytes() == 16);
17282   match(Set dst (XorV src1 src2));
17283   ins_cost(INSN_COST);
17284   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17285   ins_encode %{
17286     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17287             as_FloatRegister($src1$$reg),
17288             as_FloatRegister($src2$$reg));
17289   %}
17290   ins_pipe(vlogical128);
17291 %}
17292 
17293 // ------------------------------ Shift ---------------------------------------
17294 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
17295   predicate(n->as_Vector()->length_in_bytes() == 8);

17296   match(Set dst (LShiftCntV cnt));
17297   match(Set dst (RShiftCntV cnt));
17298   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
17299   ins_encode %{
17300     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
17301   %}
17302   ins_pipe(vdup_reg_reg64);
17303 %}
17304 
17305 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
17306   predicate(n->as_Vector()->length_in_bytes() == 16);
17307   match(Set dst (LShiftCntV cnt));
17308   match(Set dst (RShiftCntV cnt));
17309   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
17310   ins_encode %{
17311     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17312   %}
17313   ins_pipe(vdup_reg_reg128);
17314 %}
17315 




2058   if (!has_match_rule(opcode))
2059     return false;
2060 
2061   bool ret_value = true;
2062   switch (opcode) {
2063     case Op_CacheWB:
2064     case Op_CacheWBPreSync:
2065     case Op_CacheWBPostSync:
2066       if (!VM_Version::supports_data_cache_line_flush()) {
2067         ret_value = false;
2068       }
2069       break;
2070   }
2071 
2072   return ret_value; // Per default match rules are supported.
2073 }
2074 
2075 // Identify extra cases that we might want to provide match rules for vector nodes and
2076 // other intrinsics guarded with vector length (vlen) and element type (bt).
2077 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
2078   if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
2079     return false;
2080   }
2081 
2082   // Special cases which require vector length
2083   switch (opcode) {
2084     case Op_MulAddVS2VI: {
2085       if (vlen != 4) {
2086         return false;
2087       }
2088       break;
2089     }
2090     case Op_VectorLoadShuffle:
2091     case Op_VectorRearrange:
2092       if (vlen < 4) {
2093         return false;
2094       }
2095       break;
2096   }
2097 
2098   return true; // Per default match rules are supported.
2099 }
2100 
2101 const bool Matcher::has_predicated_vectors(void) {
2102   return false;
2103 }
2104 
2105 bool Matcher::supports_vector_variable_shifts(void) {
2106   return true;
2107 }
2108 
2109 const int Matcher::float_pressure(int default_pressure_threshold) {
2110   return default_pressure_threshold;
2111 }
2112 
2113 int Matcher::regnum_to_fpu_offset(int regnum)
2114 {
2115   Unimplemented();
2116   return 0;
2117 }
2118 
2119 // Is this branch offset short enough that a short branch can be used?
2120 //
2121 // NOTE: If the platform does not provide any short branch variants, then
2122 //       this method should return false for offset 0.
2123 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2124   // The passed offset is relative to address of the branch.
2125 
2126   return (-32768 <= offset && offset < 32768);
2127 }
2128 


2135 // true just means we have fast l2f conversion
2136 const bool Matcher::convL2FSupported(void) {
2137   return true;
2138 }
2139 
2140 // Vector width in bytes.
2141 const int Matcher::vector_width_in_bytes(BasicType bt) {
2142   int size = MIN2(16,(int)MaxVectorSize);
2143   // Minimum 2 values in vector
2144   if (size < 2*type2aelembytes(bt)) size = 0;
2145   // But never < 4
2146   if (size < 4) size = 0;
2147   return size;
2148 }
2149 
2150 // Limits on vector size (number of elements) loaded into vector.
2151 const int Matcher::max_vector_size(const BasicType bt) {
2152   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2153 }
2154 const int Matcher::min_vector_size(const BasicType bt) {
2155   int max_size = max_vector_size(bt);
2156   // Limit the vector size to 8 bytes
2157   int size = 8 / type2aelembytes(bt);
2158   if (bt == T_BYTE) {
2159     // To support vector api shuffle/rearrange.
2160     size = 4;
2161   } else if (bt == T_BOOLEAN) {
2162     // To support vector api load/store mask.
2163     size = 2;
2164   }
2165   if (size < 2) size = 2;
2166   return MIN2(size,max_size);
2167 }
2168 
2169 // Vector ideal reg.
2170 const uint Matcher::vector_ideal_reg(int len) {
2171   switch(len) {
2172     // For 16-bit/32-bit mask vector, reuse VecD.
2173     case  2:
2174     case  4:
2175     case  8: return Op_VecD;
2176     case 16: return Op_VecX;
2177   }
2178   ShouldNotReachHere();
2179   return 0;
2180 }
2181 
2182 // AES support not yet implemented
2183 const bool Matcher::pass_original_key_for_aes() {
2184   return false;
2185 }
2186 
2187 // aarch64 supports misaligned vectors store/load.
2188 const bool Matcher::misaligned_vectors_ok() {
2189   return true;
2190 }
2191 
2192 // false => size gets scaled to BytesPerLong, ok.
2193 const bool Matcher::init_array_count_is_in_bytes = false;
2194 


2797     // need to do this the hard way until we can manage relocs
2798     // for 32 bit constants
2799     __ movoop(rscratch2, (jobject)con);
2800     __ encode_klass_not_null(rscratch2);
2801     loadStore(_masm, &MacroAssembler::strw, rscratch2, $mem->opcode(),
2802                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
2803   %}
2804 
2805   // This encoding class is generated automatically from ad_encode.m4.
2806   // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE
2807   enc_class aarch64_enc_strb0_ordered(memory4 mem) %{
2808       C2_MacroAssembler _masm(&cbuf);
2809       __ membar(Assembler::StoreStore);
2810       loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2811                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 1);
2812   %}
2813 
2814   // END Non-volatile memory access
2815 
2816   // Vector loads and stores
2817   enc_class aarch64_enc_ldrvH(vecD dst, memory mem) %{
2818     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2819     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::H,
2820        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2821   %}
2822 
2823   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2824     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2825     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2826        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2827   %}
2828 
2829   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2830     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2831     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2832        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2833   %}
2834 
2835   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2836     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2837     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2838        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2839   %}
2840 
2841   enc_class aarch64_enc_strvH(vecD src, memory mem) %{
2842     FloatRegister src_reg = as_FloatRegister($src$$reg);
2843     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::H,
2844        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2845   %}
2846 
2847   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2848     FloatRegister src_reg = as_FloatRegister($src$$reg);
2849     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2850        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2851   %}
2852 
2853   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2854     FloatRegister src_reg = as_FloatRegister($src$$reg);
2855     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2856        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2857   %}
2858 
2859   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2860     FloatRegister src_reg = as_FloatRegister($src$$reg);
2861     loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2862        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2863   %}
2864 
2865   // volatile loads and stores
2866 


3911 operand immI_le_4()
3912 %{
3913   predicate(n->get_int() <= 4);
3914   match(ConI);
3915 
3916   op_cost(0);
3917   format %{ %}
3918   interface(CONST_INTER);
3919 %}
3920 
3921 operand immI_31()
3922 %{
3923   predicate(n->get_int() == 31);
3924   match(ConI);
3925 
3926   op_cost(0);
3927   format %{ %}
3928   interface(CONST_INTER);
3929 %}
3930 
3931 operand immI_2()
3932 %{
3933   predicate(n->get_int() == 2);
3934   match(ConI);
3935 
3936   op_cost(0);
3937   format %{ %}
3938   interface(CONST_INTER);
3939 %}
3940 
3941 operand immI_4()
3942 %{
3943   predicate(n->get_int() == 4);
3944   match(ConI);
3945 
3946   op_cost(0);
3947   format %{ %}
3948   interface(CONST_INTER);
3949 %}
3950 
3951 operand immI_8()
3952 %{
3953   predicate(n->get_int() == 8);
3954   match(ConI);
3955 
3956   op_cost(0);
3957   format %{ %}
3958   interface(CONST_INTER);
3959 %}
3960 
3961 operand immI_16()
3962 %{
3963   predicate(n->get_int() == 16);
3964   match(ConI);
3965 
3966   op_cost(0);
3967   format %{ %}
3968   interface(CONST_INTER);
3969 %}
3970 


10833   ins_pipe(ialu_reg_reg_vshift);
10834 %}
10835 
10836 // Shift Right Arithmetic Immediate
10837 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10838   match(Set dst (RShiftL src1 src2));
10839 
10840   ins_cost(INSN_COST);
10841   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10842 
10843   ins_encode %{
10844     __ asr(as_Register($dst$$reg),
10845            as_Register($src1$$reg),
10846            $src2$$constant & 0x3f);
10847   %}
10848 
10849   ins_pipe(ialu_reg_shift);
10850 %}
10851 
10852 // BEGIN This section of the file is automatically generated. Do not edit --------------
10853 // This section is generated from aarch64_ad.m4
10854 
10855 instruct regL_not_reg(iRegLNoSp dst,
10856                          iRegL src1, immL_M1 m1,
10857                          rFlagsReg cr) %{
10858   match(Set dst (XorL src1 m1));
10859   ins_cost(INSN_COST);
10860   format %{ "eon  $dst, $src1, zr" %}
10861 
10862   ins_encode %{
10863     __ eon(as_Register($dst$$reg),
10864               as_Register($src1$$reg),
10865               zr,
10866               Assembler::LSL, 0);
10867   %}
10868 
10869   ins_pipe(ialu_reg);
10870 %}
10871 instruct regI_not_reg(iRegINoSp dst,
10872                          iRegIorL2I src1, immI_M1 m1,
10873                          rFlagsReg cr) %{


16024   ins_pipe(vdup_reg_freg128);
16025 %}
16026 
16027 instruct replicate2D(vecX dst, vRegD src)
16028 %{
16029   predicate(n->as_Vector()->length() == 2);
16030   match(Set dst (ReplicateD src));
16031   ins_cost(INSN_COST);
16032   format %{ "dup  $dst, $src\t# vector (2D)" %}
16033   ins_encode %{
16034     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16035            as_FloatRegister($src$$reg));
16036   %}
16037   ins_pipe(vdup_reg_dreg128);
16038 %}
16039 
16040 // ====================REDUCTION ARITHMETIC====================================
16041 
16042 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
16043 %{
16044   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
16045   match(Set dst (AddReductionVI isrc vsrc));
16046   ins_cost(INSN_COST);
16047   effect(TEMP tmp, TEMP tmp2);
16048   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
16049             "umov  $tmp2, $vsrc, S, 1\n\t"
16050             "addw  $tmp, $isrc, $tmp\n\t"
16051             "addw  $dst, $tmp, $tmp2\t# add reduction2I"
16052   %}
16053   ins_encode %{
16054     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
16055     __ umov($tmp2$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
16056     __ addw($tmp$$Register, $isrc$$Register, $tmp$$Register);
16057     __ addw($dst$$Register, $tmp$$Register, $tmp2$$Register);
16058   %}
16059   ins_pipe(pipe_class_default);
16060 %}
16061 
16062 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
16063 %{
16064   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
16065   match(Set dst (AddReductionVI isrc vsrc));
16066   ins_cost(INSN_COST);
16067   effect(TEMP vtmp, TEMP itmp);
16068   format %{ "addv  $vtmp, T4S, $vsrc\n\t"
16069             "umov  $itmp, $vtmp, S, 0\n\t"
16070             "addw  $dst, $itmp, $isrc\t# add reduction4I"
16071   %}
16072   ins_encode %{
16073     __ addv(as_FloatRegister($vtmp$$reg), __ T4S,
16074             as_FloatRegister($vsrc$$reg));
16075     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
16076     __ addw($dst$$Register, $itmp$$Register, $isrc$$Register);
16077   %}
16078   ins_pipe(pipe_class_default);
16079 %}
16080 
16081 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
16082 %{
16083   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
16084   match(Set dst (MulReductionVI isrc vsrc));
16085   ins_cost(INSN_COST);
16086   effect(TEMP tmp, TEMP dst);
16087   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
16088             "mul   $dst, $tmp, $isrc\n\t"
16089             "umov  $tmp, $vsrc, S, 1\n\t"
16090             "mul   $dst, $tmp, $dst\t# mul reduction2I"
16091   %}
16092   ins_encode %{
16093     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
16094     __ mul($dst$$Register, $tmp$$Register, $isrc$$Register);
16095     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
16096     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16097   %}
16098   ins_pipe(pipe_class_default);
16099 %}
16100 
16101 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
16102 %{
16103   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
16104   match(Set dst (MulReductionVI isrc vsrc));
16105   ins_cost(INSN_COST);
16106   effect(TEMP vtmp, TEMP itmp, TEMP dst);
16107   format %{ "ins   $vtmp, D, $vsrc, 0, 1\n\t"
16108             "mulv  $vtmp, T2S, $vtmp, $vsrc\n\t"
16109             "umov  $itmp, $vtmp, S, 0\n\t"
16110             "mul   $dst, $itmp, $isrc\n\t"
16111             "umov  $itmp, $vtmp, S, 1\n\t"
16112             "mul   $dst, $itmp, $dst\t# mul reduction4I"
16113   %}
16114   ins_encode %{
16115     __ ins(as_FloatRegister($vtmp$$reg), __ D,
16116            as_FloatRegister($vsrc$$reg), 0, 1);
16117     __ mulv(as_FloatRegister($vtmp$$reg), __ T2S,
16118             as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
16119     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
16120     __ mul($dst$$Register, $itmp$$Register, $isrc$$Register);
16121     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 1);
16122     __ mul($dst$$Register, $itmp$$Register, $dst$$Register);
16123   %}


17165 
17166 instruct vabs2L(vecX dst, vecX src)
17167 %{
17168   predicate(n->as_Vector()->length() == 2);
17169   match(Set dst (AbsVL src));
17170   ins_cost(INSN_COST);
17171   format %{ "abs  $dst, $src\t# vector (2D)" %}
17172   ins_encode %{
17173     __ absr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
17174   %}
17175   ins_pipe(vlogical128);
17176 %}
17177 
17178 instruct vabs2F(vecD dst, vecD src)
17179 %{
17180   predicate(n->as_Vector()->length() == 2);
17181   match(Set dst (AbsVF src));
17182   ins_cost(INSN_COST * 3);
17183   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17184   ins_encode %{
17185     __ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));

17186   %}
17187   ins_pipe(vunop_fp64);
17188 %}
17189 
17190 instruct vabs4F(vecX dst, vecX src)
17191 %{
17192   predicate(n->as_Vector()->length() == 4);
17193   match(Set dst (AbsVF src));
17194   ins_cost(INSN_COST * 3);
17195   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17196   ins_encode %{
17197     __ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));

17198   %}
17199   ins_pipe(vunop_fp128);
17200 %}
17201 
17202 instruct vabs2D(vecX dst, vecX src)
17203 %{
17204   predicate(n->as_Vector()->length() == 2);
17205   match(Set dst (AbsVD src));
17206   ins_cost(INSN_COST * 3);
17207   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17208   ins_encode %{
17209     __ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));

17210   %}
17211   ins_pipe(vunop_fp128);
17212 %}
17213 
17214 // --------------------------------- NEG --------------------------------------
17215 
17216 instruct vneg2F(vecD dst, vecD src)
17217 %{
17218   predicate(n->as_Vector()->length() == 2);
17219   match(Set dst (NegVF src));
17220   ins_cost(INSN_COST * 3);
17221   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17222   ins_encode %{
17223     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17224             as_FloatRegister($src$$reg));
17225   %}
17226   ins_pipe(vunop_fp64);
17227 %}
17228 
17229 instruct vneg4F(vecX dst, vecX src)


17330   %}
17331   ins_pipe(vlogical64);
17332 %}
17333 
17334 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17335 %{
17336   predicate(n->as_Vector()->length_in_bytes() == 16);
17337   match(Set dst (XorV src1 src2));
17338   ins_cost(INSN_COST);
17339   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17340   ins_encode %{
17341     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17342             as_FloatRegister($src1$$reg),
17343             as_FloatRegister($src2$$reg));
17344   %}
17345   ins_pipe(vlogical128);
17346 %}
17347 
17348 // ------------------------------ Shift ---------------------------------------
17349 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
17350   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17351             n->as_Vector()->length_in_bytes() == 8);
17352   match(Set dst (LShiftCntV cnt));
17353   match(Set dst (RShiftCntV cnt));
17354   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
17355   ins_encode %{
17356     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
17357   %}
17358   ins_pipe(vdup_reg_reg64);
17359 %}
17360 
17361 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
17362   predicate(n->as_Vector()->length_in_bytes() == 16);
17363   match(Set dst (LShiftCntV cnt));
17364   match(Set dst (RShiftCntV cnt));
17365   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
17366   ins_encode %{
17367     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17368   %}
17369   ins_pipe(vdup_reg_reg128);
17370 %}
17371 


< prev index next >