1 // Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
   2 // Copyright (c) 2020, Arm Limited. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 dnl Generate the warning
  26 // This file is automatically generated by running "m4 aarch64_neon_ad.m4". Do not edit ----
  27 dnl
  28 
  29 // AArch64 NEON Architecture Description File
  30 
  31 dnl
  32 define(`ORL2I', `ifelse($1,I,orL2I)')dnl
  33 dnl
  34 define(`error', `__program__:__file__:__line__: Invalid argument ``$1''m4exit(`1')')dnl
  35 dnl
  36 define(`iTYPE2SIMD',
  37 `ifelse($1, `B', `B',
  38         $1, `S', `H',
  39         $1, `I', `S',
  40         $1, `L', `D',
  41         `error($1)')')dnl
  42 dnl
  43 define(`fTYPE2SIMD',
  44 `ifelse($1, `F', `S',
  45         $1, `D', `D',
  46         `error($1)')')dnl
  47 dnl
  48 define(`TYPE2DATATYPE',
  49 `ifelse($1, `B', `BYTE',
  50         $1, `S', `SHORT',
  51         $1, `I', `INT',
  52         $1, `L', `LONG',
  53         $1, `F', `FLOAT',
  54         $1, `D', `DOUBLE',
  55         `error($1)')')dnl
  56 dnl
  57 // ====================VECTOR INSTRUCTIONS==================================
  58 
  59 // ------------------------------ Load/store/reinterpret -----------------------
  60 
  61 // Load vector (16 bits)
  62 instruct loadV2(vecD dst, memory mem)
  63 %{
  64   predicate(n->as_LoadVector()->memory_size() == 2);
  65   match(Set dst (LoadVector mem));
  66   ins_cost(4 * INSN_COST);
  67   format %{ "ldrh   $dst,$mem\t# vector (16 bits)" %}
  68   ins_encode( aarch64_enc_ldrvH(dst, mem) );
  69   ins_pipe(vload_reg_mem64);
  70 %}
  71 
  72 // Store Vector (16 bits)
  73 instruct storeV2(vecD src, memory mem)
  74 %{
  75   predicate(n->as_StoreVector()->memory_size() == 2);
  76   match(Set mem (StoreVector mem src));
  77   ins_cost(4 * INSN_COST);
  78   format %{ "strh   $mem,$src\t# vector (16 bits)" %}
  79   ins_encode( aarch64_enc_strvH(src, mem) );
  80   ins_pipe(vstore_reg_mem64);
  81 %}
  82 dnl
  83 define(`REINTERPRET', `
  84 instruct reinterpret$1`'(vec$1 dst)
  85 %{
  86   predicate(n->bottom_type()->is_vect()->length_in_bytes() == $2 &&
  87             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $2);
  88   match(Set dst (VectorReinterpret dst));
  89   ins_cost(0);
  90   format %{ " # reinterpret $dst" %}
  91   ins_encode %{
  92     // empty
  93   %}
  94   ins_pipe(pipe_class_empty);
  95 %}')dnl
  96 dnl         $1 $2
  97 REINTERPRET(D, 8)
  98 REINTERPRET(X, 16)
  99 dnl
 100 define(`REINTERPRET_X', `
 101 instruct reinterpret$1`'2$2`'(vec$2 dst, vec$1 src)
 102 %{
 103   predicate(n->bottom_type()->is_vect()->length_in_bytes() == $3 &&
 104             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $4);
 105   match(Set dst (VectorReinterpret src));
 106   ins_cost(INSN_COST);
 107   format %{ " # reinterpret $dst,$src" %}
 108   ins_encode %{
 109     // If register is the same, then move is not needed.
 110     if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
 111       __ orr(as_FloatRegister($dst$$reg), __ T8B,
 112              as_FloatRegister($src$$reg),
 113              as_FloatRegister($src$$reg));
 114     }
 115   %}
 116   ins_pipe(vlogical64);
 117 %}')dnl
 118 dnl           $1 $2 $3  $4
 119 REINTERPRET_X(D, X, 16, 8)
 120 REINTERPRET_X(X, D, 8,  16)
 121 dnl
 122 
 123 // ------------------------------ Vector cast -------------------------------
 124 dnl
 125 define(`VECTOR_CAST_I2I', `
 126 instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$5 src)
 127 %{
 128   predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 129   match(Set dst (VectorCast$2`'2X src));
 130   format %{ "$6  $dst, T$8, $src, T$7\t# convert $1$2 to $1$3 vector" %}
 131   ins_encode %{
 132     __ $6(as_FloatRegister($dst$$reg), __ T$8, as_FloatRegister($src$$reg), __ T$7);
 133   %}
 134   ins_pipe(pipe_class_default);
 135 %}')dnl
 136 dnl             $1 $2 $3 $4 $5 $6    $7  $8
 137 VECTOR_CAST_I2I(4, B, S, D, D, sxtl, 8B, 8H)
 138 VECTOR_CAST_I2I(8, B, S, X, D, sxtl, 8B, 8H)
 139 VECTOR_CAST_I2I(4, S, B, D, D, xtn,  8H, 8B)
 140 VECTOR_CAST_I2I(8, S, B, D, X, xtn,  8H, 8B)
 141 VECTOR_CAST_I2I(4, S, I, X, D, sxtl, 4H, 4S)
 142 VECTOR_CAST_I2I(4, I, S, D, X, xtn,  4S, 4H)
 143 VECTOR_CAST_I2I(2, I, L, X, D, sxtl, 2S, 2D)
 144 VECTOR_CAST_I2I(2, L, I, D, X, xtn,  2D, 2S)
 145 dnl
 146 define(`VECTOR_CAST_B2I', `
 147 instruct vcvt4$1to4$2`'(vec$3 dst, vec$4 src)
 148 %{
 149   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
 150   match(Set dst (VectorCast$1`'2X src));
 151   format %{ "$5  $dst, T$7, $src, T$6\n\t"
 152             "$5  $dst, T$9, $dst, T$8\t# convert 4$1 to 4$2 vector"
 153   %}
 154   ins_encode %{
 155     __ $5(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6);
 156     __ $5(as_FloatRegister($dst$$reg), __ T$9, as_FloatRegister($dst$$reg), __ T$8);
 157   %}
 158   ins_pipe(pipe_slow);
 159 %}')dnl
 160 dnl             $1 $2 $3 $4 $5    $6  $7  $8  $9
 161 VECTOR_CAST_B2I(B, I, X, D, sxtl, 8B, 8H, 4H, 4S)
 162 VECTOR_CAST_B2I(I, B, D, X, xtn,  4S, 4H, 8H, 8B)
 163 
 164 instruct vcvt4Bto4F(vecX dst, vecD src)
 165 %{
 166   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 167   match(Set dst (VectorCastB2X src));
 168   format %{ "sxtl  $dst, T8H, $src, T8B\n\t"
 169             "sxtl  $dst, T4S, $dst, T4H\n\t"
 170             "scvtfv  T4S, $dst, $dst\t# convert 4B to 4F vector"
 171   %}
 172   ins_encode %{
 173     __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
 174     __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
 175     __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
 176   %}
 177   ins_pipe(pipe_slow);
 178 %}
 179 dnl
 180 define(`VECTOR_CAST_I2F_L', `
 181 instruct vcvt$1$2to$1$3`'(vecX dst, vecD src)
 182 %{
 183   predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 184   match(Set dst (VectorCast$2`'2X src));
 185   format %{ "sxtl    $dst, T$5, $src, T$4\n\t"
 186             "scvtfv  T$5, $dst, $dst\t# convert $1$2 to $1$3 vector"
 187   %}
 188   ins_encode %{
 189     __ sxtl(as_FloatRegister($dst$$reg), __ T$5, as_FloatRegister($src$$reg), __ T$4);
 190     __ scvtfv(__ T$5, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
 191   %}
 192   ins_pipe(pipe_slow);
 193 %}')dnl
 194 dnl               $1 $2 $3 $4  $5
 195 VECTOR_CAST_I2F_L(4, S, F, 4H, 4S)
 196 VECTOR_CAST_I2F_L(2, I, D, 2S, 2D)
 197 dnl
 198 define(`VECTOR_CAST_I2F', `
 199 instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$4 src)
 200 %{
 201   predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 202   match(Set dst (VectorCast$2`'2X src));
 203   format %{ "scvtfv  T$5, $dst, $src\t# convert $1$2 to $1$3 vector" %}
 204   ins_encode %{
 205     __ scvtfv(__ T$5, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
 206   %}
 207   ins_pipe(pipe_class_default);
 208 %}')dnl
 209 dnl             $1 $2 $3 $4 $5
 210 VECTOR_CAST_I2F(2, I, F, D, 2S)
 211 VECTOR_CAST_I2F(4, I, F, X, 4S)
 212 VECTOR_CAST_I2F(2, L, D, X, 2D)
 213 dnl
 214 define(`VECTOR_CAST_F2F', `
 215 instruct vcvt2$1to2$2`'(vec$3 dst, vec$4 src)
 216 %{
 217   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
 218   match(Set dst (VectorCast$1`'2X src));
 219   format %{ "$5  $dst, T$7, $src, T$6\t# convert 2$1 to 2$2 vector" %}
 220   ins_encode %{
 221     __ $5(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6);
 222   %}
 223   ins_pipe(pipe_class_default);
 224 %}')dnl
 225 dnl             $1 $2 $3 $4 $5     $6  $7
 226 VECTOR_CAST_F2F(F, D, X, D, fcvtl, 2S, 2D)
 227 VECTOR_CAST_F2F(D, F, D, X, fcvtn, 2D, 2S)
 228 dnl
 229 
 230 instruct vcvt2Lto2F(vecD dst, vecX src)
 231 %{
 232   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 233   match(Set dst (VectorCastL2X src));
 234   format %{ "scvtfv  T2D, $dst, $src\n\t"
 235             "fcvtn   $dst, T2S, $dst, T2D\t# convert 2L to 2F vector"
 236   %}
 237   ins_encode %{
 238     __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
 239     __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($dst$$reg), __ T2D);
 240   %}
 241   ins_pipe(pipe_slow);
 242 %}
 243 
 244 // ------------------------------ Reduction -------------------------------
 245 dnl
 246 define(`REDUCE_ADD_BORS', `
 247 instruct reduce_add$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, vec$3 tmp)
 248 %{
 249   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
 250   match(Set dst (AddReductionVI isrc vsrc));
 251   ins_cost(INSN_COST);
 252   effect(TEMP_DEF dst, TEMP tmp);
 253   format %{ "addv  $tmp, T$1`'iTYPE2SIMD($2), $vsrc\n\t"
 254             "smov  $dst, $tmp, iTYPE2SIMD($2), 0\n\t"
 255             "addw  $dst, $dst, $isrc\n\t"
 256             "sxt$4  $dst, $dst\t# add reduction$1$2"
 257   %}
 258   ins_encode %{
 259     __ addv(as_FloatRegister($tmp$$reg), __ T$1`'iTYPE2SIMD($2), as_FloatRegister($vsrc$$reg));
 260     __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($2), 0);
 261     __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
 262     __ sxt$4($dst$$Register, $dst$$Register);
 263   %}
 264   ins_pipe(pipe_slow);
 265 %}')dnl
 266 dnl             $1  $2 $3 $4
 267 REDUCE_ADD_BORS(8,  B, D, b)
 268 REDUCE_ADD_BORS(16, B, X, b)
 269 REDUCE_ADD_BORS(4,  S, D, h)
 270 REDUCE_ADD_BORS(8,  S, X, h)
 271 dnl
 272 
 273 instruct reduce_add2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, vecX tmp)
 274 %{
 275   match(Set dst (AddReductionVL isrc vsrc));
 276   ins_cost(INSN_COST);
 277   effect(TEMP_DEF dst, TEMP tmp);
 278   format %{ "addpd $tmp, $vsrc\n\t"
 279             "umov  $dst, $tmp, D, 0\n\t"
 280             "add   $dst, $isrc, $dst\t# add reduction2L"
 281   %}
 282   ins_encode %{
 283     __ addpd(as_FloatRegister($tmp$$reg), as_FloatRegister($vsrc$$reg));
 284     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
 285     __ add($dst$$Register, $isrc$$Register, $dst$$Register);
 286   %}
 287   ins_pipe(pipe_slow);
 288 %}
 289 
 290 instruct reduce_mul8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp1, vecD vtmp2, iRegINoSp itmp)
 291 %{
 292   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 293   match(Set dst (MulReductionVI isrc vsrc));
 294   ins_cost(INSN_COST);
 295   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
 296   format %{ "ins   $vtmp1, S, $vsrc, 0, 1\n\t"
 297             "mulv  $vtmp1, T8B, $vtmp1, $vsrc\n\t"
 298             "ins   $vtmp2, H, $vtmp1, 0, 1\n\t"
 299             "mulv  $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
 300             "umov  $itmp, $vtmp2, B, 0\n\t"
 301             "mulw  $dst, $itmp, $isrc\n\t"
 302             "sxtb  $dst, $dst\n\t"
 303             "umov  $itmp, $vtmp2, B, 1\n\t"
 304             "mulw  $dst, $itmp, $dst\n\t"
 305             "sxtb  $dst, $dst\t# mul reduction8B"
 306   %}
 307   ins_encode %{
 308     __ ins(as_FloatRegister($vtmp1$$reg), __ S,
 309            as_FloatRegister($vsrc$$reg), 0, 1);
 310     __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
 311             as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
 312     __ ins(as_FloatRegister($vtmp2$$reg), __ H,
 313            as_FloatRegister($vtmp1$$reg), 0, 1);
 314     __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
 315             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 316     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
 317     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 318     __ sxtb($dst$$Register, $dst$$Register);
 319     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
 320     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 321     __ sxtb($dst$$Register, $dst$$Register);
 322   %}
 323   ins_pipe(pipe_slow);
 324 %}
 325 
 326 instruct reduce_mul16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
 327 %{
 328   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 329   match(Set dst (MulReductionVI isrc vsrc));
 330   ins_cost(INSN_COST);
 331   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
 332   format %{ "ins   $vtmp1, D, $vsrc, 0, 1\n\t"
 333             "mulv  $vtmp1, T8B, $vtmp1, $vsrc\n\t"
 334             "ins   $vtmp2, S, $vtmp1, 0, 1\n\t"
 335             "mulv  $vtmp1, T8B, $vtmp2, $vtmp1\n\t"
 336             "ins   $vtmp2, H, $vtmp1, 0, 1\n\t"
 337             "mulv  $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
 338             "umov  $itmp, $vtmp2, B, 0\n\t"
 339             "mulw  $dst, $itmp, $isrc\n\t"
 340             "sxtb  $dst, $dst\n\t"
 341             "umov  $itmp, $vtmp2, B, 1\n\t"
 342             "mulw  $dst, $itmp, $dst\n\t"
 343             "sxtb  $dst, $dst\t# mul reduction16B"
 344   %}
 345   ins_encode %{
 346     __ ins(as_FloatRegister($vtmp1$$reg), __ D,
 347            as_FloatRegister($vsrc$$reg), 0, 1);
 348     __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
 349             as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
 350     __ ins(as_FloatRegister($vtmp2$$reg), __ S,
 351            as_FloatRegister($vtmp1$$reg), 0, 1);
 352     __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
 353             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 354     __ ins(as_FloatRegister($vtmp2$$reg), __ H,
 355            as_FloatRegister($vtmp1$$reg), 0, 1);
 356     __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
 357             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 358     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
 359     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 360     __ sxtb($dst$$Register, $dst$$Register);
 361     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
 362     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 363     __ sxtb($dst$$Register, $dst$$Register);
 364   %}
 365   ins_pipe(pipe_slow);
 366 %}
 367 
 368 instruct reduce_mul4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp)
 369 %{
 370   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 371   match(Set dst (MulReductionVI isrc vsrc));
 372   ins_cost(INSN_COST);
 373   effect(TEMP_DEF dst, TEMP vtmp, TEMP itmp);
 374   format %{ "ins   $vtmp, S, $vsrc, 0, 1\n\t"
 375             "mulv  $vtmp, T4H, $vtmp, $vsrc\n\t"
 376             "umov  $itmp, $vtmp, H, 0\n\t"
 377             "mulw  $dst, $itmp, $isrc\n\t"
 378             "sxth  $dst, $dst\n\t"
 379             "umov  $itmp, $vtmp, H, 1\n\t"
 380             "mulw  $dst, $itmp, $dst\n\t"
 381             "sxth  $dst, $dst\t# mul reduction4S"
 382   %}
 383   ins_encode %{
 384     __ ins(as_FloatRegister($vtmp$$reg), __ S,
 385            as_FloatRegister($vsrc$$reg), 0, 1);
 386     __ mulv(as_FloatRegister($vtmp$$reg), __ T4H,
 387             as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
 388     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 0);
 389     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 390     __ sxth($dst$$Register, $dst$$Register);
 391     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 1);
 392     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 393     __ sxth($dst$$Register, $dst$$Register);
 394   %}
 395   ins_pipe(pipe_slow);
 396 %}
 397 
 398 instruct reduce_mul8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
 399 %{
 400   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 401   match(Set dst (MulReductionVI isrc vsrc));
 402   ins_cost(INSN_COST);
 403   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
 404   format %{ "ins   $vtmp1, D, $vsrc, 0, 1\n\t"
 405             "mulv  $vtmp1, T4H, $vtmp1, $vsrc\n\t"
 406             "ins   $vtmp2, S, $vtmp1, 0, 1\n\t"
 407             "mulv  $vtmp2, T4H, $vtmp2, $vtmp1\n\t"
 408             "umov  $itmp, $vtmp2, H, 0\n\t"
 409             "mulw  $dst, $itmp, $isrc\n\t"
 410             "sxth  $dst, $dst\n\t"
 411             "umov  $itmp, $vtmp2, H, 1\n\t"
 412             "mulw  $dst, $itmp, $dst\n\t"
 413             "sxth  $dst, $dst\t# mul reduction8S"
 414   %}
 415   ins_encode %{
 416     __ ins(as_FloatRegister($vtmp1$$reg), __ D,
 417            as_FloatRegister($vsrc$$reg), 0, 1);
 418     __ mulv(as_FloatRegister($vtmp1$$reg), __ T4H,
 419             as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
 420     __ ins(as_FloatRegister($vtmp2$$reg), __ S,
 421            as_FloatRegister($vtmp1$$reg), 0, 1);
 422     __ mulv(as_FloatRegister($vtmp2$$reg), __ T4H,
 423             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 424     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 0);
 425     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 426     __ sxth($dst$$Register, $dst$$Register);
 427     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 1);
 428     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 429     __ sxth($dst$$Register, $dst$$Register);
 430   %}
 431   ins_pipe(pipe_slow);
 432 %}
 433 
 434 instruct reduce_mul2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
 435 %{
 436   match(Set dst (MulReductionVL isrc vsrc));
 437   ins_cost(INSN_COST);
 438   effect(TEMP_DEF dst, TEMP tmp);
 439   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
 440             "mul   $dst, $isrc, $tmp\n\t"
 441             "umov  $tmp, $vsrc, D, 1\n\t"
 442             "mul   $dst, $dst, $tmp\t# mul reduction2L"
 443   %}
 444   ins_encode %{
 445     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
 446     __ mul($dst$$Register, $isrc$$Register, $tmp$$Register);
 447     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
 448     __ mul($dst$$Register, $dst$$Register, $tmp$$Register);
 449   %}
 450   ins_pipe(pipe_slow);
 451 %}
 452 dnl
 453 define(`REDUCE_MAX_MIN_INT', `
 454 instruct reduce_$1$2$3`'(iRegINoSp dst, iRegIorL2I isrc, vec$4 vsrc, vec$4 tmp, rFlagsReg cr)
 455 %{
 456   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 457   match(Set dst ($5ReductionV isrc vsrc));
 458   ins_cost(INSN_COST);
 459   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 460   format %{ "s$1v $tmp, T$2`'iTYPE2SIMD($3), $vsrc\n\t"
 461             "$6mov  $dst, $tmp, iTYPE2SIMD($3), 0\n\t"
 462             "cmpw  $dst, $isrc\n\t"
 463             "cselw $dst, $dst, $isrc $7\t# $1 reduction$2$3"
 464   %}
 465   ins_encode %{
 466     __ s$1v(as_FloatRegister($tmp$$reg), __ T$2`'iTYPE2SIMD($3), as_FloatRegister($vsrc$$reg));
 467     __ $6mov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($3), 0);
 468     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
 469     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$7);
 470   %}
 471   ins_pipe(pipe_slow);
 472 %}')dnl
 473 dnl                $1   $2  $3 $4 $5   $6 $7
 474 REDUCE_MAX_MIN_INT(max, 8,  B, D, Max, s, GT)
 475 REDUCE_MAX_MIN_INT(max, 16, B, X, Max, s, GT)
 476 REDUCE_MAX_MIN_INT(max, 4,  S, D, Max, s, GT)
 477 REDUCE_MAX_MIN_INT(max, 8,  S, X, Max, s, GT)
 478 REDUCE_MAX_MIN_INT(max, 4,  I, X, Max, u, GT)
 479 REDUCE_MAX_MIN_INT(min, 8,  B, D, Min, s, LT)
 480 REDUCE_MAX_MIN_INT(min, 16, B, X, Min, s, LT)
 481 REDUCE_MAX_MIN_INT(min, 4,  S, D, Min, s, LT)
 482 REDUCE_MAX_MIN_INT(min, 8,  S, X, Min, s, LT)
 483 REDUCE_MAX_MIN_INT(min, 4,  I, X, Min, u, LT)
 484 dnl
 485 define(`REDUCE_MAX_MIN_2I', `
 486 instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr)
 487 %{
 488   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
 489   match(Set dst ($2ReductionV isrc vsrc));
 490   ins_cost(INSN_COST);
 491   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 492   format %{ "dup   $tmp, T2D, $vsrc\n\t"
 493             "s$1v $tmp, T4S, $tmp\n\t"
 494             "umov  $dst, $tmp, S, 0\n\t"
 495             "cmpw  $dst, $isrc\n\t"
 496             "cselw $dst, $dst, $isrc $3\t# $1 reduction2I"
 497   %}
 498   ins_encode %{
 499     __ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg));
 500     __ s$1v(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg));
 501     __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
 502     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
 503     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$3);
 504   %}
 505   ins_pipe(pipe_slow);
 506 %}')dnl
 507 dnl               $1   $2   $3
 508 REDUCE_MAX_MIN_2I(max, Max, GT)
 509 REDUCE_MAX_MIN_2I(min, Min, LT)
 510 dnl
 511 define(`REDUCE_MAX_MIN_2L', `
 512 instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
 513 %{
 514   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 515   match(Set dst ($2ReductionV isrc vsrc));
 516   ins_cost(INSN_COST);
 517   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 518   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
 519             "cmp   $isrc,$tmp\n\t"
 520             "csel  $dst, $isrc, $tmp $3\n\t"
 521             "umov  $tmp, $vsrc, D, 1\n\t"
 522             "cmp   $dst, $tmp\n\t"
 523             "csel  $dst, $dst, $tmp $3\t# $1 reduction2L"
 524   %}
 525   ins_encode %{
 526     __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
 527     __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
 528     __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::$3);
 529     __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
 530     __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
 531     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::$3);
 532   %}
 533   ins_pipe(pipe_slow);
 534 %}')dnl
 535 dnl               $1   $2   $3
 536 REDUCE_MAX_MIN_2L(max, Max, GT)
 537 REDUCE_MAX_MIN_2L(min, Min, LT)
 538 dnl
 539 define(`REDUCE_LOGIC_OP_8B', `
 540 instruct reduce_$1`'8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
 541 %{
 542   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 543   match(Set dst ($2ReductionV isrc vsrc));
 544   ins_cost(INSN_COST);
 545   effect(TEMP_DEF dst, TEMP tmp);
 546   format %{ "umov   $tmp, $vsrc, S, 0\n\t"
 547             "umov   $dst, $vsrc, S, 1\n\t"
 548             "$1w   $dst, $dst, $tmp\n\t"
 549             "$1w   $dst, $dst, $dst, LSR #16\n\t"
 550             "$1w   $dst, $dst, $dst, LSR #8\n\t"
 551             "$1w   $dst, $isrc, $dst\n\t"
 552             "sxtb   $dst, $dst\t# $1 reduction8B"
 553   %}
 554   ins_encode %{
 555     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
 556     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
 557     __ $1w($dst$$Register, $dst$$Register, $tmp$$Register);
 558     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
 559     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
 560     __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
 561     __ sxtb($dst$$Register, $dst$$Register);
 562   %}
 563   ins_pipe(pipe_slow);
 564 %}')dnl
 565 dnl                $1   $2
 566 REDUCE_LOGIC_OP_8B(and, And)
 567 REDUCE_LOGIC_OP_8B(orr, Or)
 568 REDUCE_LOGIC_OP_8B(eor, Xor)
 569 define(`REDUCE_LOGIC_OP_16B', `
 570 instruct reduce_$1`'16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
 571 %{
 572   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 573   match(Set dst ($2ReductionV isrc vsrc));
 574   ins_cost(INSN_COST);
 575   effect(TEMP_DEF dst, TEMP tmp);
 576   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
 577             "umov   $dst, $vsrc, D, 1\n\t"
 578             "$3   $dst, $dst, $tmp\n\t"
 579             "$3   $dst, $dst, $dst, LSR #32\n\t"
 580             "$1w   $dst, $dst, $dst, LSR #16\n\t"
 581             "$1w   $dst, $dst, $dst, LSR #8\n\t"
 582             "$1w   $dst, $isrc, $dst\n\t"
 583             "sxtb   $dst, $dst\t# $1 reduction16B"
 584   %}
 585   ins_encode %{
 586     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
 587     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
 588     __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
 589     __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
 590     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
 591     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
 592     __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
 593     __ sxtb($dst$$Register, $dst$$Register);
 594   %}
 595   ins_pipe(pipe_slow);
 596 %}')dnl
 597 dnl                 $1   $2   $3
 598 REDUCE_LOGIC_OP_16B(and, And, andr)
 599 REDUCE_LOGIC_OP_16B(orr, Or,  orr )
 600 REDUCE_LOGIC_OP_16B(eor, Xor, eor )
 601 dnl
 602 define(`REDUCE_LOGIC_OP_4S', `
 603 instruct reduce_$1`'4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
 604 %{
 605   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 606   match(Set dst ($2ReductionV isrc vsrc));
 607   ins_cost(INSN_COST);
 608   effect(TEMP_DEF dst, TEMP tmp);
 609   format %{ "umov   $tmp, $vsrc, S, 0\n\t"
 610             "umov   $dst, $vsrc, S, 1\n\t"
 611             "$1w   $dst, $dst, $tmp\n\t"
 612             "$1w   $dst, $dst, $dst, LSR #16\n\t"
 613             "$1w   $dst, $isrc, $dst\n\t"
 614             "sxth   $dst, $dst\t# $1 reduction4S"
 615   %}
 616   ins_encode %{
 617     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
 618     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
 619     __ $1w($dst$$Register, $dst$$Register, $tmp$$Register);
 620     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
 621     __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
 622     __ sxth($dst$$Register, $dst$$Register);
 623   %}
 624   ins_pipe(pipe_slow);
 625 %}')dnl
 626 dnl                $1   $2
 627 REDUCE_LOGIC_OP_4S(and, And)
 628 REDUCE_LOGIC_OP_4S(orr, Or)
 629 REDUCE_LOGIC_OP_4S(eor, Xor)
 630 dnl
 631 define(`REDUCE_LOGIC_OP_8S', `
 632 instruct reduce_$1`'8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
 633 %{
 634   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 635   match(Set dst ($2ReductionV isrc vsrc));
 636   ins_cost(INSN_COST);
 637   effect(TEMP_DEF dst, TEMP tmp);
 638   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
 639             "umov   $dst, $vsrc, D, 1\n\t"
 640             "$3   $dst, $dst, $tmp\n\t"
 641             "$3   $dst, $dst, $dst, LSR #32\n\t"
 642             "$1w   $dst, $dst, $dst, LSR #16\n\t"
 643             "$1w   $dst, $isrc, $dst\n\t"
 644             "sxth   $dst, $dst\t# $1 reduction8S"
 645   %}
 646   ins_encode %{
 647     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
 648     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
 649     __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
 650     __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
 651     __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
 652     __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
 653     __ sxth($dst$$Register, $dst$$Register);
 654   %}
 655   ins_pipe(pipe_slow);
 656 %}')dnl
 657 dnl                $1   $2   $3
 658 REDUCE_LOGIC_OP_8S(and, And, andr)
 659 REDUCE_LOGIC_OP_8S(orr, Or,  orr )
 660 REDUCE_LOGIC_OP_8S(eor, Xor, eor )
 661 dnl
 662 define(`REDUCE_LOGIC_OP_2I', `
 663 instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
 664 %{
 665   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
 666   match(Set dst ($2ReductionV isrc vsrc));
 667   ins_cost(INSN_COST);
 668   effect(TEMP_DEF dst, TEMP tmp);
 669   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
 670             "$1w  $dst, $tmp, $isrc\n\t"
 671             "umov  $tmp, $vsrc, S, 1\n\t"
 672             "$1w  $dst, $tmp, $dst\t# $1 reduction2I"
 673   %}
 674   ins_encode %{
 675     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
 676     __ $1w($dst$$Register, $tmp$$Register, $isrc$$Register);
 677     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
 678     __ $1w($dst$$Register, $tmp$$Register, $dst$$Register);
 679   %}
 680   ins_pipe(pipe_slow);
 681 %}')dnl
 682 dnl                $1   $2
 683 REDUCE_LOGIC_OP_2I(and, And)
 684 REDUCE_LOGIC_OP_2I(orr, Or)
 685 REDUCE_LOGIC_OP_2I(eor, Xor)
 686 dnl
 687 define(`REDUCE_LOGIC_OP_4I', `
 688 instruct reduce_$1`'4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
 689 %{
 690   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
 691   match(Set dst ($2ReductionV isrc vsrc));
 692   ins_cost(INSN_COST);
 693   effect(TEMP_DEF dst, TEMP tmp);
 694   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
 695             "umov   $dst, $vsrc, D, 1\n\t"
 696             "$3   $dst, $dst, $tmp\n\t"
 697             "$3   $dst, $dst, $dst, LSR #32\n\t"
 698             "$1w   $dst, $isrc, $dst\t# $1 reduction4I"
 699   %}
 700   ins_encode %{
 701     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
 702     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
 703     __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
 704     __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
 705     __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
 706   %}
 707   ins_pipe(pipe_slow);
 708 %}')dnl
 709 dnl                $1   $2   $3
 710 REDUCE_LOGIC_OP_4I(and, And, andr)
 711 REDUCE_LOGIC_OP_4I(orr, Or,  orr )
 712 REDUCE_LOGIC_OP_4I(eor, Xor, eor )
 713 dnl
 714 define(`REDUCE_LOGIC_OP_2L', `
 715 instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
 716 %{
 717   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 718   match(Set dst ($2ReductionV isrc vsrc));
 719   ins_cost(INSN_COST);
 720   effect(TEMP_DEF dst, TEMP tmp);
 721   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
 722             "$3  $dst, $isrc, $tmp\n\t"
 723             "umov  $tmp, $vsrc, D, 1\n\t"
 724             "$3  $dst, $dst, $tmp\t# $1 reduction2L"
 725   %}
 726   ins_encode %{
 727     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
 728     __ $3($dst$$Register, $isrc$$Register, $tmp$$Register);
 729     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
 730     __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
 731   %}
 732   ins_pipe(pipe_slow);
 733 %}')dnl
 734 dnl                $1   $2   $3
 735 REDUCE_LOGIC_OP_2L(and, And, andr)
 736 REDUCE_LOGIC_OP_2L(orr, Or,  orr )
 737 REDUCE_LOGIC_OP_2L(eor, Xor, eor )
 738 dnl
 739 
 740 // ------------------------------ Vector insert ---------------------------------
 741 define(`VECTOR_INSERT_I', `
 742 instruct insert$1$2`'(vec$3 dst, vec$3 src, iReg$4`'ORL2I($4) val, immI idx)
 743 %{
 744   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
 745   match(Set dst (VectorInsert (Binary src val) idx));
 746   ins_cost(INSN_COST);
 747   format %{ "orr    $dst, T$5, $src, $src\n\t"
 748             "mov    $dst, T$1`'iTYPE2SIMD($2), $idx, $val\t# insert into vector($1$2)" %}
 749   ins_encode %{
 750     if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
 751       __ orr(as_FloatRegister($dst$$reg), __ T$5,
 752              as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
 753     }
 754     __ mov(as_FloatRegister($dst$$reg), __ T$1`'iTYPE2SIMD($2), $idx$$constant, $val$$Register);
 755   %}
 756   ins_pipe(pipe_slow);
 757 %}')dnl
 758 dnl             $1  $2 $3 $4 $5
 759 VECTOR_INSERT_I(8,  B, D, I, 8B)
 760 VECTOR_INSERT_I(16, B, X, I, 16B)
 761 VECTOR_INSERT_I(4,  S, D, I, 8B)
 762 VECTOR_INSERT_I(8,  S, X, I, 16B)
 763 VECTOR_INSERT_I(2,  I, D, I, 8B)
 764 VECTOR_INSERT_I(4,  I, X, I, 16B)
 765 VECTOR_INSERT_I(2,  L, X, L, 16B)
 766 dnl
 767 define(`VECTOR_INSERT_F', `
 768 instruct insert$1`'(vec$2 dst, vec$2 src, vReg$3 val, immI idx)
 769 %{
 770   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 771   match(Set dst (VectorInsert (Binary src val) idx));
 772   ins_cost(INSN_COST);
 773   effect(TEMP_DEF dst);
 774   format %{ "orr    $dst, T$4, $src, $src\n\t"
 775             "ins    $dst, $5, $val, $idx, 0\t# insert into vector($1)" %}
 776   ins_encode %{
 777     __ orr(as_FloatRegister($dst$$reg), __ T$4,
 778            as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
 779     __ ins(as_FloatRegister($dst$$reg), __ $5,
 780            as_FloatRegister($val$$reg), $idx$$constant, 0);
 781   %}
 782   ins_pipe(pipe_slow);
 783 %}')dnl
 784 dnl             $1  $2 $3 $4   $5
 785 VECTOR_INSERT_F(2F, D, F, 8B,  S)
 786 VECTOR_INSERT_F(4F, X, F, 16B, S)
 787 VECTOR_INSERT_F(2D, X, D, 16B, D)
 788 dnl
 789 
 790 // ------------------------------ Vector extract ---------------------------------
 791 define(`VECTOR_EXTRACT_I', `
 792 instruct extract$1$2`'(iReg$3NoSp dst, vec$4 src, immI idx)
 793 %{
 794   predicate(n->in(1)->bottom_type()->is_vect()->length() == $1);
 795   match(Set dst (Extract$2 src idx));
 796   ins_cost(INSN_COST);
 797   format %{ "$5mov    $dst, $src, $6, $idx\t# extract from vector($1$2)" %}
 798   ins_encode %{
 799     __ $5mov($dst$$Register, as_FloatRegister($src$$reg), __ $6, $idx$$constant);
 800   %}
 801   ins_pipe(pipe_class_default);
 802 %}')dnl
 803 dnl             $1   $2 $3 $4 $5 $6
 804 VECTOR_EXTRACT_I(8,  B, I, D, s, B)
 805 VECTOR_EXTRACT_I(16, B, I, X, s, B)
 806 VECTOR_EXTRACT_I(4,  S, I, D, s, H)
 807 VECTOR_EXTRACT_I(8,  S, I, X, s, H)
 808 VECTOR_EXTRACT_I(2,  I, I, D, u, S)
 809 VECTOR_EXTRACT_I(4,  I, I, X, u, S)
 810 VECTOR_EXTRACT_I(2,  L, L, X, u, D)
 811 dnl
 812 define(`VECTOR_EXTRACT_F', `
 813 instruct extract$1$2`'(vReg$2 dst, vec$3 src, immI idx)
 814 %{
 815   predicate(n->in(1)->bottom_type()->is_vect()->length() == $1);
 816   match(Set dst (Extract$2 src idx));
 817   ins_cost(INSN_COST);
 818   format %{ "ins   $dst, $4, $src, 0, $idx\t# extract from vector($1$2)" %}
 819   ins_encode %{
 820     __ ins(as_FloatRegister($dst$$reg), __ $4,
 821            as_FloatRegister($src$$reg), 0, $idx$$constant);
 822   %}
 823   ins_pipe(pipe_class_default);
 824 %}')dnl
 825 dnl             $1  $2 $3 $4
 826 VECTOR_EXTRACT_F(2, F, D, S)
 827 VECTOR_EXTRACT_F(4, F, X, S)
 828 VECTOR_EXTRACT_F(2, D, X, D)
 829 dnl
 830 
 831 // ------------------------------ Vector comparison ---------------------------------
 832 define(`VECTOR_CMP_EQ_GT_GE', `
 833 instruct vcm$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2, immI cond)
 834 %{
 835   predicate(n->as_Vector()->length() == $2 &&
 836             n->as_VectorMaskCmp()->get_predicate() == BoolTest::$1 &&
 837             n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 838   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
 839   format %{ "$6cm$1  $dst, $src1, $src2\t# vector cmp ($2$3)" %}
 840   ins_cost(INSN_COST);
 841   ins_encode %{
 842     __ $6cm$1(as_FloatRegister($dst$$reg), __ T$2$5,
 843             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
 844   %}
 845   ins_pipe(vdop$7);
 846 %}')dnl
 847 dnl                $1   $2 $3 $4 $5 $6 $7
 848 VECTOR_CMP_EQ_GT_GE(eq, 8, B, D, B,  , 64)
 849 VECTOR_CMP_EQ_GT_GE(eq, 16,B, X, B,  , 128)
 850 VECTOR_CMP_EQ_GT_GE(eq, 4, S, D, H,  , 64)
 851 VECTOR_CMP_EQ_GT_GE(eq, 8, S, X, H,  , 128)
 852 VECTOR_CMP_EQ_GT_GE(eq, 2, I, D, S,  , 64)
 853 VECTOR_CMP_EQ_GT_GE(eq, 4, I, X, S,  , 128)
 854 VECTOR_CMP_EQ_GT_GE(eq, 2, L, X, D,  , 128)
 855 VECTOR_CMP_EQ_GT_GE(eq, 2, F, D, S, f, 64)
 856 VECTOR_CMP_EQ_GT_GE(eq, 4, F, X, S, f, 128)
 857 VECTOR_CMP_EQ_GT_GE(eq, 2, D, X, D, f, 128)
 858 VECTOR_CMP_EQ_GT_GE(gt, 8, B, D, B,  , 64)
 859 VECTOR_CMP_EQ_GT_GE(gt, 16,B, X, B,  , 128)
 860 VECTOR_CMP_EQ_GT_GE(gt, 4, S, D, H,  , 64)
 861 VECTOR_CMP_EQ_GT_GE(gt, 8, S, X, H,  , 128)
 862 VECTOR_CMP_EQ_GT_GE(gt, 2, I, D, S,  , 64)
 863 VECTOR_CMP_EQ_GT_GE(gt, 4, I, X, S,  , 128)
 864 VECTOR_CMP_EQ_GT_GE(gt, 2, L, X, D,  , 128)
 865 VECTOR_CMP_EQ_GT_GE(gt, 2, F, D, S, f, 64)
 866 VECTOR_CMP_EQ_GT_GE(gt, 4, F, X, S, f, 128)
 867 VECTOR_CMP_EQ_GT_GE(gt, 2, D, X, D, f, 128)
 868 VECTOR_CMP_EQ_GT_GE(ge, 8, B, D, B,  , 64)
 869 VECTOR_CMP_EQ_GT_GE(ge, 16,B, X, B,  , 128)
 870 VECTOR_CMP_EQ_GT_GE(ge, 4, S, D, H,  , 64)
 871 VECTOR_CMP_EQ_GT_GE(ge, 8, S, X, H,  , 128)
 872 VECTOR_CMP_EQ_GT_GE(ge, 2, I, D, S,  , 64)
 873 VECTOR_CMP_EQ_GT_GE(ge, 4, I, X, S,  , 128)
 874 VECTOR_CMP_EQ_GT_GE(ge, 2, L, X, D,  , 128)
 875 VECTOR_CMP_EQ_GT_GE(ge, 2, F, D, S, f, 64)
 876 VECTOR_CMP_EQ_GT_GE(ge, 4, F, X, S, f, 128)
 877 VECTOR_CMP_EQ_GT_GE(ge, 2, D, X, D, f, 128)
 878 dnl
 879 define(`VECTOR_CMP_NE', `
 880 instruct vcmne$1$2`'(vec$3 dst, vec$3 src1, vec$3 src2, immI cond)
 881 %{
 882   predicate(n->as_Vector()->length() == $1 &&
 883             n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
 884             n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
 885   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
 886   format %{ "$5cmeq  $dst, $src1, $src2\n\t# vector cmp ($1$2)"
 887             "not   $dst, $dst\t" %}
 888   ins_cost(INSN_COST);
 889   ins_encode %{
 890     __ $5cmeq(as_FloatRegister($dst$$reg), __ T$1$4,
 891             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
 892     __ notr(as_FloatRegister($dst$$reg), __ T$6, as_FloatRegister($dst$$reg));
 893   %}
 894   ins_pipe(pipe_slow);
 895 %}')dnl
 896 dnl           $1 $2 $3 $4 $5 $6
 897 VECTOR_CMP_NE(8, B, D, B,  , 8B)
 898 VECTOR_CMP_NE(16,B, X, B,  , 16B)
 899 VECTOR_CMP_NE(4, S, D, H,  , 8B)
 900 VECTOR_CMP_NE(8, S, X, H,  , 16B)
 901 VECTOR_CMP_NE(2, I, D, S,  , 8B)
 902 VECTOR_CMP_NE(4, I, X, S,  , 16B)
 903 VECTOR_CMP_NE(2, L, X, D,  , 16B)
 904 VECTOR_CMP_NE(2, F, D, S, f, 8B)
 905 VECTOR_CMP_NE(4, F, X, S, f, 16B)
 906 VECTOR_CMP_NE(2, D, X, D, f, 16B)
 907 dnl
 908 define(`VECTOR_CMP_LT_LE', `
 909 instruct vcm$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2, immI cond)
 910 %{
 911   predicate(n->as_Vector()->length() == $2 &&
 912             n->as_VectorMaskCmp()->get_predicate() == BoolTest::$1 &&
 913             n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
 914   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
 915   format %{ "$6cm$7  $dst, $src2, $src1\t# vector cmp ($2$3)" %}
 916   ins_cost(INSN_COST);
 917   ins_encode %{
 918     __ $6cm$7(as_FloatRegister($dst$$reg), __ T$2$5,
 919             as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
 920   %}
 921   ins_pipe(vdop$8);
 922 %}')dnl
 923 dnl              $1  $2 $3 $4 $5 $6 $7  $8
 924 VECTOR_CMP_LT_LE(lt, 8, B, D, B,  , gt, 64)
 925 VECTOR_CMP_LT_LE(lt, 16,B, X, B,  , gt, 128)
 926 VECTOR_CMP_LT_LE(lt, 4, S, D, H,  , gt, 64)
 927 VECTOR_CMP_LT_LE(lt, 8, S, X, H,  , gt, 128)
 928 VECTOR_CMP_LT_LE(lt, 2, I, D, S,  , gt, 64)
 929 VECTOR_CMP_LT_LE(lt, 4, I, X, S,  , gt, 128)
 930 VECTOR_CMP_LT_LE(lt, 2, L, X, D,  , gt, 128)
 931 VECTOR_CMP_LT_LE(lt, 2, F, D, S, f, gt, 64)
 932 VECTOR_CMP_LT_LE(lt, 4, F, X, S, f, gt, 128)
 933 VECTOR_CMP_LT_LE(lt, 2, D, X, D, f, gt, 128)
 934 VECTOR_CMP_LT_LE(le, 8, B, D, B,  , ge, 64)
 935 VECTOR_CMP_LT_LE(le, 16,B, X, B,  , ge, 128)
 936 VECTOR_CMP_LT_LE(le, 4, S, D, H,  , ge, 64)
 937 VECTOR_CMP_LT_LE(le, 8, S, X, H,  , ge, 128)
 938 VECTOR_CMP_LT_LE(le, 2, I, D, S,  , ge, 64)
 939 VECTOR_CMP_LT_LE(le, 4, I, X, S,  , ge, 128)
 940 VECTOR_CMP_LT_LE(le, 2, L, X, D,  , ge, 128)
 941 VECTOR_CMP_LT_LE(le, 2, F, D, S, f, ge, 64)
 942 VECTOR_CMP_LT_LE(le, 4, F, X, S, f, ge, 128)
 943 VECTOR_CMP_LT_LE(le, 2, D, X, D, f, ge, 128)
 944 dnl
 945 
 946 // ------------------------------ Vector mul -----------------------------------
 947 
 948 instruct vmul2L(vecX dst, vecX src1, vecX src2, iRegLNoSp tmp1, iRegLNoSp tmp2)
 949 %{
 950   predicate(n->as_Vector()->length() == 2);
 951   match(Set dst (MulVL src1 src2));
 952   ins_cost(INSN_COST);
 953   effect(TEMP tmp1, TEMP tmp2);
 954   format %{ "umov   $tmp1, $src1, D, 0\n\t"
 955             "umov   $tmp2, $src2, D, 0\n\t"
 956             "mul    $tmp2, $tmp2, $tmp1\n\t"
 957             "mov    $dst,  T2D,   0, $tmp2\t# insert into vector(2L)\n\t"
 958             "umov   $tmp1, $src1, D, 1\n\t"
 959             "umov   $tmp2, $src2, D, 1\n\t"
 960             "mul    $tmp2, $tmp2, $tmp1\n\t"
 961             "mov    $dst,  T2D,   1, $tmp2\t# insert into vector(2L)\n\t"
 962   %}
 963   ins_encode %{
 964     __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 0);
 965     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 0);
 966     __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
 967     __ mov(as_FloatRegister($dst$$reg), __ T2D, 0, $tmp2$$Register);
 968     __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 1);
 969     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 1);
 970     __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
 971     __ mov(as_FloatRegister($dst$$reg), __ T2D, 1, $tmp2$$Register);
 972   %}
 973   ins_pipe(pipe_slow);
 974 %}
 975 
 976 // --------------------------------- Vector not --------------------------------
 977 dnl
 978 define(`MATCH_RULE', `ifelse($1, I,
 979 `match(Set dst (XorV src (ReplicateB m1)));
 980   match(Set dst (XorV src (ReplicateS m1)));
 981   match(Set dst (XorV src (ReplicateI m1)));',
 982 `match(Set dst (XorV src (ReplicateL m1)));')')dnl
 983 dnl
 984 define(`VECTOR_NOT', `
 985 instruct vnot$1$2`'(vec$3 dst, vec$3 src, imm$2_M1 m1)
 986 %{
 987   predicate(n->as_Vector()->length_in_bytes() == $4);
 988   MATCH_RULE($2)
 989   ins_cost(INSN_COST);
 990   format %{ "not  $dst, $src\t# vector ($5)" %}
 991   ins_encode %{
 992     __ notr(as_FloatRegister($dst$$reg), __ T$5,
 993             as_FloatRegister($src$$reg));
 994   %}
 995   ins_pipe(pipe_class_default);
 996 %}')dnl
 997 dnl        $1 $2 $3 $4  $5
 998 VECTOR_NOT(2, I, D, 8,  8B)
 999 VECTOR_NOT(4, I, X, 16, 16B)
1000 VECTOR_NOT(2, L, X, 16, 16B)
1001 undefine(MATCH_RULE)
1002 dnl
1003 // ------------------------------ Vector max/min -------------------------------
1004 dnl
1005 define(`PREDICATE', `ifelse($1, 8B,
1006 `predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
1007              n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
1008 `predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_$3);')')dnl
1009 dnl
1010 define(`VECTOR_MAX_MIN_INT', `
1011 instruct v$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2)
1012 %{
1013   PREDICATE(`$2$3', $2, TYPE2DATATYPE($3))
1014   match(Set dst ($5V src1 src2));
1015   ins_cost(INSN_COST);
1016   format %{ "$1v  $dst, $src1, $src2\t# vector ($2$3)" %}
1017   ins_encode %{
1018     __ $1v(as_FloatRegister($dst$$reg), __ T$2`'iTYPE2SIMD($3),
1019             as_FloatRegister($src1$$reg),
1020             as_FloatRegister($src2$$reg));
1021   %}
1022   ins_pipe(vdop$6);
1023 %}')dnl
1024 dnl                $1   $2  $3 $4 $5   $6
1025 VECTOR_MAX_MIN_INT(max, 8,  B, D, Max, 64)
1026 VECTOR_MAX_MIN_INT(max, 16, B, X, Max, 128)
1027 VECTOR_MAX_MIN_INT(max, 4,  S, D, Max, 64)
1028 VECTOR_MAX_MIN_INT(max, 8,  S, X, Max, 128)
1029 VECTOR_MAX_MIN_INT(max, 2,  I, D, Max, 64)
1030 VECTOR_MAX_MIN_INT(max, 4,  I, X, Max, 128)
1031 VECTOR_MAX_MIN_INT(min, 8,  B, D, Min, 64)
1032 VECTOR_MAX_MIN_INT(min, 16, B, X, Min, 128)
1033 VECTOR_MAX_MIN_INT(min, 4,  S, D, Min, 64)
1034 VECTOR_MAX_MIN_INT(min, 8,  S, X, Min, 128)
1035 VECTOR_MAX_MIN_INT(min, 2,  I, D, Min, 64)
1036 VECTOR_MAX_MIN_INT(min, 4,  I, X, Min, 128)
1037 undefine(PREDICATE)
1038 dnl
1039 define(`VECTOR_MAX_MIN_LONG', `
1040 instruct v$1`'2L`'(vecX dst, vecX src1, vecX src2)
1041 %{
1042   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1043   match(Set dst ($2V src1 src2));
1044   ins_cost(INSN_COST);
1045   effect(TEMP dst);
1046   format %{ "cmgt  $dst, $src1, $src2\t# vector (2L)\n\t"
1047             "bsl   $dst, $$3, $$4\t# vector (16B)" %}
1048   ins_encode %{
1049     __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
1050             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
1051     __ bsl(as_FloatRegister($dst$$reg), __ T16B,
1052            as_FloatRegister($$3$$reg), as_FloatRegister($$4$$reg));
1053   %}
1054   ins_pipe(vdop128);
1055 %}')dnl
1056 dnl                $1   $2   $3    $4
1057 VECTOR_MAX_MIN_LONG(max, Max, src1, src2)
1058 VECTOR_MAX_MIN_LONG(min, Min, src2, src1)
1059 dnl
1060 
1061 // --------------------------------- blend (bsl) ----------------------------
1062 dnl
1063 define(`VECTOR_BSL', `
1064 instruct vbsl$1B`'(vec$2 dst, vec$2 src1, vec$2 src2)
1065 %{
1066   predicate(n->as_Vector()->length_in_bytes() == $1);
1067   match(Set dst (VectorBlend (Binary src1 src2) dst));
1068   ins_cost(INSN_COST);
1069   format %{ "bsl  $dst, $src2, $src1\t# vector ($1B)" %}
1070   ins_encode %{
1071     __ bsl(as_FloatRegister($dst$$reg), __ T$1B,
1072            as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
1073   %}
1074   ins_pipe(vlogical$3);
1075 %}')dnl
1076 dnl        $1  $2 $3
1077 VECTOR_BSL(8,  D, 64)
1078 VECTOR_BSL(16, X, 128)
1079 dnl
1080 
1081 // --------------------------------- Load/store Mask ----------------------------
1082 dnl
1083 define(`PREDICATE', `ifelse($1, load,
1084 `predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
1085 `predicate(n->as_Vector()->length() == $2);')')dnl
1086 dnl
1087 define(`VECTOR_LOAD_STORE_MASK_B', `
1088 instruct $1mask$2B`'(vec$3 dst, vec$3 src $5 $6)
1089 %{
1090   PREDICATE($1, $2)
1091   match(Set dst (Vector$4Mask src $6));
1092   ins_cost(INSN_COST);
1093   format %{ "negr  $dst, $src\t# $1 mask ($2B to $2B)" %}
1094   ins_encode %{
1095     __ negr(as_FloatRegister($dst$$reg), __ T$2B, as_FloatRegister($src$$reg));
1096   %}
1097   ins_pipe(pipe_class_default);
1098 %}')dnl
1099 dnl                      $1     $2  $3 $4     $5      $6
1100 VECTOR_LOAD_STORE_MASK_B(load,  8,  D, Load)
1101 VECTOR_LOAD_STORE_MASK_B(load,  16, X, Load)
1102 VECTOR_LOAD_STORE_MASK_B(store, 8,  D, Store, `, immI_1', size)
1103 VECTOR_LOAD_STORE_MASK_B(store, 16, X, Store, `, immI_1', size)
1104 undefine(PREDICATE)dnl
1105 dnl
1106 define(`PREDICATE', `ifelse($1, load,
1107 `predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);',
1108 `predicate(n->as_Vector()->length() == $2);')')dnl
1109 dnl
1110 define(`VECTOR_LOAD_STORE_MASK_S', `
1111 instruct $1mask$2S`'(vec$3 dst, vec$4 src $9 $10)
1112 %{
1113   PREDICATE($1, $2)
1114   match(Set dst (Vector$5Mask src $10));
1115   ins_cost(INSN_COST);
1116   format %{ "$6  $dst, $src\n\t"
1117             "negr  $dst, $dst\t# $1 mask ($2$7 to $2$8)" %}
1118   ins_encode %{
1119     __ $6(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($src$$reg), __ T8$7);
1120     __ negr(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($dst$$reg));
1121   %}
1122   ins_pipe(pipe_slow);
1123 %}')dnl
1124 dnl                      $1     $2 $3 $4 $5     $6    $7 $8    $9       $10
1125 VECTOR_LOAD_STORE_MASK_S(load,  4, D, D, Load,  uxtl, B, H)
1126 VECTOR_LOAD_STORE_MASK_S(load,  8, X, D, Load,  uxtl, B, H)
1127 VECTOR_LOAD_STORE_MASK_S(store, 4, D, D, Store, xtn,  H, B, `, immI_2', size)
1128 VECTOR_LOAD_STORE_MASK_S(store, 8, D, X, Store, xtn,  H, B, `, immI_2', size)
1129 undefine(PREDICATE)dnl
1130 dnl
1131 define(`PREDICATE', `ifelse($1, load,
1132 `predicate(n->as_Vector()->length() == $2 &&
1133             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
1134              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));',
1135 `predicate(n->as_Vector()->length() == $2);')')dnl
1136 dnl
1137 define(`VECTOR_LOAD_STORE_MASK_I', `
1138 instruct $1mask$2I`'(vec$3 dst, vec$4 src $12 $13)
1139 %{
1140   PREDICATE($1, $2)
1141   match(Set dst (Vector$5Mask src $13));
1142   ins_cost(INSN_COST);
1143   format %{ "$6  $dst, $src\t# $2$7 to $2$8\n\t"
1144             "$6  $dst, $dst\t# $2$8 to $2$9\n\t"
1145             "negr   $dst, $dst\t# $1 mask ($2$7 to $2$9)" %}
1146   ins_encode %{
1147     __ $6(as_FloatRegister($dst$$reg), __ T$10$8, as_FloatRegister($src$$reg), __ T$10$7);
1148     __ $6(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg), __ T$11$8);
1149     __ negr(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg));
1150   %}
1151   ins_pipe(pipe_slow);
1152 %}')dnl
1153 dnl                      $1     $2 $3 $4 $5     $6    $7 $8 $9 $10$11   $12      $13
1154 VECTOR_LOAD_STORE_MASK_I(load,  2, D, D, Load,  uxtl, B, H, S, 8, 4)
1155 VECTOR_LOAD_STORE_MASK_I(load,  4, X, D, Load,  uxtl, B, H, S, 8, 4)
1156 VECTOR_LOAD_STORE_MASK_I(store, 2, D, D, Store, xtn,  S, H, B, 4, 8, `, immI_4', size)
1157 VECTOR_LOAD_STORE_MASK_I(store, 4, D, X, Store, xtn,  S, H, B, 4, 8, `, immI_4', size)
1158 undefine(PREDICATE)
1159 dnl
1160 instruct loadmask2L(vecX dst, vecD src)
1161 %{
1162   predicate(n->as_Vector()->length() == 2 &&
1163             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
1164              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
1165   match(Set dst (VectorLoadMask src));
1166   ins_cost(INSN_COST);
1167   format %{ "uxtl  $dst, $src\t# 2B to 2S\n\t"
1168             "uxtl  $dst, $dst\t# 2S to 2I\n\t"
1169             "uxtl  $dst, $dst\t# 2I to 2L\n\t"
1170             "neg   $dst, $dst\t# load mask (2B to 2L)" %}
1171   ins_encode %{
1172     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
1173     __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
1174     __ uxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg), __ T2S);
1175     __ negr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg));
1176   %}
1177   ins_pipe(pipe_slow);
1178 %}
1179 
1180 instruct storemask2L(vecD dst, vecX src, immI_8 size)
1181 %{
1182   predicate(n->as_Vector()->length() == 2);
1183   match(Set dst (VectorStoreMask src size));
1184   ins_cost(INSN_COST);
1185   format %{ "xtn  $dst, $src\t# 2L to 2I\n\t"
1186             "xtn  $dst, $dst\t# 2I to 2S\n\t"
1187             "xtn  $dst, $dst\t# 2S to 2B\n\t"
1188             "neg  $dst, $dst\t# store mask (2L to 2B)" %}
1189   ins_encode %{
1190     __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
1191     __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
1192     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
1193     __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
1194   %}
1195   ins_pipe(pipe_slow);
1196 %}
1197 
1198 //-------------------------------- LOAD_IOTA_INDICES----------------------------------
1199 dnl
1200 define(`PREDICATE', `ifelse($1, 8,
1201 `predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
1202              n->as_Vector()->length() == 8) &&
1203              n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
1204 `predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);')')dnl
1205 dnl
1206 define(`VECTOR_LOAD_CON', `
1207 instruct loadcon$1B`'(vec$2 dst, immI0 src)
1208 %{
1209   PREDICATE($1)
1210   match(Set dst (VectorLoadConst src));
1211   ins_cost(INSN_COST);
1212   format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
1213   ins_encode %{
1214     __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
1215     __ ldr$3(as_FloatRegister($dst$$reg), rscratch1);
1216   %}
1217   ins_pipe(pipe_class_memory);
1218 %}')dnl
1219 dnl             $1  $2 $3
1220 VECTOR_LOAD_CON(8,  D, d)
1221 VECTOR_LOAD_CON(16, X, q)
1222 undefine(PREDICATE)
1223 dnl
1224 //-------------------------------- LOAD_SHUFFLE ----------------------------------
1225 dnl
1226 define(`VECTOR_LOAD_SHUFFLE_B', `
1227 instruct loadshuffle$1B`'(vec$2 dst, vec$2 src)
1228 %{
1229   predicate(n->as_Vector()->length() == $1 &&
1230             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1231   match(Set dst (VectorLoadShuffle src));
1232   ins_cost(INSN_COST);
1233   format %{ "mov  $dst, $src\t# get $1B shuffle" %}
1234   ins_encode %{
1235     __ orr(as_FloatRegister($dst$$reg), __ T$1B,
1236            as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1237   %}
1238   ins_pipe(pipe_class_default);
1239 %}')dnl
1240 dnl                   $1  $2
1241 VECTOR_LOAD_SHUFFLE_B(8,  D)
1242 VECTOR_LOAD_SHUFFLE_B(16, X)
1243 dnl
1244 define(`VECTOR_LOAD_SHUFFLE_S', `
1245 instruct loadshuffle$1S`'(vec$2 dst, vec$3 src)
1246 %{
1247   predicate(n->as_Vector()->length() == $1 &&
1248             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1249   match(Set dst (VectorLoadShuffle src));
1250   ins_cost(INSN_COST);
1251   format %{ "uxtl  $dst, $src\t# $1B to $1H" %}
1252   ins_encode %{
1253     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
1254   %}
1255   ins_pipe(pipe_class_default);
1256 %}')dnl
1257 dnl                   $1 $2 $3
1258 VECTOR_LOAD_SHUFFLE_S(4, D, D)
1259 VECTOR_LOAD_SHUFFLE_S(8, X, D)
1260 dnl
1261 
1262 instruct loadshuffle4I(vecX dst, vecD src)
1263 %{
1264   predicate(n->as_Vector()->length() == 4 &&
1265            (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
1266             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
1267   match(Set dst (VectorLoadShuffle src));
1268   ins_cost(INSN_COST);
1269   format %{ "uxtl  $dst, $src\t# 4B to 4H \n\t"
1270             "uxtl  $dst, $dst\t# 4H to 4S" %}
1271   ins_encode %{
1272     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
1273     __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
1274   %}
1275   ins_pipe(pipe_slow);
1276 %}
1277 
1278 //-------------------------------- Rearrange -------------------------------------
1279 // Here is an example that rearranges a NEON vector with 4 ints:
1280 // Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1]
1281 //   1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3].
1282 //   2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1].
1283 //   3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1].
1284 //   4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404]
1285 //      and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404].
1286 //   5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100]
1287 //      and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504]
1288 //   6. Use Vm as index register, and use V1 as table register.
1289 //      Then get V2 as the result by tbl NEON instructions.
1290 // Notes:
1291 //   Step 1 matches VectorLoadConst.
1292 //   Step 3 matches VectorLoadShuffle.
1293 //   Step 4, 5, 6 match VectorRearrange.
1294 //   For VectorRearrange short/int, the reason why such complex calculation is
1295 //   required is because NEON tbl supports bytes table only, so for short/int, we
1296 //   need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
1297 //   to implement rearrange.
1298 define(`VECTOR_REARRANGE_B', `
1299 instruct rearrange$1B`'(vec$2 dst, vec$2 src, vec$2 shuffle)
1300 %{
1301   predicate(n->as_Vector()->length() == $1 &&
1302             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1303   match(Set dst (VectorRearrange src shuffle));
1304   ins_cost(INSN_COST);
1305   effect(TEMP_DEF dst);
1306   format %{ "tbl $dst, {$dst}, $shuffle\t# rearrange $1B" %}
1307   ins_encode %{
1308     __ tbl(as_FloatRegister($dst$$reg), __ T$1B,
1309            as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
1310   %}
1311   ins_pipe(pipe_slow);
1312 %}')dnl
1313 dnl                $1  $2
1314 VECTOR_REARRANGE_B(8,  D)
1315 VECTOR_REARRANGE_B(16, X)
1316 dnl
1317 define(`VECTOR_REARRANGE_S', `
1318 instruct rearrange$1S`'(vec$2 dst, vec$2 src, vec$2 shuffle, vec$2 tmp0, vec$2 tmp1)
1319 %{
1320   predicate(n->as_Vector()->length() == $1 &&
1321             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1322   match(Set dst (VectorRearrange src shuffle));
1323   ins_cost(INSN_COST);
1324   effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
1325   format %{ "mov   $tmp0, CONSTANT\t# constant 0x0202020202020202\n\t"
1326             "mov   $tmp1, CONSTANT\t# constant 0x0100010001000100\n\t"
1327             "mulv  $dst, T$1H, $shuffle, $tmp0\n\t"
1328             "addv  $dst, T$3B, $dst, $tmp1\n\t"
1329             "tbl   $dst, {$src}, $dst\t# rearrange $1S" %}
1330   ins_encode %{
1331     __ mov(as_FloatRegister($tmp0$$reg), __ T$3B, 0x02);
1332     __ mov(as_FloatRegister($tmp1$$reg), __ T$1H, 0x0100);
1333     __ mulv(as_FloatRegister($dst$$reg), __ T$1H,
1334             as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
1335     __ addv(as_FloatRegister($dst$$reg), __ T$3B,
1336             as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
1337     __ tbl(as_FloatRegister($dst$$reg), __ T$3B,
1338            as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
1339   %}
1340   ins_pipe(pipe_slow);
1341 %}')dnl
1342 dnl                $1 $2 $3
1343 VECTOR_REARRANGE_S(4, D, 8)
1344 VECTOR_REARRANGE_S(8, X, 16)
1345 
1346 instruct rearrange4I(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
1347 %{
1348   predicate(n->as_Vector()->length() == 4 &&
1349            (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
1350             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
1351   match(Set dst (VectorRearrange src shuffle));
1352   ins_cost(INSN_COST);
1353   effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
1354   format %{ "mov   $tmp0, CONSTANT\t# constant 0x0404040404040404\n\t"
1355             "mov   $tmp1, CONSTANT\t# constant 0x0302010003020100\n\t"
1356             "mulv  $dst, T8H, $shuffle, $tmp0\n\t"
1357             "addv  $dst, T16B, $dst, $tmp1\n\t"
1358             "tbl   $dst, {$src}, $dst\t# rearrange 4I" %}
1359   ins_encode %{
1360     __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x04);
1361     __ mov(as_FloatRegister($tmp1$$reg), __ T4S, 0x03020100);
1362     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
1363             as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
1364     __ addv(as_FloatRegister($dst$$reg), __ T16B,
1365             as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
1366     __ tbl(as_FloatRegister($dst$$reg), __ T16B,
1367            as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
1368   %}
1369   ins_pipe(pipe_slow);
1370 %}
1371 
1372 //-------------------------------- Anytrue/alltrue -----------------------------
1373 dnl
1374 define(`ANYTRUE_IN_MASK', `
1375 instruct anytrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr)
1376 %{
1377   predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
1378   match(Set dst (VectorTest src1 src2 ));
1379   ins_cost(INSN_COST);
1380   effect(TEMP tmp, KILL cr);
1381   format %{ "addv  $tmp, T$1B, $src1\t# src1 and src2 are the same\n\t"
1382             "umov  $dst, $tmp, B, 0\n\t"
1383             "cmp   $dst, 0\n\t"
1384             "cset  $dst" %}
1385   ins_encode %{
1386     __ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg));
1387     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
1388     __ cmpw($dst$$Register, zr);
1389     __ csetw($dst$$Register, Assembler::NE);
1390   %}
1391   ins_pipe(pipe_slow);
1392 %}')dnl
1393 dnl             $1  $2
1394 ANYTRUE_IN_MASK(8,  D)
1395 ANYTRUE_IN_MASK(16, X)
1396 dnl
1397 define(`ALLTRUE_IN_MASK', `
1398 instruct alltrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr)
1399 %{
1400   predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
1401   match(Set dst (VectorTest src1 src2 ));
1402   ins_cost(INSN_COST);
1403   effect(TEMP tmp, KILL cr);
1404   format %{ "andr  $tmp, T$1B, $src1, $src2\t# src2 is maskAllTrue\n\t"
1405             "notr  $tmp, T$1B, $tmp\n\t"
1406             "addv  $tmp, T$1B, $tmp\n\t"
1407             "umov  $dst, $tmp, B, 0\n\t"
1408             "cmp   $dst, 0\n\t"
1409             "cset  $dst" %}
1410   ins_encode %{
1411     __ andr(as_FloatRegister($tmp$$reg), __ T$1B,
1412             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
1413     __ notr(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg));
1414     __ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg));
1415     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
1416     __ cmpw($dst$$Register, zr);
1417     __ csetw($dst$$Register, Assembler::EQ);
1418   %}
1419   ins_pipe(pipe_slow);
1420 %}')dnl
1421 dnl             $1  $2
1422 ALLTRUE_IN_MASK(8,  D)
1423 ALLTRUE_IN_MASK(16, X)
1424 dnl