1 //
   2 // Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2018, Red Hat, Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039  bool is_CAS(int opcode, bool maybe_volatile);
1040 
1041   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1042 
1043   bool unnecessary_acquire(const Node *barrier);
1044   bool needs_acquiring_load(const Node *load);
1045 
1046   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1047 
1048   bool unnecessary_release(const Node *barrier);
1049   bool unnecessary_volatile(const Node *barrier);
1050   bool needs_releasing_store(const Node *store);
1051 
1052   // predicate controlling translation of CompareAndSwapX
1053   bool needs_acquiring_load_exclusive(const Node *load);
1054 
1055   // predicate controlling translation of StoreCM
1056   bool unnecessary_storestore(const Node *storecm);
1057 
1058   // predicate controlling addressing modes
1059   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1060 %}
1061 
1062 source %{
1063 
1064   // Optimizaton of volatile gets and puts
1065   // -------------------------------------
1066   //
1067   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1068   // use to implement volatile reads and writes. For a volatile read
1069   // we simply need
1070   //
1071   //   ldar<x>
1072   //
1073   // and for a volatile write we need
1074   //
1075   //   stlr<x>
1076   //
1077   // Alternatively, we can implement them by pairing a normal
1078   // load/store with a memory barrier. For a volatile read we need
1079   //
1080   //   ldr<x>
1081   //   dmb ishld
1082   //
1083   // for a volatile write
1084   //
1085   //   dmb ish
1086   //   str<x>
1087   //   dmb ish
1088   //
1089   // We can also use ldaxr and stlxr to implement compare and swap CAS
1090   // sequences. These are normally translated to an instruction
1091   // sequence like the following
1092   //
1093   //   dmb      ish
1094   // retry:
1095   //   ldxr<x>   rval raddr
1096   //   cmp       rval rold
1097   //   b.ne done
1098   //   stlxr<x>  rval, rnew, rold
1099   //   cbnz      rval retry
1100   // done:
1101   //   cset      r0, eq
1102   //   dmb ishld
1103   //
1104   // Note that the exclusive store is already using an stlxr
1105   // instruction. That is required to ensure visibility to other
1106   // threads of the exclusive write (assuming it succeeds) before that
1107   // of any subsequent writes.
1108   //
1109   // The following instruction sequence is an improvement on the above
1110   //
1111   // retry:
1112   //   ldaxr<x>  rval raddr
1113   //   cmp       rval rold
1114   //   b.ne done
1115   //   stlxr<x>  rval, rnew, rold
1116   //   cbnz      rval retry
1117   // done:
1118   //   cset      r0, eq
1119   //
1120   // We don't need the leading dmb ish since the stlxr guarantees
1121   // visibility of prior writes in the case that the swap is
1122   // successful. Crucially we don't have to worry about the case where
1123   // the swap is not successful since no valid program should be
1124   // relying on visibility of prior changes by the attempting thread
1125   // in the case where the CAS fails.
1126   //
1127   // Similarly, we don't need the trailing dmb ishld if we substitute
1128   // an ldaxr instruction since that will provide all the guarantees we
1129   // require regarding observation of changes made by other threads
1130   // before any change to the CAS address observed by the load.
1131   //
1132   // In order to generate the desired instruction sequence we need to
1133   // be able to identify specific 'signature' ideal graph node
1134   // sequences which i) occur as a translation of a volatile reads or
1135   // writes or CAS operations and ii) do not occur through any other
1136   // translation or graph transformation. We can then provide
1137   // alternative aldc matching rules which translate these node
1138   // sequences to the desired machine code sequences. Selection of the
1139   // alternative rules can be implemented by predicates which identify
1140   // the relevant node sequences.
1141   //
1142   // The ideal graph generator translates a volatile read to the node
1143   // sequence
1144   //
1145   //   LoadX[mo_acquire]
1146   //   MemBarAcquire
1147   //
1148   // As a special case when using the compressed oops optimization we
1149   // may also see this variant
1150   //
1151   //   LoadN[mo_acquire]
1152   //   DecodeN
1153   //   MemBarAcquire
1154   //
1155   // A volatile write is translated to the node sequence
1156   //
1157   //   MemBarRelease
1158   //   StoreX[mo_release] {CardMark}-optional
1159   //   MemBarVolatile
1160   //
1161   // n.b. the above node patterns are generated with a strict
1162   // 'signature' configuration of input and output dependencies (see
1163   // the predicates below for exact details). The card mark may be as
1164   // simple as a few extra nodes or, in a few GC configurations, may
1165   // include more complex control flow between the leading and
1166   // trailing memory barriers. However, whatever the card mark
1167   // configuration these signatures are unique to translated volatile
1168   // reads/stores -- they will not appear as a result of any other
1169   // bytecode translation or inlining nor as a consequence of
1170   // optimizing transforms.
1171   //
1172   // We also want to catch inlined unsafe volatile gets and puts and
1173   // be able to implement them using either ldar<x>/stlr<x> or some
1174   // combination of ldr<x>/stlr<x> and dmb instructions.
1175   //
1176   // Inlined unsafe volatiles puts manifest as a minor variant of the
1177   // normal volatile put node sequence containing an extra cpuorder
1178   // membar
1179   //
1180   //   MemBarRelease
1181   //   MemBarCPUOrder
1182   //   StoreX[mo_release] {CardMark}-optional
1183   //   MemBarCPUOrder
1184   //   MemBarVolatile
1185   //
1186   // n.b. as an aside, a cpuorder membar is not itself subject to
1187   // matching and translation by adlc rules.  However, the rule
1188   // predicates need to detect its presence in order to correctly
1189   // select the desired adlc rules.
1190   //
1191   // Inlined unsafe volatile gets manifest as a slightly different
1192   // node sequence to a normal volatile get because of the
1193   // introduction of some CPUOrder memory barriers to bracket the
1194   // Load. However, but the same basic skeleton of a LoadX feeding a
1195   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1196   // present
1197   //
1198   //   MemBarCPUOrder
1199   //        ||       \\
1200   //   MemBarCPUOrder LoadX[mo_acquire]
1201   //        ||            |
1202   //        ||       {DecodeN} optional
1203   //        ||       /
1204   //     MemBarAcquire
1205   //
1206   // In this case the acquire membar does not directly depend on the
1207   // load. However, we can be sure that the load is generated from an
1208   // inlined unsafe volatile get if we see it dependent on this unique
1209   // sequence of membar nodes. Similarly, given an acquire membar we
1210   // can know that it was added because of an inlined unsafe volatile
1211   // get if it is fed and feeds a cpuorder membar and if its feed
1212   // membar also feeds an acquiring load.
1213   //
1214   // Finally an inlined (Unsafe) CAS operation is translated to the
1215   // following ideal graph
1216   //
1217   //   MemBarRelease
1218   //   MemBarCPUOrder
1219   //   CompareAndSwapX {CardMark}-optional
1220   //   MemBarCPUOrder
1221   //   MemBarAcquire
1222   //
1223   // So, where we can identify these volatile read and write
1224   // signatures we can choose to plant either of the above two code
1225   // sequences. For a volatile read we can simply plant a normal
1226   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1227   // also choose to inhibit translation of the MemBarAcquire and
1228   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1229   //
1230   // When we recognise a volatile store signature we can choose to
1231   // plant at a dmb ish as a translation for the MemBarRelease, a
1232   // normal str<x> and then a dmb ish for the MemBarVolatile.
1233   // Alternatively, we can inhibit translation of the MemBarRelease
1234   // and MemBarVolatile and instead plant a simple stlr<x>
1235   // instruction.
1236   //
1237   // when we recognise a CAS signature we can choose to plant a dmb
1238   // ish as a translation for the MemBarRelease, the conventional
1239   // macro-instruction sequence for the CompareAndSwap node (which
1240   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1241   // Alternatively, we can elide generation of the dmb instructions
1242   // and plant the alternative CompareAndSwap macro-instruction
1243   // sequence (which uses ldaxr<x>).
1244   //
1245   // Of course, the above only applies when we see these signature
1246   // configurations. We still want to plant dmb instructions in any
1247   // other cases where we may see a MemBarAcquire, MemBarRelease or
1248   // MemBarVolatile. For example, at the end of a constructor which
1249   // writes final/volatile fields we will see a MemBarRelease
1250   // instruction and this needs a 'dmb ish' lest we risk the
1251   // constructed object being visible without making the
1252   // final/volatile field writes visible.
1253   //
1254   // n.b. the translation rules below which rely on detection of the
1255   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1256   // If we see anything other than the signature configurations we
1257   // always just translate the loads and stores to ldr<x> and str<x>
1258   // and translate acquire, release and volatile membars to the
1259   // relevant dmb instructions.
1260   //
1261 
1262   // is_CAS(int opcode, bool maybe_volatile)
1263   //
1264   // return true if opcode is one of the possible CompareAndSwapX
1265   // values otherwise false.
1266 
1267   bool is_CAS(int opcode, bool maybe_volatile)
1268   {
1269     switch(opcode) {
1270       // We handle these
1271     case Op_CompareAndSwapI:
1272     case Op_CompareAndSwapL:
1273     case Op_CompareAndSwapP:
1274     case Op_CompareAndSwapN:
1275     case Op_ShenandoahCompareAndSwapP:
1276     case Op_ShenandoahCompareAndSwapN:
1277     case Op_CompareAndSwapB:
1278     case Op_CompareAndSwapS:
1279     case Op_GetAndSetI:
1280     case Op_GetAndSetL:
1281     case Op_GetAndSetP:
1282     case Op_GetAndSetN:
1283     case Op_GetAndAddI:
1284     case Op_GetAndAddL:
1285       return true;
1286     case Op_CompareAndExchangeI:
1287     case Op_CompareAndExchangeN:
1288     case Op_CompareAndExchangeB:
1289     case Op_CompareAndExchangeS:
1290     case Op_CompareAndExchangeL:
1291     case Op_CompareAndExchangeP:
1292     case Op_WeakCompareAndSwapB:
1293     case Op_WeakCompareAndSwapS:
1294     case Op_WeakCompareAndSwapI:
1295     case Op_WeakCompareAndSwapL:
1296     case Op_WeakCompareAndSwapP:
1297     case Op_WeakCompareAndSwapN:
1298     case Op_ShenandoahWeakCompareAndSwapP:
1299     case Op_ShenandoahWeakCompareAndSwapN:
1300     case Op_ShenandoahCompareAndExchangeP:
1301     case Op_ShenandoahCompareAndExchangeN:
1302       return maybe_volatile;
1303     default:
1304       return false;
1305     }
1306   }
1307 
1308   // helper to determine the maximum number of Phi nodes we may need to
1309   // traverse when searching from a card mark membar for the merge mem
1310   // feeding a trailing membar or vice versa
1311 
1312 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1313 
1314 bool unnecessary_acquire(const Node *barrier)
1315 {
1316   assert(barrier->is_MemBar(), "expecting a membar");
1317 
1318   if (UseBarriersForVolatile) {
1319     // we need to plant a dmb
1320     return false;
1321   }
1322 
1323   MemBarNode* mb = barrier->as_MemBar();
1324 
1325   if (mb->trailing_load()) {
1326     return true;
1327   }
1328 
1329   if (mb->trailing_load_store()) {
1330     Node* load_store = mb->in(MemBarNode::Precedent);
1331     assert(load_store->is_LoadStore(), "unexpected graph shape");
1332     return is_CAS(load_store->Opcode(), true);
1333   }
1334 
1335   return false;
1336 }
1337 
1338 bool needs_acquiring_load(const Node *n)
1339 {
1340   assert(n->is_Load(), "expecting a load");
1341   if (UseBarriersForVolatile) {
1342     // we use a normal load and a dmb
1343     return false;
1344   }
1345 
1346   LoadNode *ld = n->as_Load();
1347 
1348   return ld->is_acquire();
1349 }
1350 
1351 bool unnecessary_release(const Node *n)
1352 {
1353   assert((n->is_MemBar() &&
1354           n->Opcode() == Op_MemBarRelease),
1355          "expecting a release membar");
1356 
1357   if (UseBarriersForVolatile) {
1358     // we need to plant a dmb
1359     return false;
1360   }
1361 
1362   MemBarNode *barrier = n->as_MemBar();
1363   if (!barrier->leading()) {
1364     return false;
1365   } else {
1366     Node* trailing = barrier->trailing_membar();
1367     MemBarNode* trailing_mb = trailing->as_MemBar();
1368     assert(trailing_mb->trailing(), "Not a trailing membar?");
1369     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1370 
1371     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1372     if (mem->is_Store()) {
1373       assert(mem->as_Store()->is_release(), "");
1374       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1375       return true;
1376     } else {
1377       assert(mem->is_LoadStore(), "");
1378       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1379       return is_CAS(mem->Opcode(), true);
1380     }
1381   }
1382   return false;
1383 }
1384 
1385 bool unnecessary_volatile(const Node *n)
1386 {
1387   // assert n->is_MemBar();
1388   if (UseBarriersForVolatile) {
1389     // we need to plant a dmb
1390     return false;
1391   }
1392 
1393   MemBarNode *mbvol = n->as_MemBar();
1394 
1395   bool release = mbvol->trailing_store();
1396   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1397 #ifdef ASSERT
1398   if (release) {
1399     Node* leading = mbvol->leading_membar();
1400     assert(leading->Opcode() == Op_MemBarRelease, "");
1401     assert(leading->as_MemBar()->leading_store(), "");
1402     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1403   }
1404 #endif
1405 
1406   return release;
1407 }
1408 
1409 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1410 
1411 bool needs_releasing_store(const Node *n)
1412 {
1413   // assert n->is_Store();
1414   if (UseBarriersForVolatile) {
1415     // we use a normal store and dmb combination
1416     return false;
1417   }
1418 
1419   StoreNode *st = n->as_Store();
1420 
1421   return st->trailing_membar() != NULL;
1422 }
1423 
1424 // predicate controlling translation of CAS
1425 //
1426 // returns true if CAS needs to use an acquiring load otherwise false
1427 
1428 bool needs_acquiring_load_exclusive(const Node *n)
1429 {
1430   assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
1431   if (UseBarriersForVolatile) {
1432     return false;
1433   }
1434 
1435   LoadStoreNode* ldst = n->as_LoadStore();
1436   if (is_CAS(n->Opcode(), false)) {
1437     assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1438   } else {
1439     return ldst->trailing_membar() != NULL;
1440   }
1441 
1442   // so we can just return true here
1443   return true;
1444 }
1445 
1446 // predicate controlling translation of StoreCM
1447 //
1448 // returns true if a StoreStore must precede the card write otherwise
1449 // false
1450 
1451 bool unnecessary_storestore(const Node *storecm)
1452 {
1453   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1454 
1455   // we need to generate a dmb ishst between an object put and the
1456   // associated card mark when we are using CMS without conditional
1457   // card marking
1458 
1459   if (UseConcMarkSweepGC && !UseCondCardMark) {
1460     return false;
1461   }
1462 
1463   // a storestore is unnecesary in all other cases
1464 
1465   return true;
1466 }
1467 
1468 
1469 #define __ _masm.
1470 
1471 // advance declarations for helper functions to convert register
1472 // indices to register objects
1473 
1474 // the ad file has to provide implementations of certain methods
1475 // expected by the generic code
1476 //
1477 // REQUIRED FUNCTIONALITY
1478 
1479 //=============================================================================
1480 
1481 // !!!!! Special hack to get all types of calls to specify the byte offset
1482 //       from the start of the call to the point where the return address
1483 //       will point.
1484 
1485 int MachCallStaticJavaNode::ret_addr_offset()
1486 {
1487   // call should be a simple bl
1488   int off = 4;
1489   return off;
1490 }
1491 
1492 int MachCallDynamicJavaNode::ret_addr_offset()
1493 {
1494   return 16; // movz, movk, movk, bl
1495 }
1496 
1497 int MachCallRuntimeNode::ret_addr_offset() {
1498   // for generated stubs the call will be
1499   //   far_call(addr)
1500   // for real runtime callouts it will be six instructions
1501   // see aarch64_enc_java_to_runtime
1502   //   adr(rscratch2, retaddr)
1503   //   lea(rscratch1, RuntimeAddress(addr)
1504   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1505   //   blrt rscratch1
1506   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1507   if (cb) {
1508     return MacroAssembler::far_branch_size();
1509   } else {
1510     return 6 * NativeInstruction::instruction_size;
1511   }
1512 }
1513 
1514 // Indicate if the safepoint node needs the polling page as an input
1515 
1516 // the shared code plants the oop data at the start of the generated
1517 // code for the safepoint node and that needs ot be at the load
1518 // instruction itself. so we cannot plant a mov of the safepoint poll
1519 // address followed by a load. setting this to true means the mov is
1520 // scheduled as a prior instruction. that's better for scheduling
1521 // anyway.
1522 
1523 bool SafePointNode::needs_polling_address_input()
1524 {
1525   return true;
1526 }
1527 
1528 //=============================================================================
1529 
1530 #ifndef PRODUCT
1531 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1532   st->print("BREAKPOINT");
1533 }
1534 #endif
1535 
1536 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1537   MacroAssembler _masm(&cbuf);
1538   __ brk(0);
1539 }
1540 
1541 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1542   return MachNode::size(ra_);
1543 }
1544 
1545 //=============================================================================
1546 
1547 #ifndef PRODUCT
1548   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1549     st->print("nop \t# %d bytes pad for loops and calls", _count);
1550   }
1551 #endif
1552 
1553   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1554     MacroAssembler _masm(&cbuf);
1555     for (int i = 0; i < _count; i++) {
1556       __ nop();
1557     }
1558   }
1559 
1560   uint MachNopNode::size(PhaseRegAlloc*) const {
1561     return _count * NativeInstruction::instruction_size;
1562   }
1563 
1564 //=============================================================================
1565 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1566 
1567 int Compile::ConstantTable::calculate_table_base_offset() const {
1568   return 0;  // absolute addressing, no offset
1569 }
1570 
1571 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1572 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1573   ShouldNotReachHere();
1574 }
1575 
1576 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1577   // Empty encoding
1578 }
1579 
1580 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1581   return 0;
1582 }
1583 
1584 #ifndef PRODUCT
1585 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1586   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1587 }
1588 #endif
1589 
1590 #ifndef PRODUCT
1591 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1592   Compile* C = ra_->C;
1593 
1594   int framesize = C->frame_slots() << LogBytesPerInt;
1595 
1596   if (C->need_stack_bang(framesize))
1597     st->print("# stack bang size=%d\n\t", framesize);
1598 
1599   if (framesize < ((1 << 9) + 2 * wordSize)) {
1600     st->print("sub  sp, sp, #%d\n\t", framesize);
1601     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1602     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1603   } else {
1604     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1605     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1606     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1607     st->print("sub  sp, sp, rscratch1");
1608   }
1609 }
1610 #endif
1611 
1612 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1613   Compile* C = ra_->C;
1614   MacroAssembler _masm(&cbuf);
1615 
1616   // n.b. frame size includes space for return pc and rfp
1617   const long framesize = C->frame_size_in_bytes();
1618   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1619 
1620   // insert a nop at the start of the prolog so we can patch in a
1621   // branch if we need to invalidate the method later
1622   __ nop();
1623 
1624   int bangsize = C->bang_size_in_bytes();
1625   if (C->need_stack_bang(bangsize) && UseStackBanging)
1626     __ generate_stack_overflow_check(bangsize);
1627 
1628   __ build_frame(framesize);
1629 
1630   if (NotifySimulator) {
1631     __ notify(Assembler::method_entry);
1632   }
1633 
1634   if (VerifyStackAtCalls) {
1635     Unimplemented();
1636   }
1637 
1638   C->set_frame_complete(cbuf.insts_size());
1639 
1640   if (C->has_mach_constant_base_node()) {
1641     // NOTE: We set the table base offset here because users might be
1642     // emitted before MachConstantBaseNode.
1643     Compile::ConstantTable& constant_table = C->constant_table();
1644     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1645   }
1646 }
1647 
1648 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1649 {
1650   return MachNode::size(ra_); // too many variables; just compute it
1651                               // the hard way
1652 }
1653 
1654 int MachPrologNode::reloc() const
1655 {
1656   return 0;
1657 }
1658 
1659 //=============================================================================
1660 
1661 #ifndef PRODUCT
1662 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1663   Compile* C = ra_->C;
1664   int framesize = C->frame_slots() << LogBytesPerInt;
1665 
1666   st->print("# pop frame %d\n\t",framesize);
1667 
1668   if (framesize == 0) {
1669     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1670   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1671     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1672     st->print("add  sp, sp, #%d\n\t", framesize);
1673   } else {
1674     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1675     st->print("add  sp, sp, rscratch1\n\t");
1676     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1677   }
1678 
1679   if (do_polling() && C->is_method_compilation()) {
1680     st->print("# touch polling page\n\t");
1681     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
1682     st->print("ldr zr, [rscratch1]");
1683   }
1684 }
1685 #endif
1686 
1687 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1688   Compile* C = ra_->C;
1689   MacroAssembler _masm(&cbuf);
1690   int framesize = C->frame_slots() << LogBytesPerInt;
1691 
1692   __ remove_frame(framesize);
1693 
1694   if (NotifySimulator) {
1695     __ notify(Assembler::method_reentry);
1696   }
1697 
1698   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1699     __ reserved_stack_check();
1700   }
1701 
1702   if (do_polling() && C->is_method_compilation()) {
1703     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1704   }
1705 }
1706 
1707 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1708   // Variable size. Determine dynamically.
1709   return MachNode::size(ra_);
1710 }
1711 
1712 int MachEpilogNode::reloc() const {
1713   // Return number of relocatable values contained in this instruction.
1714   return 1; // 1 for polling page.
1715 }
1716 
1717 const Pipeline * MachEpilogNode::pipeline() const {
1718   return MachNode::pipeline_class();
1719 }
1720 
1721 // This method seems to be obsolete. It is declared in machnode.hpp
1722 // and defined in all *.ad files, but it is never called. Should we
1723 // get rid of it?
1724 int MachEpilogNode::safepoint_offset() const {
1725   assert(do_polling(), "no return for this epilog node");
1726   return 4;
1727 }
1728 
1729 //=============================================================================
1730 
1731 // Figure out which register class each belongs in: rc_int, rc_float or
1732 // rc_stack.
1733 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1734 
1735 static enum RC rc_class(OptoReg::Name reg) {
1736 
1737   if (reg == OptoReg::Bad) {
1738     return rc_bad;
1739   }
1740 
1741   // we have 30 int registers * 2 halves
1742   // (rscratch1 and rscratch2 are omitted)
1743 
1744   if (reg < 60) {
1745     return rc_int;
1746   }
1747 
1748   // we have 32 float register * 2 halves
1749   if (reg < 60 + 128) {
1750     return rc_float;
1751   }
1752 
1753   // Between float regs & stack is the flags regs.
1754   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1755 
1756   return rc_stack;
1757 }
1758 
1759 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1760   Compile* C = ra_->C;
1761 
1762   // Get registers to move.
1763   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1764   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1765   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1766   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1767 
1768   enum RC src_hi_rc = rc_class(src_hi);
1769   enum RC src_lo_rc = rc_class(src_lo);
1770   enum RC dst_hi_rc = rc_class(dst_hi);
1771   enum RC dst_lo_rc = rc_class(dst_lo);
1772 
1773   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1774 
1775   if (src_hi != OptoReg::Bad) {
1776     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1777            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1778            "expected aligned-adjacent pairs");
1779   }
1780 
1781   if (src_lo == dst_lo && src_hi == dst_hi) {
1782     return 0;            // Self copy, no move.
1783   }
1784 
1785   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1786               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1787   int src_offset = ra_->reg2offset(src_lo);
1788   int dst_offset = ra_->reg2offset(dst_lo);
1789 
1790   if (bottom_type()->isa_vect() != NULL) {
1791     uint ireg = ideal_reg();
1792     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1793     if (cbuf) {
1794       MacroAssembler _masm(cbuf);
1795       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1796       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1797         // stack->stack
1798         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1799         if (ireg == Op_VecD) {
1800           __ unspill(rscratch1, true, src_offset);
1801           __ spill(rscratch1, true, dst_offset);
1802         } else {
1803           __ spill_copy128(src_offset, dst_offset);
1804         }
1805       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1806         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1807                ireg == Op_VecD ? __ T8B : __ T16B,
1808                as_FloatRegister(Matcher::_regEncode[src_lo]));
1809       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1810         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1811                        ireg == Op_VecD ? __ D : __ Q,
1812                        ra_->reg2offset(dst_lo));
1813       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1814         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1815                        ireg == Op_VecD ? __ D : __ Q,
1816                        ra_->reg2offset(src_lo));
1817       } else {
1818         ShouldNotReachHere();
1819       }
1820     }
1821   } else if (cbuf) {
1822     MacroAssembler _masm(cbuf);
1823     switch (src_lo_rc) {
1824     case rc_int:
1825       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1826         if (is64) {
1827             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1828                    as_Register(Matcher::_regEncode[src_lo]));
1829         } else {
1830             MacroAssembler _masm(cbuf);
1831             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1832                     as_Register(Matcher::_regEncode[src_lo]));
1833         }
1834       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1835         if (is64) {
1836             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1837                      as_Register(Matcher::_regEncode[src_lo]));
1838         } else {
1839             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1840                      as_Register(Matcher::_regEncode[src_lo]));
1841         }
1842       } else {                    // gpr --> stack spill
1843         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1844         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1845       }
1846       break;
1847     case rc_float:
1848       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1849         if (is64) {
1850             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1851                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1852         } else {
1853             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1854                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1855         }
1856       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1857           if (cbuf) {
1858             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1859                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1860         } else {
1861             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1862                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1863         }
1864       } else {                    // fpr --> stack spill
1865         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1866         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1867                  is64 ? __ D : __ S, dst_offset);
1868       }
1869       break;
1870     case rc_stack:
1871       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1872         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1873       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1874         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1875                    is64 ? __ D : __ S, src_offset);
1876       } else {                    // stack --> stack copy
1877         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1878         __ unspill(rscratch1, is64, src_offset);
1879         __ spill(rscratch1, is64, dst_offset);
1880       }
1881       break;
1882     default:
1883       assert(false, "bad rc_class for spill");
1884       ShouldNotReachHere();
1885     }
1886   }
1887 
1888   if (st) {
1889     st->print("spill ");
1890     if (src_lo_rc == rc_stack) {
1891       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1892     } else {
1893       st->print("%s -> ", Matcher::regName[src_lo]);
1894     }
1895     if (dst_lo_rc == rc_stack) {
1896       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1897     } else {
1898       st->print("%s", Matcher::regName[dst_lo]);
1899     }
1900     if (bottom_type()->isa_vect() != NULL) {
1901       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1902     } else {
1903       st->print("\t# spill size = %d", is64 ? 64:32);
1904     }
1905   }
1906 
1907   return 0;
1908 
1909 }
1910 
1911 #ifndef PRODUCT
1912 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1913   if (!ra_)
1914     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1915   else
1916     implementation(NULL, ra_, false, st);
1917 }
1918 #endif
1919 
1920 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1921   implementation(&cbuf, ra_, false, NULL);
1922 }
1923 
1924 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1925   return MachNode::size(ra_);
1926 }
1927 
1928 //=============================================================================
1929 
1930 #ifndef PRODUCT
1931 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1932   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1933   int reg = ra_->get_reg_first(this);
1934   st->print("add %s, rsp, #%d]\t# box lock",
1935             Matcher::regName[reg], offset);
1936 }
1937 #endif
1938 
1939 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1940   MacroAssembler _masm(&cbuf);
1941 
1942   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1943   int reg    = ra_->get_encode(this);
1944 
1945   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1946     __ add(as_Register(reg), sp, offset);
1947   } else {
1948     ShouldNotReachHere();
1949   }
1950 }
1951 
1952 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1953   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1954   return 4;
1955 }
1956 
1957 //=============================================================================
1958 #ifndef PRODUCT
1959 void MachVVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1960 {
1961   st->print_cr("MachVVEPNode");
1962 }
1963 #endif
1964 
1965 void MachVVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1966 {
1967   // Unpack all value type args passed as oop and then jump to
1968   // the verified entry point (skipping the unverified entry).
1969   MacroAssembler _masm(&cbuf);
1970 
1971   __ unpack_value_args(ra_->C);
1972   __ b(*_verified_entry);
1973 }
1974 
1975 uint MachVVEPNode::size(PhaseRegAlloc* ra_) const
1976 {
1977   return MachNode::size(ra_); // too many variables; just compute it the hard way
1978 }
1979 
1980 
1981 //=============================================================================
1982 
1983 #ifndef PRODUCT
1984 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1985 {
1986   st->print_cr("# MachUEPNode");
1987   if (UseCompressedClassPointers) {
1988     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1989     if (Universe::narrow_klass_shift() != 0) {
1990       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1991     }
1992   } else {
1993    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1994   }
1995   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1996   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1997 }
1998 #endif
1999 
2000 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
2001 {
2002   // This is the unverified entry point.
2003   MacroAssembler _masm(&cbuf);
2004 
2005   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
2006   Label skip;
2007   // TODO
2008   // can we avoid this skip and still use a reloc?
2009   __ br(Assembler::EQ, skip);
2010   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2011   __ bind(skip);
2012 }
2013 
2014 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
2015 {
2016   return MachNode::size(ra_);
2017 }
2018 
2019 // REQUIRED EMIT CODE
2020 
2021 //=============================================================================
2022 
2023 // Emit exception handler code.
2024 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
2025 {
2026   // mov rscratch1 #exception_blob_entry_point
2027   // br rscratch1
2028   // Note that the code buffer's insts_mark is always relative to insts.
2029   // That's why we must use the macroassembler to generate a handler.
2030   MacroAssembler _masm(&cbuf);
2031   address base = __ start_a_stub(size_exception_handler());
2032   if (base == NULL) {
2033     ciEnv::current()->record_failure("CodeCache is full");
2034     return 0;  // CodeBuffer::expand failed
2035   }
2036   int offset = __ offset();
2037   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2038   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2039   __ end_a_stub();
2040   return offset;
2041 }
2042 
2043 // Emit deopt handler code.
2044 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2045 {
2046   // Note that the code buffer's insts_mark is always relative to insts.
2047   // That's why we must use the macroassembler to generate a handler.
2048   MacroAssembler _masm(&cbuf);
2049   address base = __ start_a_stub(size_deopt_handler());
2050   if (base == NULL) {
2051     ciEnv::current()->record_failure("CodeCache is full");
2052     return 0;  // CodeBuffer::expand failed
2053   }
2054   int offset = __ offset();
2055 
2056   __ adr(lr, __ pc());
2057   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2058 
2059   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2060   __ end_a_stub();
2061   return offset;
2062 }
2063 
2064 // REQUIRED MATCHER CODE
2065 
2066 //=============================================================================
2067 
2068 const bool Matcher::match_rule_supported(int opcode) {
2069 
2070   switch (opcode) {
2071   default:
2072     break;
2073   }
2074 
2075   if (!has_match_rule(opcode)) {
2076     return false;
2077   }
2078 
2079   return true;  // Per default match rules are supported.
2080 }
2081 
2082 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
2083 
2084   // TODO
2085   // identify extra cases that we might want to provide match rules for
2086   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
2087   bool ret_value = match_rule_supported(opcode);
2088   // Add rules here.
2089 
2090   return ret_value;  // Per default match rules are supported.
2091 }
2092 
2093 const bool Matcher::has_predicated_vectors(void) {
2094   return false;
2095 }
2096 
2097 const int Matcher::float_pressure(int default_pressure_threshold) {
2098   return default_pressure_threshold;
2099 }
2100 
2101 int Matcher::regnum_to_fpu_offset(int regnum)
2102 {
2103   Unimplemented();
2104   return 0;
2105 }
2106 
2107 // Is this branch offset short enough that a short branch can be used?
2108 //
2109 // NOTE: If the platform does not provide any short branch variants, then
2110 //       this method should return false for offset 0.
2111 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2112   // The passed offset is relative to address of the branch.
2113 
2114   return (-32768 <= offset && offset < 32768);
2115 }
2116 
2117 const bool Matcher::isSimpleConstant64(jlong value) {
2118   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2119   // Probably always true, even if a temp register is required.
2120   return true;
2121 }
2122 
2123 // true just means we have fast l2f conversion
2124 const bool Matcher::convL2FSupported(void) {
2125   return true;
2126 }
2127 
2128 // Vector width in bytes.
2129 const int Matcher::vector_width_in_bytes(BasicType bt) {
2130   int size = MIN2(16,(int)MaxVectorSize);
2131   // Minimum 2 values in vector
2132   if (size < 2*type2aelembytes(bt)) size = 0;
2133   // But never < 4
2134   if (size < 4) size = 0;
2135   return size;
2136 }
2137 
2138 // Limits on vector size (number of elements) loaded into vector.
2139 const int Matcher::max_vector_size(const BasicType bt) {
2140   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2141 }
2142 const int Matcher::min_vector_size(const BasicType bt) {
2143 //  For the moment limit the vector size to 8 bytes
2144     int size = 8 / type2aelembytes(bt);
2145     if (size < 2) size = 2;
2146     return size;
2147 }
2148 
2149 // Vector ideal reg.
2150 const uint Matcher::vector_ideal_reg(int len) {
2151   switch(len) {
2152     case  8: return Op_VecD;
2153     case 16: return Op_VecX;
2154   }
2155   ShouldNotReachHere();
2156   return 0;
2157 }
2158 
2159 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2160   switch(size) {
2161     case  8: return Op_VecD;
2162     case 16: return Op_VecX;
2163   }
2164   ShouldNotReachHere();
2165   return 0;
2166 }
2167 
2168 // AES support not yet implemented
2169 const bool Matcher::pass_original_key_for_aes() {
2170   return false;
2171 }
2172 
2173 // x86 supports misaligned vectors store/load.
2174 const bool Matcher::misaligned_vectors_ok() {
2175   return !AlignVector; // can be changed by flag
2176 }
2177 
2178 // false => size gets scaled to BytesPerLong, ok.
2179 const bool Matcher::init_array_count_is_in_bytes = false;
2180 
2181 // Use conditional move (CMOVL)
2182 const int Matcher::long_cmove_cost() {
2183   // long cmoves are no more expensive than int cmoves
2184   return 0;
2185 }
2186 
2187 const int Matcher::float_cmove_cost() {
2188   // float cmoves are no more expensive than int cmoves
2189   return 0;
2190 }
2191 
2192 // Does the CPU require late expand (see block.cpp for description of late expand)?
2193 const bool Matcher::require_postalloc_expand = false;
2194 
2195 // Do we need to mask the count passed to shift instructions or does
2196 // the cpu only look at the lower 5/6 bits anyway?
2197 const bool Matcher::need_masked_shift_count = false;
2198 
2199 // This affects two different things:
2200 //  - how Decode nodes are matched
2201 //  - how ImplicitNullCheck opportunities are recognized
2202 // If true, the matcher will try to remove all Decodes and match them
2203 // (as operands) into nodes. NullChecks are not prepared to deal with
2204 // Decodes by final_graph_reshaping().
2205 // If false, final_graph_reshaping() forces the decode behind the Cmp
2206 // for a NullCheck. The matcher matches the Decode node into a register.
2207 // Implicit_null_check optimization moves the Decode along with the
2208 // memory operation back up before the NullCheck.
2209 bool Matcher::narrow_oop_use_complex_address() {
2210   return Universe::narrow_oop_shift() == 0;
2211 }
2212 
2213 bool Matcher::narrow_klass_use_complex_address() {
2214 // TODO
2215 // decide whether we need to set this to true
2216   return false;
2217 }
2218 
2219 bool Matcher::const_oop_prefer_decode() {
2220   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
2221   return Universe::narrow_oop_base() == NULL;
2222 }
2223 
2224 bool Matcher::const_klass_prefer_decode() {
2225   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
2226   return Universe::narrow_klass_base() == NULL;
2227 }
2228 
2229 // Is it better to copy float constants, or load them directly from
2230 // memory?  Intel can load a float constant from a direct address,
2231 // requiring no extra registers.  Most RISCs will have to materialize
2232 // an address into a register first, so they would do better to copy
2233 // the constant from stack.
2234 const bool Matcher::rematerialize_float_constants = false;
2235 
2236 // If CPU can load and store mis-aligned doubles directly then no
2237 // fixup is needed.  Else we split the double into 2 integer pieces
2238 // and move it piece-by-piece.  Only happens when passing doubles into
2239 // C code as the Java calling convention forces doubles to be aligned.
2240 const bool Matcher::misaligned_doubles_ok = true;
2241 
2242 // No-op on amd64
2243 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2244   Unimplemented();
2245 }
2246 
2247 // Advertise here if the CPU requires explicit rounding operations to
2248 // implement the UseStrictFP mode.
2249 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2250 
2251 // Are floats converted to double when stored to stack during
2252 // deoptimization?
2253 bool Matcher::float_in_double() { return false; }
2254 
2255 // Do ints take an entire long register or just half?
2256 // The relevant question is how the int is callee-saved:
2257 // the whole long is written but de-opt'ing will have to extract
2258 // the relevant 32 bits.
2259 const bool Matcher::int_in_long = true;
2260 
2261 // Return whether or not this register is ever used as an argument.
2262 // This function is used on startup to build the trampoline stubs in
2263 // generateOptoStub.  Registers not mentioned will be killed by the VM
2264 // call in the trampoline, and arguments in those registers not be
2265 // available to the callee.
2266 bool Matcher::can_be_java_arg(int reg)
2267 {
2268   return
2269     reg ==  R0_num || reg == R0_H_num ||
2270     reg ==  R1_num || reg == R1_H_num ||
2271     reg ==  R2_num || reg == R2_H_num ||
2272     reg ==  R3_num || reg == R3_H_num ||
2273     reg ==  R4_num || reg == R4_H_num ||
2274     reg ==  R5_num || reg == R5_H_num ||
2275     reg ==  R6_num || reg == R6_H_num ||
2276     reg ==  R7_num || reg == R7_H_num ||
2277     reg ==  V0_num || reg == V0_H_num ||
2278     reg ==  V1_num || reg == V1_H_num ||
2279     reg ==  V2_num || reg == V2_H_num ||
2280     reg ==  V3_num || reg == V3_H_num ||
2281     reg ==  V4_num || reg == V4_H_num ||
2282     reg ==  V5_num || reg == V5_H_num ||
2283     reg ==  V6_num || reg == V6_H_num ||
2284     reg ==  V7_num || reg == V7_H_num;
2285 }
2286 
2287 bool Matcher::is_spillable_arg(int reg)
2288 {
2289   return can_be_java_arg(reg);
2290 }
2291 
2292 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2293   return false;
2294 }
2295 
2296 RegMask Matcher::divI_proj_mask() {
2297   ShouldNotReachHere();
2298   return RegMask();
2299 }
2300 
2301 // Register for MODI projection of divmodI.
2302 RegMask Matcher::modI_proj_mask() {
2303   ShouldNotReachHere();
2304   return RegMask();
2305 }
2306 
2307 // Register for DIVL projection of divmodL.
2308 RegMask Matcher::divL_proj_mask() {
2309   ShouldNotReachHere();
2310   return RegMask();
2311 }
2312 
2313 // Register for MODL projection of divmodL.
2314 RegMask Matcher::modL_proj_mask() {
2315   ShouldNotReachHere();
2316   return RegMask();
2317 }
2318 
2319 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2320   return FP_REG_mask();
2321 }
2322 
2323 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2324   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2325     Node* u = addp->fast_out(i);
2326     if (u->is_Mem()) {
2327       int opsize = u->as_Mem()->memory_size();
2328       assert(opsize > 0, "unexpected memory operand size");
2329       if (u->as_Mem()->memory_size() != (1<<shift)) {
2330         return false;
2331       }
2332     }
2333   }
2334   return true;
2335 }
2336 
2337 const bool Matcher::convi2l_type_required = false;
2338 
2339 // Should the Matcher clone shifts on addressing modes, expecting them
2340 // to be subsumed into complex addressing expressions or compute them
2341 // into registers?
2342 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2343   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
2344     return true;
2345   }
2346 
2347   Node *off = m->in(AddPNode::Offset);
2348   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
2349       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
2350       // Are there other uses besides address expressions?
2351       !is_visited(off)) {
2352     address_visited.set(off->_idx); // Flag as address_visited
2353     mstack.push(off->in(2), Visit);
2354     Node *conv = off->in(1);
2355     if (conv->Opcode() == Op_ConvI2L &&
2356         // Are there other uses besides address expressions?
2357         !is_visited(conv)) {
2358       address_visited.set(conv->_idx); // Flag as address_visited
2359       mstack.push(conv->in(1), Pre_Visit);
2360     } else {
2361       mstack.push(conv, Pre_Visit);
2362     }
2363     address_visited.test_set(m->_idx); // Flag as address_visited
2364     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2365     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2366     return true;
2367   } else if (off->Opcode() == Op_ConvI2L &&
2368              // Are there other uses besides address expressions?
2369              !is_visited(off)) {
2370     address_visited.test_set(m->_idx); // Flag as address_visited
2371     address_visited.set(off->_idx); // Flag as address_visited
2372     mstack.push(off->in(1), Pre_Visit);
2373     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2374     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2375     return true;
2376   }
2377   return false;
2378 }
2379 
2380 void Compile::reshape_address(AddPNode* addp) {
2381 }
2382 
2383 // helper for encoding java_to_runtime calls on sim
2384 //
2385 // this is needed to compute the extra arguments required when
2386 // planting a call to the simulator blrt instruction. the TypeFunc
2387 // can be queried to identify the counts for integral, and floating
2388 // arguments and the return type
2389 
2390 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
2391 {
2392   int gps = 0;
2393   int fps = 0;
2394   const TypeTuple *domain = tf->domain_cc();
2395   int max = domain->cnt();
2396   for (int i = TypeFunc::Parms; i < max; i++) {
2397     const Type *t = domain->field_at(i);
2398     switch(t->basic_type()) {
2399     case T_FLOAT:
2400     case T_DOUBLE:
2401       fps++;
2402     default:
2403       gps++;
2404     }
2405   }
2406   gpcnt = gps;
2407   fpcnt = fps;
2408   BasicType rt = tf->return_type();
2409   switch (rt) {
2410   case T_VOID:
2411     rtype = MacroAssembler::ret_type_void;
2412     break;
2413   default:
2414     rtype = MacroAssembler::ret_type_integral;
2415     break;
2416   case T_FLOAT:
2417     rtype = MacroAssembler::ret_type_float;
2418     break;
2419   case T_DOUBLE:
2420     rtype = MacroAssembler::ret_type_double;
2421     break;
2422   }
2423 }
2424 
2425 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2426   MacroAssembler _masm(&cbuf);                                          \
2427   {                                                                     \
2428     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2429     guarantee(DISP == 0, "mode not permitted for volatile");            \
2430     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2431     __ INSN(REG, as_Register(BASE));                                    \
2432   }
2433 
2434 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2435 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2436 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2437                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2438 
2439   // Used for all non-volatile memory accesses.  The use of
2440   // $mem->opcode() to discover whether this pattern uses sign-extended
2441   // offsets is something of a kludge.
2442   static void loadStore(MacroAssembler masm, mem_insn insn,
2443                          Register reg, int opcode,
2444                          Register base, int index, int size, int disp)
2445   {
2446     Address::extend scale;
2447 
2448     // Hooboy, this is fugly.  We need a way to communicate to the
2449     // encoder that the index needs to be sign extended, so we have to
2450     // enumerate all the cases.
2451     switch (opcode) {
2452     case INDINDEXSCALEDI2L:
2453     case INDINDEXSCALEDI2LN:
2454     case INDINDEXI2L:
2455     case INDINDEXI2LN:
2456       scale = Address::sxtw(size);
2457       break;
2458     default:
2459       scale = Address::lsl(size);
2460     }
2461 
2462     if (index == -1) {
2463       (masm.*insn)(reg, Address(base, disp));
2464     } else {
2465       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2466       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2467     }
2468   }
2469 
2470   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2471                          FloatRegister reg, int opcode,
2472                          Register base, int index, int size, int disp)
2473   {
2474     Address::extend scale;
2475 
2476     switch (opcode) {
2477     case INDINDEXSCALEDI2L:
2478     case INDINDEXSCALEDI2LN:
2479       scale = Address::sxtw(size);
2480       break;
2481     default:
2482       scale = Address::lsl(size);
2483     }
2484 
2485      if (index == -1) {
2486       (masm.*insn)(reg, Address(base, disp));
2487     } else {
2488       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2489       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2490     }
2491   }
2492 
2493   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2494                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2495                          int opcode, Register base, int index, int size, int disp)
2496   {
2497     if (index == -1) {
2498       (masm.*insn)(reg, T, Address(base, disp));
2499     } else {
2500       assert(disp == 0, "unsupported address mode");
2501       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2502     }
2503   }
2504 
2505 %}
2506 
2507 
2508 
2509 //----------ENCODING BLOCK-----------------------------------------------------
2510 // This block specifies the encoding classes used by the compiler to
2511 // output byte streams.  Encoding classes are parameterized macros
2512 // used by Machine Instruction Nodes in order to generate the bit
2513 // encoding of the instruction.  Operands specify their base encoding
2514 // interface with the interface keyword.  There are currently
2515 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2516 // COND_INTER.  REG_INTER causes an operand to generate a function
2517 // which returns its register number when queried.  CONST_INTER causes
2518 // an operand to generate a function which returns the value of the
2519 // constant when queried.  MEMORY_INTER causes an operand to generate
2520 // four functions which return the Base Register, the Index Register,
2521 // the Scale Value, and the Offset Value of the operand when queried.
2522 // COND_INTER causes an operand to generate six functions which return
2523 // the encoding code (ie - encoding bits for the instruction)
2524 // associated with each basic boolean condition for a conditional
2525 // instruction.
2526 //
2527 // Instructions specify two basic values for encoding.  Again, a
2528 // function is available to check if the constant displacement is an
2529 // oop. They use the ins_encode keyword to specify their encoding
2530 // classes (which must be a sequence of enc_class names, and their
2531 // parameters, specified in the encoding block), and they use the
2532 // opcode keyword to specify, in order, their primary, secondary, and
2533 // tertiary opcode.  Only the opcode sections which a particular
2534 // instruction needs for encoding need to be specified.
2535 encode %{
2536   // Build emit functions for each basic byte or larger field in the
2537   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2538   // from C++ code in the enc_class source block.  Emit functions will
2539   // live in the main source block for now.  In future, we can
2540   // generalize this by adding a syntax that specifies the sizes of
2541   // fields in an order, so that the adlc can build the emit functions
2542   // automagically
2543 
2544   // catch all for unimplemented encodings
2545   enc_class enc_unimplemented %{
2546     MacroAssembler _masm(&cbuf);
2547     __ unimplemented("C2 catch all");
2548   %}
2549 
2550   // BEGIN Non-volatile memory access
2551 
2552   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2553     Register dst_reg = as_Register($dst$$reg);
2554     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2555                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2556   %}
2557 
2558   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2559     Register dst_reg = as_Register($dst$$reg);
2560     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2561                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2562   %}
2563 
2564   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2565     Register dst_reg = as_Register($dst$$reg);
2566     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2567                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2568   %}
2569 
2570   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2571     Register dst_reg = as_Register($dst$$reg);
2572     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2573                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2574   %}
2575 
2576   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2577     Register dst_reg = as_Register($dst$$reg);
2578     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2579                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2580   %}
2581 
2582   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2583     Register dst_reg = as_Register($dst$$reg);
2584     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2585                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2586   %}
2587 
2588   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2589     Register dst_reg = as_Register($dst$$reg);
2590     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2591                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2592   %}
2593 
2594   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2595     Register dst_reg = as_Register($dst$$reg);
2596     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2597                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2598   %}
2599 
2600   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2601     Register dst_reg = as_Register($dst$$reg);
2602     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2603                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2604   %}
2605 
2606   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2607     Register dst_reg = as_Register($dst$$reg);
2608     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2609                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2610   %}
2611 
2612   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2613     Register dst_reg = as_Register($dst$$reg);
2614     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2615                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2616   %}
2617 
2618   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2619     Register dst_reg = as_Register($dst$$reg);
2620     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2621                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2622   %}
2623 
2624   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2625     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2626     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2627                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2628   %}
2629 
2630   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2631     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2632     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2633                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2634   %}
2635 
2636   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2637     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2638     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2639        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2640   %}
2641 
2642   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2643     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2644     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2645        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2646   %}
2647 
2648   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2649     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2650     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2651        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2652   %}
2653 
2654   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2655     Register src_reg = as_Register($src$$reg);
2656     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2657                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2658   %}
2659 
2660   enc_class aarch64_enc_strb0(memory mem) %{
2661     MacroAssembler _masm(&cbuf);
2662     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2663                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2664   %}
2665 
2666   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2667     MacroAssembler _masm(&cbuf);
2668     __ membar(Assembler::StoreStore);
2669     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2670                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2671   %}
2672 
2673   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2674     Register src_reg = as_Register($src$$reg);
2675     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2676                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2677   %}
2678 
2679   enc_class aarch64_enc_strh0(memory mem) %{
2680     MacroAssembler _masm(&cbuf);
2681     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2682                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2683   %}
2684 
2685   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2686     Register src_reg = as_Register($src$$reg);
2687     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2688                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2689   %}
2690 
2691   enc_class aarch64_enc_strw0(memory mem) %{
2692     MacroAssembler _masm(&cbuf);
2693     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2694                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2695   %}
2696 
2697   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2698     Register src_reg = as_Register($src$$reg);
2699     // we sometimes get asked to store the stack pointer into the
2700     // current thread -- we cannot do that directly on AArch64
2701     if (src_reg == r31_sp) {
2702       MacroAssembler _masm(&cbuf);
2703       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2704       __ mov(rscratch2, sp);
2705       src_reg = rscratch2;
2706     }
2707     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2708                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2709   %}
2710 
2711   enc_class aarch64_enc_str0(memory mem) %{
2712     MacroAssembler _masm(&cbuf);
2713     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2714                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2715   %}
2716 
2717   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2718     FloatRegister src_reg = as_FloatRegister($src$$reg);
2719     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2720                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2721   %}
2722 
2723   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2724     FloatRegister src_reg = as_FloatRegister($src$$reg);
2725     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2726                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2727   %}
2728 
2729   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2730     FloatRegister src_reg = as_FloatRegister($src$$reg);
2731     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2732        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2733   %}
2734 
2735   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2736     FloatRegister src_reg = as_FloatRegister($src$$reg);
2737     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2738        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2739   %}
2740 
2741   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2742     FloatRegister src_reg = as_FloatRegister($src$$reg);
2743     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2744        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2745   %}
2746 
2747   // END Non-volatile memory access
2748 
2749   // volatile loads and stores
2750 
2751   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2752     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2753                  rscratch1, stlrb);
2754   %}
2755 
2756   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2757     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2758                  rscratch1, stlrh);
2759   %}
2760 
2761   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2762     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2763                  rscratch1, stlrw);
2764   %}
2765 
2766 
2767   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2768     Register dst_reg = as_Register($dst$$reg);
2769     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2770              rscratch1, ldarb);
2771     __ sxtbw(dst_reg, dst_reg);
2772   %}
2773 
2774   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2775     Register dst_reg = as_Register($dst$$reg);
2776     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2777              rscratch1, ldarb);
2778     __ sxtb(dst_reg, dst_reg);
2779   %}
2780 
2781   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2782     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2783              rscratch1, ldarb);
2784   %}
2785 
2786   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2787     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2788              rscratch1, ldarb);
2789   %}
2790 
2791   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2792     Register dst_reg = as_Register($dst$$reg);
2793     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2794              rscratch1, ldarh);
2795     __ sxthw(dst_reg, dst_reg);
2796   %}
2797 
2798   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2799     Register dst_reg = as_Register($dst$$reg);
2800     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2801              rscratch1, ldarh);
2802     __ sxth(dst_reg, dst_reg);
2803   %}
2804 
2805   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2806     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2807              rscratch1, ldarh);
2808   %}
2809 
2810   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2811     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2812              rscratch1, ldarh);
2813   %}
2814 
2815   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2816     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2817              rscratch1, ldarw);
2818   %}
2819 
2820   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2821     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2822              rscratch1, ldarw);
2823   %}
2824 
2825   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2826     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2827              rscratch1, ldar);
2828   %}
2829 
2830   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2831     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2832              rscratch1, ldarw);
2833     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2834   %}
2835 
2836   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2837     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2838              rscratch1, ldar);
2839     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2840   %}
2841 
2842   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2843     Register src_reg = as_Register($src$$reg);
2844     // we sometimes get asked to store the stack pointer into the
2845     // current thread -- we cannot do that directly on AArch64
2846     if (src_reg == r31_sp) {
2847         MacroAssembler _masm(&cbuf);
2848       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2849       __ mov(rscratch2, sp);
2850       src_reg = rscratch2;
2851     }
2852     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2853                  rscratch1, stlr);
2854   %}
2855 
2856   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2857     {
2858       MacroAssembler _masm(&cbuf);
2859       FloatRegister src_reg = as_FloatRegister($src$$reg);
2860       __ fmovs(rscratch2, src_reg);
2861     }
2862     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2863                  rscratch1, stlrw);
2864   %}
2865 
2866   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2867     {
2868       MacroAssembler _masm(&cbuf);
2869       FloatRegister src_reg = as_FloatRegister($src$$reg);
2870       __ fmovd(rscratch2, src_reg);
2871     }
2872     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2873                  rscratch1, stlr);
2874   %}
2875 
2876   // synchronized read/update encodings
2877 
2878   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2879     MacroAssembler _masm(&cbuf);
2880     Register dst_reg = as_Register($dst$$reg);
2881     Register base = as_Register($mem$$base);
2882     int index = $mem$$index;
2883     int scale = $mem$$scale;
2884     int disp = $mem$$disp;
2885     if (index == -1) {
2886        if (disp != 0) {
2887         __ lea(rscratch1, Address(base, disp));
2888         __ ldaxr(dst_reg, rscratch1);
2889       } else {
2890         // TODO
2891         // should we ever get anything other than this case?
2892         __ ldaxr(dst_reg, base);
2893       }
2894     } else {
2895       Register index_reg = as_Register(index);
2896       if (disp == 0) {
2897         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2898         __ ldaxr(dst_reg, rscratch1);
2899       } else {
2900         __ lea(rscratch1, Address(base, disp));
2901         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2902         __ ldaxr(dst_reg, rscratch1);
2903       }
2904     }
2905   %}
2906 
2907   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2908     MacroAssembler _masm(&cbuf);
2909     Register src_reg = as_Register($src$$reg);
2910     Register base = as_Register($mem$$base);
2911     int index = $mem$$index;
2912     int scale = $mem$$scale;
2913     int disp = $mem$$disp;
2914     if (index == -1) {
2915        if (disp != 0) {
2916         __ lea(rscratch2, Address(base, disp));
2917         __ stlxr(rscratch1, src_reg, rscratch2);
2918       } else {
2919         // TODO
2920         // should we ever get anything other than this case?
2921         __ stlxr(rscratch1, src_reg, base);
2922       }
2923     } else {
2924       Register index_reg = as_Register(index);
2925       if (disp == 0) {
2926         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2927         __ stlxr(rscratch1, src_reg, rscratch2);
2928       } else {
2929         __ lea(rscratch2, Address(base, disp));
2930         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2931         __ stlxr(rscratch1, src_reg, rscratch2);
2932       }
2933     }
2934     __ cmpw(rscratch1, zr);
2935   %}
2936 
2937   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2938     MacroAssembler _masm(&cbuf);
2939     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2940     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2941                Assembler::xword, /*acquire*/ false, /*release*/ true,
2942                /*weak*/ false, noreg);
2943   %}
2944 
2945   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2946     MacroAssembler _masm(&cbuf);
2947     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2948     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2949                Assembler::word, /*acquire*/ false, /*release*/ true,
2950                /*weak*/ false, noreg);
2951   %}
2952 
2953   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2954     MacroAssembler _masm(&cbuf);
2955     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2956     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2957                Assembler::halfword, /*acquire*/ false, /*release*/ true,
2958                /*weak*/ false, noreg);
2959   %}
2960 
2961   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2962     MacroAssembler _masm(&cbuf);
2963     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2964     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2965                Assembler::byte, /*acquire*/ false, /*release*/ true,
2966                /*weak*/ false, noreg);
2967   %}
2968 
2969 
2970   // The only difference between aarch64_enc_cmpxchg and
2971   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2972   // CompareAndSwap sequence to serve as a barrier on acquiring a
2973   // lock.
2974   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2975     MacroAssembler _masm(&cbuf);
2976     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2977     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2978                Assembler::xword, /*acquire*/ true, /*release*/ true,
2979                /*weak*/ false, noreg);
2980   %}
2981 
2982   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2983     MacroAssembler _masm(&cbuf);
2984     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2985     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2986                Assembler::word, /*acquire*/ true, /*release*/ true,
2987                /*weak*/ false, noreg);
2988   %}
2989 
2990   enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2991     MacroAssembler _masm(&cbuf);
2992     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2993     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2994                Assembler::halfword, /*acquire*/ true, /*release*/ true,
2995                /*weak*/ false, noreg);
2996   %}
2997 
2998   enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2999     MacroAssembler _masm(&cbuf);
3000     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
3001     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
3002                Assembler::byte, /*acquire*/ true, /*release*/ true,
3003                /*weak*/ false, noreg);
3004   %}
3005 
3006   // auxiliary used for CompareAndSwapX to set result register
3007   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
3008     MacroAssembler _masm(&cbuf);
3009     Register res_reg = as_Register($res$$reg);
3010     __ cset(res_reg, Assembler::EQ);
3011   %}
3012 
3013   // prefetch encodings
3014 
3015   enc_class aarch64_enc_prefetchw(memory mem) %{
3016     MacroAssembler _masm(&cbuf);
3017     Register base = as_Register($mem$$base);
3018     int index = $mem$$index;
3019     int scale = $mem$$scale;
3020     int disp = $mem$$disp;
3021     if (index == -1) {
3022       __ prfm(Address(base, disp), PSTL1KEEP);
3023     } else {
3024       Register index_reg = as_Register(index);
3025       if (disp == 0) {
3026         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
3027       } else {
3028         __ lea(rscratch1, Address(base, disp));
3029         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
3030       }
3031     }
3032   %}
3033 
3034   /// mov envcodings
3035 
3036   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
3037     MacroAssembler _masm(&cbuf);
3038     u_int32_t con = (u_int32_t)$src$$constant;
3039     Register dst_reg = as_Register($dst$$reg);
3040     if (con == 0) {
3041       __ movw(dst_reg, zr);
3042     } else {
3043       __ movw(dst_reg, con);
3044     }
3045   %}
3046 
3047   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
3048     MacroAssembler _masm(&cbuf);
3049     Register dst_reg = as_Register($dst$$reg);
3050     u_int64_t con = (u_int64_t)$src$$constant;
3051     if (con == 0) {
3052       __ mov(dst_reg, zr);
3053     } else {
3054       __ mov(dst_reg, con);
3055     }
3056   %}
3057 
3058   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
3059     MacroAssembler _masm(&cbuf);
3060     Register dst_reg = as_Register($dst$$reg);
3061     address con = (address)$src$$constant;
3062     if (con == NULL || con == (address)1) {
3063       ShouldNotReachHere();
3064     } else {
3065       relocInfo::relocType rtype = $src->constant_reloc();
3066       if (rtype == relocInfo::oop_type) {
3067         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
3068       } else if (rtype == relocInfo::metadata_type) {
3069         __ mov_metadata(dst_reg, (Metadata*)con);
3070       } else {
3071         assert(rtype == relocInfo::none, "unexpected reloc type");
3072         if (con < (address)(uintptr_t)os::vm_page_size()) {
3073           __ mov(dst_reg, con);
3074         } else {
3075           unsigned long offset;
3076           __ adrp(dst_reg, con, offset);
3077           __ add(dst_reg, dst_reg, offset);
3078         }
3079       }
3080     }
3081   %}
3082 
3083   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
3084     MacroAssembler _masm(&cbuf);
3085     Register dst_reg = as_Register($dst$$reg);
3086     __ mov(dst_reg, zr);
3087   %}
3088 
3089   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3090     MacroAssembler _masm(&cbuf);
3091     Register dst_reg = as_Register($dst$$reg);
3092     __ mov(dst_reg, (u_int64_t)1);
3093   %}
3094 
3095   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
3096     MacroAssembler _masm(&cbuf);
3097     address page = (address)$src$$constant;
3098     Register dst_reg = as_Register($dst$$reg);
3099     unsigned long off;
3100     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
3101     assert(off == 0, "assumed offset == 0");
3102   %}
3103 
3104   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3105     MacroAssembler _masm(&cbuf);
3106     __ load_byte_map_base($dst$$Register);
3107   %}
3108 
3109   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3110     MacroAssembler _masm(&cbuf);
3111     Register dst_reg = as_Register($dst$$reg);
3112     address con = (address)$src$$constant;
3113     if (con == NULL) {
3114       ShouldNotReachHere();
3115     } else {
3116       relocInfo::relocType rtype = $src->constant_reloc();
3117       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3118       __ set_narrow_oop(dst_reg, (jobject)con);
3119     }
3120   %}
3121 
3122   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3123     MacroAssembler _masm(&cbuf);
3124     Register dst_reg = as_Register($dst$$reg);
3125     __ mov(dst_reg, zr);
3126   %}
3127 
3128   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3129     MacroAssembler _masm(&cbuf);
3130     Register dst_reg = as_Register($dst$$reg);
3131     address con = (address)$src$$constant;
3132     if (con == NULL) {
3133       ShouldNotReachHere();
3134     } else {
3135       relocInfo::relocType rtype = $src->constant_reloc();
3136       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3137       __ set_narrow_klass(dst_reg, (Klass *)con);
3138     }
3139   %}
3140 
3141   // arithmetic encodings
3142 
3143   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3144     MacroAssembler _masm(&cbuf);
3145     Register dst_reg = as_Register($dst$$reg);
3146     Register src_reg = as_Register($src1$$reg);
3147     int32_t con = (int32_t)$src2$$constant;
3148     // add has primary == 0, subtract has primary == 1
3149     if ($primary) { con = -con; }
3150     if (con < 0) {
3151       __ subw(dst_reg, src_reg, -con);
3152     } else {
3153       __ addw(dst_reg, src_reg, con);
3154     }
3155   %}
3156 
3157   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3158     MacroAssembler _masm(&cbuf);
3159     Register dst_reg = as_Register($dst$$reg);
3160     Register src_reg = as_Register($src1$$reg);
3161     int32_t con = (int32_t)$src2$$constant;
3162     // add has primary == 0, subtract has primary == 1
3163     if ($primary) { con = -con; }
3164     if (con < 0) {
3165       __ sub(dst_reg, src_reg, -con);
3166     } else {
3167       __ add(dst_reg, src_reg, con);
3168     }
3169   %}
3170 
3171   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3172     MacroAssembler _masm(&cbuf);
3173    Register dst_reg = as_Register($dst$$reg);
3174    Register src1_reg = as_Register($src1$$reg);
3175    Register src2_reg = as_Register($src2$$reg);
3176     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3177   %}
3178 
3179   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3180     MacroAssembler _masm(&cbuf);
3181    Register dst_reg = as_Register($dst$$reg);
3182    Register src1_reg = as_Register($src1$$reg);
3183    Register src2_reg = as_Register($src2$$reg);
3184     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3185   %}
3186 
3187   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3188     MacroAssembler _masm(&cbuf);
3189    Register dst_reg = as_Register($dst$$reg);
3190    Register src1_reg = as_Register($src1$$reg);
3191    Register src2_reg = as_Register($src2$$reg);
3192     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3193   %}
3194 
3195   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3196     MacroAssembler _masm(&cbuf);
3197    Register dst_reg = as_Register($dst$$reg);
3198    Register src1_reg = as_Register($src1$$reg);
3199    Register src2_reg = as_Register($src2$$reg);
3200     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3201   %}
3202 
3203   // compare instruction encodings
3204 
3205   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3206     MacroAssembler _masm(&cbuf);
3207     Register reg1 = as_Register($src1$$reg);
3208     Register reg2 = as_Register($src2$$reg);
3209     __ cmpw(reg1, reg2);
3210   %}
3211 
3212   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3213     MacroAssembler _masm(&cbuf);
3214     Register reg = as_Register($src1$$reg);
3215     int32_t val = $src2$$constant;
3216     if (val >= 0) {
3217       __ subsw(zr, reg, val);
3218     } else {
3219       __ addsw(zr, reg, -val);
3220     }
3221   %}
3222 
3223   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3224     MacroAssembler _masm(&cbuf);
3225     Register reg1 = as_Register($src1$$reg);
3226     u_int32_t val = (u_int32_t)$src2$$constant;
3227     __ movw(rscratch1, val);
3228     __ cmpw(reg1, rscratch1);
3229   %}
3230 
3231   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3232     MacroAssembler _masm(&cbuf);
3233     Register reg1 = as_Register($src1$$reg);
3234     Register reg2 = as_Register($src2$$reg);
3235     __ cmp(reg1, reg2);
3236   %}
3237 
3238   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3239     MacroAssembler _masm(&cbuf);
3240     Register reg = as_Register($src1$$reg);
3241     int64_t val = $src2$$constant;
3242     if (val >= 0) {
3243       __ subs(zr, reg, val);
3244     } else if (val != -val) {
3245       __ adds(zr, reg, -val);
3246     } else {
3247     // aargh, Long.MIN_VALUE is a special case
3248       __ orr(rscratch1, zr, (u_int64_t)val);
3249       __ subs(zr, reg, rscratch1);
3250     }
3251   %}
3252 
3253   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3254     MacroAssembler _masm(&cbuf);
3255     Register reg1 = as_Register($src1$$reg);
3256     u_int64_t val = (u_int64_t)$src2$$constant;
3257     __ mov(rscratch1, val);
3258     __ cmp(reg1, rscratch1);
3259   %}
3260 
3261   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3262     MacroAssembler _masm(&cbuf);
3263     Register reg1 = as_Register($src1$$reg);
3264     Register reg2 = as_Register($src2$$reg);
3265     __ cmp(reg1, reg2);
3266   %}
3267 
3268   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3269     MacroAssembler _masm(&cbuf);
3270     Register reg1 = as_Register($src1$$reg);
3271     Register reg2 = as_Register($src2$$reg);
3272     __ cmpw(reg1, reg2);
3273   %}
3274 
3275   enc_class aarch64_enc_testp(iRegP src) %{
3276     MacroAssembler _masm(&cbuf);
3277     Register reg = as_Register($src$$reg);
3278     __ cmp(reg, zr);
3279   %}
3280 
3281   enc_class aarch64_enc_testn(iRegN src) %{
3282     MacroAssembler _masm(&cbuf);
3283     Register reg = as_Register($src$$reg);
3284     __ cmpw(reg, zr);
3285   %}
3286 
3287   enc_class aarch64_enc_b(label lbl) %{
3288     MacroAssembler _masm(&cbuf);
3289     Label *L = $lbl$$label;
3290     __ b(*L);
3291   %}
3292 
3293   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3294     MacroAssembler _masm(&cbuf);
3295     Label *L = $lbl$$label;
3296     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3297   %}
3298 
3299   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3300     MacroAssembler _masm(&cbuf);
3301     Label *L = $lbl$$label;
3302     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3303   %}
3304 
3305   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3306   %{
3307      Register sub_reg = as_Register($sub$$reg);
3308      Register super_reg = as_Register($super$$reg);
3309      Register temp_reg = as_Register($temp$$reg);
3310      Register result_reg = as_Register($result$$reg);
3311 
3312      Label miss;
3313      MacroAssembler _masm(&cbuf);
3314      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3315                                      NULL, &miss,
3316                                      /*set_cond_codes:*/ true);
3317      if ($primary) {
3318        __ mov(result_reg, zr);
3319      }
3320      __ bind(miss);
3321   %}
3322 
3323   enc_class aarch64_enc_java_static_call(method meth) %{
3324     MacroAssembler _masm(&cbuf);
3325 
3326     address addr = (address)$meth$$method;
3327     address call;
3328     if (!_method) {
3329       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3330       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3331     } else {
3332       int method_index = resolved_method_index(cbuf);
3333       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3334                                                   : static_call_Relocation::spec(method_index);
3335       call = __ trampoline_call(Address(addr, rspec), &cbuf);
3336 
3337       // Emit stub for static call
3338       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3339       if (stub == NULL) {
3340         ciEnv::current()->record_failure("CodeCache is full");
3341         return;
3342       }
3343     }
3344     if (call == NULL) {
3345       ciEnv::current()->record_failure("CodeCache is full");
3346       return;
3347     }
3348   %}
3349 
3350   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3351     MacroAssembler _masm(&cbuf);
3352     int method_index = resolved_method_index(cbuf);
3353     address call = __ ic_call((address)$meth$$method, method_index);
3354     if (call == NULL) {
3355       ciEnv::current()->record_failure("CodeCache is full");
3356       return;
3357     }
3358   %}
3359 
3360   enc_class aarch64_enc_call_epilog() %{
3361     MacroAssembler _masm(&cbuf);
3362     if (VerifyStackAtCalls) {
3363       // Check that stack depth is unchanged: find majik cookie on stack
3364       __ call_Unimplemented();
3365     }
3366   %}
3367 
3368   enc_class aarch64_enc_java_to_runtime(method meth) %{
3369     MacroAssembler _masm(&cbuf);
3370 
3371     // some calls to generated routines (arraycopy code) are scheduled
3372     // by C2 as runtime calls. if so we can call them using a br (they
3373     // will be in a reachable segment) otherwise we have to use a blrt
3374     // which loads the absolute address into a register.
3375     address entry = (address)$meth$$method;
3376     CodeBlob *cb = CodeCache::find_blob(entry);
3377     if (cb) {
3378       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3379       if (call == NULL) {
3380         ciEnv::current()->record_failure("CodeCache is full");
3381         return;
3382       }
3383     } else {
3384       int gpcnt;
3385       int fpcnt;
3386       int rtype;
3387       getCallInfo(tf(), gpcnt, fpcnt, rtype);
3388       Label retaddr;
3389       __ adr(rscratch2, retaddr);
3390       __ lea(rscratch1, RuntimeAddress(entry));
3391       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3392       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3393       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
3394       __ bind(retaddr);
3395       __ add(sp, sp, 2 * wordSize);
3396     }
3397   %}
3398 
3399   enc_class aarch64_enc_rethrow() %{
3400     MacroAssembler _masm(&cbuf);
3401     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3402   %}
3403 
3404   enc_class aarch64_enc_ret() %{
3405     MacroAssembler _masm(&cbuf);
3406     __ ret(lr);
3407   %}
3408 
3409   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3410     MacroAssembler _masm(&cbuf);
3411     Register target_reg = as_Register($jump_target$$reg);
3412     __ br(target_reg);
3413   %}
3414 
3415   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3416     MacroAssembler _masm(&cbuf);
3417     Register target_reg = as_Register($jump_target$$reg);
3418     // exception oop should be in r0
3419     // ret addr has been popped into lr
3420     // callee expects it in r3
3421     __ mov(r3, lr);
3422     __ br(target_reg);
3423   %}
3424 
3425   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3426     MacroAssembler _masm(&cbuf);
3427     Register oop = as_Register($object$$reg);
3428     Register box = as_Register($box$$reg);
3429     Register disp_hdr = as_Register($tmp$$reg);
3430     Register tmp = as_Register($tmp2$$reg);
3431     Label cont;
3432     Label object_has_monitor;
3433     Label cas_failed;
3434 
3435     assert_different_registers(oop, box, tmp, disp_hdr);
3436 
3437     // Load markOop from object into displaced_header.
3438     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3439 
3440     if (UseBiasedLocking && !UseOptoBiasInlining) {
3441       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3442     }
3443 
3444     // Handle existing monitor
3445     // we can use AArch64's bit test and branch here but
3446     // markoopDesc does not define a bit index just the bit value
3447     // so assert in case the bit pos changes
3448 #   define __monitor_value_log2 1
3449     assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
3450     __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
3451 #   undef __monitor_value_log2
3452 
3453     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
3454     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
3455 
3456     // Load Compare Value application register.
3457 
3458     // Initialize the box. (Must happen before we update the object mark!)
3459     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3460 
3461     // Compare object markOop with mark and if equal exchange scratch1
3462     // with object markOop.
3463     if (UseLSE) {
3464       __ mov(tmp, disp_hdr);
3465       __ casal(Assembler::xword, tmp, box, oop);
3466       __ cmp(tmp, disp_hdr);
3467       __ br(Assembler::EQ, cont);
3468     } else {
3469       Label retry_load;
3470       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
3471         __ prfm(Address(oop), PSTL1STRM);
3472       __ bind(retry_load);
3473       __ ldaxr(tmp, oop);
3474       __ cmp(tmp, disp_hdr);
3475       __ br(Assembler::NE, cas_failed);
3476       // use stlxr to ensure update is immediately visible
3477       __ stlxr(tmp, box, oop);
3478       __ cbzw(tmp, cont);
3479       __ b(retry_load);
3480     }
3481 
3482     // Formerly:
3483     // __ cmpxchgptr(/*oldv=*/disp_hdr,
3484     //               /*newv=*/box,
3485     //               /*addr=*/oop,
3486     //               /*tmp=*/tmp,
3487     //               cont,
3488     //               /*fail*/NULL);
3489 
3490     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3491 
3492     // If the compare-and-exchange succeeded, then we found an unlocked
3493     // object, will have now locked it will continue at label cont
3494 
3495     __ bind(cas_failed);
3496     // We did not see an unlocked object so try the fast recursive case.
3497 
3498     // Check if the owner is self by comparing the value in the
3499     // markOop of object (disp_hdr) with the stack pointer.
3500     __ mov(rscratch1, sp);
3501     __ sub(disp_hdr, disp_hdr, rscratch1);
3502     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
3503     // If condition is true we are cont and hence we can store 0 as the
3504     // displaced header in the box, which indicates that it is a recursive lock.
3505     __ ands(tmp/*==0?*/, disp_hdr, tmp);
3506     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3507 
3508     // Handle existing monitor.
3509     __ b(cont);
3510 
3511     __ bind(object_has_monitor);
3512     // The object's monitor m is unlocked iff m->owner == NULL,
3513     // otherwise m->owner may contain a thread or a stack address.
3514     //
3515     // Try to CAS m->owner from NULL to current thread.
3516     __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3517     __ mov(disp_hdr, zr);
3518 
3519     if (UseLSE) {
3520       __ mov(rscratch1, disp_hdr);
3521       __ casal(Assembler::xword, rscratch1, rthread, tmp);
3522       __ cmp(rscratch1, disp_hdr);
3523     } else {
3524       Label retry_load, fail;
3525       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH)) {
3526         __ prfm(Address(tmp), PSTL1STRM);
3527       }
3528       __ bind(retry_load);
3529       __ ldaxr(rscratch1, tmp);
3530       __ cmp(disp_hdr, rscratch1);
3531       __ br(Assembler::NE, fail);
3532       // use stlxr to ensure update is immediately visible
3533       __ stlxr(rscratch1, rthread, tmp);
3534       __ cbnzw(rscratch1, retry_load);
3535       __ bind(fail);
3536     }
3537 
3538     // Label next;
3539     // __ cmpxchgptr(/*oldv=*/disp_hdr,
3540     //               /*newv=*/rthread,
3541     //               /*addr=*/tmp,
3542     //               /*tmp=*/rscratch1,
3543     //               /*succeed*/next,
3544     //               /*fail*/NULL);
3545     // __ bind(next);
3546 
3547     // store a non-null value into the box.
3548     __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3549 
3550     // PPC port checks the following invariants
3551     // #ifdef ASSERT
3552     // bne(flag, cont);
3553     // We have acquired the monitor, check some invariants.
3554     // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
3555     // Invariant 1: _recursions should be 0.
3556     // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
3557     // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
3558     //                        "monitor->_recursions should be 0", -1);
3559     // Invariant 2: OwnerIsThread shouldn't be 0.
3560     // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
3561     //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
3562     //                           "monitor->OwnerIsThread shouldn't be 0", -1);
3563     // #endif
3564 
3565     __ bind(cont);
3566     // flag == EQ indicates success
3567     // flag == NE indicates failure
3568 
3569   %}
3570 
3571   // TODO
3572   // reimplement this with custom cmpxchgptr code
3573   // which avoids some of the unnecessary branching
3574   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3575     MacroAssembler _masm(&cbuf);
3576     Register oop = as_Register($object$$reg);
3577     Register box = as_Register($box$$reg);
3578     Register disp_hdr = as_Register($tmp$$reg);
3579     Register tmp = as_Register($tmp2$$reg);
3580     Label cont;
3581     Label object_has_monitor;
3582     Label cas_failed;
3583 
3584     assert_different_registers(oop, box, tmp, disp_hdr);
3585 
3586     if (UseBiasedLocking && !UseOptoBiasInlining) {
3587       __ biased_locking_exit(oop, tmp, cont);
3588     }
3589 
3590     // Find the lock address and load the displaced header from the stack.
3591     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3592 
3593     // If the displaced header is 0, we have a recursive unlock.
3594     __ cmp(disp_hdr, zr);
3595     __ br(Assembler::EQ, cont);
3596 
3597 
3598     // Handle existing monitor.
3599     __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3600     __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3601 
3602     // Check if it is still a light weight lock, this is is true if we
3603     // see the stack address of the basicLock in the markOop of the
3604     // object.
3605 
3606       if (UseLSE) {
3607         __ mov(tmp, box);
3608         __ casl(Assembler::xword, tmp, disp_hdr, oop);
3609         __ cmp(tmp, box);
3610       } else {
3611         Label retry_load;
3612         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
3613           __ prfm(Address(oop), PSTL1STRM);
3614         __ bind(retry_load);
3615         __ ldxr(tmp, oop);
3616         __ cmp(box, tmp);
3617         __ br(Assembler::NE, cas_failed);
3618         // use stlxr to ensure update is immediately visible
3619         __ stlxr(tmp, disp_hdr, oop);
3620         __ cbzw(tmp, cont);
3621         __ b(retry_load);
3622       }
3623 
3624     // __ cmpxchgptr(/*compare_value=*/box,
3625     //               /*exchange_value=*/disp_hdr,
3626     //               /*where=*/oop,
3627     //               /*result=*/tmp,
3628     //               cont,
3629     //               /*cas_failed*/NULL);
3630     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3631 
3632     __ bind(cas_failed);
3633 
3634     // Handle existing monitor.
3635     __ b(cont);
3636 
3637     __ bind(object_has_monitor);
3638     __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3639     __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3640     __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3641     __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3642     __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3643     __ cmp(rscratch1, zr);
3644     __ br(Assembler::NE, cont);
3645 
3646     __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3647     __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3648     __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3649     __ cmp(rscratch1, zr);
3650     __ cbnz(rscratch1, cont);
3651     // need a release store here
3652     __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3653     __ stlr(rscratch1, tmp); // rscratch1 is zero
3654 
3655     __ bind(cont);
3656     // flag == EQ indicates success
3657     // flag == NE indicates failure
3658   %}
3659 
3660 %}
3661 
3662 //----------FRAME--------------------------------------------------------------
3663 // Definition of frame structure and management information.
3664 //
3665 //  S T A C K   L A Y O U T    Allocators stack-slot number
3666 //                             |   (to get allocators register number
3667 //  G  Owned by    |        |  v    add OptoReg::stack0())
3668 //  r   CALLER     |        |
3669 //  o     |        +--------+      pad to even-align allocators stack-slot
3670 //  w     V        |  pad0  |        numbers; owned by CALLER
3671 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3672 //  h     ^        |   in   |  5
3673 //        |        |  args  |  4   Holes in incoming args owned by SELF
3674 //  |     |        |        |  3
3675 //  |     |        +--------+
3676 //  V     |        | old out|      Empty on Intel, window on Sparc
3677 //        |    old |preserve|      Must be even aligned.
3678 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3679 //        |        |   in   |  3   area for Intel ret address
3680 //     Owned by    |preserve|      Empty on Sparc.
3681 //       SELF      +--------+
3682 //        |        |  pad2  |  2   pad to align old SP
3683 //        |        +--------+  1
3684 //        |        | locks  |  0
3685 //        |        +--------+----> OptoReg::stack0(), even aligned
3686 //        |        |  pad1  | 11   pad to align new SP
3687 //        |        +--------+
3688 //        |        |        | 10
3689 //        |        | spills |  9   spills
3690 //        V        |        |  8   (pad0 slot for callee)
3691 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3692 //        ^        |  out   |  7
3693 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3694 //     Owned by    +--------+
3695 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3696 //        |    new |preserve|      Must be even-aligned.
3697 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3698 //        |        |        |
3699 //
3700 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3701 //         known from SELF's arguments and the Java calling convention.
3702 //         Region 6-7 is determined per call site.
3703 // Note 2: If the calling convention leaves holes in the incoming argument
3704 //         area, those holes are owned by SELF.  Holes in the outgoing area
3705 //         are owned by the CALLEE.  Holes should not be nessecary in the
3706 //         incoming area, as the Java calling convention is completely under
3707 //         the control of the AD file.  Doubles can be sorted and packed to
3708 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3709 //         varargs C calling conventions.
3710 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3711 //         even aligned with pad0 as needed.
3712 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3713 //           (the latter is true on Intel but is it false on AArch64?)
3714 //         region 6-11 is even aligned; it may be padded out more so that
3715 //         the region from SP to FP meets the minimum stack alignment.
3716 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3717 //         alignment.  Region 11, pad1, may be dynamically extended so that
3718 //         SP meets the minimum alignment.
3719 
3720 frame %{
3721   // What direction does stack grow in (assumed to be same for C & Java)
3722   stack_direction(TOWARDS_LOW);
3723 
3724   // These three registers define part of the calling convention
3725   // between compiled code and the interpreter.
3726 
3727   // Inline Cache Register or methodOop for I2C.
3728   inline_cache_reg(R12);
3729 
3730   // Method Oop Register when calling interpreter.
3731   interpreter_method_oop_reg(R12);
3732 
3733   // Number of stack slots consumed by locking an object
3734   sync_stack_slots(2);
3735 
3736   // Compiled code's Frame Pointer
3737   frame_pointer(R31);
3738 
3739   // Interpreter stores its frame pointer in a register which is
3740   // stored to the stack by I2CAdaptors.
3741   // I2CAdaptors convert from interpreted java to compiled java.
3742   interpreter_frame_pointer(R29);
3743 
3744   // Stack alignment requirement
3745   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3746 
3747   // Number of stack slots between incoming argument block and the start of
3748   // a new frame.  The PROLOG must add this many slots to the stack.  The
3749   // EPILOG must remove this many slots. aarch64 needs two slots for
3750   // return address and fp.
3751   // TODO think this is correct but check
3752   in_preserve_stack_slots(4);
3753 
3754   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3755   // for calls to C.  Supports the var-args backing area for register parms.
3756   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3757 
3758   // The after-PROLOG location of the return address.  Location of
3759   // return address specifies a type (REG or STACK) and a number
3760   // representing the register number (i.e. - use a register name) or
3761   // stack slot.
3762   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3763   // Otherwise, it is above the locks and verification slot and alignment word
3764   // TODO this may well be correct but need to check why that - 2 is there
3765   // ppc port uses 0 but we definitely need to allow for fixed_slots
3766   // which folds in the space used for monitors
3767   return_addr(STACK - 2 +
3768               align_up((Compile::current()->in_preserve_stack_slots() +
3769                         Compile::current()->fixed_slots()),
3770                        stack_alignment_in_slots()));
3771 
3772   // Body of function which returns an integer array locating
3773   // arguments either in registers or in stack slots.  Passed an array
3774   // of ideal registers called "sig" and a "length" count.  Stack-slot
3775   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3776   // arguments for a CALLEE.  Incoming stack arguments are
3777   // automatically biased by the preserve_stack_slots field above.
3778 
3779   calling_convention
3780   %{
3781     // No difference between ingoing/outgoing just pass false
3782     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3783   %}
3784 
3785   c_calling_convention
3786   %{
3787     // This is obviously always outgoing
3788     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3789   %}
3790 
3791   // Location of compiled Java return values.  Same as C for now.
3792   return_value
3793   %{
3794     // TODO do we allow ideal_reg == Op_RegN???
3795     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3796            "only return normal values");
3797 
3798     static const int lo[Op_RegL + 1] = { // enum name
3799       0,                                 // Op_Node
3800       0,                                 // Op_Set
3801       R0_num,                            // Op_RegN
3802       R0_num,                            // Op_RegI
3803       R0_num,                            // Op_RegP
3804       V0_num,                            // Op_RegF
3805       V0_num,                            // Op_RegD
3806       R0_num                             // Op_RegL
3807     };
3808 
3809     static const int hi[Op_RegL + 1] = { // enum name
3810       0,                                 // Op_Node
3811       0,                                 // Op_Set
3812       OptoReg::Bad,                       // Op_RegN
3813       OptoReg::Bad,                      // Op_RegI
3814       R0_H_num,                          // Op_RegP
3815       OptoReg::Bad,                      // Op_RegF
3816       V0_H_num,                          // Op_RegD
3817       R0_H_num                           // Op_RegL
3818     };
3819 
3820     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3821   %}
3822 %}
3823 
3824 //----------ATTRIBUTES---------------------------------------------------------
3825 //----------Operand Attributes-------------------------------------------------
3826 op_attrib op_cost(1);        // Required cost attribute
3827 
3828 //----------Instruction Attributes---------------------------------------------
3829 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3830 ins_attrib ins_size(32);        // Required size attribute (in bits)
3831 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3832                                 // a non-matching short branch variant
3833                                 // of some long branch?
3834 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3835                                 // be a power of 2) specifies the
3836                                 // alignment that some part of the
3837                                 // instruction (not necessarily the
3838                                 // start) requires.  If > 1, a
3839                                 // compute_padding() function must be
3840                                 // provided for the instruction
3841 
3842 //----------OPERANDS-----------------------------------------------------------
3843 // Operand definitions must precede instruction definitions for correct parsing
3844 // in the ADLC because operands constitute user defined types which are used in
3845 // instruction definitions.
3846 
3847 //----------Simple Operands----------------------------------------------------
3848 
3849 // Integer operands 32 bit
3850 // 32 bit immediate
3851 operand immI()
3852 %{
3853   match(ConI);
3854 
3855   op_cost(0);
3856   format %{ %}
3857   interface(CONST_INTER);
3858 %}
3859 
3860 // 32 bit zero
3861 operand immI0()
3862 %{
3863   predicate(n->get_int() == 0);
3864   match(ConI);
3865 
3866   op_cost(0);
3867   format %{ %}
3868   interface(CONST_INTER);
3869 %}
3870 
3871 // 32 bit unit increment
3872 operand immI_1()
3873 %{
3874   predicate(n->get_int() == 1);
3875   match(ConI);
3876 
3877   op_cost(0);
3878   format %{ %}
3879   interface(CONST_INTER);
3880 %}
3881 
3882 // 32 bit unit decrement
3883 operand immI_M1()
3884 %{
3885   predicate(n->get_int() == -1);
3886   match(ConI);
3887 
3888   op_cost(0);
3889   format %{ %}
3890   interface(CONST_INTER);
3891 %}
3892 
3893 // Shift values for add/sub extension shift
3894 operand immIExt()
3895 %{
3896   predicate(0 <= n->get_int() && (n->get_int() <= 4));
3897   match(ConI);
3898 
3899   op_cost(0);
3900   format %{ %}
3901   interface(CONST_INTER);
3902 %}
3903 
3904 operand immI_le_4()
3905 %{
3906   predicate(n->get_int() <= 4);
3907   match(ConI);
3908 
3909   op_cost(0);
3910   format %{ %}
3911   interface(CONST_INTER);
3912 %}
3913 
3914 operand immI_31()
3915 %{
3916   predicate(n->get_int() == 31);
3917   match(ConI);
3918 
3919   op_cost(0);
3920   format %{ %}
3921   interface(CONST_INTER);
3922 %}
3923 
3924 operand immI_8()
3925 %{
3926   predicate(n->get_int() == 8);
3927   match(ConI);
3928 
3929   op_cost(0);
3930   format %{ %}
3931   interface(CONST_INTER);
3932 %}
3933 
3934 operand immI_16()
3935 %{
3936   predicate(n->get_int() == 16);
3937   match(ConI);
3938 
3939   op_cost(0);
3940   format %{ %}
3941   interface(CONST_INTER);
3942 %}
3943 
3944 operand immI_24()
3945 %{
3946   predicate(n->get_int() == 24);
3947   match(ConI);
3948 
3949   op_cost(0);
3950   format %{ %}
3951   interface(CONST_INTER);
3952 %}
3953 
3954 operand immI_32()
3955 %{
3956   predicate(n->get_int() == 32);
3957   match(ConI);
3958 
3959   op_cost(0);
3960   format %{ %}
3961   interface(CONST_INTER);
3962 %}
3963 
3964 operand immI_48()
3965 %{
3966   predicate(n->get_int() == 48);
3967   match(ConI);
3968 
3969   op_cost(0);
3970   format %{ %}
3971   interface(CONST_INTER);
3972 %}
3973 
3974 operand immI_56()
3975 %{
3976   predicate(n->get_int() == 56);
3977   match(ConI);
3978 
3979   op_cost(0);
3980   format %{ %}
3981   interface(CONST_INTER);
3982 %}
3983 
3984 operand immI_63()
3985 %{
3986   predicate(n->get_int() == 63);
3987   match(ConI);
3988 
3989   op_cost(0);
3990   format %{ %}
3991   interface(CONST_INTER);
3992 %}
3993 
3994 operand immI_64()
3995 %{
3996   predicate(n->get_int() == 64);
3997   match(ConI);
3998 
3999   op_cost(0);
4000   format %{ %}
4001   interface(CONST_INTER);
4002 %}
4003 
4004 operand immI_255()
4005 %{
4006   predicate(n->get_int() == 255);
4007   match(ConI);
4008 
4009   op_cost(0);
4010   format %{ %}
4011   interface(CONST_INTER);
4012 %}
4013 
4014 operand immI_65535()
4015 %{
4016   predicate(n->get_int() == 65535);
4017   match(ConI);
4018 
4019   op_cost(0);
4020   format %{ %}
4021   interface(CONST_INTER);
4022 %}
4023 
4024 operand immL_255()
4025 %{
4026   predicate(n->get_long() == 255L);
4027   match(ConL);
4028 
4029   op_cost(0);
4030   format %{ %}
4031   interface(CONST_INTER);
4032 %}
4033 
4034 operand immL_65535()
4035 %{
4036   predicate(n->get_long() == 65535L);
4037   match(ConL);
4038 
4039   op_cost(0);
4040   format %{ %}
4041   interface(CONST_INTER);
4042 %}
4043 
4044 operand immL_4294967295()
4045 %{
4046   predicate(n->get_long() == 4294967295L);
4047   match(ConL);
4048 
4049   op_cost(0);
4050   format %{ %}
4051   interface(CONST_INTER);
4052 %}
4053 
4054 operand immL_bitmask()
4055 %{
4056   predicate(((n->get_long() & 0xc000000000000000l) == 0)
4057             && is_power_of_2(n->get_long() + 1));
4058   match(ConL);
4059 
4060   op_cost(0);
4061   format %{ %}
4062   interface(CONST_INTER);
4063 %}
4064 
4065 operand immI_bitmask()
4066 %{
4067   predicate(((n->get_int() & 0xc0000000) == 0)
4068             && is_power_of_2(n->get_int() + 1));
4069   match(ConI);
4070 
4071   op_cost(0);
4072   format %{ %}
4073   interface(CONST_INTER);
4074 %}
4075 
4076 // Scale values for scaled offset addressing modes (up to long but not quad)
4077 operand immIScale()
4078 %{
4079   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4080   match(ConI);
4081 
4082   op_cost(0);
4083   format %{ %}
4084   interface(CONST_INTER);
4085 %}
4086 
4087 // 26 bit signed offset -- for pc-relative branches
4088 operand immI26()
4089 %{
4090   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
4091   match(ConI);
4092 
4093   op_cost(0);
4094   format %{ %}
4095   interface(CONST_INTER);
4096 %}
4097 
4098 // 19 bit signed offset -- for pc-relative loads
4099 operand immI19()
4100 %{
4101   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
4102   match(ConI);
4103 
4104   op_cost(0);
4105   format %{ %}
4106   interface(CONST_INTER);
4107 %}
4108 
4109 // 12 bit unsigned offset -- for base plus immediate loads
4110 operand immIU12()
4111 %{
4112   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
4113   match(ConI);
4114 
4115   op_cost(0);
4116   format %{ %}
4117   interface(CONST_INTER);
4118 %}
4119 
4120 operand immLU12()
4121 %{
4122   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
4123   match(ConL);
4124 
4125   op_cost(0);
4126   format %{ %}
4127   interface(CONST_INTER);
4128 %}
4129 
4130 // Offset for scaled or unscaled immediate loads and stores
4131 operand immIOffset()
4132 %{
4133   predicate(Address::offset_ok_for_immed(n->get_int()));
4134   match(ConI);
4135 
4136   op_cost(0);
4137   format %{ %}
4138   interface(CONST_INTER);
4139 %}
4140 
4141 operand immIOffset4()
4142 %{
4143   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
4144   match(ConI);
4145 
4146   op_cost(0);
4147   format %{ %}
4148   interface(CONST_INTER);
4149 %}
4150 
4151 operand immIOffset8()
4152 %{
4153   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
4154   match(ConI);
4155 
4156   op_cost(0);
4157   format %{ %}
4158   interface(CONST_INTER);
4159 %}
4160 
4161 operand immIOffset16()
4162 %{
4163   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4164   match(ConI);
4165 
4166   op_cost(0);
4167   format %{ %}
4168   interface(CONST_INTER);
4169 %}
4170 
4171 operand immLoffset()
4172 %{
4173   predicate(Address::offset_ok_for_immed(n->get_long()));
4174   match(ConL);
4175 
4176   op_cost(0);
4177   format %{ %}
4178   interface(CONST_INTER);
4179 %}
4180 
4181 operand immLoffset4()
4182 %{
4183   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4184   match(ConL);
4185 
4186   op_cost(0);
4187   format %{ %}
4188   interface(CONST_INTER);
4189 %}
4190 
4191 operand immLoffset8()
4192 %{
4193   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4194   match(ConL);
4195 
4196   op_cost(0);
4197   format %{ %}
4198   interface(CONST_INTER);
4199 %}
4200 
4201 operand immLoffset16()
4202 %{
4203   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4204   match(ConL);
4205 
4206   op_cost(0);
4207   format %{ %}
4208   interface(CONST_INTER);
4209 %}
4210 
4211 // 32 bit integer valid for add sub immediate
4212 operand immIAddSub()
4213 %{
4214   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4215   match(ConI);
4216   op_cost(0);
4217   format %{ %}
4218   interface(CONST_INTER);
4219 %}
4220 
4221 // 32 bit unsigned integer valid for logical immediate
4222 // TODO -- check this is right when e.g the mask is 0x80000000
4223 operand immILog()
4224 %{
4225   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4226   match(ConI);
4227 
4228   op_cost(0);
4229   format %{ %}
4230   interface(CONST_INTER);
4231 %}
4232 
4233 // Integer operands 64 bit
4234 // 64 bit immediate
4235 operand immL()
4236 %{
4237   match(ConL);
4238 
4239   op_cost(0);
4240   format %{ %}
4241   interface(CONST_INTER);
4242 %}
4243 
4244 // 64 bit zero
4245 operand immL0()
4246 %{
4247   predicate(n->get_long() == 0);
4248   match(ConL);
4249 
4250   op_cost(0);
4251   format %{ %}
4252   interface(CONST_INTER);
4253 %}
4254 
4255 // 64 bit unit increment
4256 operand immL_1()
4257 %{
4258   predicate(n->get_long() == 1);
4259   match(ConL);
4260 
4261   op_cost(0);
4262   format %{ %}
4263   interface(CONST_INTER);
4264 %}
4265 
4266 // 64 bit unit decrement
4267 operand immL_M1()
4268 %{
4269   predicate(n->get_long() == -1);
4270   match(ConL);
4271 
4272   op_cost(0);
4273   format %{ %}
4274   interface(CONST_INTER);
4275 %}
4276 
4277 // 32 bit offset of pc in thread anchor
4278 
4279 operand immL_pc_off()
4280 %{
4281   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4282                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4283   match(ConL);
4284 
4285   op_cost(0);
4286   format %{ %}
4287   interface(CONST_INTER);
4288 %}
4289 
4290 // 64 bit integer valid for add sub immediate
4291 operand immLAddSub()
4292 %{
4293   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4294   match(ConL);
4295   op_cost(0);
4296   format %{ %}
4297   interface(CONST_INTER);
4298 %}
4299 
4300 // 64 bit integer valid for logical immediate
4301 operand immLLog()
4302 %{
4303   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4304   match(ConL);
4305   op_cost(0);
4306   format %{ %}
4307   interface(CONST_INTER);
4308 %}
4309 
4310 // Long Immediate: low 32-bit mask
4311 operand immL_32bits()
4312 %{
4313   predicate(n->get_long() == 0xFFFFFFFFL);
4314   match(ConL);
4315   op_cost(0);
4316   format %{ %}
4317   interface(CONST_INTER);
4318 %}
4319 
4320 // Pointer operands
4321 // Pointer Immediate
4322 operand immP()
4323 %{
4324   match(ConP);
4325 
4326   op_cost(0);
4327   format %{ %}
4328   interface(CONST_INTER);
4329 %}
4330 
4331 // NULL Pointer Immediate
4332 operand immP0()
4333 %{
4334   predicate(n->get_ptr() == 0);
4335   match(ConP);
4336 
4337   op_cost(0);
4338   format %{ %}
4339   interface(CONST_INTER);
4340 %}
4341 
4342 // Pointer Immediate One
4343 // this is used in object initialization (initial object header)
4344 operand immP_1()
4345 %{
4346   predicate(n->get_ptr() == 1);
4347   match(ConP);
4348 
4349   op_cost(0);
4350   format %{ %}
4351   interface(CONST_INTER);
4352 %}
4353 
4354 // Polling Page Pointer Immediate
4355 operand immPollPage()
4356 %{
4357   predicate((address)n->get_ptr() == os::get_polling_page());
4358   match(ConP);
4359 
4360   op_cost(0);
4361   format %{ %}
4362   interface(CONST_INTER);
4363 %}
4364 
4365 // Card Table Byte Map Base
4366 operand immByteMapBase()
4367 %{
4368   // Get base of card map
4369   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
4370             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
4371   match(ConP);
4372 
4373   op_cost(0);
4374   format %{ %}
4375   interface(CONST_INTER);
4376 %}
4377 
4378 // Pointer Immediate Minus One
4379 // this is used when we want to write the current PC to the thread anchor
4380 operand immP_M1()
4381 %{
4382   predicate(n->get_ptr() == -1);
4383   match(ConP);
4384 
4385   op_cost(0);
4386   format %{ %}
4387   interface(CONST_INTER);
4388 %}
4389 
4390 // Pointer Immediate Minus Two
4391 // this is used when we want to write the current PC to the thread anchor
4392 operand immP_M2()
4393 %{
4394   predicate(n->get_ptr() == -2);
4395   match(ConP);
4396 
4397   op_cost(0);
4398   format %{ %}
4399   interface(CONST_INTER);
4400 %}
4401 
4402 // Float and Double operands
4403 // Double Immediate
4404 operand immD()
4405 %{
4406   match(ConD);
4407   op_cost(0);
4408   format %{ %}
4409   interface(CONST_INTER);
4410 %}
4411 
4412 // Double Immediate: +0.0d
4413 operand immD0()
4414 %{
4415   predicate(jlong_cast(n->getd()) == 0);
4416   match(ConD);
4417 
4418   op_cost(0);
4419   format %{ %}
4420   interface(CONST_INTER);
4421 %}
4422 
4423 // constant 'double +0.0'.
4424 operand immDPacked()
4425 %{
4426   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4427   match(ConD);
4428   op_cost(0);
4429   format %{ %}
4430   interface(CONST_INTER);
4431 %}
4432 
4433 // Float Immediate
4434 operand immF()
4435 %{
4436   match(ConF);
4437   op_cost(0);
4438   format %{ %}
4439   interface(CONST_INTER);
4440 %}
4441 
4442 // Float Immediate: +0.0f.
4443 operand immF0()
4444 %{
4445   predicate(jint_cast(n->getf()) == 0);
4446   match(ConF);
4447 
4448   op_cost(0);
4449   format %{ %}
4450   interface(CONST_INTER);
4451 %}
4452 
4453 //
4454 operand immFPacked()
4455 %{
4456   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4457   match(ConF);
4458   op_cost(0);
4459   format %{ %}
4460   interface(CONST_INTER);
4461 %}
4462 
4463 // Narrow pointer operands
4464 // Narrow Pointer Immediate
4465 operand immN()
4466 %{
4467   match(ConN);
4468 
4469   op_cost(0);
4470   format %{ %}
4471   interface(CONST_INTER);
4472 %}
4473 
4474 // Narrow NULL Pointer Immediate
4475 operand immN0()
4476 %{
4477   predicate(n->get_narrowcon() == 0);
4478   match(ConN);
4479 
4480   op_cost(0);
4481   format %{ %}
4482   interface(CONST_INTER);
4483 %}
4484 
4485 operand immNKlass()
4486 %{
4487   match(ConNKlass);
4488 
4489   op_cost(0);
4490   format %{ %}
4491   interface(CONST_INTER);
4492 %}
4493 
4494 // Integer 32 bit Register Operands
4495 // Integer 32 bitRegister (excludes SP)
4496 operand iRegI()
4497 %{
4498   constraint(ALLOC_IN_RC(any_reg32));
4499   match(RegI);
4500   match(iRegINoSp);
4501   op_cost(0);
4502   format %{ %}
4503   interface(REG_INTER);
4504 %}
4505 
4506 // Integer 32 bit Register not Special
4507 operand iRegINoSp()
4508 %{
4509   constraint(ALLOC_IN_RC(no_special_reg32));
4510   match(RegI);
4511   op_cost(0);
4512   format %{ %}
4513   interface(REG_INTER);
4514 %}
4515 
4516 // Integer 64 bit Register Operands
4517 // Integer 64 bit Register (includes SP)
4518 operand iRegL()
4519 %{
4520   constraint(ALLOC_IN_RC(any_reg));
4521   match(RegL);
4522   match(iRegLNoSp);
4523   op_cost(0);
4524   format %{ %}
4525   interface(REG_INTER);
4526 %}
4527 
4528 // Integer 64 bit Register not Special
4529 operand iRegLNoSp()
4530 %{
4531   constraint(ALLOC_IN_RC(no_special_reg));
4532   match(RegL);
4533   match(iRegL_R0);
4534   format %{ %}
4535   interface(REG_INTER);
4536 %}
4537 
4538 // Pointer Register Operands
4539 // Pointer Register
4540 operand iRegP()
4541 %{
4542   constraint(ALLOC_IN_RC(ptr_reg));
4543   match(RegP);
4544   match(iRegPNoSp);
4545   match(iRegP_R0);
4546   //match(iRegP_R2);
4547   //match(iRegP_R4);
4548   //match(iRegP_R5);
4549   match(thread_RegP);
4550   op_cost(0);
4551   format %{ %}
4552   interface(REG_INTER);
4553 %}
4554 
4555 // Pointer 64 bit Register not Special
4556 operand iRegPNoSp()
4557 %{
4558   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4559   match(RegP);
4560   // match(iRegP);
4561   // match(iRegP_R0);
4562   // match(iRegP_R2);
4563   // match(iRegP_R4);
4564   // match(iRegP_R5);
4565   // match(thread_RegP);
4566   op_cost(0);
4567   format %{ %}
4568   interface(REG_INTER);
4569 %}
4570 
4571 // Pointer 64 bit Register R0 only
4572 operand iRegP_R0()
4573 %{
4574   constraint(ALLOC_IN_RC(r0_reg));
4575   match(RegP);
4576   // match(iRegP);
4577   match(iRegPNoSp);
4578   op_cost(0);
4579   format %{ %}
4580   interface(REG_INTER);
4581 %}
4582 
4583 // Pointer 64 bit Register R1 only
4584 operand iRegP_R1()
4585 %{
4586   constraint(ALLOC_IN_RC(r1_reg));
4587   match(RegP);
4588   // match(iRegP);
4589   match(iRegPNoSp);
4590   op_cost(0);
4591   format %{ %}
4592   interface(REG_INTER);
4593 %}
4594 
4595 // Pointer 64 bit Register R2 only
4596 operand iRegP_R2()
4597 %{
4598   constraint(ALLOC_IN_RC(r2_reg));
4599   match(RegP);
4600   // match(iRegP);
4601   match(iRegPNoSp);
4602   op_cost(0);
4603   format %{ %}
4604   interface(REG_INTER);
4605 %}
4606 
4607 // Pointer 64 bit Register R3 only
4608 operand iRegP_R3()
4609 %{
4610   constraint(ALLOC_IN_RC(r3_reg));
4611   match(RegP);
4612   // match(iRegP);
4613   match(iRegPNoSp);
4614   op_cost(0);
4615   format %{ %}
4616   interface(REG_INTER);
4617 %}
4618 
4619 // Pointer 64 bit Register R4 only
4620 operand iRegP_R4()
4621 %{
4622   constraint(ALLOC_IN_RC(r4_reg));
4623   match(RegP);
4624   // match(iRegP);
4625   match(iRegPNoSp);
4626   op_cost(0);
4627   format %{ %}
4628   interface(REG_INTER);
4629 %}
4630 
4631 // Pointer 64 bit Register R5 only
4632 operand iRegP_R5()
4633 %{
4634   constraint(ALLOC_IN_RC(r5_reg));
4635   match(RegP);
4636   // match(iRegP);
4637   match(iRegPNoSp);
4638   op_cost(0);
4639   format %{ %}
4640   interface(REG_INTER);
4641 %}
4642 
4643 // Pointer 64 bit Register R10 only
4644 operand iRegP_R10()
4645 %{
4646   constraint(ALLOC_IN_RC(r10_reg));
4647   match(RegP);
4648   // match(iRegP);
4649   match(iRegPNoSp);
4650   op_cost(0);
4651   format %{ %}
4652   interface(REG_INTER);
4653 %}
4654 
4655 // Long 64 bit Register R0 only
4656 operand iRegL_R0()
4657 %{
4658   constraint(ALLOC_IN_RC(r0_reg));
4659   match(RegL);
4660   match(iRegLNoSp);
4661   op_cost(0);
4662   format %{ %}
4663   interface(REG_INTER);
4664 %}
4665 
4666 // Long 64 bit Register R2 only
4667 operand iRegL_R2()
4668 %{
4669   constraint(ALLOC_IN_RC(r2_reg));
4670   match(RegL);
4671   match(iRegLNoSp);
4672   op_cost(0);
4673   format %{ %}
4674   interface(REG_INTER);
4675 %}
4676 
4677 // Long 64 bit Register R3 only
4678 operand iRegL_R3()
4679 %{
4680   constraint(ALLOC_IN_RC(r3_reg));
4681   match(RegL);
4682   match(iRegLNoSp);
4683   op_cost(0);
4684   format %{ %}
4685   interface(REG_INTER);
4686 %}
4687 
4688 // Long 64 bit Register R11 only
4689 operand iRegL_R11()
4690 %{
4691   constraint(ALLOC_IN_RC(r11_reg));
4692   match(RegL);
4693   match(iRegLNoSp);
4694   op_cost(0);
4695   format %{ %}
4696   interface(REG_INTER);
4697 %}
4698 
4699 // Pointer 64 bit Register FP only
4700 operand iRegP_FP()
4701 %{
4702   constraint(ALLOC_IN_RC(fp_reg));
4703   match(RegP);
4704   // match(iRegP);
4705   op_cost(0);
4706   format %{ %}
4707   interface(REG_INTER);
4708 %}
4709 
4710 // Register R0 only
4711 operand iRegI_R0()
4712 %{
4713   constraint(ALLOC_IN_RC(int_r0_reg));
4714   match(RegI);
4715   match(iRegINoSp);
4716   op_cost(0);
4717   format %{ %}
4718   interface(REG_INTER);
4719 %}
4720 
4721 // Register R2 only
4722 operand iRegI_R2()
4723 %{
4724   constraint(ALLOC_IN_RC(int_r2_reg));
4725   match(RegI);
4726   match(iRegINoSp);
4727   op_cost(0);
4728   format %{ %}
4729   interface(REG_INTER);
4730 %}
4731 
4732 // Register R3 only
4733 operand iRegI_R3()
4734 %{
4735   constraint(ALLOC_IN_RC(int_r3_reg));
4736   match(RegI);
4737   match(iRegINoSp);
4738   op_cost(0);
4739   format %{ %}
4740   interface(REG_INTER);
4741 %}
4742 
4743 
4744 // Register R4 only
4745 operand iRegI_R4()
4746 %{
4747   constraint(ALLOC_IN_RC(int_r4_reg));
4748   match(RegI);
4749   match(iRegINoSp);
4750   op_cost(0);
4751   format %{ %}
4752   interface(REG_INTER);
4753 %}
4754 
4755 
4756 // Pointer Register Operands
4757 // Narrow Pointer Register
4758 operand iRegN()
4759 %{
4760   constraint(ALLOC_IN_RC(any_reg32));
4761   match(RegN);
4762   match(iRegNNoSp);
4763   op_cost(0);
4764   format %{ %}
4765   interface(REG_INTER);
4766 %}
4767 
4768 operand iRegN_R0()
4769 %{
4770   constraint(ALLOC_IN_RC(r0_reg));
4771   match(iRegN);
4772   op_cost(0);
4773   format %{ %}
4774   interface(REG_INTER);
4775 %}
4776 
4777 operand iRegN_R2()
4778 %{
4779   constraint(ALLOC_IN_RC(r2_reg));
4780   match(iRegN);
4781   op_cost(0);
4782   format %{ %}
4783   interface(REG_INTER);
4784 %}
4785 
4786 operand iRegN_R3()
4787 %{
4788   constraint(ALLOC_IN_RC(r3_reg));
4789   match(iRegN);
4790   op_cost(0);
4791   format %{ %}
4792   interface(REG_INTER);
4793 %}
4794 
4795 // Integer 64 bit Register not Special
4796 operand iRegNNoSp()
4797 %{
4798   constraint(ALLOC_IN_RC(no_special_reg32));
4799   match(RegN);
4800   op_cost(0);
4801   format %{ %}
4802   interface(REG_INTER);
4803 %}
4804 
4805 // heap base register -- used for encoding immN0
4806 
4807 operand iRegIHeapbase()
4808 %{
4809   constraint(ALLOC_IN_RC(heapbase_reg));
4810   match(RegI);
4811   op_cost(0);
4812   format %{ %}
4813   interface(REG_INTER);
4814 %}
4815 
4816 // Float Register
4817 // Float register operands
4818 operand vRegF()
4819 %{
4820   constraint(ALLOC_IN_RC(float_reg));
4821   match(RegF);
4822 
4823   op_cost(0);
4824   format %{ %}
4825   interface(REG_INTER);
4826 %}
4827 
4828 // Double Register
4829 // Double register operands
4830 operand vRegD()
4831 %{
4832   constraint(ALLOC_IN_RC(double_reg));
4833   match(RegD);
4834 
4835   op_cost(0);
4836   format %{ %}
4837   interface(REG_INTER);
4838 %}
4839 
4840 operand vecD()
4841 %{
4842   constraint(ALLOC_IN_RC(vectord_reg));
4843   match(VecD);
4844 
4845   op_cost(0);
4846   format %{ %}
4847   interface(REG_INTER);
4848 %}
4849 
4850 operand vecX()
4851 %{
4852   constraint(ALLOC_IN_RC(vectorx_reg));
4853   match(VecX);
4854 
4855   op_cost(0);
4856   format %{ %}
4857   interface(REG_INTER);
4858 %}
4859 
4860 operand vRegD_V0()
4861 %{
4862   constraint(ALLOC_IN_RC(v0_reg));
4863   match(RegD);
4864   op_cost(0);
4865   format %{ %}
4866   interface(REG_INTER);
4867 %}
4868 
4869 operand vRegD_V1()
4870 %{
4871   constraint(ALLOC_IN_RC(v1_reg));
4872   match(RegD);
4873   op_cost(0);
4874   format %{ %}
4875   interface(REG_INTER);
4876 %}
4877 
4878 operand vRegD_V2()
4879 %{
4880   constraint(ALLOC_IN_RC(v2_reg));
4881   match(RegD);
4882   op_cost(0);
4883   format %{ %}
4884   interface(REG_INTER);
4885 %}
4886 
4887 operand vRegD_V3()
4888 %{
4889   constraint(ALLOC_IN_RC(v3_reg));
4890   match(RegD);
4891   op_cost(0);
4892   format %{ %}
4893   interface(REG_INTER);
4894 %}
4895 
4896 // Flags register, used as output of signed compare instructions
4897 
4898 // note that on AArch64 we also use this register as the output for
4899 // for floating point compare instructions (CmpF CmpD). this ensures
4900 // that ordered inequality tests use GT, GE, LT or LE none of which
4901 // pass through cases where the result is unordered i.e. one or both
4902 // inputs to the compare is a NaN. this means that the ideal code can
4903 // replace e.g. a GT with an LE and not end up capturing the NaN case
4904 // (where the comparison should always fail). EQ and NE tests are
4905 // always generated in ideal code so that unordered folds into the NE
4906 // case, matching the behaviour of AArch64 NE.
4907 //
4908 // This differs from x86 where the outputs of FP compares use a
4909 // special FP flags registers and where compares based on this
4910 // register are distinguished into ordered inequalities (cmpOpUCF) and
4911 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4912 // to explicitly handle the unordered case in branches. x86 also has
4913 // to include extra CMoveX rules to accept a cmpOpUCF input.
4914 
4915 operand rFlagsReg()
4916 %{
4917   constraint(ALLOC_IN_RC(int_flags));
4918   match(RegFlags);
4919 
4920   op_cost(0);
4921   format %{ "RFLAGS" %}
4922   interface(REG_INTER);
4923 %}
4924 
4925 // Flags register, used as output of unsigned compare instructions
4926 operand rFlagsRegU()
4927 %{
4928   constraint(ALLOC_IN_RC(int_flags));
4929   match(RegFlags);
4930 
4931   op_cost(0);
4932   format %{ "RFLAGSU" %}
4933   interface(REG_INTER);
4934 %}
4935 
4936 // Special Registers
4937 
4938 // Method Register
4939 operand inline_cache_RegP(iRegP reg)
4940 %{
4941   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4942   match(reg);
4943   match(iRegPNoSp);
4944   op_cost(0);
4945   format %{ %}
4946   interface(REG_INTER);
4947 %}
4948 
4949 operand interpreter_method_oop_RegP(iRegP reg)
4950 %{
4951   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4952   match(reg);
4953   match(iRegPNoSp);
4954   op_cost(0);
4955   format %{ %}
4956   interface(REG_INTER);
4957 %}
4958 
4959 // Thread Register
4960 operand thread_RegP(iRegP reg)
4961 %{
4962   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4963   match(reg);
4964   op_cost(0);
4965   format %{ %}
4966   interface(REG_INTER);
4967 %}
4968 
4969 operand lr_RegP(iRegP reg)
4970 %{
4971   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4972   match(reg);
4973   op_cost(0);
4974   format %{ %}
4975   interface(REG_INTER);
4976 %}
4977 
4978 //----------Memory Operands----------------------------------------------------
4979 
4980 operand indirect(iRegP reg)
4981 %{
4982   constraint(ALLOC_IN_RC(ptr_reg));
4983   match(reg);
4984   op_cost(0);
4985   format %{ "[$reg]" %}
4986   interface(MEMORY_INTER) %{
4987     base($reg);
4988     index(0xffffffff);
4989     scale(0x0);
4990     disp(0x0);
4991   %}
4992 %}
4993 
4994 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4995 %{
4996   constraint(ALLOC_IN_RC(ptr_reg));
4997   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4998   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4999   op_cost(0);
5000   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
5001   interface(MEMORY_INTER) %{
5002     base($reg);
5003     index($ireg);
5004     scale($scale);
5005     disp(0x0);
5006   %}
5007 %}
5008 
5009 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
5010 %{
5011   constraint(ALLOC_IN_RC(ptr_reg));
5012   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5013   match(AddP reg (LShiftL lreg scale));
5014   op_cost(0);
5015   format %{ "$reg, $lreg lsl($scale)" %}
5016   interface(MEMORY_INTER) %{
5017     base($reg);
5018     index($lreg);
5019     scale($scale);
5020     disp(0x0);
5021   %}
5022 %}
5023 
5024 operand indIndexI2L(iRegP reg, iRegI ireg)
5025 %{
5026   constraint(ALLOC_IN_RC(ptr_reg));
5027   match(AddP reg (ConvI2L ireg));
5028   op_cost(0);
5029   format %{ "$reg, $ireg, 0, I2L" %}
5030   interface(MEMORY_INTER) %{
5031     base($reg);
5032     index($ireg);
5033     scale(0x0);
5034     disp(0x0);
5035   %}
5036 %}
5037 
5038 operand indIndex(iRegP reg, iRegL lreg)
5039 %{
5040   constraint(ALLOC_IN_RC(ptr_reg));
5041   match(AddP reg lreg);
5042   op_cost(0);
5043   format %{ "$reg, $lreg" %}
5044   interface(MEMORY_INTER) %{
5045     base($reg);
5046     index($lreg);
5047     scale(0x0);
5048     disp(0x0);
5049   %}
5050 %}
5051 
5052 operand indOffI(iRegP reg, immIOffset off)
5053 %{
5054   constraint(ALLOC_IN_RC(ptr_reg));
5055   match(AddP reg off);
5056   op_cost(0);
5057   format %{ "[$reg, $off]" %}
5058   interface(MEMORY_INTER) %{
5059     base($reg);
5060     index(0xffffffff);
5061     scale(0x0);
5062     disp($off);
5063   %}
5064 %}
5065 
5066 operand indOffI4(iRegP reg, immIOffset4 off)
5067 %{
5068   constraint(ALLOC_IN_RC(ptr_reg));
5069   match(AddP reg off);
5070   op_cost(0);
5071   format %{ "[$reg, $off]" %}
5072   interface(MEMORY_INTER) %{
5073     base($reg);
5074     index(0xffffffff);
5075     scale(0x0);
5076     disp($off);
5077   %}
5078 %}
5079 
5080 operand indOffI8(iRegP reg, immIOffset8 off)
5081 %{
5082   constraint(ALLOC_IN_RC(ptr_reg));
5083   match(AddP reg off);
5084   op_cost(0);
5085   format %{ "[$reg, $off]" %}
5086   interface(MEMORY_INTER) %{
5087     base($reg);
5088     index(0xffffffff);
5089     scale(0x0);
5090     disp($off);
5091   %}
5092 %}
5093 
5094 operand indOffI16(iRegP reg, immIOffset16 off)
5095 %{
5096   constraint(ALLOC_IN_RC(ptr_reg));
5097   match(AddP reg off);
5098   op_cost(0);
5099   format %{ "[$reg, $off]" %}
5100   interface(MEMORY_INTER) %{
5101     base($reg);
5102     index(0xffffffff);
5103     scale(0x0);
5104     disp($off);
5105   %}
5106 %}
5107 
5108 operand indOffL(iRegP reg, immLoffset off)
5109 %{
5110   constraint(ALLOC_IN_RC(ptr_reg));
5111   match(AddP reg off);
5112   op_cost(0);
5113   format %{ "[$reg, $off]" %}
5114   interface(MEMORY_INTER) %{
5115     base($reg);
5116     index(0xffffffff);
5117     scale(0x0);
5118     disp($off);
5119   %}
5120 %}
5121 
5122 operand indOffL4(iRegP reg, immLoffset4 off)
5123 %{
5124   constraint(ALLOC_IN_RC(ptr_reg));
5125   match(AddP reg off);
5126   op_cost(0);
5127   format %{ "[$reg, $off]" %}
5128   interface(MEMORY_INTER) %{
5129     base($reg);
5130     index(0xffffffff);
5131     scale(0x0);
5132     disp($off);
5133   %}
5134 %}
5135 
5136 operand indOffL8(iRegP reg, immLoffset8 off)
5137 %{
5138   constraint(ALLOC_IN_RC(ptr_reg));
5139   match(AddP reg off);
5140   op_cost(0);
5141   format %{ "[$reg, $off]" %}
5142   interface(MEMORY_INTER) %{
5143     base($reg);
5144     index(0xffffffff);
5145     scale(0x0);
5146     disp($off);
5147   %}
5148 %}
5149 
5150 operand indOffL16(iRegP reg, immLoffset16 off)
5151 %{
5152   constraint(ALLOC_IN_RC(ptr_reg));
5153   match(AddP reg off);
5154   op_cost(0);
5155   format %{ "[$reg, $off]" %}
5156   interface(MEMORY_INTER) %{
5157     base($reg);
5158     index(0xffffffff);
5159     scale(0x0);
5160     disp($off);
5161   %}
5162 %}
5163 
5164 operand indirectN(iRegN reg)
5165 %{
5166   predicate(Universe::narrow_oop_shift() == 0);
5167   constraint(ALLOC_IN_RC(ptr_reg));
5168   match(DecodeN reg);
5169   op_cost(0);
5170   format %{ "[$reg]\t# narrow" %}
5171   interface(MEMORY_INTER) %{
5172     base($reg);
5173     index(0xffffffff);
5174     scale(0x0);
5175     disp(0x0);
5176   %}
5177 %}
5178 
5179 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5180 %{
5181   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5182   constraint(ALLOC_IN_RC(ptr_reg));
5183   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5184   op_cost(0);
5185   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5186   interface(MEMORY_INTER) %{
5187     base($reg);
5188     index($ireg);
5189     scale($scale);
5190     disp(0x0);
5191   %}
5192 %}
5193 
5194 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5195 %{
5196   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5197   constraint(ALLOC_IN_RC(ptr_reg));
5198   match(AddP (DecodeN reg) (LShiftL lreg scale));
5199   op_cost(0);
5200   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5201   interface(MEMORY_INTER) %{
5202     base($reg);
5203     index($lreg);
5204     scale($scale);
5205     disp(0x0);
5206   %}
5207 %}
5208 
5209 operand indIndexI2LN(iRegN reg, iRegI ireg)
5210 %{
5211   predicate(Universe::narrow_oop_shift() == 0);
5212   constraint(ALLOC_IN_RC(ptr_reg));
5213   match(AddP (DecodeN reg) (ConvI2L ireg));
5214   op_cost(0);
5215   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
5216   interface(MEMORY_INTER) %{
5217     base($reg);
5218     index($ireg);
5219     scale(0x0);
5220     disp(0x0);
5221   %}
5222 %}
5223 
5224 operand indIndexN(iRegN reg, iRegL lreg)
5225 %{
5226   predicate(Universe::narrow_oop_shift() == 0);
5227   constraint(ALLOC_IN_RC(ptr_reg));
5228   match(AddP (DecodeN reg) lreg);
5229   op_cost(0);
5230   format %{ "$reg, $lreg\t# narrow" %}
5231   interface(MEMORY_INTER) %{
5232     base($reg);
5233     index($lreg);
5234     scale(0x0);
5235     disp(0x0);
5236   %}
5237 %}
5238 
5239 operand indOffIN(iRegN reg, immIOffset off)
5240 %{
5241   predicate(Universe::narrow_oop_shift() == 0);
5242   constraint(ALLOC_IN_RC(ptr_reg));
5243   match(AddP (DecodeN reg) off);
5244   op_cost(0);
5245   format %{ "[$reg, $off]\t# narrow" %}
5246   interface(MEMORY_INTER) %{
5247     base($reg);
5248     index(0xffffffff);
5249     scale(0x0);
5250     disp($off);
5251   %}
5252 %}
5253 
5254 operand indOffLN(iRegN reg, immLoffset off)
5255 %{
5256   predicate(Universe::narrow_oop_shift() == 0);
5257   constraint(ALLOC_IN_RC(ptr_reg));
5258   match(AddP (DecodeN reg) off);
5259   op_cost(0);
5260   format %{ "[$reg, $off]\t# narrow" %}
5261   interface(MEMORY_INTER) %{
5262     base($reg);
5263     index(0xffffffff);
5264     scale(0x0);
5265     disp($off);
5266   %}
5267 %}
5268 
5269 
5270 
5271 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5272 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5273 %{
5274   constraint(ALLOC_IN_RC(ptr_reg));
5275   match(AddP reg off);
5276   op_cost(0);
5277   format %{ "[$reg, $off]" %}
5278   interface(MEMORY_INTER) %{
5279     base($reg);
5280     index(0xffffffff);
5281     scale(0x0);
5282     disp($off);
5283   %}
5284 %}
5285 
5286 //----------Special Memory Operands--------------------------------------------
5287 // Stack Slot Operand - This operand is used for loading and storing temporary
5288 //                      values on the stack where a match requires a value to
5289 //                      flow through memory.
5290 operand stackSlotP(sRegP reg)
5291 %{
5292   constraint(ALLOC_IN_RC(stack_slots));
5293   op_cost(100);
5294   // No match rule because this operand is only generated in matching
5295   // match(RegP);
5296   format %{ "[$reg]" %}
5297   interface(MEMORY_INTER) %{
5298     base(0x1e);  // RSP
5299     index(0x0);  // No Index
5300     scale(0x0);  // No Scale
5301     disp($reg);  // Stack Offset
5302   %}
5303 %}
5304 
5305 operand stackSlotI(sRegI reg)
5306 %{
5307   constraint(ALLOC_IN_RC(stack_slots));
5308   // No match rule because this operand is only generated in matching
5309   // match(RegI);
5310   format %{ "[$reg]" %}
5311   interface(MEMORY_INTER) %{
5312     base(0x1e);  // RSP
5313     index(0x0);  // No Index
5314     scale(0x0);  // No Scale
5315     disp($reg);  // Stack Offset
5316   %}
5317 %}
5318 
5319 operand stackSlotF(sRegF reg)
5320 %{
5321   constraint(ALLOC_IN_RC(stack_slots));
5322   // No match rule because this operand is only generated in matching
5323   // match(RegF);
5324   format %{ "[$reg]" %}
5325   interface(MEMORY_INTER) %{
5326     base(0x1e);  // RSP
5327     index(0x0);  // No Index
5328     scale(0x0);  // No Scale
5329     disp($reg);  // Stack Offset
5330   %}
5331 %}
5332 
5333 operand stackSlotD(sRegD reg)
5334 %{
5335   constraint(ALLOC_IN_RC(stack_slots));
5336   // No match rule because this operand is only generated in matching
5337   // match(RegD);
5338   format %{ "[$reg]" %}
5339   interface(MEMORY_INTER) %{
5340     base(0x1e);  // RSP
5341     index(0x0);  // No Index
5342     scale(0x0);  // No Scale
5343     disp($reg);  // Stack Offset
5344   %}
5345 %}
5346 
5347 operand stackSlotL(sRegL reg)
5348 %{
5349   constraint(ALLOC_IN_RC(stack_slots));
5350   // No match rule because this operand is only generated in matching
5351   // match(RegL);
5352   format %{ "[$reg]" %}
5353   interface(MEMORY_INTER) %{
5354     base(0x1e);  // RSP
5355     index(0x0);  // No Index
5356     scale(0x0);  // No Scale
5357     disp($reg);  // Stack Offset
5358   %}
5359 %}
5360 
5361 // Operands for expressing Control Flow
5362 // NOTE: Label is a predefined operand which should not be redefined in
5363 //       the AD file. It is generically handled within the ADLC.
5364 
5365 //----------Conditional Branch Operands----------------------------------------
5366 // Comparison Op  - This is the operation of the comparison, and is limited to
5367 //                  the following set of codes:
5368 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5369 //
5370 // Other attributes of the comparison, such as unsignedness, are specified
5371 // by the comparison instruction that sets a condition code flags register.
5372 // That result is represented by a flags operand whose subtype is appropriate
5373 // to the unsignedness (etc.) of the comparison.
5374 //
5375 // Later, the instruction which matches both the Comparison Op (a Bool) and
5376 // the flags (produced by the Cmp) specifies the coding of the comparison op
5377 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5378 
5379 // used for signed integral comparisons and fp comparisons
5380 
5381 operand cmpOp()
5382 %{
5383   match(Bool);
5384 
5385   format %{ "" %}
5386   interface(COND_INTER) %{
5387     equal(0x0, "eq");
5388     not_equal(0x1, "ne");
5389     less(0xb, "lt");
5390     greater_equal(0xa, "ge");
5391     less_equal(0xd, "le");
5392     greater(0xc, "gt");
5393     overflow(0x6, "vs");
5394     no_overflow(0x7, "vc");
5395   %}
5396 %}
5397 
5398 // used for unsigned integral comparisons
5399 
5400 operand cmpOpU()
5401 %{
5402   match(Bool);
5403 
5404   format %{ "" %}
5405   interface(COND_INTER) %{
5406     equal(0x0, "eq");
5407     not_equal(0x1, "ne");
5408     less(0x3, "lo");
5409     greater_equal(0x2, "hs");
5410     less_equal(0x9, "ls");
5411     greater(0x8, "hi");
5412     overflow(0x6, "vs");
5413     no_overflow(0x7, "vc");
5414   %}
5415 %}
5416 
5417 // used for certain integral comparisons which can be
5418 // converted to cbxx or tbxx instructions
5419 
5420 operand cmpOpEqNe()
5421 %{
5422   match(Bool);
5423   match(CmpOp);
5424   op_cost(0);
5425   predicate(n->as_Bool()->_test._test == BoolTest::ne
5426             || n->as_Bool()->_test._test == BoolTest::eq);
5427 
5428   format %{ "" %}
5429   interface(COND_INTER) %{
5430     equal(0x0, "eq");
5431     not_equal(0x1, "ne");
5432     less(0xb, "lt");
5433     greater_equal(0xa, "ge");
5434     less_equal(0xd, "le");
5435     greater(0xc, "gt");
5436     overflow(0x6, "vs");
5437     no_overflow(0x7, "vc");
5438   %}
5439 %}
5440 
5441 // used for certain integral comparisons which can be
5442 // converted to cbxx or tbxx instructions
5443 
5444 operand cmpOpLtGe()
5445 %{
5446   match(Bool);
5447   match(CmpOp);
5448   op_cost(0);
5449 
5450   predicate(n->as_Bool()->_test._test == BoolTest::lt
5451             || n->as_Bool()->_test._test == BoolTest::ge);
5452 
5453   format %{ "" %}
5454   interface(COND_INTER) %{
5455     equal(0x0, "eq");
5456     not_equal(0x1, "ne");
5457     less(0xb, "lt");
5458     greater_equal(0xa, "ge");
5459     less_equal(0xd, "le");
5460     greater(0xc, "gt");
5461     overflow(0x6, "vs");
5462     no_overflow(0x7, "vc");
5463   %}
5464 %}
5465 
5466 // used for certain unsigned integral comparisons which can be
5467 // converted to cbxx or tbxx instructions
5468 
5469 operand cmpOpUEqNeLtGe()
5470 %{
5471   match(Bool);
5472   match(CmpOp);
5473   op_cost(0);
5474 
5475   predicate(n->as_Bool()->_test._test == BoolTest::eq
5476             || n->as_Bool()->_test._test == BoolTest::ne
5477             || n->as_Bool()->_test._test == BoolTest::lt
5478             || n->as_Bool()->_test._test == BoolTest::ge);
5479 
5480   format %{ "" %}
5481   interface(COND_INTER) %{
5482     equal(0x0, "eq");
5483     not_equal(0x1, "ne");
5484     less(0xb, "lt");
5485     greater_equal(0xa, "ge");
5486     less_equal(0xd, "le");
5487     greater(0xc, "gt");
5488     overflow(0x6, "vs");
5489     no_overflow(0x7, "vc");
5490   %}
5491 %}
5492 
5493 // Special operand allowing long args to int ops to be truncated for free
5494 
5495 operand iRegL2I(iRegL reg) %{
5496 
5497   op_cost(0);
5498 
5499   match(ConvL2I reg);
5500 
5501   format %{ "l2i($reg)" %}
5502 
5503   interface(REG_INTER)
5504 %}
5505 
5506 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5507 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5508 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5509 
5510 //----------OPERAND CLASSES----------------------------------------------------
5511 // Operand Classes are groups of operands that are used as to simplify
5512 // instruction definitions by not requiring the AD writer to specify
5513 // separate instructions for every form of operand when the
5514 // instruction accepts multiple operand types with the same basic
5515 // encoding and format. The classic case of this is memory operands.
5516 
5517 // memory is used to define read/write location for load/store
5518 // instruction defs. we can turn a memory op into an Address
5519 
5520 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
5521                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5522 
5523 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5524 // operations. it allows the src to be either an iRegI or a (ConvL2I
5525 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5526 // can be elided because the 32-bit instruction will just employ the
5527 // lower 32 bits anyway.
5528 //
5529 // n.b. this does not elide all L2I conversions. if the truncated
5530 // value is consumed by more than one operation then the ConvL2I
5531 // cannot be bundled into the consuming nodes so an l2i gets planted
5532 // (actually a movw $dst $src) and the downstream instructions consume
5533 // the result of the l2i as an iRegI input. That's a shame since the
5534 // movw is actually redundant but its not too costly.
5535 
5536 opclass iRegIorL2I(iRegI, iRegL2I);
5537 
5538 //----------PIPELINE-----------------------------------------------------------
5539 // Rules which define the behavior of the target architectures pipeline.
5540 
5541 // For specific pipelines, eg A53, define the stages of that pipeline
5542 //pipe_desc(ISS, EX1, EX2, WR);
5543 #define ISS S0
5544 #define EX1 S1
5545 #define EX2 S2
5546 #define WR  S3
5547 
5548 // Integer ALU reg operation
5549 pipeline %{
5550 
5551 attributes %{
5552   // ARM instructions are of fixed length
5553   fixed_size_instructions;        // Fixed size instructions TODO does
5554   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5555   // ARM instructions come in 32-bit word units
5556   instruction_unit_size = 4;         // An instruction is 4 bytes long
5557   instruction_fetch_unit_size = 64;  // The processor fetches one line
5558   instruction_fetch_units = 1;       // of 64 bytes
5559 
5560   // List of nop instructions
5561   nops( MachNop );
5562 %}
5563 
5564 // We don't use an actual pipeline model so don't care about resources
5565 // or description. we do use pipeline classes to introduce fixed
5566 // latencies
5567 
5568 //----------RESOURCES----------------------------------------------------------
5569 // Resources are the functional units available to the machine
5570 
5571 resources( INS0, INS1, INS01 = INS0 | INS1,
5572            ALU0, ALU1, ALU = ALU0 | ALU1,
5573            MAC,
5574            DIV,
5575            BRANCH,
5576            LDST,
5577            NEON_FP);
5578 
5579 //----------PIPELINE DESCRIPTION-----------------------------------------------
5580 // Pipeline Description specifies the stages in the machine's pipeline
5581 
5582 // Define the pipeline as a generic 6 stage pipeline
5583 pipe_desc(S0, S1, S2, S3, S4, S5);
5584 
5585 //----------PIPELINE CLASSES---------------------------------------------------
5586 // Pipeline Classes describe the stages in which input and output are
5587 // referenced by the hardware pipeline.
5588 
5589 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5590 %{
5591   single_instruction;
5592   src1   : S1(read);
5593   src2   : S2(read);
5594   dst    : S5(write);
5595   INS01  : ISS;
5596   NEON_FP : S5;
5597 %}
5598 
5599 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5600 %{
5601   single_instruction;
5602   src1   : S1(read);
5603   src2   : S2(read);
5604   dst    : S5(write);
5605   INS01  : ISS;
5606   NEON_FP : S5;
5607 %}
5608 
5609 pipe_class fp_uop_s(vRegF dst, vRegF src)
5610 %{
5611   single_instruction;
5612   src    : S1(read);
5613   dst    : S5(write);
5614   INS01  : ISS;
5615   NEON_FP : S5;
5616 %}
5617 
5618 pipe_class fp_uop_d(vRegD dst, vRegD src)
5619 %{
5620   single_instruction;
5621   src    : S1(read);
5622   dst    : S5(write);
5623   INS01  : ISS;
5624   NEON_FP : S5;
5625 %}
5626 
5627 pipe_class fp_d2f(vRegF dst, vRegD src)
5628 %{
5629   single_instruction;
5630   src    : S1(read);
5631   dst    : S5(write);
5632   INS01  : ISS;
5633   NEON_FP : S5;
5634 %}
5635 
5636 pipe_class fp_f2d(vRegD dst, vRegF src)
5637 %{
5638   single_instruction;
5639   src    : S1(read);
5640   dst    : S5(write);
5641   INS01  : ISS;
5642   NEON_FP : S5;
5643 %}
5644 
5645 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5646 %{
5647   single_instruction;
5648   src    : S1(read);
5649   dst    : S5(write);
5650   INS01  : ISS;
5651   NEON_FP : S5;
5652 %}
5653 
5654 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5655 %{
5656   single_instruction;
5657   src    : S1(read);
5658   dst    : S5(write);
5659   INS01  : ISS;
5660   NEON_FP : S5;
5661 %}
5662 
5663 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5664 %{
5665   single_instruction;
5666   src    : S1(read);
5667   dst    : S5(write);
5668   INS01  : ISS;
5669   NEON_FP : S5;
5670 %}
5671 
5672 pipe_class fp_l2f(vRegF dst, iRegL src)
5673 %{
5674   single_instruction;
5675   src    : S1(read);
5676   dst    : S5(write);
5677   INS01  : ISS;
5678   NEON_FP : S5;
5679 %}
5680 
5681 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5682 %{
5683   single_instruction;
5684   src    : S1(read);
5685   dst    : S5(write);
5686   INS01  : ISS;
5687   NEON_FP : S5;
5688 %}
5689 
5690 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5691 %{
5692   single_instruction;
5693   src    : S1(read);
5694   dst    : S5(write);
5695   INS01  : ISS;
5696   NEON_FP : S5;
5697 %}
5698 
5699 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5700 %{
5701   single_instruction;
5702   src    : S1(read);
5703   dst    : S5(write);
5704   INS01  : ISS;
5705   NEON_FP : S5;
5706 %}
5707 
5708 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5709 %{
5710   single_instruction;
5711   src    : S1(read);
5712   dst    : S5(write);
5713   INS01  : ISS;
5714   NEON_FP : S5;
5715 %}
5716 
5717 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5718 %{
5719   single_instruction;
5720   src1   : S1(read);
5721   src2   : S2(read);
5722   dst    : S5(write);
5723   INS0   : ISS;
5724   NEON_FP : S5;
5725 %}
5726 
5727 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5728 %{
5729   single_instruction;
5730   src1   : S1(read);
5731   src2   : S2(read);
5732   dst    : S5(write);
5733   INS0   : ISS;
5734   NEON_FP : S5;
5735 %}
5736 
5737 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5738 %{
5739   single_instruction;
5740   cr     : S1(read);
5741   src1   : S1(read);
5742   src2   : S1(read);
5743   dst    : S3(write);
5744   INS01  : ISS;
5745   NEON_FP : S3;
5746 %}
5747 
5748 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5749 %{
5750   single_instruction;
5751   cr     : S1(read);
5752   src1   : S1(read);
5753   src2   : S1(read);
5754   dst    : S3(write);
5755   INS01  : ISS;
5756   NEON_FP : S3;
5757 %}
5758 
5759 pipe_class fp_imm_s(vRegF dst)
5760 %{
5761   single_instruction;
5762   dst    : S3(write);
5763   INS01  : ISS;
5764   NEON_FP : S3;
5765 %}
5766 
5767 pipe_class fp_imm_d(vRegD dst)
5768 %{
5769   single_instruction;
5770   dst    : S3(write);
5771   INS01  : ISS;
5772   NEON_FP : S3;
5773 %}
5774 
5775 pipe_class fp_load_constant_s(vRegF dst)
5776 %{
5777   single_instruction;
5778   dst    : S4(write);
5779   INS01  : ISS;
5780   NEON_FP : S4;
5781 %}
5782 
5783 pipe_class fp_load_constant_d(vRegD dst)
5784 %{
5785   single_instruction;
5786   dst    : S4(write);
5787   INS01  : ISS;
5788   NEON_FP : S4;
5789 %}
5790 
5791 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5792 %{
5793   single_instruction;
5794   dst    : S5(write);
5795   src1   : S1(read);
5796   src2   : S1(read);
5797   INS01  : ISS;
5798   NEON_FP : S5;
5799 %}
5800 
5801 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5802 %{
5803   single_instruction;
5804   dst    : S5(write);
5805   src1   : S1(read);
5806   src2   : S1(read);
5807   INS0   : ISS;
5808   NEON_FP : S5;
5809 %}
5810 
5811 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5812 %{
5813   single_instruction;
5814   dst    : S5(write);
5815   src1   : S1(read);
5816   src2   : S1(read);
5817   dst    : S1(read);
5818   INS01  : ISS;
5819   NEON_FP : S5;
5820 %}
5821 
5822 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5823 %{
5824   single_instruction;
5825   dst    : S5(write);
5826   src1   : S1(read);
5827   src2   : S1(read);
5828   dst    : S1(read);
5829   INS0   : ISS;
5830   NEON_FP : S5;
5831 %}
5832 
5833 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5834 %{
5835   single_instruction;
5836   dst    : S4(write);
5837   src1   : S2(read);
5838   src2   : S2(read);
5839   INS01  : ISS;
5840   NEON_FP : S4;
5841 %}
5842 
5843 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5844 %{
5845   single_instruction;
5846   dst    : S4(write);
5847   src1   : S2(read);
5848   src2   : S2(read);
5849   INS0   : ISS;
5850   NEON_FP : S4;
5851 %}
5852 
5853 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5854 %{
5855   single_instruction;
5856   dst    : S3(write);
5857   src1   : S2(read);
5858   src2   : S2(read);
5859   INS01  : ISS;
5860   NEON_FP : S3;
5861 %}
5862 
5863 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5864 %{
5865   single_instruction;
5866   dst    : S3(write);
5867   src1   : S2(read);
5868   src2   : S2(read);
5869   INS0   : ISS;
5870   NEON_FP : S3;
5871 %}
5872 
5873 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5874 %{
5875   single_instruction;
5876   dst    : S3(write);
5877   src    : S1(read);
5878   shift  : S1(read);
5879   INS01  : ISS;
5880   NEON_FP : S3;
5881 %}
5882 
5883 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5884 %{
5885   single_instruction;
5886   dst    : S3(write);
5887   src    : S1(read);
5888   shift  : S1(read);
5889   INS0   : ISS;
5890   NEON_FP : S3;
5891 %}
5892 
5893 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5894 %{
5895   single_instruction;
5896   dst    : S3(write);
5897   src    : S1(read);
5898   INS01  : ISS;
5899   NEON_FP : S3;
5900 %}
5901 
5902 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5903 %{
5904   single_instruction;
5905   dst    : S3(write);
5906   src    : S1(read);
5907   INS0   : ISS;
5908   NEON_FP : S3;
5909 %}
5910 
5911 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5912 %{
5913   single_instruction;
5914   dst    : S5(write);
5915   src1   : S1(read);
5916   src2   : S1(read);
5917   INS01  : ISS;
5918   NEON_FP : S5;
5919 %}
5920 
5921 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5922 %{
5923   single_instruction;
5924   dst    : S5(write);
5925   src1   : S1(read);
5926   src2   : S1(read);
5927   INS0   : ISS;
5928   NEON_FP : S5;
5929 %}
5930 
5931 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5932 %{
5933   single_instruction;
5934   dst    : S5(write);
5935   src1   : S1(read);
5936   src2   : S1(read);
5937   INS0   : ISS;
5938   NEON_FP : S5;
5939 %}
5940 
5941 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5942 %{
5943   single_instruction;
5944   dst    : S5(write);
5945   src1   : S1(read);
5946   src2   : S1(read);
5947   INS0   : ISS;
5948   NEON_FP : S5;
5949 %}
5950 
5951 pipe_class vsqrt_fp128(vecX dst, vecX src)
5952 %{
5953   single_instruction;
5954   dst    : S5(write);
5955   src    : S1(read);
5956   INS0   : ISS;
5957   NEON_FP : S5;
5958 %}
5959 
5960 pipe_class vunop_fp64(vecD dst, vecD src)
5961 %{
5962   single_instruction;
5963   dst    : S5(write);
5964   src    : S1(read);
5965   INS01  : ISS;
5966   NEON_FP : S5;
5967 %}
5968 
5969 pipe_class vunop_fp128(vecX dst, vecX src)
5970 %{
5971   single_instruction;
5972   dst    : S5(write);
5973   src    : S1(read);
5974   INS0   : ISS;
5975   NEON_FP : S5;
5976 %}
5977 
5978 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5979 %{
5980   single_instruction;
5981   dst    : S3(write);
5982   src    : S1(read);
5983   INS01  : ISS;
5984   NEON_FP : S3;
5985 %}
5986 
5987 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5988 %{
5989   single_instruction;
5990   dst    : S3(write);
5991   src    : S1(read);
5992   INS01  : ISS;
5993   NEON_FP : S3;
5994 %}
5995 
5996 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5997 %{
5998   single_instruction;
5999   dst    : S3(write);
6000   src    : S1(read);
6001   INS01  : ISS;
6002   NEON_FP : S3;
6003 %}
6004 
6005 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
6006 %{
6007   single_instruction;
6008   dst    : S3(write);
6009   src    : S1(read);
6010   INS01  : ISS;
6011   NEON_FP : S3;
6012 %}
6013 
6014 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
6015 %{
6016   single_instruction;
6017   dst    : S3(write);
6018   src    : S1(read);
6019   INS01  : ISS;
6020   NEON_FP : S3;
6021 %}
6022 
6023 pipe_class vmovi_reg_imm64(vecD dst)
6024 %{
6025   single_instruction;
6026   dst    : S3(write);
6027   INS01  : ISS;
6028   NEON_FP : S3;
6029 %}
6030 
6031 pipe_class vmovi_reg_imm128(vecX dst)
6032 %{
6033   single_instruction;
6034   dst    : S3(write);
6035   INS0   : ISS;
6036   NEON_FP : S3;
6037 %}
6038 
6039 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
6040 %{
6041   single_instruction;
6042   dst    : S5(write);
6043   mem    : ISS(read);
6044   INS01  : ISS;
6045   NEON_FP : S3;
6046 %}
6047 
6048 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
6049 %{
6050   single_instruction;
6051   dst    : S5(write);
6052   mem    : ISS(read);
6053   INS01  : ISS;
6054   NEON_FP : S3;
6055 %}
6056 
6057 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
6058 %{
6059   single_instruction;
6060   mem    : ISS(read);
6061   src    : S2(read);
6062   INS01  : ISS;
6063   NEON_FP : S3;
6064 %}
6065 
6066 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
6067 %{
6068   single_instruction;
6069   mem    : ISS(read);
6070   src    : S2(read);
6071   INS01  : ISS;
6072   NEON_FP : S3;
6073 %}
6074 
6075 //------- Integer ALU operations --------------------------
6076 
6077 // Integer ALU reg-reg operation
6078 // Operands needed in EX1, result generated in EX2
6079 // Eg.  ADD     x0, x1, x2
6080 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6081 %{
6082   single_instruction;
6083   dst    : EX2(write);
6084   src1   : EX1(read);
6085   src2   : EX1(read);
6086   INS01  : ISS; // Dual issue as instruction 0 or 1
6087   ALU    : EX2;
6088 %}
6089 
6090 // Integer ALU reg-reg operation with constant shift
6091 // Shifted register must be available in LATE_ISS instead of EX1
6092 // Eg.  ADD     x0, x1, x2, LSL #2
6093 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
6094 %{
6095   single_instruction;
6096   dst    : EX2(write);
6097   src1   : EX1(read);
6098   src2   : ISS(read);
6099   INS01  : ISS;
6100   ALU    : EX2;
6101 %}
6102 
6103 // Integer ALU reg operation with constant shift
6104 // Eg.  LSL     x0, x1, #shift
6105 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
6106 %{
6107   single_instruction;
6108   dst    : EX2(write);
6109   src1   : ISS(read);
6110   INS01  : ISS;
6111   ALU    : EX2;
6112 %}
6113 
6114 // Integer ALU reg-reg operation with variable shift
6115 // Both operands must be available in LATE_ISS instead of EX1
6116 // Result is available in EX1 instead of EX2
6117 // Eg.  LSLV    x0, x1, x2
6118 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
6119 %{
6120   single_instruction;
6121   dst    : EX1(write);
6122   src1   : ISS(read);
6123   src2   : ISS(read);
6124   INS01  : ISS;
6125   ALU    : EX1;
6126 %}
6127 
6128 // Integer ALU reg-reg operation with extract
6129 // As for _vshift above, but result generated in EX2
6130 // Eg.  EXTR    x0, x1, x2, #N
6131 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
6132 %{
6133   single_instruction;
6134   dst    : EX2(write);
6135   src1   : ISS(read);
6136   src2   : ISS(read);
6137   INS1   : ISS; // Can only dual issue as Instruction 1
6138   ALU    : EX1;
6139 %}
6140 
6141 // Integer ALU reg operation
6142 // Eg.  NEG     x0, x1
6143 pipe_class ialu_reg(iRegI dst, iRegI src)
6144 %{
6145   single_instruction;
6146   dst    : EX2(write);
6147   src    : EX1(read);
6148   INS01  : ISS;
6149   ALU    : EX2;
6150 %}
6151 
6152 // Integer ALU reg mmediate operation
6153 // Eg.  ADD     x0, x1, #N
6154 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
6155 %{
6156   single_instruction;
6157   dst    : EX2(write);
6158   src1   : EX1(read);
6159   INS01  : ISS;
6160   ALU    : EX2;
6161 %}
6162 
6163 // Integer ALU immediate operation (no source operands)
6164 // Eg.  MOV     x0, #N
6165 pipe_class ialu_imm(iRegI dst)
6166 %{
6167   single_instruction;
6168   dst    : EX1(write);
6169   INS01  : ISS;
6170   ALU    : EX1;
6171 %}
6172 
6173 //------- Compare operation -------------------------------
6174 
6175 // Compare reg-reg
6176 // Eg.  CMP     x0, x1
6177 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6178 %{
6179   single_instruction;
6180 //  fixed_latency(16);
6181   cr     : EX2(write);
6182   op1    : EX1(read);
6183   op2    : EX1(read);
6184   INS01  : ISS;
6185   ALU    : EX2;
6186 %}
6187 
6188 // Compare reg-reg
6189 // Eg.  CMP     x0, #N
6190 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6191 %{
6192   single_instruction;
6193 //  fixed_latency(16);
6194   cr     : EX2(write);
6195   op1    : EX1(read);
6196   INS01  : ISS;
6197   ALU    : EX2;
6198 %}
6199 
6200 //------- Conditional instructions ------------------------
6201 
6202 // Conditional no operands
6203 // Eg.  CSINC   x0, zr, zr, <cond>
6204 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6205 %{
6206   single_instruction;
6207   cr     : EX1(read);
6208   dst    : EX2(write);
6209   INS01  : ISS;
6210   ALU    : EX2;
6211 %}
6212 
6213 // Conditional 2 operand
6214 // EG.  CSEL    X0, X1, X2, <cond>
6215 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6216 %{
6217   single_instruction;
6218   cr     : EX1(read);
6219   src1   : EX1(read);
6220   src2   : EX1(read);
6221   dst    : EX2(write);
6222   INS01  : ISS;
6223   ALU    : EX2;
6224 %}
6225 
6226 // Conditional 2 operand
6227 // EG.  CSEL    X0, X1, X2, <cond>
6228 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6229 %{
6230   single_instruction;
6231   cr     : EX1(read);
6232   src    : EX1(read);
6233   dst    : EX2(write);
6234   INS01  : ISS;
6235   ALU    : EX2;
6236 %}
6237 
6238 //------- Multiply pipeline operations --------------------
6239 
6240 // Multiply reg-reg
6241 // Eg.  MUL     w0, w1, w2
6242 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6243 %{
6244   single_instruction;
6245   dst    : WR(write);
6246   src1   : ISS(read);
6247   src2   : ISS(read);
6248   INS01  : ISS;
6249   MAC    : WR;
6250 %}
6251 
6252 // Multiply accumulate
6253 // Eg.  MADD    w0, w1, w2, w3
6254 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6255 %{
6256   single_instruction;
6257   dst    : WR(write);
6258   src1   : ISS(read);
6259   src2   : ISS(read);
6260   src3   : ISS(read);
6261   INS01  : ISS;
6262   MAC    : WR;
6263 %}
6264 
6265 // Eg.  MUL     w0, w1, w2
6266 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6267 %{
6268   single_instruction;
6269   fixed_latency(3); // Maximum latency for 64 bit mul
6270   dst    : WR(write);
6271   src1   : ISS(read);
6272   src2   : ISS(read);
6273   INS01  : ISS;
6274   MAC    : WR;
6275 %}
6276 
6277 // Multiply accumulate
6278 // Eg.  MADD    w0, w1, w2, w3
6279 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6280 %{
6281   single_instruction;
6282   fixed_latency(3); // Maximum latency for 64 bit mul
6283   dst    : WR(write);
6284   src1   : ISS(read);
6285   src2   : ISS(read);
6286   src3   : ISS(read);
6287   INS01  : ISS;
6288   MAC    : WR;
6289 %}
6290 
6291 //------- Divide pipeline operations --------------------
6292 
6293 // Eg.  SDIV    w0, w1, w2
6294 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6295 %{
6296   single_instruction;
6297   fixed_latency(8); // Maximum latency for 32 bit divide
6298   dst    : WR(write);
6299   src1   : ISS(read);
6300   src2   : ISS(read);
6301   INS0   : ISS; // Can only dual issue as instruction 0
6302   DIV    : WR;
6303 %}
6304 
6305 // Eg.  SDIV    x0, x1, x2
6306 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6307 %{
6308   single_instruction;
6309   fixed_latency(16); // Maximum latency for 64 bit divide
6310   dst    : WR(write);
6311   src1   : ISS(read);
6312   src2   : ISS(read);
6313   INS0   : ISS; // Can only dual issue as instruction 0
6314   DIV    : WR;
6315 %}
6316 
6317 //------- Load pipeline operations ------------------------
6318 
6319 // Load - prefetch
6320 // Eg.  PFRM    <mem>
6321 pipe_class iload_prefetch(memory mem)
6322 %{
6323   single_instruction;
6324   mem    : ISS(read);
6325   INS01  : ISS;
6326   LDST   : WR;
6327 %}
6328 
6329 // Load - reg, mem
6330 // Eg.  LDR     x0, <mem>
6331 pipe_class iload_reg_mem(iRegI dst, memory mem)
6332 %{
6333   single_instruction;
6334   dst    : WR(write);
6335   mem    : ISS(read);
6336   INS01  : ISS;
6337   LDST   : WR;
6338 %}
6339 
6340 // Load - reg, reg
6341 // Eg.  LDR     x0, [sp, x1]
6342 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6343 %{
6344   single_instruction;
6345   dst    : WR(write);
6346   src    : ISS(read);
6347   INS01  : ISS;
6348   LDST   : WR;
6349 %}
6350 
6351 //------- Store pipeline operations -----------------------
6352 
6353 // Store - zr, mem
6354 // Eg.  STR     zr, <mem>
6355 pipe_class istore_mem(memory mem)
6356 %{
6357   single_instruction;
6358   mem    : ISS(read);
6359   INS01  : ISS;
6360   LDST   : WR;
6361 %}
6362 
6363 // Store - reg, mem
6364 // Eg.  STR     x0, <mem>
6365 pipe_class istore_reg_mem(iRegI src, memory mem)
6366 %{
6367   single_instruction;
6368   mem    : ISS(read);
6369   src    : EX2(read);
6370   INS01  : ISS;
6371   LDST   : WR;
6372 %}
6373 
6374 // Store - reg, reg
6375 // Eg. STR      x0, [sp, x1]
6376 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6377 %{
6378   single_instruction;
6379   dst    : ISS(read);
6380   src    : EX2(read);
6381   INS01  : ISS;
6382   LDST   : WR;
6383 %}
6384 
6385 //------- Store pipeline operations -----------------------
6386 
6387 // Branch
6388 pipe_class pipe_branch()
6389 %{
6390   single_instruction;
6391   INS01  : ISS;
6392   BRANCH : EX1;
6393 %}
6394 
6395 // Conditional branch
6396 pipe_class pipe_branch_cond(rFlagsReg cr)
6397 %{
6398   single_instruction;
6399   cr     : EX1(read);
6400   INS01  : ISS;
6401   BRANCH : EX1;
6402 %}
6403 
6404 // Compare & Branch
6405 // EG.  CBZ/CBNZ
6406 pipe_class pipe_cmp_branch(iRegI op1)
6407 %{
6408   single_instruction;
6409   op1    : EX1(read);
6410   INS01  : ISS;
6411   BRANCH : EX1;
6412 %}
6413 
6414 //------- Synchronisation operations ----------------------
6415 
6416 // Any operation requiring serialization.
6417 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6418 pipe_class pipe_serial()
6419 %{
6420   single_instruction;
6421   force_serialization;
6422   fixed_latency(16);
6423   INS01  : ISS(2); // Cannot dual issue with any other instruction
6424   LDST   : WR;
6425 %}
6426 
6427 // Generic big/slow expanded idiom - also serialized
6428 pipe_class pipe_slow()
6429 %{
6430   instruction_count(10);
6431   multiple_bundles;
6432   force_serialization;
6433   fixed_latency(16);
6434   INS01  : ISS(2); // Cannot dual issue with any other instruction
6435   LDST   : WR;
6436 %}
6437 
6438 // Empty pipeline class
6439 pipe_class pipe_class_empty()
6440 %{
6441   single_instruction;
6442   fixed_latency(0);
6443 %}
6444 
6445 // Default pipeline class.
6446 pipe_class pipe_class_default()
6447 %{
6448   single_instruction;
6449   fixed_latency(2);
6450 %}
6451 
6452 // Pipeline class for compares.
6453 pipe_class pipe_class_compare()
6454 %{
6455   single_instruction;
6456   fixed_latency(16);
6457 %}
6458 
6459 // Pipeline class for memory operations.
6460 pipe_class pipe_class_memory()
6461 %{
6462   single_instruction;
6463   fixed_latency(16);
6464 %}
6465 
6466 // Pipeline class for call.
6467 pipe_class pipe_class_call()
6468 %{
6469   single_instruction;
6470   fixed_latency(100);
6471 %}
6472 
6473 // Define the class for the Nop node.
6474 define %{
6475    MachNop = pipe_class_empty;
6476 %}
6477 
6478 %}
6479 //----------INSTRUCTIONS-------------------------------------------------------
6480 //
6481 // match      -- States which machine-independent subtree may be replaced
6482 //               by this instruction.
6483 // ins_cost   -- The estimated cost of this instruction is used by instruction
6484 //               selection to identify a minimum cost tree of machine
6485 //               instructions that matches a tree of machine-independent
6486 //               instructions.
6487 // format     -- A string providing the disassembly for this instruction.
6488 //               The value of an instruction's operand may be inserted
6489 //               by referring to it with a '$' prefix.
6490 // opcode     -- Three instruction opcodes may be provided.  These are referred
6491 //               to within an encode class as $primary, $secondary, and $tertiary
6492 //               rrspectively.  The primary opcode is commonly used to
6493 //               indicate the type of machine instruction, while secondary
6494 //               and tertiary are often used for prefix options or addressing
6495 //               modes.
6496 // ins_encode -- A list of encode classes with parameters. The encode class
6497 //               name must have been defined in an 'enc_class' specification
6498 //               in the encode section of the architecture description.
6499 
6500 // ============================================================================
6501 // Memory (Load/Store) Instructions
6502 
6503 // Load Instructions
6504 
6505 // Load Byte (8 bit signed)
6506 instruct loadB(iRegINoSp dst, memory mem)
6507 %{
6508   match(Set dst (LoadB mem));
6509   predicate(!needs_acquiring_load(n));
6510 
6511   ins_cost(4 * INSN_COST);
6512   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6513 
6514   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6515 
6516   ins_pipe(iload_reg_mem);
6517 %}
6518 
6519 // Load Byte (8 bit signed) into long
6520 instruct loadB2L(iRegLNoSp dst, memory mem)
6521 %{
6522   match(Set dst (ConvI2L (LoadB mem)));
6523   predicate(!needs_acquiring_load(n->in(1)));
6524 
6525   ins_cost(4 * INSN_COST);
6526   format %{ "ldrsb  $dst, $mem\t# byte" %}
6527 
6528   ins_encode(aarch64_enc_ldrsb(dst, mem));
6529 
6530   ins_pipe(iload_reg_mem);
6531 %}
6532 
6533 // Load Byte (8 bit unsigned)
6534 instruct loadUB(iRegINoSp dst, memory mem)
6535 %{
6536   match(Set dst (LoadUB mem));
6537   predicate(!needs_acquiring_load(n));
6538 
6539   ins_cost(4 * INSN_COST);
6540   format %{ "ldrbw  $dst, $mem\t# byte" %}
6541 
6542   ins_encode(aarch64_enc_ldrb(dst, mem));
6543 
6544   ins_pipe(iload_reg_mem);
6545 %}
6546 
6547 // Load Byte (8 bit unsigned) into long
6548 instruct loadUB2L(iRegLNoSp dst, memory mem)
6549 %{
6550   match(Set dst (ConvI2L (LoadUB mem)));
6551   predicate(!needs_acquiring_load(n->in(1)));
6552 
6553   ins_cost(4 * INSN_COST);
6554   format %{ "ldrb  $dst, $mem\t# byte" %}
6555 
6556   ins_encode(aarch64_enc_ldrb(dst, mem));
6557 
6558   ins_pipe(iload_reg_mem);
6559 %}
6560 
6561 // Load Short (16 bit signed)
6562 instruct loadS(iRegINoSp dst, memory mem)
6563 %{
6564   match(Set dst (LoadS mem));
6565   predicate(!needs_acquiring_load(n));
6566 
6567   ins_cost(4 * INSN_COST);
6568   format %{ "ldrshw  $dst, $mem\t# short" %}
6569 
6570   ins_encode(aarch64_enc_ldrshw(dst, mem));
6571 
6572   ins_pipe(iload_reg_mem);
6573 %}
6574 
6575 // Load Short (16 bit signed) into long
6576 instruct loadS2L(iRegLNoSp dst, memory mem)
6577 %{
6578   match(Set dst (ConvI2L (LoadS mem)));
6579   predicate(!needs_acquiring_load(n->in(1)));
6580 
6581   ins_cost(4 * INSN_COST);
6582   format %{ "ldrsh  $dst, $mem\t# short" %}
6583 
6584   ins_encode(aarch64_enc_ldrsh(dst, mem));
6585 
6586   ins_pipe(iload_reg_mem);
6587 %}
6588 
6589 // Load Char (16 bit unsigned)
6590 instruct loadUS(iRegINoSp dst, memory mem)
6591 %{
6592   match(Set dst (LoadUS mem));
6593   predicate(!needs_acquiring_load(n));
6594 
6595   ins_cost(4 * INSN_COST);
6596   format %{ "ldrh  $dst, $mem\t# short" %}
6597 
6598   ins_encode(aarch64_enc_ldrh(dst, mem));
6599 
6600   ins_pipe(iload_reg_mem);
6601 %}
6602 
6603 // Load Short/Char (16 bit unsigned) into long
6604 instruct loadUS2L(iRegLNoSp dst, memory mem)
6605 %{
6606   match(Set dst (ConvI2L (LoadUS mem)));
6607   predicate(!needs_acquiring_load(n->in(1)));
6608 
6609   ins_cost(4 * INSN_COST);
6610   format %{ "ldrh  $dst, $mem\t# short" %}
6611 
6612   ins_encode(aarch64_enc_ldrh(dst, mem));
6613 
6614   ins_pipe(iload_reg_mem);
6615 %}
6616 
6617 // Load Integer (32 bit signed)
6618 instruct loadI(iRegINoSp dst, memory mem)
6619 %{
6620   match(Set dst (LoadI mem));
6621   predicate(!needs_acquiring_load(n));
6622 
6623   ins_cost(4 * INSN_COST);
6624   format %{ "ldrw  $dst, $mem\t# int" %}
6625 
6626   ins_encode(aarch64_enc_ldrw(dst, mem));
6627 
6628   ins_pipe(iload_reg_mem);
6629 %}
6630 
6631 // Load Integer (32 bit signed) into long
6632 instruct loadI2L(iRegLNoSp dst, memory mem)
6633 %{
6634   match(Set dst (ConvI2L (LoadI mem)));
6635   predicate(!needs_acquiring_load(n->in(1)));
6636 
6637   ins_cost(4 * INSN_COST);
6638   format %{ "ldrsw  $dst, $mem\t# int" %}
6639 
6640   ins_encode(aarch64_enc_ldrsw(dst, mem));
6641 
6642   ins_pipe(iload_reg_mem);
6643 %}
6644 
6645 // Load Integer (32 bit unsigned) into long
6646 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6647 %{
6648   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6649   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6650 
6651   ins_cost(4 * INSN_COST);
6652   format %{ "ldrw  $dst, $mem\t# int" %}
6653 
6654   ins_encode(aarch64_enc_ldrw(dst, mem));
6655 
6656   ins_pipe(iload_reg_mem);
6657 %}
6658 
6659 // Load Long (64 bit signed)
6660 instruct loadL(iRegLNoSp dst, memory mem)
6661 %{
6662   match(Set dst (LoadL mem));
6663   predicate(!needs_acquiring_load(n));
6664 
6665   ins_cost(4 * INSN_COST);
6666   format %{ "ldr  $dst, $mem\t# int" %}
6667 
6668   ins_encode(aarch64_enc_ldr(dst, mem));
6669 
6670   ins_pipe(iload_reg_mem);
6671 %}
6672 
6673 // Load Range
6674 instruct loadRange(iRegINoSp dst, memory mem)
6675 %{
6676   match(Set dst (LoadRange mem));
6677 
6678   ins_cost(4 * INSN_COST);
6679   format %{ "ldrw  $dst, $mem\t# range" %}
6680 
6681   ins_encode(aarch64_enc_ldrw(dst, mem));
6682 
6683   ins_pipe(iload_reg_mem);
6684 %}
6685 
6686 // Load Pointer
6687 instruct loadP(iRegPNoSp dst, memory mem)
6688 %{
6689   match(Set dst (LoadP mem));
6690   predicate(!needs_acquiring_load(n));
6691 
6692   ins_cost(4 * INSN_COST);
6693   format %{ "ldr  $dst, $mem\t# ptr" %}
6694 
6695   ins_encode(aarch64_enc_ldr(dst, mem));
6696 
6697   ins_pipe(iload_reg_mem);
6698 %}
6699 
6700 // Load Compressed Pointer
6701 instruct loadN(iRegNNoSp dst, memory mem)
6702 %{
6703   match(Set dst (LoadN mem));
6704   predicate(!needs_acquiring_load(n));
6705 
6706   ins_cost(4 * INSN_COST);
6707   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6708 
6709   ins_encode(aarch64_enc_ldrw(dst, mem));
6710 
6711   ins_pipe(iload_reg_mem);
6712 %}
6713 
6714 // Load Klass Pointer
6715 instruct loadKlass(iRegPNoSp dst, memory mem)
6716 %{
6717   match(Set dst (LoadKlass mem));
6718   predicate(!needs_acquiring_load(n));
6719 
6720   ins_cost(4 * INSN_COST);
6721   format %{ "ldr  $dst, $mem\t# class" %}
6722 
6723   ins_encode(aarch64_enc_ldr(dst, mem));
6724 
6725   ins_pipe(iload_reg_mem);
6726 %}
6727 
6728 // Load Narrow Klass Pointer
6729 instruct loadNKlass(iRegNNoSp dst, memory mem)
6730 %{
6731   match(Set dst (LoadNKlass mem));
6732   predicate(!needs_acquiring_load(n));
6733 
6734   ins_cost(4 * INSN_COST);
6735   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6736 
6737   ins_encode(aarch64_enc_ldrw(dst, mem));
6738 
6739   ins_pipe(iload_reg_mem);
6740 %}
6741 
6742 // Load Float
6743 instruct loadF(vRegF dst, memory mem)
6744 %{
6745   match(Set dst (LoadF mem));
6746   predicate(!needs_acquiring_load(n));
6747 
6748   ins_cost(4 * INSN_COST);
6749   format %{ "ldrs  $dst, $mem\t# float" %}
6750 
6751   ins_encode( aarch64_enc_ldrs(dst, mem) );
6752 
6753   ins_pipe(pipe_class_memory);
6754 %}
6755 
6756 // Load Double
6757 instruct loadD(vRegD dst, memory mem)
6758 %{
6759   match(Set dst (LoadD mem));
6760   predicate(!needs_acquiring_load(n));
6761 
6762   ins_cost(4 * INSN_COST);
6763   format %{ "ldrd  $dst, $mem\t# double" %}
6764 
6765   ins_encode( aarch64_enc_ldrd(dst, mem) );
6766 
6767   ins_pipe(pipe_class_memory);
6768 %}
6769 
6770 
6771 // Load Int Constant
6772 instruct loadConI(iRegINoSp dst, immI src)
6773 %{
6774   match(Set dst src);
6775 
6776   ins_cost(INSN_COST);
6777   format %{ "mov $dst, $src\t# int" %}
6778 
6779   ins_encode( aarch64_enc_movw_imm(dst, src) );
6780 
6781   ins_pipe(ialu_imm);
6782 %}
6783 
6784 // Load Long Constant
6785 instruct loadConL(iRegLNoSp dst, immL src)
6786 %{
6787   match(Set dst src);
6788 
6789   ins_cost(INSN_COST);
6790   format %{ "mov $dst, $src\t# long" %}
6791 
6792   ins_encode( aarch64_enc_mov_imm(dst, src) );
6793 
6794   ins_pipe(ialu_imm);
6795 %}
6796 
6797 // Load Pointer Constant
6798 
6799 instruct loadConP(iRegPNoSp dst, immP con)
6800 %{
6801   match(Set dst con);
6802 
6803   ins_cost(INSN_COST * 4);
6804   format %{
6805     "mov  $dst, $con\t# ptr\n\t"
6806   %}
6807 
6808   ins_encode(aarch64_enc_mov_p(dst, con));
6809 
6810   ins_pipe(ialu_imm);
6811 %}
6812 
6813 // Load Null Pointer Constant
6814 
6815 instruct loadConP0(iRegPNoSp dst, immP0 con)
6816 %{
6817   match(Set dst con);
6818 
6819   ins_cost(INSN_COST);
6820   format %{ "mov  $dst, $con\t# NULL ptr" %}
6821 
6822   ins_encode(aarch64_enc_mov_p0(dst, con));
6823 
6824   ins_pipe(ialu_imm);
6825 %}
6826 
6827 // Load Pointer Constant One
6828 
6829 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6830 %{
6831   match(Set dst con);
6832 
6833   ins_cost(INSN_COST);
6834   format %{ "mov  $dst, $con\t# NULL ptr" %}
6835 
6836   ins_encode(aarch64_enc_mov_p1(dst, con));
6837 
6838   ins_pipe(ialu_imm);
6839 %}
6840 
6841 // Load Poll Page Constant
6842 
6843 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6844 %{
6845   match(Set dst con);
6846 
6847   ins_cost(INSN_COST);
6848   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6849 
6850   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6851 
6852   ins_pipe(ialu_imm);
6853 %}
6854 
6855 // Load Byte Map Base Constant
6856 
6857 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6858 %{
6859   match(Set dst con);
6860 
6861   ins_cost(INSN_COST);
6862   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6863 
6864   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6865 
6866   ins_pipe(ialu_imm);
6867 %}
6868 
6869 // Load Narrow Pointer Constant
6870 
6871 instruct loadConN(iRegNNoSp dst, immN con)
6872 %{
6873   match(Set dst con);
6874 
6875   ins_cost(INSN_COST * 4);
6876   format %{ "mov  $dst, $con\t# compressed ptr" %}
6877 
6878   ins_encode(aarch64_enc_mov_n(dst, con));
6879 
6880   ins_pipe(ialu_imm);
6881 %}
6882 
6883 // Load Narrow Null Pointer Constant
6884 
6885 instruct loadConN0(iRegNNoSp dst, immN0 con)
6886 %{
6887   match(Set dst con);
6888 
6889   ins_cost(INSN_COST);
6890   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6891 
6892   ins_encode(aarch64_enc_mov_n0(dst, con));
6893 
6894   ins_pipe(ialu_imm);
6895 %}
6896 
6897 // Load Narrow Klass Constant
6898 
6899 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6900 %{
6901   match(Set dst con);
6902 
6903   ins_cost(INSN_COST);
6904   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6905 
6906   ins_encode(aarch64_enc_mov_nk(dst, con));
6907 
6908   ins_pipe(ialu_imm);
6909 %}
6910 
6911 // Load Packed Float Constant
6912 
6913 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6914   match(Set dst con);
6915   ins_cost(INSN_COST * 4);
6916   format %{ "fmovs  $dst, $con"%}
6917   ins_encode %{
6918     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6919   %}
6920 
6921   ins_pipe(fp_imm_s);
6922 %}
6923 
6924 // Load Float Constant
6925 
6926 instruct loadConF(vRegF dst, immF con) %{
6927   match(Set dst con);
6928 
6929   ins_cost(INSN_COST * 4);
6930 
6931   format %{
6932     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6933   %}
6934 
6935   ins_encode %{
6936     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6937   %}
6938 
6939   ins_pipe(fp_load_constant_s);
6940 %}
6941 
6942 // Load Packed Double Constant
6943 
6944 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6945   match(Set dst con);
6946   ins_cost(INSN_COST);
6947   format %{ "fmovd  $dst, $con"%}
6948   ins_encode %{
6949     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6950   %}
6951 
6952   ins_pipe(fp_imm_d);
6953 %}
6954 
6955 // Load Double Constant
6956 
6957 instruct loadConD(vRegD dst, immD con) %{
6958   match(Set dst con);
6959 
6960   ins_cost(INSN_COST * 5);
6961   format %{
6962     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6963   %}
6964 
6965   ins_encode %{
6966     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6967   %}
6968 
6969   ins_pipe(fp_load_constant_d);
6970 %}
6971 
6972 // Store Instructions
6973 
6974 // Store CMS card-mark Immediate
6975 instruct storeimmCM0(immI0 zero, memory mem)
6976 %{
6977   match(Set mem (StoreCM mem zero));
6978   predicate(unnecessary_storestore(n));
6979 
6980   ins_cost(INSN_COST);
6981   format %{ "storestore (elided)\n\t"
6982             "strb zr, $mem\t# byte" %}
6983 
6984   ins_encode(aarch64_enc_strb0(mem));
6985 
6986   ins_pipe(istore_mem);
6987 %}
6988 
6989 // Store CMS card-mark Immediate with intervening StoreStore
6990 // needed when using CMS with no conditional card marking
6991 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6992 %{
6993   match(Set mem (StoreCM mem zero));
6994 
6995   ins_cost(INSN_COST * 2);
6996   format %{ "storestore\n\t"
6997             "dmb ishst"
6998             "\n\tstrb zr, $mem\t# byte" %}
6999 
7000   ins_encode(aarch64_enc_strb0_ordered(mem));
7001 
7002   ins_pipe(istore_mem);
7003 %}
7004 
7005 // Store Byte
7006 instruct storeB(iRegIorL2I src, memory mem)
7007 %{
7008   match(Set mem (StoreB mem src));
7009   predicate(!needs_releasing_store(n));
7010 
7011   ins_cost(INSN_COST);
7012   format %{ "strb  $src, $mem\t# byte" %}
7013 
7014   ins_encode(aarch64_enc_strb(src, mem));
7015 
7016   ins_pipe(istore_reg_mem);
7017 %}
7018 
7019 
7020 instruct storeimmB0(immI0 zero, memory mem)
7021 %{
7022   match(Set mem (StoreB mem zero));
7023   predicate(!needs_releasing_store(n));
7024 
7025   ins_cost(INSN_COST);
7026   format %{ "strb rscractch2, $mem\t# byte" %}
7027 
7028   ins_encode(aarch64_enc_strb0(mem));
7029 
7030   ins_pipe(istore_mem);
7031 %}
7032 
7033 // Store Char/Short
7034 instruct storeC(iRegIorL2I src, memory mem)
7035 %{
7036   match(Set mem (StoreC mem src));
7037   predicate(!needs_releasing_store(n));
7038 
7039   ins_cost(INSN_COST);
7040   format %{ "strh  $src, $mem\t# short" %}
7041 
7042   ins_encode(aarch64_enc_strh(src, mem));
7043 
7044   ins_pipe(istore_reg_mem);
7045 %}
7046 
7047 instruct storeimmC0(immI0 zero, memory mem)
7048 %{
7049   match(Set mem (StoreC mem zero));
7050   predicate(!needs_releasing_store(n));
7051 
7052   ins_cost(INSN_COST);
7053   format %{ "strh  zr, $mem\t# short" %}
7054 
7055   ins_encode(aarch64_enc_strh0(mem));
7056 
7057   ins_pipe(istore_mem);
7058 %}
7059 
7060 // Store Integer
7061 
7062 instruct storeI(iRegIorL2I src, memory mem)
7063 %{
7064   match(Set mem(StoreI mem src));
7065   predicate(!needs_releasing_store(n));
7066 
7067   ins_cost(INSN_COST);
7068   format %{ "strw  $src, $mem\t# int" %}
7069 
7070   ins_encode(aarch64_enc_strw(src, mem));
7071 
7072   ins_pipe(istore_reg_mem);
7073 %}
7074 
7075 instruct storeimmI0(immI0 zero, memory mem)
7076 %{
7077   match(Set mem(StoreI mem zero));
7078   predicate(!needs_releasing_store(n));
7079 
7080   ins_cost(INSN_COST);
7081   format %{ "strw  zr, $mem\t# int" %}
7082 
7083   ins_encode(aarch64_enc_strw0(mem));
7084 
7085   ins_pipe(istore_mem);
7086 %}
7087 
7088 // Store Long (64 bit signed)
7089 instruct storeL(iRegL src, memory mem)
7090 %{
7091   match(Set mem (StoreL mem src));
7092   predicate(!needs_releasing_store(n));
7093 
7094   ins_cost(INSN_COST);
7095   format %{ "str  $src, $mem\t# int" %}
7096 
7097   ins_encode(aarch64_enc_str(src, mem));
7098 
7099   ins_pipe(istore_reg_mem);
7100 %}
7101 
7102 // Store Long (64 bit signed)
7103 instruct storeimmL0(immL0 zero, memory mem)
7104 %{
7105   match(Set mem (StoreL mem zero));
7106   predicate(!needs_releasing_store(n));
7107 
7108   ins_cost(INSN_COST);
7109   format %{ "str  zr, $mem\t# int" %}
7110 
7111   ins_encode(aarch64_enc_str0(mem));
7112 
7113   ins_pipe(istore_mem);
7114 %}
7115 
7116 // Store Pointer
7117 instruct storeP(iRegP src, memory mem)
7118 %{
7119   match(Set mem (StoreP mem src));
7120   predicate(!needs_releasing_store(n));
7121 
7122   ins_cost(INSN_COST);
7123   format %{ "str  $src, $mem\t# ptr" %}
7124 
7125   ins_encode(aarch64_enc_str(src, mem));
7126 
7127   ins_pipe(istore_reg_mem);
7128 %}
7129 
7130 // Store Pointer
7131 instruct storeimmP0(immP0 zero, memory mem)
7132 %{
7133   match(Set mem (StoreP mem zero));
7134   predicate(!needs_releasing_store(n));
7135 
7136   ins_cost(INSN_COST);
7137   format %{ "str zr, $mem\t# ptr" %}
7138 
7139   ins_encode(aarch64_enc_str0(mem));
7140 
7141   ins_pipe(istore_mem);
7142 %}
7143 
7144 // Store Compressed Pointer
7145 instruct storeN(iRegN src, memory mem)
7146 %{
7147   match(Set mem (StoreN mem src));
7148   predicate(!needs_releasing_store(n));
7149 
7150   ins_cost(INSN_COST);
7151   format %{ "strw  $src, $mem\t# compressed ptr" %}
7152 
7153   ins_encode(aarch64_enc_strw(src, mem));
7154 
7155   ins_pipe(istore_reg_mem);
7156 %}
7157 
7158 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
7159 %{
7160   match(Set mem (StoreN mem zero));
7161   predicate(Universe::narrow_oop_base() == NULL &&
7162             Universe::narrow_klass_base() == NULL &&
7163             (!needs_releasing_store(n)));
7164 
7165   ins_cost(INSN_COST);
7166   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7167 
7168   ins_encode(aarch64_enc_strw(heapbase, mem));
7169 
7170   ins_pipe(istore_reg_mem);
7171 %}
7172 
7173 // Store Float
7174 instruct storeF(vRegF src, memory mem)
7175 %{
7176   match(Set mem (StoreF mem src));
7177   predicate(!needs_releasing_store(n));
7178 
7179   ins_cost(INSN_COST);
7180   format %{ "strs  $src, $mem\t# float" %}
7181 
7182   ins_encode( aarch64_enc_strs(src, mem) );
7183 
7184   ins_pipe(pipe_class_memory);
7185 %}
7186 
7187 // TODO
7188 // implement storeImmF0 and storeFImmPacked
7189 
7190 // Store Double
7191 instruct storeD(vRegD src, memory mem)
7192 %{
7193   match(Set mem (StoreD mem src));
7194   predicate(!needs_releasing_store(n));
7195 
7196   ins_cost(INSN_COST);
7197   format %{ "strd  $src, $mem\t# double" %}
7198 
7199   ins_encode( aarch64_enc_strd(src, mem) );
7200 
7201   ins_pipe(pipe_class_memory);
7202 %}
7203 
7204 // Store Compressed Klass Pointer
7205 instruct storeNKlass(iRegN src, memory mem)
7206 %{
7207   predicate(!needs_releasing_store(n));
7208   match(Set mem (StoreNKlass mem src));
7209 
7210   ins_cost(INSN_COST);
7211   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7212 
7213   ins_encode(aarch64_enc_strw(src, mem));
7214 
7215   ins_pipe(istore_reg_mem);
7216 %}
7217 
7218 // TODO
7219 // implement storeImmD0 and storeDImmPacked
7220 
7221 // prefetch instructions
7222 // Must be safe to execute with invalid address (cannot fault).
7223 
7224 instruct prefetchalloc( memory mem ) %{
7225   match(PrefetchAllocation mem);
7226 
7227   ins_cost(INSN_COST);
7228   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7229 
7230   ins_encode( aarch64_enc_prefetchw(mem) );
7231 
7232   ins_pipe(iload_prefetch);
7233 %}
7234 
7235 //  ---------------- volatile loads and stores ----------------
7236 
7237 // Load Byte (8 bit signed)
7238 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7239 %{
7240   match(Set dst (LoadB mem));
7241 
7242   ins_cost(VOLATILE_REF_COST);
7243   format %{ "ldarsb  $dst, $mem\t# byte" %}
7244 
7245   ins_encode(aarch64_enc_ldarsb(dst, mem));
7246 
7247   ins_pipe(pipe_serial);
7248 %}
7249 
7250 // Load Byte (8 bit signed) into long
7251 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7252 %{
7253   match(Set dst (ConvI2L (LoadB mem)));
7254 
7255   ins_cost(VOLATILE_REF_COST);
7256   format %{ "ldarsb  $dst, $mem\t# byte" %}
7257 
7258   ins_encode(aarch64_enc_ldarsb(dst, mem));
7259 
7260   ins_pipe(pipe_serial);
7261 %}
7262 
7263 // Load Byte (8 bit unsigned)
7264 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7265 %{
7266   match(Set dst (LoadUB mem));
7267 
7268   ins_cost(VOLATILE_REF_COST);
7269   format %{ "ldarb  $dst, $mem\t# byte" %}
7270 
7271   ins_encode(aarch64_enc_ldarb(dst, mem));
7272 
7273   ins_pipe(pipe_serial);
7274 %}
7275 
7276 // Load Byte (8 bit unsigned) into long
7277 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7278 %{
7279   match(Set dst (ConvI2L (LoadUB mem)));
7280 
7281   ins_cost(VOLATILE_REF_COST);
7282   format %{ "ldarb  $dst, $mem\t# byte" %}
7283 
7284   ins_encode(aarch64_enc_ldarb(dst, mem));
7285 
7286   ins_pipe(pipe_serial);
7287 %}
7288 
7289 // Load Short (16 bit signed)
7290 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7291 %{
7292   match(Set dst (LoadS mem));
7293 
7294   ins_cost(VOLATILE_REF_COST);
7295   format %{ "ldarshw  $dst, $mem\t# short" %}
7296 
7297   ins_encode(aarch64_enc_ldarshw(dst, mem));
7298 
7299   ins_pipe(pipe_serial);
7300 %}
7301 
7302 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7303 %{
7304   match(Set dst (LoadUS mem));
7305 
7306   ins_cost(VOLATILE_REF_COST);
7307   format %{ "ldarhw  $dst, $mem\t# short" %}
7308 
7309   ins_encode(aarch64_enc_ldarhw(dst, mem));
7310 
7311   ins_pipe(pipe_serial);
7312 %}
7313 
7314 // Load Short/Char (16 bit unsigned) into long
7315 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7316 %{
7317   match(Set dst (ConvI2L (LoadUS mem)));
7318 
7319   ins_cost(VOLATILE_REF_COST);
7320   format %{ "ldarh  $dst, $mem\t# short" %}
7321 
7322   ins_encode(aarch64_enc_ldarh(dst, mem));
7323 
7324   ins_pipe(pipe_serial);
7325 %}
7326 
7327 // Load Short/Char (16 bit signed) into long
7328 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7329 %{
7330   match(Set dst (ConvI2L (LoadS mem)));
7331 
7332   ins_cost(VOLATILE_REF_COST);
7333   format %{ "ldarh  $dst, $mem\t# short" %}
7334 
7335   ins_encode(aarch64_enc_ldarsh(dst, mem));
7336 
7337   ins_pipe(pipe_serial);
7338 %}
7339 
7340 // Load Integer (32 bit signed)
7341 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7342 %{
7343   match(Set dst (LoadI mem));
7344 
7345   ins_cost(VOLATILE_REF_COST);
7346   format %{ "ldarw  $dst, $mem\t# int" %}
7347 
7348   ins_encode(aarch64_enc_ldarw(dst, mem));
7349 
7350   ins_pipe(pipe_serial);
7351 %}
7352 
7353 // Load Integer (32 bit unsigned) into long
7354 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7355 %{
7356   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7357 
7358   ins_cost(VOLATILE_REF_COST);
7359   format %{ "ldarw  $dst, $mem\t# int" %}
7360 
7361   ins_encode(aarch64_enc_ldarw(dst, mem));
7362 
7363   ins_pipe(pipe_serial);
7364 %}
7365 
7366 // Load Long (64 bit signed)
7367 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7368 %{
7369   match(Set dst (LoadL mem));
7370 
7371   ins_cost(VOLATILE_REF_COST);
7372   format %{ "ldar  $dst, $mem\t# int" %}
7373 
7374   ins_encode(aarch64_enc_ldar(dst, mem));
7375 
7376   ins_pipe(pipe_serial);
7377 %}
7378 
7379 // Load Pointer
7380 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7381 %{
7382   match(Set dst (LoadP mem));
7383 
7384   ins_cost(VOLATILE_REF_COST);
7385   format %{ "ldar  $dst, $mem\t# ptr" %}
7386 
7387   ins_encode(aarch64_enc_ldar(dst, mem));
7388 
7389   ins_pipe(pipe_serial);
7390 %}
7391 
7392 // Load Compressed Pointer
7393 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7394 %{
7395   match(Set dst (LoadN mem));
7396 
7397   ins_cost(VOLATILE_REF_COST);
7398   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7399 
7400   ins_encode(aarch64_enc_ldarw(dst, mem));
7401 
7402   ins_pipe(pipe_serial);
7403 %}
7404 
7405 // Load Float
7406 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7407 %{
7408   match(Set dst (LoadF mem));
7409 
7410   ins_cost(VOLATILE_REF_COST);
7411   format %{ "ldars  $dst, $mem\t# float" %}
7412 
7413   ins_encode( aarch64_enc_fldars(dst, mem) );
7414 
7415   ins_pipe(pipe_serial);
7416 %}
7417 
7418 // Load Double
7419 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7420 %{
7421   match(Set dst (LoadD mem));
7422 
7423   ins_cost(VOLATILE_REF_COST);
7424   format %{ "ldard  $dst, $mem\t# double" %}
7425 
7426   ins_encode( aarch64_enc_fldard(dst, mem) );
7427 
7428   ins_pipe(pipe_serial);
7429 %}
7430 
7431 // Store Byte
7432 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7433 %{
7434   match(Set mem (StoreB mem src));
7435 
7436   ins_cost(VOLATILE_REF_COST);
7437   format %{ "stlrb  $src, $mem\t# byte" %}
7438 
7439   ins_encode(aarch64_enc_stlrb(src, mem));
7440 
7441   ins_pipe(pipe_class_memory);
7442 %}
7443 
7444 // Store Char/Short
7445 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7446 %{
7447   match(Set mem (StoreC mem src));
7448 
7449   ins_cost(VOLATILE_REF_COST);
7450   format %{ "stlrh  $src, $mem\t# short" %}
7451 
7452   ins_encode(aarch64_enc_stlrh(src, mem));
7453 
7454   ins_pipe(pipe_class_memory);
7455 %}
7456 
7457 // Store Integer
7458 
7459 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7460 %{
7461   match(Set mem(StoreI mem src));
7462 
7463   ins_cost(VOLATILE_REF_COST);
7464   format %{ "stlrw  $src, $mem\t# int" %}
7465 
7466   ins_encode(aarch64_enc_stlrw(src, mem));
7467 
7468   ins_pipe(pipe_class_memory);
7469 %}
7470 
7471 // Store Long (64 bit signed)
7472 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7473 %{
7474   match(Set mem (StoreL mem src));
7475 
7476   ins_cost(VOLATILE_REF_COST);
7477   format %{ "stlr  $src, $mem\t# int" %}
7478 
7479   ins_encode(aarch64_enc_stlr(src, mem));
7480 
7481   ins_pipe(pipe_class_memory);
7482 %}
7483 
7484 // Store Pointer
7485 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7486 %{
7487   match(Set mem (StoreP mem src));
7488 
7489   ins_cost(VOLATILE_REF_COST);
7490   format %{ "stlr  $src, $mem\t# ptr" %}
7491 
7492   ins_encode(aarch64_enc_stlr(src, mem));
7493 
7494   ins_pipe(pipe_class_memory);
7495 %}
7496 
7497 // Store Compressed Pointer
7498 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7499 %{
7500   match(Set mem (StoreN mem src));
7501 
7502   ins_cost(VOLATILE_REF_COST);
7503   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7504 
7505   ins_encode(aarch64_enc_stlrw(src, mem));
7506 
7507   ins_pipe(pipe_class_memory);
7508 %}
7509 
7510 // Store Float
7511 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7512 %{
7513   match(Set mem (StoreF mem src));
7514 
7515   ins_cost(VOLATILE_REF_COST);
7516   format %{ "stlrs  $src, $mem\t# float" %}
7517 
7518   ins_encode( aarch64_enc_fstlrs(src, mem) );
7519 
7520   ins_pipe(pipe_class_memory);
7521 %}
7522 
7523 // TODO
7524 // implement storeImmF0 and storeFImmPacked
7525 
7526 // Store Double
7527 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7528 %{
7529   match(Set mem (StoreD mem src));
7530 
7531   ins_cost(VOLATILE_REF_COST);
7532   format %{ "stlrd  $src, $mem\t# double" %}
7533 
7534   ins_encode( aarch64_enc_fstlrd(src, mem) );
7535 
7536   ins_pipe(pipe_class_memory);
7537 %}
7538 
7539 //  ---------------- end of volatile loads and stores ----------------
7540 
7541 // ============================================================================
7542 // BSWAP Instructions
7543 
7544 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7545   match(Set dst (ReverseBytesI src));
7546 
7547   ins_cost(INSN_COST);
7548   format %{ "revw  $dst, $src" %}
7549 
7550   ins_encode %{
7551     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7552   %}
7553 
7554   ins_pipe(ialu_reg);
7555 %}
7556 
7557 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7558   match(Set dst (ReverseBytesL src));
7559 
7560   ins_cost(INSN_COST);
7561   format %{ "rev  $dst, $src" %}
7562 
7563   ins_encode %{
7564     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7565   %}
7566 
7567   ins_pipe(ialu_reg);
7568 %}
7569 
7570 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7571   match(Set dst (ReverseBytesUS src));
7572 
7573   ins_cost(INSN_COST);
7574   format %{ "rev16w  $dst, $src" %}
7575 
7576   ins_encode %{
7577     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7578   %}
7579 
7580   ins_pipe(ialu_reg);
7581 %}
7582 
7583 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7584   match(Set dst (ReverseBytesS src));
7585 
7586   ins_cost(INSN_COST);
7587   format %{ "rev16w  $dst, $src\n\t"
7588             "sbfmw $dst, $dst, #0, #15" %}
7589 
7590   ins_encode %{
7591     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7592     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7593   %}
7594 
7595   ins_pipe(ialu_reg);
7596 %}
7597 
7598 // ============================================================================
7599 // Zero Count Instructions
7600 
7601 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7602   match(Set dst (CountLeadingZerosI src));
7603 
7604   ins_cost(INSN_COST);
7605   format %{ "clzw  $dst, $src" %}
7606   ins_encode %{
7607     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7608   %}
7609 
7610   ins_pipe(ialu_reg);
7611 %}
7612 
7613 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7614   match(Set dst (CountLeadingZerosL src));
7615 
7616   ins_cost(INSN_COST);
7617   format %{ "clz   $dst, $src" %}
7618   ins_encode %{
7619     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7620   %}
7621 
7622   ins_pipe(ialu_reg);
7623 %}
7624 
7625 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7626   match(Set dst (CountTrailingZerosI src));
7627 
7628   ins_cost(INSN_COST * 2);
7629   format %{ "rbitw  $dst, $src\n\t"
7630             "clzw   $dst, $dst" %}
7631   ins_encode %{
7632     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7633     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7634   %}
7635 
7636   ins_pipe(ialu_reg);
7637 %}
7638 
7639 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7640   match(Set dst (CountTrailingZerosL src));
7641 
7642   ins_cost(INSN_COST * 2);
7643   format %{ "rbit   $dst, $src\n\t"
7644             "clz    $dst, $dst" %}
7645   ins_encode %{
7646     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7647     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7648   %}
7649 
7650   ins_pipe(ialu_reg);
7651 %}
7652 
7653 //---------- Population Count Instructions -------------------------------------
7654 //
7655 
7656 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7657   predicate(UsePopCountInstruction);
7658   match(Set dst (PopCountI src));
7659   effect(TEMP tmp);
7660   ins_cost(INSN_COST * 13);
7661 
7662   format %{ "movw   $src, $src\n\t"
7663             "mov    $tmp, $src\t# vector (1D)\n\t"
7664             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7665             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7666             "mov    $dst, $tmp\t# vector (1D)" %}
7667   ins_encode %{
7668     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7669     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7670     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7671     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7672     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7673   %}
7674 
7675   ins_pipe(pipe_class_default);
7676 %}
7677 
7678 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7679   predicate(UsePopCountInstruction);
7680   match(Set dst (PopCountI (LoadI mem)));
7681   effect(TEMP tmp);
7682   ins_cost(INSN_COST * 13);
7683 
7684   format %{ "ldrs   $tmp, $mem\n\t"
7685             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7686             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7687             "mov    $dst, $tmp\t# vector (1D)" %}
7688   ins_encode %{
7689     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7690     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7691                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7692     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7693     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7694     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7695   %}
7696 
7697   ins_pipe(pipe_class_default);
7698 %}
7699 
7700 // Note: Long.bitCount(long) returns an int.
7701 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7702   predicate(UsePopCountInstruction);
7703   match(Set dst (PopCountL src));
7704   effect(TEMP tmp);
7705   ins_cost(INSN_COST * 13);
7706 
7707   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7708             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7709             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7710             "mov    $dst, $tmp\t# vector (1D)" %}
7711   ins_encode %{
7712     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7713     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7714     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7715     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7716   %}
7717 
7718   ins_pipe(pipe_class_default);
7719 %}
7720 
7721 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7722   predicate(UsePopCountInstruction);
7723   match(Set dst (PopCountL (LoadL mem)));
7724   effect(TEMP tmp);
7725   ins_cost(INSN_COST * 13);
7726 
7727   format %{ "ldrd   $tmp, $mem\n\t"
7728             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7729             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7730             "mov    $dst, $tmp\t# vector (1D)" %}
7731   ins_encode %{
7732     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7733     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7734                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7735     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7736     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7737     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7738   %}
7739 
7740   ins_pipe(pipe_class_default);
7741 %}
7742 
7743 // ============================================================================
7744 // MemBar Instruction
7745 
7746 instruct load_fence() %{
7747   match(LoadFence);
7748   ins_cost(VOLATILE_REF_COST);
7749 
7750   format %{ "load_fence" %}
7751 
7752   ins_encode %{
7753     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7754   %}
7755   ins_pipe(pipe_serial);
7756 %}
7757 
7758 instruct unnecessary_membar_acquire() %{
7759   predicate(unnecessary_acquire(n));
7760   match(MemBarAcquire);
7761   ins_cost(0);
7762 
7763   format %{ "membar_acquire (elided)" %}
7764 
7765   ins_encode %{
7766     __ block_comment("membar_acquire (elided)");
7767   %}
7768 
7769   ins_pipe(pipe_class_empty);
7770 %}
7771 
7772 instruct membar_acquire() %{
7773   match(MemBarAcquire);
7774   ins_cost(VOLATILE_REF_COST);
7775 
7776   format %{ "membar_acquire\n\t"
7777             "dmb ish" %}
7778 
7779   ins_encode %{
7780     __ block_comment("membar_acquire");
7781     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7782   %}
7783 
7784   ins_pipe(pipe_serial);
7785 %}
7786 
7787 
7788 instruct membar_acquire_lock() %{
7789   match(MemBarAcquireLock);
7790   ins_cost(VOLATILE_REF_COST);
7791 
7792   format %{ "membar_acquire_lock (elided)" %}
7793 
7794   ins_encode %{
7795     __ block_comment("membar_acquire_lock (elided)");
7796   %}
7797 
7798   ins_pipe(pipe_serial);
7799 %}
7800 
7801 instruct store_fence() %{
7802   match(StoreFence);
7803   ins_cost(VOLATILE_REF_COST);
7804 
7805   format %{ "store_fence" %}
7806 
7807   ins_encode %{
7808     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7809   %}
7810   ins_pipe(pipe_serial);
7811 %}
7812 
7813 instruct unnecessary_membar_release() %{
7814   predicate(unnecessary_release(n));
7815   match(MemBarRelease);
7816   ins_cost(0);
7817 
7818   format %{ "membar_release (elided)" %}
7819 
7820   ins_encode %{
7821     __ block_comment("membar_release (elided)");
7822   %}
7823   ins_pipe(pipe_serial);
7824 %}
7825 
7826 instruct membar_release() %{
7827   match(MemBarRelease);
7828   ins_cost(VOLATILE_REF_COST);
7829 
7830   format %{ "membar_release\n\t"
7831             "dmb ish" %}
7832 
7833   ins_encode %{
7834     __ block_comment("membar_release");
7835     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7836   %}
7837   ins_pipe(pipe_serial);
7838 %}
7839 
7840 instruct membar_storestore() %{
7841   match(MemBarStoreStore);
7842   ins_cost(VOLATILE_REF_COST);
7843 
7844   format %{ "MEMBAR-store-store" %}
7845 
7846   ins_encode %{
7847     __ membar(Assembler::StoreStore);
7848   %}
7849   ins_pipe(pipe_serial);
7850 %}
7851 
7852 instruct membar_release_lock() %{
7853   match(MemBarReleaseLock);
7854   ins_cost(VOLATILE_REF_COST);
7855 
7856   format %{ "membar_release_lock (elided)" %}
7857 
7858   ins_encode %{
7859     __ block_comment("membar_release_lock (elided)");
7860   %}
7861 
7862   ins_pipe(pipe_serial);
7863 %}
7864 
7865 instruct unnecessary_membar_volatile() %{
7866   predicate(unnecessary_volatile(n));
7867   match(MemBarVolatile);
7868   ins_cost(0);
7869 
7870   format %{ "membar_volatile (elided)" %}
7871 
7872   ins_encode %{
7873     __ block_comment("membar_volatile (elided)");
7874   %}
7875 
7876   ins_pipe(pipe_serial);
7877 %}
7878 
7879 instruct membar_volatile() %{
7880   match(MemBarVolatile);
7881   ins_cost(VOLATILE_REF_COST*100);
7882 
7883   format %{ "membar_volatile\n\t"
7884              "dmb ish"%}
7885 
7886   ins_encode %{
7887     __ block_comment("membar_volatile");
7888     __ membar(Assembler::StoreLoad);
7889   %}
7890 
7891   ins_pipe(pipe_serial);
7892 %}
7893 
7894 // ============================================================================
7895 // Cast/Convert Instructions
7896 
7897 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7898   match(Set dst (CastX2P src));
7899 
7900   ins_cost(INSN_COST);
7901   format %{ "mov $dst, $src\t# long -> ptr" %}
7902 
7903   ins_encode %{
7904     if ($dst$$reg != $src$$reg) {
7905       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7906     }
7907   %}
7908 
7909   ins_pipe(ialu_reg);
7910 %}
7911 
7912 instruct castN2X(iRegLNoSp dst, iRegN src) %{
7913   match(Set dst (CastP2X src));
7914 
7915   ins_cost(INSN_COST);
7916   format %{ "mov $dst, $src\t# ptr -> long" %}
7917 
7918   ins_encode %{
7919     if ($dst$$reg != $src$$reg) {
7920       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7921     }
7922   %}
7923 
7924   ins_pipe(ialu_reg);
7925 %}
7926 
7927 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7928   match(Set dst (CastP2X src));
7929 
7930   ins_cost(INSN_COST);
7931   format %{ "mov $dst, $src\t# ptr -> long" %}
7932 
7933   ins_encode %{
7934     if ($dst$$reg != $src$$reg) {
7935       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7936     }
7937   %}
7938 
7939   ins_pipe(ialu_reg);
7940 %}
7941 
7942 // Convert oop into int for vectors alignment masking
7943 instruct convP2I(iRegINoSp dst, iRegP src) %{
7944   match(Set dst (ConvL2I (CastP2X src)));
7945 
7946   ins_cost(INSN_COST);
7947   format %{ "movw $dst, $src\t# ptr -> int" %}
7948   ins_encode %{
7949     __ movw($dst$$Register, $src$$Register);
7950   %}
7951 
7952   ins_pipe(ialu_reg);
7953 %}
7954 
7955 // Convert compressed oop into int for vectors alignment masking
7956 // in case of 32bit oops (heap < 4Gb).
7957 instruct convN2I(iRegINoSp dst, iRegN src)
7958 %{
7959   predicate(Universe::narrow_oop_shift() == 0);
7960   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7961 
7962   ins_cost(INSN_COST);
7963   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7964   ins_encode %{
7965     __ movw($dst$$Register, $src$$Register);
7966   %}
7967 
7968   ins_pipe(ialu_reg);
7969 %}
7970 
7971 
7972 // Convert oop pointer into compressed form
7973 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7974   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7975   match(Set dst (EncodeP src));
7976   effect(KILL cr);
7977   ins_cost(INSN_COST * 3);
7978   format %{ "encode_heap_oop $dst, $src" %}
7979   ins_encode %{
7980     Register s = $src$$Register;
7981     Register d = $dst$$Register;
7982     __ encode_heap_oop(d, s);
7983   %}
7984   ins_pipe(ialu_reg);
7985 %}
7986 
7987 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7988   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7989   match(Set dst (EncodeP src));
7990   ins_cost(INSN_COST * 3);
7991   format %{ "encode_heap_oop_not_null $dst, $src" %}
7992   ins_encode %{
7993     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7994   %}
7995   ins_pipe(ialu_reg);
7996 %}
7997 
7998 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7999   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
8000             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
8001   match(Set dst (DecodeN src));
8002   ins_cost(INSN_COST * 3);
8003   format %{ "decode_heap_oop $dst, $src" %}
8004   ins_encode %{
8005     Register s = $src$$Register;
8006     Register d = $dst$$Register;
8007     __ decode_heap_oop(d, s);
8008   %}
8009   ins_pipe(ialu_reg);
8010 %}
8011 
8012 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
8013   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
8014             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
8015   match(Set dst (DecodeN src));
8016   ins_cost(INSN_COST * 3);
8017   format %{ "decode_heap_oop_not_null $dst, $src" %}
8018   ins_encode %{
8019     Register s = $src$$Register;
8020     Register d = $dst$$Register;
8021     __ decode_heap_oop_not_null(d, s);
8022   %}
8023   ins_pipe(ialu_reg);
8024 %}
8025 
8026 // n.b. AArch64 implementations of encode_klass_not_null and
8027 // decode_klass_not_null do not modify the flags register so, unlike
8028 // Intel, we don't kill CR as a side effect here
8029 
8030 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
8031   match(Set dst (EncodePKlass src));
8032 
8033   ins_cost(INSN_COST * 3);
8034   format %{ "encode_klass_not_null $dst,$src" %}
8035 
8036   ins_encode %{
8037     Register src_reg = as_Register($src$$reg);
8038     Register dst_reg = as_Register($dst$$reg);
8039     __ encode_klass_not_null(dst_reg, src_reg);
8040   %}
8041 
8042    ins_pipe(ialu_reg);
8043 %}
8044 
8045 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
8046   match(Set dst (DecodeNKlass src));
8047 
8048   ins_cost(INSN_COST * 3);
8049   format %{ "decode_klass_not_null $dst,$src" %}
8050 
8051   ins_encode %{
8052     Register src_reg = as_Register($src$$reg);
8053     Register dst_reg = as_Register($dst$$reg);
8054     if (dst_reg != src_reg) {
8055       __ decode_klass_not_null(dst_reg, src_reg);
8056     } else {
8057       __ decode_klass_not_null(dst_reg);
8058     }
8059   %}
8060 
8061    ins_pipe(ialu_reg);
8062 %}
8063 
8064 instruct checkCastPP(iRegPNoSp dst)
8065 %{
8066   match(Set dst (CheckCastPP dst));
8067 
8068   size(0);
8069   format %{ "# checkcastPP of $dst" %}
8070   ins_encode(/* empty encoding */);
8071   ins_pipe(pipe_class_empty);
8072 %}
8073 
8074 instruct castPP(iRegPNoSp dst)
8075 %{
8076   match(Set dst (CastPP dst));
8077 
8078   size(0);
8079   format %{ "# castPP of $dst" %}
8080   ins_encode(/* empty encoding */);
8081   ins_pipe(pipe_class_empty);
8082 %}
8083 
8084 instruct castII(iRegI dst)
8085 %{
8086   match(Set dst (CastII dst));
8087 
8088   size(0);
8089   format %{ "# castII of $dst" %}
8090   ins_encode(/* empty encoding */);
8091   ins_cost(0);
8092   ins_pipe(pipe_class_empty);
8093 %}
8094 
8095 // ============================================================================
8096 // Atomic operation instructions
8097 //
8098 // Intel and SPARC both implement Ideal Node LoadPLocked and
8099 // Store{PIL}Conditional instructions using a normal load for the
8100 // LoadPLocked and a CAS for the Store{PIL}Conditional.
8101 //
8102 // The ideal code appears only to use LoadPLocked/StorePLocked as a
8103 // pair to lock object allocations from Eden space when not using
8104 // TLABs.
8105 //
8106 // There does not appear to be a Load{IL}Locked Ideal Node and the
8107 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
8108 // and to use StoreIConditional only for 32-bit and StoreLConditional
8109 // only for 64-bit.
8110 //
8111 // We implement LoadPLocked and StorePLocked instructions using,
8112 // respectively the AArch64 hw load-exclusive and store-conditional
8113 // instructions. Whereas we must implement each of
8114 // Store{IL}Conditional using a CAS which employs a pair of
8115 // instructions comprising a load-exclusive followed by a
8116 // store-conditional.
8117 
8118 
8119 // Locked-load (linked load) of the current heap-top
8120 // used when updating the eden heap top
8121 // implemented using ldaxr on AArch64
8122 
8123 instruct loadPLocked(iRegPNoSp dst, indirect mem)
8124 %{
8125   match(Set dst (LoadPLocked mem));
8126 
8127   ins_cost(VOLATILE_REF_COST);
8128 
8129   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
8130 
8131   ins_encode(aarch64_enc_ldaxr(dst, mem));
8132 
8133   ins_pipe(pipe_serial);
8134 %}
8135 
8136 // Conditional-store of the updated heap-top.
8137 // Used during allocation of the shared heap.
8138 // Sets flag (EQ) on success.
8139 // implemented using stlxr on AArch64.
8140 
8141 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
8142 %{
8143   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8144 
8145   ins_cost(VOLATILE_REF_COST);
8146 
8147  // TODO
8148  // do we need to do a store-conditional release or can we just use a
8149  // plain store-conditional?
8150 
8151   format %{
8152     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
8153     "cmpw rscratch1, zr\t# EQ on successful write"
8154   %}
8155 
8156   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
8157 
8158   ins_pipe(pipe_serial);
8159 %}
8160 
8161 
8162 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
8163 // when attempting to rebias a lock towards the current thread.  We
8164 // must use the acquire form of cmpxchg in order to guarantee acquire
8165 // semantics in this case.
8166 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
8167 %{
8168   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8169 
8170   ins_cost(VOLATILE_REF_COST);
8171 
8172   format %{
8173     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8174     "cmpw rscratch1, zr\t# EQ on successful write"
8175   %}
8176 
8177   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8178 
8179   ins_pipe(pipe_slow);
8180 %}
8181 
8182 // storeIConditional also has acquire semantics, for no better reason
8183 // than matching storeLConditional.  At the time of writing this
8184 // comment storeIConditional was not used anywhere by AArch64.
8185 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8186 %{
8187   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8188 
8189   ins_cost(VOLATILE_REF_COST);
8190 
8191   format %{
8192     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8193     "cmpw rscratch1, zr\t# EQ on successful write"
8194   %}
8195 
8196   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8197 
8198   ins_pipe(pipe_slow);
8199 %}
8200 
8201 // standard CompareAndSwapX when we are using barriers
8202 // these have higher priority than the rules selected by a predicate
8203 
8204 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8205 // can't match them
8206 
8207 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8208 
8209   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8210   ins_cost(2 * VOLATILE_REF_COST);
8211 
8212   effect(KILL cr);
8213 
8214   format %{
8215     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8216     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8217   %}
8218 
8219   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
8220             aarch64_enc_cset_eq(res));
8221 
8222   ins_pipe(pipe_slow);
8223 %}
8224 
8225 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8226 
8227   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8228   ins_cost(2 * VOLATILE_REF_COST);
8229 
8230   effect(KILL cr);
8231 
8232   format %{
8233     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8234     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8235   %}
8236 
8237   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
8238             aarch64_enc_cset_eq(res));
8239 
8240   ins_pipe(pipe_slow);
8241 %}
8242 
8243 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8244 
8245   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8246   ins_cost(2 * VOLATILE_REF_COST);
8247 
8248   effect(KILL cr);
8249 
8250  format %{
8251     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8252     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8253  %}
8254 
8255  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8256             aarch64_enc_cset_eq(res));
8257 
8258   ins_pipe(pipe_slow);
8259 %}
8260 
8261 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8262 
8263   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8264   ins_cost(2 * VOLATILE_REF_COST);
8265 
8266   effect(KILL cr);
8267 
8268  format %{
8269     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8270     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8271  %}
8272 
8273  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8274             aarch64_enc_cset_eq(res));
8275 
8276   ins_pipe(pipe_slow);
8277 %}
8278 
8279 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8280 
8281   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8282   ins_cost(2 * VOLATILE_REF_COST);
8283 
8284   effect(KILL cr);
8285 
8286  format %{
8287     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8288     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8289  %}
8290 
8291  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8292             aarch64_enc_cset_eq(res));
8293 
8294   ins_pipe(pipe_slow);
8295 %}
8296 
8297 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8298 
8299   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8300   ins_cost(2 * VOLATILE_REF_COST);
8301 
8302   effect(KILL cr);
8303 
8304  format %{
8305     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8306     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8307  %}
8308 
8309  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8310             aarch64_enc_cset_eq(res));
8311 
8312   ins_pipe(pipe_slow);
8313 %}
8314 
8315 // alternative CompareAndSwapX when we are eliding barriers
8316 
8317 instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8318 
8319   predicate(needs_acquiring_load_exclusive(n));
8320   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8321   ins_cost(VOLATILE_REF_COST);
8322 
8323   effect(KILL cr);
8324 
8325   format %{
8326     "cmpxchgb_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8327     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8328   %}
8329 
8330   ins_encode(aarch64_enc_cmpxchgb_acq(mem, oldval, newval),
8331             aarch64_enc_cset_eq(res));
8332 
8333   ins_pipe(pipe_slow);
8334 %}
8335 
8336 instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8337 
8338   predicate(needs_acquiring_load_exclusive(n));
8339   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8340   ins_cost(VOLATILE_REF_COST);
8341 
8342   effect(KILL cr);
8343 
8344   format %{
8345     "cmpxchgs_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8346     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8347   %}
8348 
8349   ins_encode(aarch64_enc_cmpxchgs_acq(mem, oldval, newval),
8350             aarch64_enc_cset_eq(res));
8351 
8352   ins_pipe(pipe_slow);
8353 %}
8354 
8355 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8356 
8357   predicate(needs_acquiring_load_exclusive(n));
8358   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8359   ins_cost(VOLATILE_REF_COST);
8360 
8361   effect(KILL cr);
8362 
8363  format %{
8364     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8365     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8366  %}
8367 
8368  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8369             aarch64_enc_cset_eq(res));
8370 
8371   ins_pipe(pipe_slow);
8372 %}
8373 
8374 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8375 
8376   predicate(needs_acquiring_load_exclusive(n));
8377   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8378   ins_cost(VOLATILE_REF_COST);
8379 
8380   effect(KILL cr);
8381 
8382  format %{
8383     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8384     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8385  %}
8386 
8387  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8388             aarch64_enc_cset_eq(res));
8389 
8390   ins_pipe(pipe_slow);
8391 %}
8392 
8393 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8394 
8395   predicate(needs_acquiring_load_exclusive(n));
8396   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8397   ins_cost(VOLATILE_REF_COST);
8398 
8399   effect(KILL cr);
8400 
8401  format %{
8402     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8403     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8404  %}
8405 
8406  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8407             aarch64_enc_cset_eq(res));
8408 
8409   ins_pipe(pipe_slow);
8410 %}
8411 
8412 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8413 
8414   predicate(needs_acquiring_load_exclusive(n));
8415   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8416   ins_cost(VOLATILE_REF_COST);
8417 
8418   effect(KILL cr);
8419 
8420  format %{
8421     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8422     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8423  %}
8424 
8425  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8426             aarch64_enc_cset_eq(res));
8427 
8428   ins_pipe(pipe_slow);
8429 %}
8430 
8431 
8432 // ---------------------------------------------------------------------
8433 
8434 
8435 // BEGIN This section of the file is automatically generated. Do not edit --------------
8436 
8437 // Sundry CAS operations.  Note that release is always true,
8438 // regardless of the memory ordering of the CAS.  This is because we
8439 // need the volatile case to be sequentially consistent but there is
8440 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
8441 // can't check the type of memory ordering here, so we always emit a
8442 // STLXR.
8443 
8444 // This section is generated from aarch64_ad_cas.m4
8445 
8446 
8447 
8448 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8449   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8450   ins_cost(2 * VOLATILE_REF_COST);
8451   effect(TEMP_DEF res, KILL cr);
8452   format %{
8453     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8454   %}
8455   ins_encode %{
8456     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8457                Assembler::byte, /*acquire*/ false, /*release*/ true,
8458                /*weak*/ false, $res$$Register);
8459     __ sxtbw($res$$Register, $res$$Register);
8460   %}
8461   ins_pipe(pipe_slow);
8462 %}
8463 
8464 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8465   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8466   ins_cost(2 * VOLATILE_REF_COST);
8467   effect(TEMP_DEF res, KILL cr);
8468   format %{
8469     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8470   %}
8471   ins_encode %{
8472     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8473                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8474                /*weak*/ false, $res$$Register);
8475     __ sxthw($res$$Register, $res$$Register);
8476   %}
8477   ins_pipe(pipe_slow);
8478 %}
8479 
8480 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8481   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8482   ins_cost(2 * VOLATILE_REF_COST);
8483   effect(TEMP_DEF res, KILL cr);
8484   format %{
8485     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8486   %}
8487   ins_encode %{
8488     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8489                Assembler::word, /*acquire*/ false, /*release*/ true,
8490                /*weak*/ false, $res$$Register);
8491   %}
8492   ins_pipe(pipe_slow);
8493 %}
8494 
8495 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8496   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8497   ins_cost(2 * VOLATILE_REF_COST);
8498   effect(TEMP_DEF res, KILL cr);
8499   format %{
8500     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8501   %}
8502   ins_encode %{
8503     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8504                Assembler::xword, /*acquire*/ false, /*release*/ true,
8505                /*weak*/ false, $res$$Register);
8506   %}
8507   ins_pipe(pipe_slow);
8508 %}
8509 
8510 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8511   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8512   ins_cost(2 * VOLATILE_REF_COST);
8513   effect(TEMP_DEF res, KILL cr);
8514   format %{
8515     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8516   %}
8517   ins_encode %{
8518     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8519                Assembler::word, /*acquire*/ false, /*release*/ true,
8520                /*weak*/ false, $res$$Register);
8521   %}
8522   ins_pipe(pipe_slow);
8523 %}
8524 
8525 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8526   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8527   ins_cost(2 * VOLATILE_REF_COST);
8528   effect(TEMP_DEF res, KILL cr);
8529   format %{
8530     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8531   %}
8532   ins_encode %{
8533     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8534                Assembler::xword, /*acquire*/ false, /*release*/ true,
8535                /*weak*/ false, $res$$Register);
8536   %}
8537   ins_pipe(pipe_slow);
8538 %}
8539 
8540 instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8541   predicate(needs_acquiring_load_exclusive(n));
8542   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8543   ins_cost(VOLATILE_REF_COST);
8544   effect(TEMP_DEF res, KILL cr);
8545   format %{
8546     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8547   %}
8548   ins_encode %{
8549     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8550                Assembler::byte, /*acquire*/ true, /*release*/ true,
8551                /*weak*/ false, $res$$Register);
8552     __ sxtbw($res$$Register, $res$$Register);
8553   %}
8554   ins_pipe(pipe_slow);
8555 %}
8556 
8557 instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8558   predicate(needs_acquiring_load_exclusive(n));
8559   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8560   ins_cost(VOLATILE_REF_COST);
8561   effect(TEMP_DEF res, KILL cr);
8562   format %{
8563     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8564   %}
8565   ins_encode %{
8566     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8567                Assembler::halfword, /*acquire*/ true, /*release*/ true,
8568                /*weak*/ false, $res$$Register);
8569     __ sxthw($res$$Register, $res$$Register);
8570   %}
8571   ins_pipe(pipe_slow);
8572 %}
8573 
8574 
8575 instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8576   predicate(needs_acquiring_load_exclusive(n));
8577   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8578   ins_cost(VOLATILE_REF_COST);
8579   effect(TEMP_DEF res, KILL cr);
8580   format %{
8581     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8582   %}
8583   ins_encode %{
8584     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8585                Assembler::word, /*acquire*/ true, /*release*/ true,
8586                /*weak*/ false, $res$$Register);
8587   %}
8588   ins_pipe(pipe_slow);
8589 %}
8590 
8591 instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8592   predicate(needs_acquiring_load_exclusive(n));
8593   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8594   ins_cost(VOLATILE_REF_COST);
8595   effect(TEMP_DEF res, KILL cr);
8596   format %{
8597     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8598   %}
8599   ins_encode %{
8600     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8601                Assembler::xword, /*acquire*/ true, /*release*/ true,
8602                /*weak*/ false, $res$$Register);
8603   %}
8604   ins_pipe(pipe_slow);
8605 %}
8606 
8607 
8608 instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8609   predicate(needs_acquiring_load_exclusive(n));
8610   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8611   ins_cost(VOLATILE_REF_COST);
8612   effect(TEMP_DEF res, KILL cr);
8613   format %{
8614     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8615   %}
8616   ins_encode %{
8617     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8618                Assembler::word, /*acquire*/ true, /*release*/ true,
8619                /*weak*/ false, $res$$Register);
8620   %}
8621   ins_pipe(pipe_slow);
8622 %}
8623 
8624 instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8625   predicate(needs_acquiring_load_exclusive(n));
8626   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8627   ins_cost(VOLATILE_REF_COST);
8628   effect(TEMP_DEF res, KILL cr);
8629   format %{
8630     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8631   %}
8632   ins_encode %{
8633     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8634                Assembler::xword, /*acquire*/ true, /*release*/ true,
8635                /*weak*/ false, $res$$Register);
8636   %}
8637   ins_pipe(pipe_slow);
8638 %}
8639 
8640 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8641   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8642   ins_cost(2 * VOLATILE_REF_COST);
8643   effect(KILL cr);
8644   format %{
8645     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8646     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8647   %}
8648   ins_encode %{
8649     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8650                Assembler::byte, /*acquire*/ false, /*release*/ true,
8651                /*weak*/ true, noreg);
8652     __ csetw($res$$Register, Assembler::EQ);
8653   %}
8654   ins_pipe(pipe_slow);
8655 %}
8656 
8657 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8658   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8659   ins_cost(2 * VOLATILE_REF_COST);
8660   effect(KILL cr);
8661   format %{
8662     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8663     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8664   %}
8665   ins_encode %{
8666     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8667                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8668                /*weak*/ true, noreg);
8669     __ csetw($res$$Register, Assembler::EQ);
8670   %}
8671   ins_pipe(pipe_slow);
8672 %}
8673 
8674 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8675   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8676   ins_cost(2 * VOLATILE_REF_COST);
8677   effect(KILL cr);
8678   format %{
8679     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8680     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8681   %}
8682   ins_encode %{
8683     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8684                Assembler::word, /*acquire*/ false, /*release*/ true,
8685                /*weak*/ true, noreg);
8686     __ csetw($res$$Register, Assembler::EQ);
8687   %}
8688   ins_pipe(pipe_slow);
8689 %}
8690 
8691 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8692   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8693   ins_cost(2 * VOLATILE_REF_COST);
8694   effect(KILL cr);
8695   format %{
8696     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8697     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8698   %}
8699   ins_encode %{
8700     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8701                Assembler::xword, /*acquire*/ false, /*release*/ true,
8702                /*weak*/ true, noreg);
8703     __ csetw($res$$Register, Assembler::EQ);
8704   %}
8705   ins_pipe(pipe_slow);
8706 %}
8707 
8708 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8709   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8710   ins_cost(2 * VOLATILE_REF_COST);
8711   effect(KILL cr);
8712   format %{
8713     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8714     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8715   %}
8716   ins_encode %{
8717     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8718                Assembler::word, /*acquire*/ false, /*release*/ true,
8719                /*weak*/ true, noreg);
8720     __ csetw($res$$Register, Assembler::EQ);
8721   %}
8722   ins_pipe(pipe_slow);
8723 %}
8724 
8725 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8726   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8727   ins_cost(2 * VOLATILE_REF_COST);
8728   effect(KILL cr);
8729   format %{
8730     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8731     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8732   %}
8733   ins_encode %{
8734     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8735                Assembler::xword, /*acquire*/ false, /*release*/ true,
8736                /*weak*/ true, noreg);
8737     __ csetw($res$$Register, Assembler::EQ);
8738   %}
8739   ins_pipe(pipe_slow);
8740 %}
8741 
8742 instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8743   predicate(needs_acquiring_load_exclusive(n));
8744   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8745   ins_cost(VOLATILE_REF_COST);
8746   effect(KILL cr);
8747   format %{
8748     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8749     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8750   %}
8751   ins_encode %{
8752     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8753                Assembler::byte, /*acquire*/ true, /*release*/ true,
8754                /*weak*/ true, noreg);
8755     __ csetw($res$$Register, Assembler::EQ);
8756   %}
8757   ins_pipe(pipe_slow);
8758 %}
8759 
8760 instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8761   predicate(needs_acquiring_load_exclusive(n));
8762   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8763   ins_cost(VOLATILE_REF_COST);
8764   effect(KILL cr);
8765   format %{
8766     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8767     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8768   %}
8769   ins_encode %{
8770     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8771                Assembler::halfword, /*acquire*/ true, /*release*/ true,
8772                /*weak*/ true, noreg);
8773     __ csetw($res$$Register, Assembler::EQ);
8774   %}
8775   ins_pipe(pipe_slow);
8776 %}
8777 
8778 instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8779   predicate(needs_acquiring_load_exclusive(n));
8780   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8781   ins_cost(VOLATILE_REF_COST);
8782   effect(KILL cr);
8783   format %{
8784     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8785     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8786   %}
8787   ins_encode %{
8788     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8789                Assembler::word, /*acquire*/ true, /*release*/ true,
8790                /*weak*/ true, noreg);
8791     __ csetw($res$$Register, Assembler::EQ);
8792   %}
8793   ins_pipe(pipe_slow);
8794 %}
8795 
8796 instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8797   predicate(needs_acquiring_load_exclusive(n));
8798   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8799   ins_cost(VOLATILE_REF_COST);
8800   effect(KILL cr);
8801   format %{
8802     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8803     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8804   %}
8805   ins_encode %{
8806     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8807                Assembler::xword, /*acquire*/ true, /*release*/ true,
8808                /*weak*/ true, noreg);
8809     __ csetw($res$$Register, Assembler::EQ);
8810   %}
8811   ins_pipe(pipe_slow);
8812 %}
8813 
8814 instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8815   predicate(needs_acquiring_load_exclusive(n));
8816   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8817   ins_cost(VOLATILE_REF_COST);
8818   effect(KILL cr);
8819   format %{
8820     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8821     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8822   %}
8823   ins_encode %{
8824     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8825                Assembler::word, /*acquire*/ true, /*release*/ true,
8826                /*weak*/ true, noreg);
8827     __ csetw($res$$Register, Assembler::EQ);
8828   %}
8829   ins_pipe(pipe_slow);
8830 %}
8831 
8832 instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8833   predicate(needs_acquiring_load_exclusive(n));
8834   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8835   ins_cost(VOLATILE_REF_COST);
8836   effect(KILL cr);
8837   format %{
8838     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8839     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8840   %}
8841   ins_encode %{
8842     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8843                Assembler::xword, /*acquire*/ true, /*release*/ true,
8844                /*weak*/ true, noreg);
8845     __ csetw($res$$Register, Assembler::EQ);
8846   %}
8847   ins_pipe(pipe_slow);
8848 %}
8849 
8850 // END This section of the file is automatically generated. Do not edit --------------
8851 // ---------------------------------------------------------------------
8852 
8853 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8854   match(Set prev (GetAndSetI mem newv));
8855   ins_cost(2 * VOLATILE_REF_COST);
8856   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8857   ins_encode %{
8858     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8859   %}
8860   ins_pipe(pipe_serial);
8861 %}
8862 
8863 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8864   match(Set prev (GetAndSetL mem newv));
8865   ins_cost(2 * VOLATILE_REF_COST);
8866   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8867   ins_encode %{
8868     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8869   %}
8870   ins_pipe(pipe_serial);
8871 %}
8872 
8873 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8874   match(Set prev (GetAndSetN mem newv));
8875   ins_cost(2 * VOLATILE_REF_COST);
8876   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8877   ins_encode %{
8878     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8879   %}
8880   ins_pipe(pipe_serial);
8881 %}
8882 
8883 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8884   match(Set prev (GetAndSetP mem newv));
8885   ins_cost(2 * VOLATILE_REF_COST);
8886   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8887   ins_encode %{
8888     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8889   %}
8890   ins_pipe(pipe_serial);
8891 %}
8892 
8893 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{
8894   predicate(needs_acquiring_load_exclusive(n));
8895   match(Set prev (GetAndSetI mem newv));
8896   ins_cost(VOLATILE_REF_COST);
8897   format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]" %}
8898   ins_encode %{
8899     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8900   %}
8901   ins_pipe(pipe_serial);
8902 %}
8903 
8904 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
8905   predicate(needs_acquiring_load_exclusive(n));
8906   match(Set prev (GetAndSetL mem newv));
8907   ins_cost(VOLATILE_REF_COST);
8908   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8909   ins_encode %{
8910     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8911   %}
8912   ins_pipe(pipe_serial);
8913 %}
8914 
8915 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
8916   predicate(needs_acquiring_load_exclusive(n));
8917   match(Set prev (GetAndSetN mem newv));
8918   ins_cost(VOLATILE_REF_COST);
8919   format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
8920   ins_encode %{
8921     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8922   %}
8923   ins_pipe(pipe_serial);
8924 %}
8925 
8926 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
8927   predicate(needs_acquiring_load_exclusive(n));
8928   match(Set prev (GetAndSetP mem newv));
8929   ins_cost(VOLATILE_REF_COST);
8930   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8931   ins_encode %{
8932     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8933   %}
8934   ins_pipe(pipe_serial);
8935 %}
8936 
8937 
8938 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8939   match(Set newval (GetAndAddL mem incr));
8940   ins_cost(2 * VOLATILE_REF_COST + 1);
8941   format %{ "get_and_addL $newval, [$mem], $incr" %}
8942   ins_encode %{
8943     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8944   %}
8945   ins_pipe(pipe_serial);
8946 %}
8947 
8948 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8949   predicate(n->as_LoadStore()->result_not_used());
8950   match(Set dummy (GetAndAddL mem incr));
8951   ins_cost(2 * VOLATILE_REF_COST);
8952   format %{ "get_and_addL [$mem], $incr" %}
8953   ins_encode %{
8954     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8955   %}
8956   ins_pipe(pipe_serial);
8957 %}
8958 
8959 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8960   match(Set newval (GetAndAddL mem incr));
8961   ins_cost(2 * VOLATILE_REF_COST + 1);
8962   format %{ "get_and_addL $newval, [$mem], $incr" %}
8963   ins_encode %{
8964     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8965   %}
8966   ins_pipe(pipe_serial);
8967 %}
8968 
8969 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8970   predicate(n->as_LoadStore()->result_not_used());
8971   match(Set dummy (GetAndAddL mem incr));
8972   ins_cost(2 * VOLATILE_REF_COST);
8973   format %{ "get_and_addL [$mem], $incr" %}
8974   ins_encode %{
8975     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8976   %}
8977   ins_pipe(pipe_serial);
8978 %}
8979 
8980 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8981   match(Set newval (GetAndAddI mem incr));
8982   ins_cost(2 * VOLATILE_REF_COST + 1);
8983   format %{ "get_and_addI $newval, [$mem], $incr" %}
8984   ins_encode %{
8985     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8986   %}
8987   ins_pipe(pipe_serial);
8988 %}
8989 
8990 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8991   predicate(n->as_LoadStore()->result_not_used());
8992   match(Set dummy (GetAndAddI mem incr));
8993   ins_cost(2 * VOLATILE_REF_COST);
8994   format %{ "get_and_addI [$mem], $incr" %}
8995   ins_encode %{
8996     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8997   %}
8998   ins_pipe(pipe_serial);
8999 %}
9000 
9001 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9002   match(Set newval (GetAndAddI mem incr));
9003   ins_cost(2 * VOLATILE_REF_COST + 1);
9004   format %{ "get_and_addI $newval, [$mem], $incr" %}
9005   ins_encode %{
9006     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9007   %}
9008   ins_pipe(pipe_serial);
9009 %}
9010 
9011 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
9012   predicate(n->as_LoadStore()->result_not_used());
9013   match(Set dummy (GetAndAddI mem incr));
9014   ins_cost(2 * VOLATILE_REF_COST);
9015   format %{ "get_and_addI [$mem], $incr" %}
9016   ins_encode %{
9017     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
9018   %}
9019   ins_pipe(pipe_serial);
9020 %}
9021 
9022 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
9023   predicate(needs_acquiring_load_exclusive(n));
9024   match(Set newval (GetAndAddL mem incr));
9025   ins_cost(VOLATILE_REF_COST + 1);
9026   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
9027   ins_encode %{
9028     __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
9029   %}
9030   ins_pipe(pipe_serial);
9031 %}
9032 
9033 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
9034   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9035   match(Set dummy (GetAndAddL mem incr));
9036   ins_cost(VOLATILE_REF_COST);
9037   format %{ "get_and_addL_acq [$mem], $incr" %}
9038   ins_encode %{
9039     __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
9040   %}
9041   ins_pipe(pipe_serial);
9042 %}
9043 
9044 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9045   predicate(needs_acquiring_load_exclusive(n));
9046   match(Set newval (GetAndAddL mem incr));
9047   ins_cost(VOLATILE_REF_COST + 1);
9048   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
9049   ins_encode %{
9050     __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
9051   %}
9052   ins_pipe(pipe_serial);
9053 %}
9054 
9055 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{
9056   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9057   match(Set dummy (GetAndAddL mem incr));
9058   ins_cost(VOLATILE_REF_COST);
9059   format %{ "get_and_addL_acq [$mem], $incr" %}
9060   ins_encode %{
9061     __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
9062   %}
9063   ins_pipe(pipe_serial);
9064 %}
9065 
9066 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9067   predicate(needs_acquiring_load_exclusive(n));
9068   match(Set newval (GetAndAddI mem incr));
9069   ins_cost(VOLATILE_REF_COST + 1);
9070   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
9071   ins_encode %{
9072     __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9073   %}
9074   ins_pipe(pipe_serial);
9075 %}
9076 
9077 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{
9078   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9079   match(Set dummy (GetAndAddI mem incr));
9080   ins_cost(VOLATILE_REF_COST);
9081   format %{ "get_and_addI_acq [$mem], $incr" %}
9082   ins_encode %{
9083     __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
9084   %}
9085   ins_pipe(pipe_serial);
9086 %}
9087 
9088 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9089   predicate(needs_acquiring_load_exclusive(n));
9090   match(Set newval (GetAndAddI mem incr));
9091   ins_cost(VOLATILE_REF_COST + 1);
9092   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
9093   ins_encode %{
9094     __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9095   %}
9096   ins_pipe(pipe_serial);
9097 %}
9098 
9099 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{
9100   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
9101   match(Set dummy (GetAndAddI mem incr));
9102   ins_cost(VOLATILE_REF_COST);
9103   format %{ "get_and_addI_acq [$mem], $incr" %}
9104   ins_encode %{
9105     __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
9106   %}
9107   ins_pipe(pipe_serial);
9108 %}
9109 
9110 // Manifest a CmpL result in an integer register.
9111 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
9112 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
9113 %{
9114   match(Set dst (CmpL3 src1 src2));
9115   effect(KILL flags);
9116 
9117   ins_cost(INSN_COST * 6);
9118   format %{
9119       "cmp $src1, $src2"
9120       "csetw $dst, ne"
9121       "cnegw $dst, lt"
9122   %}
9123   // format %{ "CmpL3 $dst, $src1, $src2" %}
9124   ins_encode %{
9125     __ cmp($src1$$Register, $src2$$Register);
9126     __ csetw($dst$$Register, Assembler::NE);
9127     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9128   %}
9129 
9130   ins_pipe(pipe_class_default);
9131 %}
9132 
9133 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
9134 %{
9135   match(Set dst (CmpL3 src1 src2));
9136   effect(KILL flags);
9137 
9138   ins_cost(INSN_COST * 6);
9139   format %{
9140       "cmp $src1, $src2"
9141       "csetw $dst, ne"
9142       "cnegw $dst, lt"
9143   %}
9144   ins_encode %{
9145     int32_t con = (int32_t)$src2$$constant;
9146      if (con < 0) {
9147       __ adds(zr, $src1$$Register, -con);
9148     } else {
9149       __ subs(zr, $src1$$Register, con);
9150     }
9151     __ csetw($dst$$Register, Assembler::NE);
9152     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
9153   %}
9154 
9155   ins_pipe(pipe_class_default);
9156 %}
9157 
9158 // ============================================================================
9159 // Conditional Move Instructions
9160 
9161 // n.b. we have identical rules for both a signed compare op (cmpOp)
9162 // and an unsigned compare op (cmpOpU). it would be nice if we could
9163 // define an op class which merged both inputs and use it to type the
9164 // argument to a single rule. unfortunatelyt his fails because the
9165 // opclass does not live up to the COND_INTER interface of its
9166 // component operands. When the generic code tries to negate the
9167 // operand it ends up running the generci Machoper::negate method
9168 // which throws a ShouldNotHappen. So, we have to provide two flavours
9169 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
9170 
9171 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9172   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9173 
9174   ins_cost(INSN_COST * 2);
9175   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
9176 
9177   ins_encode %{
9178     __ cselw(as_Register($dst$$reg),
9179              as_Register($src2$$reg),
9180              as_Register($src1$$reg),
9181              (Assembler::Condition)$cmp$$cmpcode);
9182   %}
9183 
9184   ins_pipe(icond_reg_reg);
9185 %}
9186 
9187 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9188   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9189 
9190   ins_cost(INSN_COST * 2);
9191   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
9192 
9193   ins_encode %{
9194     __ cselw(as_Register($dst$$reg),
9195              as_Register($src2$$reg),
9196              as_Register($src1$$reg),
9197              (Assembler::Condition)$cmp$$cmpcode);
9198   %}
9199 
9200   ins_pipe(icond_reg_reg);
9201 %}
9202 
9203 // special cases where one arg is zero
9204 
9205 // n.b. this is selected in preference to the rule above because it
9206 // avoids loading constant 0 into a source register
9207 
9208 // TODO
9209 // we ought only to be able to cull one of these variants as the ideal
9210 // transforms ought always to order the zero consistently (to left/right?)
9211 
9212 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9213   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9214 
9215   ins_cost(INSN_COST * 2);
9216   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
9217 
9218   ins_encode %{
9219     __ cselw(as_Register($dst$$reg),
9220              as_Register($src$$reg),
9221              zr,
9222              (Assembler::Condition)$cmp$$cmpcode);
9223   %}
9224 
9225   ins_pipe(icond_reg);
9226 %}
9227 
9228 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9229   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9230 
9231   ins_cost(INSN_COST * 2);
9232   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
9233 
9234   ins_encode %{
9235     __ cselw(as_Register($dst$$reg),
9236              as_Register($src$$reg),
9237              zr,
9238              (Assembler::Condition)$cmp$$cmpcode);
9239   %}
9240 
9241   ins_pipe(icond_reg);
9242 %}
9243 
9244 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9245   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9246 
9247   ins_cost(INSN_COST * 2);
9248   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
9249 
9250   ins_encode %{
9251     __ cselw(as_Register($dst$$reg),
9252              zr,
9253              as_Register($src$$reg),
9254              (Assembler::Condition)$cmp$$cmpcode);
9255   %}
9256 
9257   ins_pipe(icond_reg);
9258 %}
9259 
9260 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9261   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9262 
9263   ins_cost(INSN_COST * 2);
9264   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
9265 
9266   ins_encode %{
9267     __ cselw(as_Register($dst$$reg),
9268              zr,
9269              as_Register($src$$reg),
9270              (Assembler::Condition)$cmp$$cmpcode);
9271   %}
9272 
9273   ins_pipe(icond_reg);
9274 %}
9275 
9276 // special case for creating a boolean 0 or 1
9277 
9278 // n.b. this is selected in preference to the rule above because it
9279 // avoids loading constants 0 and 1 into a source register
9280 
9281 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9282   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9283 
9284   ins_cost(INSN_COST * 2);
9285   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
9286 
9287   ins_encode %{
9288     // equivalently
9289     // cset(as_Register($dst$$reg),
9290     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9291     __ csincw(as_Register($dst$$reg),
9292              zr,
9293              zr,
9294              (Assembler::Condition)$cmp$$cmpcode);
9295   %}
9296 
9297   ins_pipe(icond_none);
9298 %}
9299 
9300 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9301   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9302 
9303   ins_cost(INSN_COST * 2);
9304   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
9305 
9306   ins_encode %{
9307     // equivalently
9308     // cset(as_Register($dst$$reg),
9309     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9310     __ csincw(as_Register($dst$$reg),
9311              zr,
9312              zr,
9313              (Assembler::Condition)$cmp$$cmpcode);
9314   %}
9315 
9316   ins_pipe(icond_none);
9317 %}
9318 
9319 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9320   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9321 
9322   ins_cost(INSN_COST * 2);
9323   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
9324 
9325   ins_encode %{
9326     __ csel(as_Register($dst$$reg),
9327             as_Register($src2$$reg),
9328             as_Register($src1$$reg),
9329             (Assembler::Condition)$cmp$$cmpcode);
9330   %}
9331 
9332   ins_pipe(icond_reg_reg);
9333 %}
9334 
9335 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9336   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9337 
9338   ins_cost(INSN_COST * 2);
9339   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
9340 
9341   ins_encode %{
9342     __ csel(as_Register($dst$$reg),
9343             as_Register($src2$$reg),
9344             as_Register($src1$$reg),
9345             (Assembler::Condition)$cmp$$cmpcode);
9346   %}
9347 
9348   ins_pipe(icond_reg_reg);
9349 %}
9350 
9351 // special cases where one arg is zero
9352 
9353 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9354   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9355 
9356   ins_cost(INSN_COST * 2);
9357   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
9358 
9359   ins_encode %{
9360     __ csel(as_Register($dst$$reg),
9361             zr,
9362             as_Register($src$$reg),
9363             (Assembler::Condition)$cmp$$cmpcode);
9364   %}
9365 
9366   ins_pipe(icond_reg);
9367 %}
9368 
9369 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9370   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9371 
9372   ins_cost(INSN_COST * 2);
9373   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
9374 
9375   ins_encode %{
9376     __ csel(as_Register($dst$$reg),
9377             zr,
9378             as_Register($src$$reg),
9379             (Assembler::Condition)$cmp$$cmpcode);
9380   %}
9381 
9382   ins_pipe(icond_reg);
9383 %}
9384 
9385 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9386   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9387 
9388   ins_cost(INSN_COST * 2);
9389   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
9390 
9391   ins_encode %{
9392     __ csel(as_Register($dst$$reg),
9393             as_Register($src$$reg),
9394             zr,
9395             (Assembler::Condition)$cmp$$cmpcode);
9396   %}
9397 
9398   ins_pipe(icond_reg);
9399 %}
9400 
9401 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9402   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9403 
9404   ins_cost(INSN_COST * 2);
9405   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
9406 
9407   ins_encode %{
9408     __ csel(as_Register($dst$$reg),
9409             as_Register($src$$reg),
9410             zr,
9411             (Assembler::Condition)$cmp$$cmpcode);
9412   %}
9413 
9414   ins_pipe(icond_reg);
9415 %}
9416 
9417 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9418   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9419 
9420   ins_cost(INSN_COST * 2);
9421   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
9422 
9423   ins_encode %{
9424     __ csel(as_Register($dst$$reg),
9425             as_Register($src2$$reg),
9426             as_Register($src1$$reg),
9427             (Assembler::Condition)$cmp$$cmpcode);
9428   %}
9429 
9430   ins_pipe(icond_reg_reg);
9431 %}
9432 
9433 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9434   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9435 
9436   ins_cost(INSN_COST * 2);
9437   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
9438 
9439   ins_encode %{
9440     __ csel(as_Register($dst$$reg),
9441             as_Register($src2$$reg),
9442             as_Register($src1$$reg),
9443             (Assembler::Condition)$cmp$$cmpcode);
9444   %}
9445 
9446   ins_pipe(icond_reg_reg);
9447 %}
9448 
9449 // special cases where one arg is zero
9450 
9451 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9452   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9453 
9454   ins_cost(INSN_COST * 2);
9455   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
9456 
9457   ins_encode %{
9458     __ csel(as_Register($dst$$reg),
9459             zr,
9460             as_Register($src$$reg),
9461             (Assembler::Condition)$cmp$$cmpcode);
9462   %}
9463 
9464   ins_pipe(icond_reg);
9465 %}
9466 
9467 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9468   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9469 
9470   ins_cost(INSN_COST * 2);
9471   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
9472 
9473   ins_encode %{
9474     __ csel(as_Register($dst$$reg),
9475             zr,
9476             as_Register($src$$reg),
9477             (Assembler::Condition)$cmp$$cmpcode);
9478   %}
9479 
9480   ins_pipe(icond_reg);
9481 %}
9482 
9483 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9484   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9485 
9486   ins_cost(INSN_COST * 2);
9487   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
9488 
9489   ins_encode %{
9490     __ csel(as_Register($dst$$reg),
9491             as_Register($src$$reg),
9492             zr,
9493             (Assembler::Condition)$cmp$$cmpcode);
9494   %}
9495 
9496   ins_pipe(icond_reg);
9497 %}
9498 
9499 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9500   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9501 
9502   ins_cost(INSN_COST * 2);
9503   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
9504 
9505   ins_encode %{
9506     __ csel(as_Register($dst$$reg),
9507             as_Register($src$$reg),
9508             zr,
9509             (Assembler::Condition)$cmp$$cmpcode);
9510   %}
9511 
9512   ins_pipe(icond_reg);
9513 %}
9514 
9515 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9516   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9517 
9518   ins_cost(INSN_COST * 2);
9519   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9520 
9521   ins_encode %{
9522     __ cselw(as_Register($dst$$reg),
9523              as_Register($src2$$reg),
9524              as_Register($src1$$reg),
9525              (Assembler::Condition)$cmp$$cmpcode);
9526   %}
9527 
9528   ins_pipe(icond_reg_reg);
9529 %}
9530 
9531 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9532   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9533 
9534   ins_cost(INSN_COST * 2);
9535   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9536 
9537   ins_encode %{
9538     __ cselw(as_Register($dst$$reg),
9539              as_Register($src2$$reg),
9540              as_Register($src1$$reg),
9541              (Assembler::Condition)$cmp$$cmpcode);
9542   %}
9543 
9544   ins_pipe(icond_reg_reg);
9545 %}
9546 
9547 // special cases where one arg is zero
9548 
9549 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9550   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9551 
9552   ins_cost(INSN_COST * 2);
9553   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
9554 
9555   ins_encode %{
9556     __ cselw(as_Register($dst$$reg),
9557              zr,
9558              as_Register($src$$reg),
9559              (Assembler::Condition)$cmp$$cmpcode);
9560   %}
9561 
9562   ins_pipe(icond_reg);
9563 %}
9564 
9565 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9566   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9567 
9568   ins_cost(INSN_COST * 2);
9569   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
9570 
9571   ins_encode %{
9572     __ cselw(as_Register($dst$$reg),
9573              zr,
9574              as_Register($src$$reg),
9575              (Assembler::Condition)$cmp$$cmpcode);
9576   %}
9577 
9578   ins_pipe(icond_reg);
9579 %}
9580 
9581 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9582   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9583 
9584   ins_cost(INSN_COST * 2);
9585   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9586 
9587   ins_encode %{
9588     __ cselw(as_Register($dst$$reg),
9589              as_Register($src$$reg),
9590              zr,
9591              (Assembler::Condition)$cmp$$cmpcode);
9592   %}
9593 
9594   ins_pipe(icond_reg);
9595 %}
9596 
9597 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9598   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9599 
9600   ins_cost(INSN_COST * 2);
9601   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9602 
9603   ins_encode %{
9604     __ cselw(as_Register($dst$$reg),
9605              as_Register($src$$reg),
9606              zr,
9607              (Assembler::Condition)$cmp$$cmpcode);
9608   %}
9609 
9610   ins_pipe(icond_reg);
9611 %}
9612 
9613 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9614 %{
9615   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9616 
9617   ins_cost(INSN_COST * 3);
9618 
9619   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9620   ins_encode %{
9621     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9622     __ fcsels(as_FloatRegister($dst$$reg),
9623               as_FloatRegister($src2$$reg),
9624               as_FloatRegister($src1$$reg),
9625               cond);
9626   %}
9627 
9628   ins_pipe(fp_cond_reg_reg_s);
9629 %}
9630 
9631 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9632 %{
9633   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9634 
9635   ins_cost(INSN_COST * 3);
9636 
9637   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9638   ins_encode %{
9639     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9640     __ fcsels(as_FloatRegister($dst$$reg),
9641               as_FloatRegister($src2$$reg),
9642               as_FloatRegister($src1$$reg),
9643               cond);
9644   %}
9645 
9646   ins_pipe(fp_cond_reg_reg_s);
9647 %}
9648 
9649 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9650 %{
9651   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9652 
9653   ins_cost(INSN_COST * 3);
9654 
9655   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9656   ins_encode %{
9657     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9658     __ fcseld(as_FloatRegister($dst$$reg),
9659               as_FloatRegister($src2$$reg),
9660               as_FloatRegister($src1$$reg),
9661               cond);
9662   %}
9663 
9664   ins_pipe(fp_cond_reg_reg_d);
9665 %}
9666 
9667 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9668 %{
9669   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9670 
9671   ins_cost(INSN_COST * 3);
9672 
9673   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9674   ins_encode %{
9675     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9676     __ fcseld(as_FloatRegister($dst$$reg),
9677               as_FloatRegister($src2$$reg),
9678               as_FloatRegister($src1$$reg),
9679               cond);
9680   %}
9681 
9682   ins_pipe(fp_cond_reg_reg_d);
9683 %}
9684 
9685 // ============================================================================
9686 // Arithmetic Instructions
9687 //
9688 
9689 // Integer Addition
9690 
9691 // TODO
9692 // these currently employ operations which do not set CR and hence are
9693 // not flagged as killing CR but we would like to isolate the cases
9694 // where we want to set flags from those where we don't. need to work
9695 // out how to do that.
9696 
9697 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9698   match(Set dst (AddI src1 src2));
9699 
9700   ins_cost(INSN_COST);
9701   format %{ "addw  $dst, $src1, $src2" %}
9702 
9703   ins_encode %{
9704     __ addw(as_Register($dst$$reg),
9705             as_Register($src1$$reg),
9706             as_Register($src2$$reg));
9707   %}
9708 
9709   ins_pipe(ialu_reg_reg);
9710 %}
9711 
9712 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9713   match(Set dst (AddI src1 src2));
9714 
9715   ins_cost(INSN_COST);
9716   format %{ "addw $dst, $src1, $src2" %}
9717 
9718   // use opcode to indicate that this is an add not a sub
9719   opcode(0x0);
9720 
9721   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9722 
9723   ins_pipe(ialu_reg_imm);
9724 %}
9725 
9726 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9727   match(Set dst (AddI (ConvL2I src1) src2));
9728 
9729   ins_cost(INSN_COST);
9730   format %{ "addw $dst, $src1, $src2" %}
9731 
9732   // use opcode to indicate that this is an add not a sub
9733   opcode(0x0);
9734 
9735   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9736 
9737   ins_pipe(ialu_reg_imm);
9738 %}
9739 
9740 // Pointer Addition
9741 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9742   match(Set dst (AddP src1 src2));
9743 
9744   ins_cost(INSN_COST);
9745   format %{ "add $dst, $src1, $src2\t# ptr" %}
9746 
9747   ins_encode %{
9748     __ add(as_Register($dst$$reg),
9749            as_Register($src1$$reg),
9750            as_Register($src2$$reg));
9751   %}
9752 
9753   ins_pipe(ialu_reg_reg);
9754 %}
9755 
9756 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9757   match(Set dst (AddP src1 (ConvI2L src2)));
9758 
9759   ins_cost(1.9 * INSN_COST);
9760   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9761 
9762   ins_encode %{
9763     __ add(as_Register($dst$$reg),
9764            as_Register($src1$$reg),
9765            as_Register($src2$$reg), ext::sxtw);
9766   %}
9767 
9768   ins_pipe(ialu_reg_reg);
9769 %}
9770 
9771 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9772   match(Set dst (AddP src1 (LShiftL src2 scale)));
9773 
9774   ins_cost(1.9 * INSN_COST);
9775   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9776 
9777   ins_encode %{
9778     __ lea(as_Register($dst$$reg),
9779            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9780                    Address::lsl($scale$$constant)));
9781   %}
9782 
9783   ins_pipe(ialu_reg_reg_shift);
9784 %}
9785 
9786 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9787   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9788 
9789   ins_cost(1.9 * INSN_COST);
9790   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9791 
9792   ins_encode %{
9793     __ lea(as_Register($dst$$reg),
9794            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9795                    Address::sxtw($scale$$constant)));
9796   %}
9797 
9798   ins_pipe(ialu_reg_reg_shift);
9799 %}
9800 
9801 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9802   match(Set dst (LShiftL (ConvI2L src) scale));
9803 
9804   ins_cost(INSN_COST);
9805   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9806 
9807   ins_encode %{
9808     __ sbfiz(as_Register($dst$$reg),
9809           as_Register($src$$reg),
9810           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9811   %}
9812 
9813   ins_pipe(ialu_reg_shift);
9814 %}
9815 
9816 // Pointer Immediate Addition
9817 // n.b. this needs to be more expensive than using an indirect memory
9818 // operand
9819 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9820   match(Set dst (AddP src1 src2));
9821 
9822   ins_cost(INSN_COST);
9823   format %{ "add $dst, $src1, $src2\t# ptr" %}
9824 
9825   // use opcode to indicate that this is an add not a sub
9826   opcode(0x0);
9827 
9828   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9829 
9830   ins_pipe(ialu_reg_imm);
9831 %}
9832 
9833 // Long Addition
9834 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9835 
9836   match(Set dst (AddL src1 src2));
9837 
9838   ins_cost(INSN_COST);
9839   format %{ "add  $dst, $src1, $src2" %}
9840 
9841   ins_encode %{
9842     __ add(as_Register($dst$$reg),
9843            as_Register($src1$$reg),
9844            as_Register($src2$$reg));
9845   %}
9846 
9847   ins_pipe(ialu_reg_reg);
9848 %}
9849 
9850 // No constant pool entries requiredLong Immediate Addition.
9851 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9852   match(Set dst (AddL src1 src2));
9853 
9854   ins_cost(INSN_COST);
9855   format %{ "add $dst, $src1, $src2" %}
9856 
9857   // use opcode to indicate that this is an add not a sub
9858   opcode(0x0);
9859 
9860   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9861 
9862   ins_pipe(ialu_reg_imm);
9863 %}
9864 
9865 // Integer Subtraction
9866 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9867   match(Set dst (SubI src1 src2));
9868 
9869   ins_cost(INSN_COST);
9870   format %{ "subw  $dst, $src1, $src2" %}
9871 
9872   ins_encode %{
9873     __ subw(as_Register($dst$$reg),
9874             as_Register($src1$$reg),
9875             as_Register($src2$$reg));
9876   %}
9877 
9878   ins_pipe(ialu_reg_reg);
9879 %}
9880 
9881 // Immediate Subtraction
9882 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9883   match(Set dst (SubI src1 src2));
9884 
9885   ins_cost(INSN_COST);
9886   format %{ "subw $dst, $src1, $src2" %}
9887 
9888   // use opcode to indicate that this is a sub not an add
9889   opcode(0x1);
9890 
9891   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9892 
9893   ins_pipe(ialu_reg_imm);
9894 %}
9895 
9896 // Long Subtraction
9897 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9898 
9899   match(Set dst (SubL src1 src2));
9900 
9901   ins_cost(INSN_COST);
9902   format %{ "sub  $dst, $src1, $src2" %}
9903 
9904   ins_encode %{
9905     __ sub(as_Register($dst$$reg),
9906            as_Register($src1$$reg),
9907            as_Register($src2$$reg));
9908   %}
9909 
9910   ins_pipe(ialu_reg_reg);
9911 %}
9912 
9913 // No constant pool entries requiredLong Immediate Subtraction.
9914 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9915   match(Set dst (SubL src1 src2));
9916 
9917   ins_cost(INSN_COST);
9918   format %{ "sub$dst, $src1, $src2" %}
9919 
9920   // use opcode to indicate that this is a sub not an add
9921   opcode(0x1);
9922 
9923   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9924 
9925   ins_pipe(ialu_reg_imm);
9926 %}
9927 
9928 // Integer Negation (special case for sub)
9929 
9930 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9931   match(Set dst (SubI zero src));
9932 
9933   ins_cost(INSN_COST);
9934   format %{ "negw $dst, $src\t# int" %}
9935 
9936   ins_encode %{
9937     __ negw(as_Register($dst$$reg),
9938             as_Register($src$$reg));
9939   %}
9940 
9941   ins_pipe(ialu_reg);
9942 %}
9943 
9944 // Long Negation
9945 
9946 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
9947   match(Set dst (SubL zero src));
9948 
9949   ins_cost(INSN_COST);
9950   format %{ "neg $dst, $src\t# long" %}
9951 
9952   ins_encode %{
9953     __ neg(as_Register($dst$$reg),
9954            as_Register($src$$reg));
9955   %}
9956 
9957   ins_pipe(ialu_reg);
9958 %}
9959 
9960 // Integer Multiply
9961 
9962 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9963   match(Set dst (MulI src1 src2));
9964 
9965   ins_cost(INSN_COST * 3);
9966   format %{ "mulw  $dst, $src1, $src2" %}
9967 
9968   ins_encode %{
9969     __ mulw(as_Register($dst$$reg),
9970             as_Register($src1$$reg),
9971             as_Register($src2$$reg));
9972   %}
9973 
9974   ins_pipe(imul_reg_reg);
9975 %}
9976 
9977 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9978   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9979 
9980   ins_cost(INSN_COST * 3);
9981   format %{ "smull  $dst, $src1, $src2" %}
9982 
9983   ins_encode %{
9984     __ smull(as_Register($dst$$reg),
9985              as_Register($src1$$reg),
9986              as_Register($src2$$reg));
9987   %}
9988 
9989   ins_pipe(imul_reg_reg);
9990 %}
9991 
9992 // Long Multiply
9993 
9994 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9995   match(Set dst (MulL src1 src2));
9996 
9997   ins_cost(INSN_COST * 5);
9998   format %{ "mul  $dst, $src1, $src2" %}
9999 
10000   ins_encode %{
10001     __ mul(as_Register($dst$$reg),
10002            as_Register($src1$$reg),
10003            as_Register($src2$$reg));
10004   %}
10005 
10006   ins_pipe(lmul_reg_reg);
10007 %}
10008 
10009 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
10010 %{
10011   match(Set dst (MulHiL src1 src2));
10012 
10013   ins_cost(INSN_COST * 7);
10014   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
10015 
10016   ins_encode %{
10017     __ smulh(as_Register($dst$$reg),
10018              as_Register($src1$$reg),
10019              as_Register($src2$$reg));
10020   %}
10021 
10022   ins_pipe(lmul_reg_reg);
10023 %}
10024 
10025 // Combined Integer Multiply & Add/Sub
10026 
10027 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10028   match(Set dst (AddI src3 (MulI src1 src2)));
10029 
10030   ins_cost(INSN_COST * 3);
10031   format %{ "madd  $dst, $src1, $src2, $src3" %}
10032 
10033   ins_encode %{
10034     __ maddw(as_Register($dst$$reg),
10035              as_Register($src1$$reg),
10036              as_Register($src2$$reg),
10037              as_Register($src3$$reg));
10038   %}
10039 
10040   ins_pipe(imac_reg_reg);
10041 %}
10042 
10043 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10044   match(Set dst (SubI src3 (MulI src1 src2)));
10045 
10046   ins_cost(INSN_COST * 3);
10047   format %{ "msub  $dst, $src1, $src2, $src3" %}
10048 
10049   ins_encode %{
10050     __ msubw(as_Register($dst$$reg),
10051              as_Register($src1$$reg),
10052              as_Register($src2$$reg),
10053              as_Register($src3$$reg));
10054   %}
10055 
10056   ins_pipe(imac_reg_reg);
10057 %}
10058 
10059 // Combined Integer Multiply & Neg
10060 
10061 instruct mnegI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI0 zero) %{
10062   match(Set dst (MulI (SubI zero src1) src2));
10063   match(Set dst (MulI src1 (SubI zero src2)));
10064 
10065   ins_cost(INSN_COST * 3);
10066   format %{ "mneg  $dst, $src1, $src2" %}
10067 
10068   ins_encode %{
10069     __ mnegw(as_Register($dst$$reg),
10070              as_Register($src1$$reg),
10071              as_Register($src2$$reg));
10072   %}
10073 
10074   ins_pipe(imac_reg_reg);
10075 %}
10076 
10077 // Combined Long Multiply & Add/Sub
10078 
10079 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10080   match(Set dst (AddL src3 (MulL src1 src2)));
10081 
10082   ins_cost(INSN_COST * 5);
10083   format %{ "madd  $dst, $src1, $src2, $src3" %}
10084 
10085   ins_encode %{
10086     __ madd(as_Register($dst$$reg),
10087             as_Register($src1$$reg),
10088             as_Register($src2$$reg),
10089             as_Register($src3$$reg));
10090   %}
10091 
10092   ins_pipe(lmac_reg_reg);
10093 %}
10094 
10095 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10096   match(Set dst (SubL src3 (MulL src1 src2)));
10097 
10098   ins_cost(INSN_COST * 5);
10099   format %{ "msub  $dst, $src1, $src2, $src3" %}
10100 
10101   ins_encode %{
10102     __ msub(as_Register($dst$$reg),
10103             as_Register($src1$$reg),
10104             as_Register($src2$$reg),
10105             as_Register($src3$$reg));
10106   %}
10107 
10108   ins_pipe(lmac_reg_reg);
10109 %}
10110 
10111 // Combined Long Multiply & Neg
10112 
10113 instruct mnegL(iRegLNoSp dst, iRegL src1, iRegL src2, immL0 zero) %{
10114   match(Set dst (MulL (SubL zero src1) src2));
10115   match(Set dst (MulL src1 (SubL zero src2)));
10116 
10117   ins_cost(INSN_COST * 5);
10118   format %{ "mneg  $dst, $src1, $src2" %}
10119 
10120   ins_encode %{
10121     __ mneg(as_Register($dst$$reg),
10122             as_Register($src1$$reg),
10123             as_Register($src2$$reg));
10124   %}
10125 
10126   ins_pipe(lmac_reg_reg);
10127 %}
10128 
10129 // Integer Divide
10130 
10131 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10132   match(Set dst (DivI src1 src2));
10133 
10134   ins_cost(INSN_COST * 19);
10135   format %{ "sdivw  $dst, $src1, $src2" %}
10136 
10137   ins_encode(aarch64_enc_divw(dst, src1, src2));
10138   ins_pipe(idiv_reg_reg);
10139 %}
10140 
10141 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
10142   match(Set dst (URShiftI (RShiftI src1 div1) div2));
10143   ins_cost(INSN_COST);
10144   format %{ "lsrw $dst, $src1, $div1" %}
10145   ins_encode %{
10146     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
10147   %}
10148   ins_pipe(ialu_reg_shift);
10149 %}
10150 
10151 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
10152   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
10153   ins_cost(INSN_COST);
10154   format %{ "addw $dst, $src, LSR $div1" %}
10155 
10156   ins_encode %{
10157     __ addw(as_Register($dst$$reg),
10158               as_Register($src$$reg),
10159               as_Register($src$$reg),
10160               Assembler::LSR, 31);
10161   %}
10162   ins_pipe(ialu_reg);
10163 %}
10164 
10165 // Long Divide
10166 
10167 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10168   match(Set dst (DivL src1 src2));
10169 
10170   ins_cost(INSN_COST * 35);
10171   format %{ "sdiv   $dst, $src1, $src2" %}
10172 
10173   ins_encode(aarch64_enc_div(dst, src1, src2));
10174   ins_pipe(ldiv_reg_reg);
10175 %}
10176 
10177 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
10178   match(Set dst (URShiftL (RShiftL src1 div1) div2));
10179   ins_cost(INSN_COST);
10180   format %{ "lsr $dst, $src1, $div1" %}
10181   ins_encode %{
10182     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
10183   %}
10184   ins_pipe(ialu_reg_shift);
10185 %}
10186 
10187 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
10188   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
10189   ins_cost(INSN_COST);
10190   format %{ "add $dst, $src, $div1" %}
10191 
10192   ins_encode %{
10193     __ add(as_Register($dst$$reg),
10194               as_Register($src$$reg),
10195               as_Register($src$$reg),
10196               Assembler::LSR, 63);
10197   %}
10198   ins_pipe(ialu_reg);
10199 %}
10200 
10201 // Integer Remainder
10202 
10203 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10204   match(Set dst (ModI src1 src2));
10205 
10206   ins_cost(INSN_COST * 22);
10207   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
10208             "msubw($dst, rscratch1, $src2, $src1" %}
10209 
10210   ins_encode(aarch64_enc_modw(dst, src1, src2));
10211   ins_pipe(idiv_reg_reg);
10212 %}
10213 
10214 // Long Remainder
10215 
10216 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10217   match(Set dst (ModL src1 src2));
10218 
10219   ins_cost(INSN_COST * 38);
10220   format %{ "sdiv   rscratch1, $src1, $src2\n"
10221             "msub($dst, rscratch1, $src2, $src1" %}
10222 
10223   ins_encode(aarch64_enc_mod(dst, src1, src2));
10224   ins_pipe(ldiv_reg_reg);
10225 %}
10226 
10227 // Integer Shifts
10228 
10229 // Shift Left Register
10230 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10231   match(Set dst (LShiftI src1 src2));
10232 
10233   ins_cost(INSN_COST * 2);
10234   format %{ "lslvw  $dst, $src1, $src2" %}
10235 
10236   ins_encode %{
10237     __ lslvw(as_Register($dst$$reg),
10238              as_Register($src1$$reg),
10239              as_Register($src2$$reg));
10240   %}
10241 
10242   ins_pipe(ialu_reg_reg_vshift);
10243 %}
10244 
10245 // Shift Left Immediate
10246 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10247   match(Set dst (LShiftI src1 src2));
10248 
10249   ins_cost(INSN_COST);
10250   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
10251 
10252   ins_encode %{
10253     __ lslw(as_Register($dst$$reg),
10254             as_Register($src1$$reg),
10255             $src2$$constant & 0x1f);
10256   %}
10257 
10258   ins_pipe(ialu_reg_shift);
10259 %}
10260 
10261 // Shift Right Logical Register
10262 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10263   match(Set dst (URShiftI src1 src2));
10264 
10265   ins_cost(INSN_COST * 2);
10266   format %{ "lsrvw  $dst, $src1, $src2" %}
10267 
10268   ins_encode %{
10269     __ lsrvw(as_Register($dst$$reg),
10270              as_Register($src1$$reg),
10271              as_Register($src2$$reg));
10272   %}
10273 
10274   ins_pipe(ialu_reg_reg_vshift);
10275 %}
10276 
10277 // Shift Right Logical Immediate
10278 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10279   match(Set dst (URShiftI src1 src2));
10280 
10281   ins_cost(INSN_COST);
10282   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
10283 
10284   ins_encode %{
10285     __ lsrw(as_Register($dst$$reg),
10286             as_Register($src1$$reg),
10287             $src2$$constant & 0x1f);
10288   %}
10289 
10290   ins_pipe(ialu_reg_shift);
10291 %}
10292 
10293 // Shift Right Arithmetic Register
10294 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10295   match(Set dst (RShiftI src1 src2));
10296 
10297   ins_cost(INSN_COST * 2);
10298   format %{ "asrvw  $dst, $src1, $src2" %}
10299 
10300   ins_encode %{
10301     __ asrvw(as_Register($dst$$reg),
10302              as_Register($src1$$reg),
10303              as_Register($src2$$reg));
10304   %}
10305 
10306   ins_pipe(ialu_reg_reg_vshift);
10307 %}
10308 
10309 // Shift Right Arithmetic Immediate
10310 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10311   match(Set dst (RShiftI src1 src2));
10312 
10313   ins_cost(INSN_COST);
10314   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
10315 
10316   ins_encode %{
10317     __ asrw(as_Register($dst$$reg),
10318             as_Register($src1$$reg),
10319             $src2$$constant & 0x1f);
10320   %}
10321 
10322   ins_pipe(ialu_reg_shift);
10323 %}
10324 
10325 // Combined Int Mask and Right Shift (using UBFM)
10326 // TODO
10327 
10328 // Long Shifts
10329 
10330 // Shift Left Register
10331 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10332   match(Set dst (LShiftL src1 src2));
10333 
10334   ins_cost(INSN_COST * 2);
10335   format %{ "lslv  $dst, $src1, $src2" %}
10336 
10337   ins_encode %{
10338     __ lslv(as_Register($dst$$reg),
10339             as_Register($src1$$reg),
10340             as_Register($src2$$reg));
10341   %}
10342 
10343   ins_pipe(ialu_reg_reg_vshift);
10344 %}
10345 
10346 // Shift Left Immediate
10347 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10348   match(Set dst (LShiftL src1 src2));
10349 
10350   ins_cost(INSN_COST);
10351   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
10352 
10353   ins_encode %{
10354     __ lsl(as_Register($dst$$reg),
10355             as_Register($src1$$reg),
10356             $src2$$constant & 0x3f);
10357   %}
10358 
10359   ins_pipe(ialu_reg_shift);
10360 %}
10361 
10362 // Shift Right Logical Register
10363 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10364   match(Set dst (URShiftL src1 src2));
10365 
10366   ins_cost(INSN_COST * 2);
10367   format %{ "lsrv  $dst, $src1, $src2" %}
10368 
10369   ins_encode %{
10370     __ lsrv(as_Register($dst$$reg),
10371             as_Register($src1$$reg),
10372             as_Register($src2$$reg));
10373   %}
10374 
10375   ins_pipe(ialu_reg_reg_vshift);
10376 %}
10377 
10378 // Shift Right Logical Immediate
10379 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10380   match(Set dst (URShiftL src1 src2));
10381 
10382   ins_cost(INSN_COST);
10383   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
10384 
10385   ins_encode %{
10386     __ lsr(as_Register($dst$$reg),
10387            as_Register($src1$$reg),
10388            $src2$$constant & 0x3f);
10389   %}
10390 
10391   ins_pipe(ialu_reg_shift);
10392 %}
10393 
10394 // A special-case pattern for card table stores.
10395 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
10396   match(Set dst (URShiftL (CastP2X src1) src2));
10397 
10398   ins_cost(INSN_COST);
10399   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
10400 
10401   ins_encode %{
10402     __ lsr(as_Register($dst$$reg),
10403            as_Register($src1$$reg),
10404            $src2$$constant & 0x3f);
10405   %}
10406 
10407   ins_pipe(ialu_reg_shift);
10408 %}
10409 
10410 // Shift Right Arithmetic Register
10411 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10412   match(Set dst (RShiftL src1 src2));
10413 
10414   ins_cost(INSN_COST * 2);
10415   format %{ "asrv  $dst, $src1, $src2" %}
10416 
10417   ins_encode %{
10418     __ asrv(as_Register($dst$$reg),
10419             as_Register($src1$$reg),
10420             as_Register($src2$$reg));
10421   %}
10422 
10423   ins_pipe(ialu_reg_reg_vshift);
10424 %}
10425 
10426 // Shift Right Arithmetic Immediate
10427 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10428   match(Set dst (RShiftL src1 src2));
10429 
10430   ins_cost(INSN_COST);
10431   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10432 
10433   ins_encode %{
10434     __ asr(as_Register($dst$$reg),
10435            as_Register($src1$$reg),
10436            $src2$$constant & 0x3f);
10437   %}
10438 
10439   ins_pipe(ialu_reg_shift);
10440 %}
10441 
10442 // BEGIN This section of the file is automatically generated. Do not edit --------------
10443 
10444 instruct regL_not_reg(iRegLNoSp dst,
10445                          iRegL src1, immL_M1 m1,
10446                          rFlagsReg cr) %{
10447   match(Set dst (XorL src1 m1));
10448   ins_cost(INSN_COST);
10449   format %{ "eon  $dst, $src1, zr" %}
10450 
10451   ins_encode %{
10452     __ eon(as_Register($dst$$reg),
10453               as_Register($src1$$reg),
10454               zr,
10455               Assembler::LSL, 0);
10456   %}
10457 
10458   ins_pipe(ialu_reg);
10459 %}
10460 instruct regI_not_reg(iRegINoSp dst,
10461                          iRegIorL2I src1, immI_M1 m1,
10462                          rFlagsReg cr) %{
10463   match(Set dst (XorI src1 m1));
10464   ins_cost(INSN_COST);
10465   format %{ "eonw  $dst, $src1, zr" %}
10466 
10467   ins_encode %{
10468     __ eonw(as_Register($dst$$reg),
10469               as_Register($src1$$reg),
10470               zr,
10471               Assembler::LSL, 0);
10472   %}
10473 
10474   ins_pipe(ialu_reg);
10475 %}
10476 
10477 instruct AndI_reg_not_reg(iRegINoSp dst,
10478                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10479                          rFlagsReg cr) %{
10480   match(Set dst (AndI src1 (XorI src2 m1)));
10481   ins_cost(INSN_COST);
10482   format %{ "bicw  $dst, $src1, $src2" %}
10483 
10484   ins_encode %{
10485     __ bicw(as_Register($dst$$reg),
10486               as_Register($src1$$reg),
10487               as_Register($src2$$reg),
10488               Assembler::LSL, 0);
10489   %}
10490 
10491   ins_pipe(ialu_reg_reg);
10492 %}
10493 
10494 instruct AndL_reg_not_reg(iRegLNoSp dst,
10495                          iRegL src1, iRegL src2, immL_M1 m1,
10496                          rFlagsReg cr) %{
10497   match(Set dst (AndL src1 (XorL src2 m1)));
10498   ins_cost(INSN_COST);
10499   format %{ "bic  $dst, $src1, $src2" %}
10500 
10501   ins_encode %{
10502     __ bic(as_Register($dst$$reg),
10503               as_Register($src1$$reg),
10504               as_Register($src2$$reg),
10505               Assembler::LSL, 0);
10506   %}
10507 
10508   ins_pipe(ialu_reg_reg);
10509 %}
10510 
10511 instruct OrI_reg_not_reg(iRegINoSp dst,
10512                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10513                          rFlagsReg cr) %{
10514   match(Set dst (OrI src1 (XorI src2 m1)));
10515   ins_cost(INSN_COST);
10516   format %{ "ornw  $dst, $src1, $src2" %}
10517 
10518   ins_encode %{
10519     __ ornw(as_Register($dst$$reg),
10520               as_Register($src1$$reg),
10521               as_Register($src2$$reg),
10522               Assembler::LSL, 0);
10523   %}
10524 
10525   ins_pipe(ialu_reg_reg);
10526 %}
10527 
10528 instruct OrL_reg_not_reg(iRegLNoSp dst,
10529                          iRegL src1, iRegL src2, immL_M1 m1,
10530                          rFlagsReg cr) %{
10531   match(Set dst (OrL src1 (XorL src2 m1)));
10532   ins_cost(INSN_COST);
10533   format %{ "orn  $dst, $src1, $src2" %}
10534 
10535   ins_encode %{
10536     __ orn(as_Register($dst$$reg),
10537               as_Register($src1$$reg),
10538               as_Register($src2$$reg),
10539               Assembler::LSL, 0);
10540   %}
10541 
10542   ins_pipe(ialu_reg_reg);
10543 %}
10544 
10545 instruct XorI_reg_not_reg(iRegINoSp dst,
10546                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10547                          rFlagsReg cr) %{
10548   match(Set dst (XorI m1 (XorI src2 src1)));
10549   ins_cost(INSN_COST);
10550   format %{ "eonw  $dst, $src1, $src2" %}
10551 
10552   ins_encode %{
10553     __ eonw(as_Register($dst$$reg),
10554               as_Register($src1$$reg),
10555               as_Register($src2$$reg),
10556               Assembler::LSL, 0);
10557   %}
10558 
10559   ins_pipe(ialu_reg_reg);
10560 %}
10561 
10562 instruct XorL_reg_not_reg(iRegLNoSp dst,
10563                          iRegL src1, iRegL src2, immL_M1 m1,
10564                          rFlagsReg cr) %{
10565   match(Set dst (XorL m1 (XorL src2 src1)));
10566   ins_cost(INSN_COST);
10567   format %{ "eon  $dst, $src1, $src2" %}
10568 
10569   ins_encode %{
10570     __ eon(as_Register($dst$$reg),
10571               as_Register($src1$$reg),
10572               as_Register($src2$$reg),
10573               Assembler::LSL, 0);
10574   %}
10575 
10576   ins_pipe(ialu_reg_reg);
10577 %}
10578 
10579 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
10580                          iRegIorL2I src1, iRegIorL2I src2,
10581                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10582   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
10583   ins_cost(1.9 * INSN_COST);
10584   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
10585 
10586   ins_encode %{
10587     __ bicw(as_Register($dst$$reg),
10588               as_Register($src1$$reg),
10589               as_Register($src2$$reg),
10590               Assembler::LSR,
10591               $src3$$constant & 0x1f);
10592   %}
10593 
10594   ins_pipe(ialu_reg_reg_shift);
10595 %}
10596 
10597 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
10598                          iRegL src1, iRegL src2,
10599                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10600   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
10601   ins_cost(1.9 * INSN_COST);
10602   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
10603 
10604   ins_encode %{
10605     __ bic(as_Register($dst$$reg),
10606               as_Register($src1$$reg),
10607               as_Register($src2$$reg),
10608               Assembler::LSR,
10609               $src3$$constant & 0x3f);
10610   %}
10611 
10612   ins_pipe(ialu_reg_reg_shift);
10613 %}
10614 
10615 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
10616                          iRegIorL2I src1, iRegIorL2I src2,
10617                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10618   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10619   ins_cost(1.9 * INSN_COST);
10620   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10621 
10622   ins_encode %{
10623     __ bicw(as_Register($dst$$reg),
10624               as_Register($src1$$reg),
10625               as_Register($src2$$reg),
10626               Assembler::ASR,
10627               $src3$$constant & 0x1f);
10628   %}
10629 
10630   ins_pipe(ialu_reg_reg_shift);
10631 %}
10632 
10633 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10634                          iRegL src1, iRegL src2,
10635                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10636   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10637   ins_cost(1.9 * INSN_COST);
10638   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10639 
10640   ins_encode %{
10641     __ bic(as_Register($dst$$reg),
10642               as_Register($src1$$reg),
10643               as_Register($src2$$reg),
10644               Assembler::ASR,
10645               $src3$$constant & 0x3f);
10646   %}
10647 
10648   ins_pipe(ialu_reg_reg_shift);
10649 %}
10650 
10651 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10652                          iRegIorL2I src1, iRegIorL2I src2,
10653                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10654   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10655   ins_cost(1.9 * INSN_COST);
10656   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10657 
10658   ins_encode %{
10659     __ bicw(as_Register($dst$$reg),
10660               as_Register($src1$$reg),
10661               as_Register($src2$$reg),
10662               Assembler::LSL,
10663               $src3$$constant & 0x1f);
10664   %}
10665 
10666   ins_pipe(ialu_reg_reg_shift);
10667 %}
10668 
10669 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10670                          iRegL src1, iRegL src2,
10671                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10672   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10673   ins_cost(1.9 * INSN_COST);
10674   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10675 
10676   ins_encode %{
10677     __ bic(as_Register($dst$$reg),
10678               as_Register($src1$$reg),
10679               as_Register($src2$$reg),
10680               Assembler::LSL,
10681               $src3$$constant & 0x3f);
10682   %}
10683 
10684   ins_pipe(ialu_reg_reg_shift);
10685 %}
10686 
10687 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10688                          iRegIorL2I src1, iRegIorL2I src2,
10689                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10690   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10691   ins_cost(1.9 * INSN_COST);
10692   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10693 
10694   ins_encode %{
10695     __ eonw(as_Register($dst$$reg),
10696               as_Register($src1$$reg),
10697               as_Register($src2$$reg),
10698               Assembler::LSR,
10699               $src3$$constant & 0x1f);
10700   %}
10701 
10702   ins_pipe(ialu_reg_reg_shift);
10703 %}
10704 
10705 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10706                          iRegL src1, iRegL src2,
10707                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10708   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10709   ins_cost(1.9 * INSN_COST);
10710   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10711 
10712   ins_encode %{
10713     __ eon(as_Register($dst$$reg),
10714               as_Register($src1$$reg),
10715               as_Register($src2$$reg),
10716               Assembler::LSR,
10717               $src3$$constant & 0x3f);
10718   %}
10719 
10720   ins_pipe(ialu_reg_reg_shift);
10721 %}
10722 
10723 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10724                          iRegIorL2I src1, iRegIorL2I src2,
10725                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10726   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10727   ins_cost(1.9 * INSN_COST);
10728   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10729 
10730   ins_encode %{
10731     __ eonw(as_Register($dst$$reg),
10732               as_Register($src1$$reg),
10733               as_Register($src2$$reg),
10734               Assembler::ASR,
10735               $src3$$constant & 0x1f);
10736   %}
10737 
10738   ins_pipe(ialu_reg_reg_shift);
10739 %}
10740 
10741 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10742                          iRegL src1, iRegL src2,
10743                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10744   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10745   ins_cost(1.9 * INSN_COST);
10746   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10747 
10748   ins_encode %{
10749     __ eon(as_Register($dst$$reg),
10750               as_Register($src1$$reg),
10751               as_Register($src2$$reg),
10752               Assembler::ASR,
10753               $src3$$constant & 0x3f);
10754   %}
10755 
10756   ins_pipe(ialu_reg_reg_shift);
10757 %}
10758 
10759 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10760                          iRegIorL2I src1, iRegIorL2I src2,
10761                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10762   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10763   ins_cost(1.9 * INSN_COST);
10764   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10765 
10766   ins_encode %{
10767     __ eonw(as_Register($dst$$reg),
10768               as_Register($src1$$reg),
10769               as_Register($src2$$reg),
10770               Assembler::LSL,
10771               $src3$$constant & 0x1f);
10772   %}
10773 
10774   ins_pipe(ialu_reg_reg_shift);
10775 %}
10776 
10777 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10778                          iRegL src1, iRegL src2,
10779                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10780   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10781   ins_cost(1.9 * INSN_COST);
10782   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10783 
10784   ins_encode %{
10785     __ eon(as_Register($dst$$reg),
10786               as_Register($src1$$reg),
10787               as_Register($src2$$reg),
10788               Assembler::LSL,
10789               $src3$$constant & 0x3f);
10790   %}
10791 
10792   ins_pipe(ialu_reg_reg_shift);
10793 %}
10794 
10795 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10796                          iRegIorL2I src1, iRegIorL2I src2,
10797                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10798   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10799   ins_cost(1.9 * INSN_COST);
10800   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10801 
10802   ins_encode %{
10803     __ ornw(as_Register($dst$$reg),
10804               as_Register($src1$$reg),
10805               as_Register($src2$$reg),
10806               Assembler::LSR,
10807               $src3$$constant & 0x1f);
10808   %}
10809 
10810   ins_pipe(ialu_reg_reg_shift);
10811 %}
10812 
10813 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10814                          iRegL src1, iRegL src2,
10815                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10816   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10817   ins_cost(1.9 * INSN_COST);
10818   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10819 
10820   ins_encode %{
10821     __ orn(as_Register($dst$$reg),
10822               as_Register($src1$$reg),
10823               as_Register($src2$$reg),
10824               Assembler::LSR,
10825               $src3$$constant & 0x3f);
10826   %}
10827 
10828   ins_pipe(ialu_reg_reg_shift);
10829 %}
10830 
10831 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10832                          iRegIorL2I src1, iRegIorL2I src2,
10833                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10834   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10835   ins_cost(1.9 * INSN_COST);
10836   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10837 
10838   ins_encode %{
10839     __ ornw(as_Register($dst$$reg),
10840               as_Register($src1$$reg),
10841               as_Register($src2$$reg),
10842               Assembler::ASR,
10843               $src3$$constant & 0x1f);
10844   %}
10845 
10846   ins_pipe(ialu_reg_reg_shift);
10847 %}
10848 
10849 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10850                          iRegL src1, iRegL src2,
10851                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10852   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10853   ins_cost(1.9 * INSN_COST);
10854   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10855 
10856   ins_encode %{
10857     __ orn(as_Register($dst$$reg),
10858               as_Register($src1$$reg),
10859               as_Register($src2$$reg),
10860               Assembler::ASR,
10861               $src3$$constant & 0x3f);
10862   %}
10863 
10864   ins_pipe(ialu_reg_reg_shift);
10865 %}
10866 
10867 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10868                          iRegIorL2I src1, iRegIorL2I src2,
10869                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10870   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10871   ins_cost(1.9 * INSN_COST);
10872   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10873 
10874   ins_encode %{
10875     __ ornw(as_Register($dst$$reg),
10876               as_Register($src1$$reg),
10877               as_Register($src2$$reg),
10878               Assembler::LSL,
10879               $src3$$constant & 0x1f);
10880   %}
10881 
10882   ins_pipe(ialu_reg_reg_shift);
10883 %}
10884 
10885 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10886                          iRegL src1, iRegL src2,
10887                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10888   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10889   ins_cost(1.9 * INSN_COST);
10890   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10891 
10892   ins_encode %{
10893     __ orn(as_Register($dst$$reg),
10894               as_Register($src1$$reg),
10895               as_Register($src2$$reg),
10896               Assembler::LSL,
10897               $src3$$constant & 0x3f);
10898   %}
10899 
10900   ins_pipe(ialu_reg_reg_shift);
10901 %}
10902 
10903 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10904                          iRegIorL2I src1, iRegIorL2I src2,
10905                          immI src3, rFlagsReg cr) %{
10906   match(Set dst (AndI src1 (URShiftI src2 src3)));
10907 
10908   ins_cost(1.9 * INSN_COST);
10909   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10910 
10911   ins_encode %{
10912     __ andw(as_Register($dst$$reg),
10913               as_Register($src1$$reg),
10914               as_Register($src2$$reg),
10915               Assembler::LSR,
10916               $src3$$constant & 0x1f);
10917   %}
10918 
10919   ins_pipe(ialu_reg_reg_shift);
10920 %}
10921 
10922 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10923                          iRegL src1, iRegL src2,
10924                          immI src3, rFlagsReg cr) %{
10925   match(Set dst (AndL src1 (URShiftL src2 src3)));
10926 
10927   ins_cost(1.9 * INSN_COST);
10928   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10929 
10930   ins_encode %{
10931     __ andr(as_Register($dst$$reg),
10932               as_Register($src1$$reg),
10933               as_Register($src2$$reg),
10934               Assembler::LSR,
10935               $src3$$constant & 0x3f);
10936   %}
10937 
10938   ins_pipe(ialu_reg_reg_shift);
10939 %}
10940 
10941 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10942                          iRegIorL2I src1, iRegIorL2I src2,
10943                          immI src3, rFlagsReg cr) %{
10944   match(Set dst (AndI src1 (RShiftI src2 src3)));
10945 
10946   ins_cost(1.9 * INSN_COST);
10947   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10948 
10949   ins_encode %{
10950     __ andw(as_Register($dst$$reg),
10951               as_Register($src1$$reg),
10952               as_Register($src2$$reg),
10953               Assembler::ASR,
10954               $src3$$constant & 0x1f);
10955   %}
10956 
10957   ins_pipe(ialu_reg_reg_shift);
10958 %}
10959 
10960 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10961                          iRegL src1, iRegL src2,
10962                          immI src3, rFlagsReg cr) %{
10963   match(Set dst (AndL src1 (RShiftL src2 src3)));
10964 
10965   ins_cost(1.9 * INSN_COST);
10966   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10967 
10968   ins_encode %{
10969     __ andr(as_Register($dst$$reg),
10970               as_Register($src1$$reg),
10971               as_Register($src2$$reg),
10972               Assembler::ASR,
10973               $src3$$constant & 0x3f);
10974   %}
10975 
10976   ins_pipe(ialu_reg_reg_shift);
10977 %}
10978 
10979 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10980                          iRegIorL2I src1, iRegIorL2I src2,
10981                          immI src3, rFlagsReg cr) %{
10982   match(Set dst (AndI src1 (LShiftI src2 src3)));
10983 
10984   ins_cost(1.9 * INSN_COST);
10985   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10986 
10987   ins_encode %{
10988     __ andw(as_Register($dst$$reg),
10989               as_Register($src1$$reg),
10990               as_Register($src2$$reg),
10991               Assembler::LSL,
10992               $src3$$constant & 0x1f);
10993   %}
10994 
10995   ins_pipe(ialu_reg_reg_shift);
10996 %}
10997 
10998 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10999                          iRegL src1, iRegL src2,
11000                          immI src3, rFlagsReg cr) %{
11001   match(Set dst (AndL src1 (LShiftL src2 src3)));
11002 
11003   ins_cost(1.9 * INSN_COST);
11004   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
11005 
11006   ins_encode %{
11007     __ andr(as_Register($dst$$reg),
11008               as_Register($src1$$reg),
11009               as_Register($src2$$reg),
11010               Assembler::LSL,
11011               $src3$$constant & 0x3f);
11012   %}
11013 
11014   ins_pipe(ialu_reg_reg_shift);
11015 %}
11016 
11017 instruct XorI_reg_URShift_reg(iRegINoSp dst,
11018                          iRegIorL2I src1, iRegIorL2I src2,
11019                          immI src3, rFlagsReg cr) %{
11020   match(Set dst (XorI src1 (URShiftI src2 src3)));
11021 
11022   ins_cost(1.9 * INSN_COST);
11023   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
11024 
11025   ins_encode %{
11026     __ eorw(as_Register($dst$$reg),
11027               as_Register($src1$$reg),
11028               as_Register($src2$$reg),
11029               Assembler::LSR,
11030               $src3$$constant & 0x1f);
11031   %}
11032 
11033   ins_pipe(ialu_reg_reg_shift);
11034 %}
11035 
11036 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
11037                          iRegL src1, iRegL src2,
11038                          immI src3, rFlagsReg cr) %{
11039   match(Set dst (XorL src1 (URShiftL src2 src3)));
11040 
11041   ins_cost(1.9 * INSN_COST);
11042   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
11043 
11044   ins_encode %{
11045     __ eor(as_Register($dst$$reg),
11046               as_Register($src1$$reg),
11047               as_Register($src2$$reg),
11048               Assembler::LSR,
11049               $src3$$constant & 0x3f);
11050   %}
11051 
11052   ins_pipe(ialu_reg_reg_shift);
11053 %}
11054 
11055 instruct XorI_reg_RShift_reg(iRegINoSp dst,
11056                          iRegIorL2I src1, iRegIorL2I src2,
11057                          immI src3, rFlagsReg cr) %{
11058   match(Set dst (XorI src1 (RShiftI src2 src3)));
11059 
11060   ins_cost(1.9 * INSN_COST);
11061   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
11062 
11063   ins_encode %{
11064     __ eorw(as_Register($dst$$reg),
11065               as_Register($src1$$reg),
11066               as_Register($src2$$reg),
11067               Assembler::ASR,
11068               $src3$$constant & 0x1f);
11069   %}
11070 
11071   ins_pipe(ialu_reg_reg_shift);
11072 %}
11073 
11074 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11075                          iRegL src1, iRegL src2,
11076                          immI src3, rFlagsReg cr) %{
11077   match(Set dst (XorL src1 (RShiftL src2 src3)));
11078 
11079   ins_cost(1.9 * INSN_COST);
11080   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
11081 
11082   ins_encode %{
11083     __ eor(as_Register($dst$$reg),
11084               as_Register($src1$$reg),
11085               as_Register($src2$$reg),
11086               Assembler::ASR,
11087               $src3$$constant & 0x3f);
11088   %}
11089 
11090   ins_pipe(ialu_reg_reg_shift);
11091 %}
11092 
11093 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11094                          iRegIorL2I src1, iRegIorL2I src2,
11095                          immI src3, rFlagsReg cr) %{
11096   match(Set dst (XorI src1 (LShiftI src2 src3)));
11097 
11098   ins_cost(1.9 * INSN_COST);
11099   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
11100 
11101   ins_encode %{
11102     __ eorw(as_Register($dst$$reg),
11103               as_Register($src1$$reg),
11104               as_Register($src2$$reg),
11105               Assembler::LSL,
11106               $src3$$constant & 0x1f);
11107   %}
11108 
11109   ins_pipe(ialu_reg_reg_shift);
11110 %}
11111 
11112 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
11113                          iRegL src1, iRegL src2,
11114                          immI src3, rFlagsReg cr) %{
11115   match(Set dst (XorL src1 (LShiftL src2 src3)));
11116 
11117   ins_cost(1.9 * INSN_COST);
11118   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
11119 
11120   ins_encode %{
11121     __ eor(as_Register($dst$$reg),
11122               as_Register($src1$$reg),
11123               as_Register($src2$$reg),
11124               Assembler::LSL,
11125               $src3$$constant & 0x3f);
11126   %}
11127 
11128   ins_pipe(ialu_reg_reg_shift);
11129 %}
11130 
11131 instruct OrI_reg_URShift_reg(iRegINoSp dst,
11132                          iRegIorL2I src1, iRegIorL2I src2,
11133                          immI src3, rFlagsReg cr) %{
11134   match(Set dst (OrI src1 (URShiftI src2 src3)));
11135 
11136   ins_cost(1.9 * INSN_COST);
11137   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
11138 
11139   ins_encode %{
11140     __ orrw(as_Register($dst$$reg),
11141               as_Register($src1$$reg),
11142               as_Register($src2$$reg),
11143               Assembler::LSR,
11144               $src3$$constant & 0x1f);
11145   %}
11146 
11147   ins_pipe(ialu_reg_reg_shift);
11148 %}
11149 
11150 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
11151                          iRegL src1, iRegL src2,
11152                          immI src3, rFlagsReg cr) %{
11153   match(Set dst (OrL src1 (URShiftL src2 src3)));
11154 
11155   ins_cost(1.9 * INSN_COST);
11156   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
11157 
11158   ins_encode %{
11159     __ orr(as_Register($dst$$reg),
11160               as_Register($src1$$reg),
11161               as_Register($src2$$reg),
11162               Assembler::LSR,
11163               $src3$$constant & 0x3f);
11164   %}
11165 
11166   ins_pipe(ialu_reg_reg_shift);
11167 %}
11168 
11169 instruct OrI_reg_RShift_reg(iRegINoSp dst,
11170                          iRegIorL2I src1, iRegIorL2I src2,
11171                          immI src3, rFlagsReg cr) %{
11172   match(Set dst (OrI src1 (RShiftI src2 src3)));
11173 
11174   ins_cost(1.9 * INSN_COST);
11175   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
11176 
11177   ins_encode %{
11178     __ orrw(as_Register($dst$$reg),
11179               as_Register($src1$$reg),
11180               as_Register($src2$$reg),
11181               Assembler::ASR,
11182               $src3$$constant & 0x1f);
11183   %}
11184 
11185   ins_pipe(ialu_reg_reg_shift);
11186 %}
11187 
11188 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
11189                          iRegL src1, iRegL src2,
11190                          immI src3, rFlagsReg cr) %{
11191   match(Set dst (OrL src1 (RShiftL src2 src3)));
11192 
11193   ins_cost(1.9 * INSN_COST);
11194   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
11195 
11196   ins_encode %{
11197     __ orr(as_Register($dst$$reg),
11198               as_Register($src1$$reg),
11199               as_Register($src2$$reg),
11200               Assembler::ASR,
11201               $src3$$constant & 0x3f);
11202   %}
11203 
11204   ins_pipe(ialu_reg_reg_shift);
11205 %}
11206 
11207 instruct OrI_reg_LShift_reg(iRegINoSp dst,
11208                          iRegIorL2I src1, iRegIorL2I src2,
11209                          immI src3, rFlagsReg cr) %{
11210   match(Set dst (OrI src1 (LShiftI src2 src3)));
11211 
11212   ins_cost(1.9 * INSN_COST);
11213   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
11214 
11215   ins_encode %{
11216     __ orrw(as_Register($dst$$reg),
11217               as_Register($src1$$reg),
11218               as_Register($src2$$reg),
11219               Assembler::LSL,
11220               $src3$$constant & 0x1f);
11221   %}
11222 
11223   ins_pipe(ialu_reg_reg_shift);
11224 %}
11225 
11226 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
11227                          iRegL src1, iRegL src2,
11228                          immI src3, rFlagsReg cr) %{
11229   match(Set dst (OrL src1 (LShiftL src2 src3)));
11230 
11231   ins_cost(1.9 * INSN_COST);
11232   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
11233 
11234   ins_encode %{
11235     __ orr(as_Register($dst$$reg),
11236               as_Register($src1$$reg),
11237               as_Register($src2$$reg),
11238               Assembler::LSL,
11239               $src3$$constant & 0x3f);
11240   %}
11241 
11242   ins_pipe(ialu_reg_reg_shift);
11243 %}
11244 
11245 instruct AddI_reg_URShift_reg(iRegINoSp dst,
11246                          iRegIorL2I src1, iRegIorL2I src2,
11247                          immI src3, rFlagsReg cr) %{
11248   match(Set dst (AddI src1 (URShiftI src2 src3)));
11249 
11250   ins_cost(1.9 * INSN_COST);
11251   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
11252 
11253   ins_encode %{
11254     __ addw(as_Register($dst$$reg),
11255               as_Register($src1$$reg),
11256               as_Register($src2$$reg),
11257               Assembler::LSR,
11258               $src3$$constant & 0x1f);
11259   %}
11260 
11261   ins_pipe(ialu_reg_reg_shift);
11262 %}
11263 
11264 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
11265                          iRegL src1, iRegL src2,
11266                          immI src3, rFlagsReg cr) %{
11267   match(Set dst (AddL src1 (URShiftL src2 src3)));
11268 
11269   ins_cost(1.9 * INSN_COST);
11270   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
11271 
11272   ins_encode %{
11273     __ add(as_Register($dst$$reg),
11274               as_Register($src1$$reg),
11275               as_Register($src2$$reg),
11276               Assembler::LSR,
11277               $src3$$constant & 0x3f);
11278   %}
11279 
11280   ins_pipe(ialu_reg_reg_shift);
11281 %}
11282 
11283 instruct AddI_reg_RShift_reg(iRegINoSp dst,
11284                          iRegIorL2I src1, iRegIorL2I src2,
11285                          immI src3, rFlagsReg cr) %{
11286   match(Set dst (AddI src1 (RShiftI src2 src3)));
11287 
11288   ins_cost(1.9 * INSN_COST);
11289   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
11290 
11291   ins_encode %{
11292     __ addw(as_Register($dst$$reg),
11293               as_Register($src1$$reg),
11294               as_Register($src2$$reg),
11295               Assembler::ASR,
11296               $src3$$constant & 0x1f);
11297   %}
11298 
11299   ins_pipe(ialu_reg_reg_shift);
11300 %}
11301 
11302 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
11303                          iRegL src1, iRegL src2,
11304                          immI src3, rFlagsReg cr) %{
11305   match(Set dst (AddL src1 (RShiftL src2 src3)));
11306 
11307   ins_cost(1.9 * INSN_COST);
11308   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
11309 
11310   ins_encode %{
11311     __ add(as_Register($dst$$reg),
11312               as_Register($src1$$reg),
11313               as_Register($src2$$reg),
11314               Assembler::ASR,
11315               $src3$$constant & 0x3f);
11316   %}
11317 
11318   ins_pipe(ialu_reg_reg_shift);
11319 %}
11320 
11321 instruct AddI_reg_LShift_reg(iRegINoSp dst,
11322                          iRegIorL2I src1, iRegIorL2I src2,
11323                          immI src3, rFlagsReg cr) %{
11324   match(Set dst (AddI src1 (LShiftI src2 src3)));
11325 
11326   ins_cost(1.9 * INSN_COST);
11327   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
11328 
11329   ins_encode %{
11330     __ addw(as_Register($dst$$reg),
11331               as_Register($src1$$reg),
11332               as_Register($src2$$reg),
11333               Assembler::LSL,
11334               $src3$$constant & 0x1f);
11335   %}
11336 
11337   ins_pipe(ialu_reg_reg_shift);
11338 %}
11339 
11340 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
11341                          iRegL src1, iRegL src2,
11342                          immI src3, rFlagsReg cr) %{
11343   match(Set dst (AddL src1 (LShiftL src2 src3)));
11344 
11345   ins_cost(1.9 * INSN_COST);
11346   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
11347 
11348   ins_encode %{
11349     __ add(as_Register($dst$$reg),
11350               as_Register($src1$$reg),
11351               as_Register($src2$$reg),
11352               Assembler::LSL,
11353               $src3$$constant & 0x3f);
11354   %}
11355 
11356   ins_pipe(ialu_reg_reg_shift);
11357 %}
11358 
11359 instruct SubI_reg_URShift_reg(iRegINoSp dst,
11360                          iRegIorL2I src1, iRegIorL2I src2,
11361                          immI src3, rFlagsReg cr) %{
11362   match(Set dst (SubI src1 (URShiftI src2 src3)));
11363 
11364   ins_cost(1.9 * INSN_COST);
11365   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
11366 
11367   ins_encode %{
11368     __ subw(as_Register($dst$$reg),
11369               as_Register($src1$$reg),
11370               as_Register($src2$$reg),
11371               Assembler::LSR,
11372               $src3$$constant & 0x1f);
11373   %}
11374 
11375   ins_pipe(ialu_reg_reg_shift);
11376 %}
11377 
11378 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
11379                          iRegL src1, iRegL src2,
11380                          immI src3, rFlagsReg cr) %{
11381   match(Set dst (SubL src1 (URShiftL src2 src3)));
11382 
11383   ins_cost(1.9 * INSN_COST);
11384   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
11385 
11386   ins_encode %{
11387     __ sub(as_Register($dst$$reg),
11388               as_Register($src1$$reg),
11389               as_Register($src2$$reg),
11390               Assembler::LSR,
11391               $src3$$constant & 0x3f);
11392   %}
11393 
11394   ins_pipe(ialu_reg_reg_shift);
11395 %}
11396 
11397 instruct SubI_reg_RShift_reg(iRegINoSp dst,
11398                          iRegIorL2I src1, iRegIorL2I src2,
11399                          immI src3, rFlagsReg cr) %{
11400   match(Set dst (SubI src1 (RShiftI src2 src3)));
11401 
11402   ins_cost(1.9 * INSN_COST);
11403   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
11404 
11405   ins_encode %{
11406     __ subw(as_Register($dst$$reg),
11407               as_Register($src1$$reg),
11408               as_Register($src2$$reg),
11409               Assembler::ASR,
11410               $src3$$constant & 0x1f);
11411   %}
11412 
11413   ins_pipe(ialu_reg_reg_shift);
11414 %}
11415 
11416 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
11417                          iRegL src1, iRegL src2,
11418                          immI src3, rFlagsReg cr) %{
11419   match(Set dst (SubL src1 (RShiftL src2 src3)));
11420 
11421   ins_cost(1.9 * INSN_COST);
11422   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
11423 
11424   ins_encode %{
11425     __ sub(as_Register($dst$$reg),
11426               as_Register($src1$$reg),
11427               as_Register($src2$$reg),
11428               Assembler::ASR,
11429               $src3$$constant & 0x3f);
11430   %}
11431 
11432   ins_pipe(ialu_reg_reg_shift);
11433 %}
11434 
11435 instruct SubI_reg_LShift_reg(iRegINoSp dst,
11436                          iRegIorL2I src1, iRegIorL2I src2,
11437                          immI src3, rFlagsReg cr) %{
11438   match(Set dst (SubI src1 (LShiftI src2 src3)));
11439 
11440   ins_cost(1.9 * INSN_COST);
11441   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
11442 
11443   ins_encode %{
11444     __ subw(as_Register($dst$$reg),
11445               as_Register($src1$$reg),
11446               as_Register($src2$$reg),
11447               Assembler::LSL,
11448               $src3$$constant & 0x1f);
11449   %}
11450 
11451   ins_pipe(ialu_reg_reg_shift);
11452 %}
11453 
11454 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
11455                          iRegL src1, iRegL src2,
11456                          immI src3, rFlagsReg cr) %{
11457   match(Set dst (SubL src1 (LShiftL src2 src3)));
11458 
11459   ins_cost(1.9 * INSN_COST);
11460   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
11461 
11462   ins_encode %{
11463     __ sub(as_Register($dst$$reg),
11464               as_Register($src1$$reg),
11465               as_Register($src2$$reg),
11466               Assembler::LSL,
11467               $src3$$constant & 0x3f);
11468   %}
11469 
11470   ins_pipe(ialu_reg_reg_shift);
11471 %}
11472 
11473 
11474 
11475 // Shift Left followed by Shift Right.
11476 // This idiom is used by the compiler for the i2b bytecode etc.
11477 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11478 %{
11479   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
11480   // Make sure we are not going to exceed what sbfm can do.
11481   predicate((unsigned int)n->in(2)->get_int() <= 63
11482             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11483 
11484   ins_cost(INSN_COST * 2);
11485   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11486   ins_encode %{
11487     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11488     int s = 63 - lshift;
11489     int r = (rshift - lshift) & 63;
11490     __ sbfm(as_Register($dst$$reg),
11491             as_Register($src$$reg),
11492             r, s);
11493   %}
11494 
11495   ins_pipe(ialu_reg_shift);
11496 %}
11497 
11498 // Shift Left followed by Shift Right.
11499 // This idiom is used by the compiler for the i2b bytecode etc.
11500 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11501 %{
11502   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
11503   // Make sure we are not going to exceed what sbfmw can do.
11504   predicate((unsigned int)n->in(2)->get_int() <= 31
11505             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11506 
11507   ins_cost(INSN_COST * 2);
11508   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11509   ins_encode %{
11510     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11511     int s = 31 - lshift;
11512     int r = (rshift - lshift) & 31;
11513     __ sbfmw(as_Register($dst$$reg),
11514             as_Register($src$$reg),
11515             r, s);
11516   %}
11517 
11518   ins_pipe(ialu_reg_shift);
11519 %}
11520 
11521 // Shift Left followed by Shift Right.
11522 // This idiom is used by the compiler for the i2b bytecode etc.
11523 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11524 %{
11525   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
11526   // Make sure we are not going to exceed what ubfm can do.
11527   predicate((unsigned int)n->in(2)->get_int() <= 63
11528             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11529 
11530   ins_cost(INSN_COST * 2);
11531   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11532   ins_encode %{
11533     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11534     int s = 63 - lshift;
11535     int r = (rshift - lshift) & 63;
11536     __ ubfm(as_Register($dst$$reg),
11537             as_Register($src$$reg),
11538             r, s);
11539   %}
11540 
11541   ins_pipe(ialu_reg_shift);
11542 %}
11543 
11544 // Shift Left followed by Shift Right.
11545 // This idiom is used by the compiler for the i2b bytecode etc.
11546 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11547 %{
11548   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
11549   // Make sure we are not going to exceed what ubfmw can do.
11550   predicate((unsigned int)n->in(2)->get_int() <= 31
11551             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11552 
11553   ins_cost(INSN_COST * 2);
11554   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11555   ins_encode %{
11556     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11557     int s = 31 - lshift;
11558     int r = (rshift - lshift) & 31;
11559     __ ubfmw(as_Register($dst$$reg),
11560             as_Register($src$$reg),
11561             r, s);
11562   %}
11563 
11564   ins_pipe(ialu_reg_shift);
11565 %}
11566 // Bitfield extract with shift & mask
11567 
11568 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11569 %{
11570   match(Set dst (AndI (URShiftI src rshift) mask));
11571 
11572   ins_cost(INSN_COST);
11573   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
11574   ins_encode %{
11575     int rshift = $rshift$$constant;
11576     long mask = $mask$$constant;
11577     int width = exact_log2(mask+1);
11578     __ ubfxw(as_Register($dst$$reg),
11579             as_Register($src$$reg), rshift, width);
11580   %}
11581   ins_pipe(ialu_reg_shift);
11582 %}
11583 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
11584 %{
11585   match(Set dst (AndL (URShiftL src rshift) mask));
11586 
11587   ins_cost(INSN_COST);
11588   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11589   ins_encode %{
11590     int rshift = $rshift$$constant;
11591     long mask = $mask$$constant;
11592     int width = exact_log2(mask+1);
11593     __ ubfx(as_Register($dst$$reg),
11594             as_Register($src$$reg), rshift, width);
11595   %}
11596   ins_pipe(ialu_reg_shift);
11597 %}
11598 
11599 // We can use ubfx when extending an And with a mask when we know mask
11600 // is positive.  We know that because immI_bitmask guarantees it.
11601 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11602 %{
11603   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
11604 
11605   ins_cost(INSN_COST * 2);
11606   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11607   ins_encode %{
11608     int rshift = $rshift$$constant;
11609     long mask = $mask$$constant;
11610     int width = exact_log2(mask+1);
11611     __ ubfx(as_Register($dst$$reg),
11612             as_Register($src$$reg), rshift, width);
11613   %}
11614   ins_pipe(ialu_reg_shift);
11615 %}
11616 
11617 // We can use ubfiz when masking by a positive number and then left shifting the result.
11618 // We know that the mask is positive because immI_bitmask guarantees it.
11619 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11620 %{
11621   match(Set dst (LShiftI (AndI src mask) lshift));
11622   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11623     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
11624 
11625   ins_cost(INSN_COST);
11626   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
11627   ins_encode %{
11628     int lshift = $lshift$$constant;
11629     long mask = $mask$$constant;
11630     int width = exact_log2(mask+1);
11631     __ ubfizw(as_Register($dst$$reg),
11632           as_Register($src$$reg), lshift, width);
11633   %}
11634   ins_pipe(ialu_reg_shift);
11635 %}
11636 // We can use ubfiz when masking by a positive number and then left shifting the result.
11637 // We know that the mask is positive because immL_bitmask guarantees it.
11638 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
11639 %{
11640   match(Set dst (LShiftL (AndL src mask) lshift));
11641   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
11642     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
11643 
11644   ins_cost(INSN_COST);
11645   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11646   ins_encode %{
11647     int lshift = $lshift$$constant;
11648     long mask = $mask$$constant;
11649     int width = exact_log2(mask+1);
11650     __ ubfiz(as_Register($dst$$reg),
11651           as_Register($src$$reg), lshift, width);
11652   %}
11653   ins_pipe(ialu_reg_shift);
11654 %}
11655 
11656 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
11657 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11658 %{
11659   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
11660   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11661     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
11662 
11663   ins_cost(INSN_COST);
11664   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11665   ins_encode %{
11666     int lshift = $lshift$$constant;
11667     long mask = $mask$$constant;
11668     int width = exact_log2(mask+1);
11669     __ ubfiz(as_Register($dst$$reg),
11670              as_Register($src$$reg), lshift, width);
11671   %}
11672   ins_pipe(ialu_reg_shift);
11673 %}
11674 
11675 // Rotations
11676 
11677 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11678 %{
11679   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11680   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11681 
11682   ins_cost(INSN_COST);
11683   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11684 
11685   ins_encode %{
11686     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11687             $rshift$$constant & 63);
11688   %}
11689   ins_pipe(ialu_reg_reg_extr);
11690 %}
11691 
11692 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11693 %{
11694   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11695   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11696 
11697   ins_cost(INSN_COST);
11698   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11699 
11700   ins_encode %{
11701     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11702             $rshift$$constant & 31);
11703   %}
11704   ins_pipe(ialu_reg_reg_extr);
11705 %}
11706 
11707 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11708 %{
11709   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11710   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11711 
11712   ins_cost(INSN_COST);
11713   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11714 
11715   ins_encode %{
11716     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11717             $rshift$$constant & 63);
11718   %}
11719   ins_pipe(ialu_reg_reg_extr);
11720 %}
11721 
11722 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11723 %{
11724   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11725   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11726 
11727   ins_cost(INSN_COST);
11728   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11729 
11730   ins_encode %{
11731     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11732             $rshift$$constant & 31);
11733   %}
11734   ins_pipe(ialu_reg_reg_extr);
11735 %}
11736 
11737 
11738 // rol expander
11739 
11740 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11741 %{
11742   effect(DEF dst, USE src, USE shift);
11743 
11744   format %{ "rol    $dst, $src, $shift" %}
11745   ins_cost(INSN_COST * 3);
11746   ins_encode %{
11747     __ subw(rscratch1, zr, as_Register($shift$$reg));
11748     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11749             rscratch1);
11750     %}
11751   ins_pipe(ialu_reg_reg_vshift);
11752 %}
11753 
11754 // rol expander
11755 
11756 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11757 %{
11758   effect(DEF dst, USE src, USE shift);
11759 
11760   format %{ "rol    $dst, $src, $shift" %}
11761   ins_cost(INSN_COST * 3);
11762   ins_encode %{
11763     __ subw(rscratch1, zr, as_Register($shift$$reg));
11764     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11765             rscratch1);
11766     %}
11767   ins_pipe(ialu_reg_reg_vshift);
11768 %}
11769 
11770 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11771 %{
11772   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11773 
11774   expand %{
11775     rolL_rReg(dst, src, shift, cr);
11776   %}
11777 %}
11778 
11779 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11780 %{
11781   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11782 
11783   expand %{
11784     rolL_rReg(dst, src, shift, cr);
11785   %}
11786 %}
11787 
11788 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11789 %{
11790   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11791 
11792   expand %{
11793     rolI_rReg(dst, src, shift, cr);
11794   %}
11795 %}
11796 
11797 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11798 %{
11799   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11800 
11801   expand %{
11802     rolI_rReg(dst, src, shift, cr);
11803   %}
11804 %}
11805 
11806 // ror expander
11807 
11808 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11809 %{
11810   effect(DEF dst, USE src, USE shift);
11811 
11812   format %{ "ror    $dst, $src, $shift" %}
11813   ins_cost(INSN_COST);
11814   ins_encode %{
11815     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11816             as_Register($shift$$reg));
11817     %}
11818   ins_pipe(ialu_reg_reg_vshift);
11819 %}
11820 
11821 // ror expander
11822 
11823 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11824 %{
11825   effect(DEF dst, USE src, USE shift);
11826 
11827   format %{ "ror    $dst, $src, $shift" %}
11828   ins_cost(INSN_COST);
11829   ins_encode %{
11830     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11831             as_Register($shift$$reg));
11832     %}
11833   ins_pipe(ialu_reg_reg_vshift);
11834 %}
11835 
11836 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11837 %{
11838   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11839 
11840   expand %{
11841     rorL_rReg(dst, src, shift, cr);
11842   %}
11843 %}
11844 
11845 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11846 %{
11847   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11848 
11849   expand %{
11850     rorL_rReg(dst, src, shift, cr);
11851   %}
11852 %}
11853 
11854 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11855 %{
11856   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11857 
11858   expand %{
11859     rorI_rReg(dst, src, shift, cr);
11860   %}
11861 %}
11862 
11863 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11864 %{
11865   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11866 
11867   expand %{
11868     rorI_rReg(dst, src, shift, cr);
11869   %}
11870 %}
11871 
11872 // Add/subtract (extended)
11873 
11874 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11875 %{
11876   match(Set dst (AddL src1 (ConvI2L src2)));
11877   ins_cost(INSN_COST);
11878   format %{ "add  $dst, $src1, $src2, sxtw" %}
11879 
11880    ins_encode %{
11881      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11882             as_Register($src2$$reg), ext::sxtw);
11883    %}
11884   ins_pipe(ialu_reg_reg);
11885 %};
11886 
11887 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11888 %{
11889   match(Set dst (SubL src1 (ConvI2L src2)));
11890   ins_cost(INSN_COST);
11891   format %{ "sub  $dst, $src1, $src2, sxtw" %}
11892 
11893    ins_encode %{
11894      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11895             as_Register($src2$$reg), ext::sxtw);
11896    %}
11897   ins_pipe(ialu_reg_reg);
11898 %};
11899 
11900 
11901 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11902 %{
11903   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11904   ins_cost(INSN_COST);
11905   format %{ "add  $dst, $src1, $src2, sxth" %}
11906 
11907    ins_encode %{
11908      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11909             as_Register($src2$$reg), ext::sxth);
11910    %}
11911   ins_pipe(ialu_reg_reg);
11912 %}
11913 
11914 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11915 %{
11916   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11917   ins_cost(INSN_COST);
11918   format %{ "add  $dst, $src1, $src2, sxtb" %}
11919 
11920    ins_encode %{
11921      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11922             as_Register($src2$$reg), ext::sxtb);
11923    %}
11924   ins_pipe(ialu_reg_reg);
11925 %}
11926 
11927 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11928 %{
11929   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11930   ins_cost(INSN_COST);
11931   format %{ "add  $dst, $src1, $src2, uxtb" %}
11932 
11933    ins_encode %{
11934      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11935             as_Register($src2$$reg), ext::uxtb);
11936    %}
11937   ins_pipe(ialu_reg_reg);
11938 %}
11939 
11940 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11941 %{
11942   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11943   ins_cost(INSN_COST);
11944   format %{ "add  $dst, $src1, $src2, sxth" %}
11945 
11946    ins_encode %{
11947      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11948             as_Register($src2$$reg), ext::sxth);
11949    %}
11950   ins_pipe(ialu_reg_reg);
11951 %}
11952 
11953 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11954 %{
11955   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11956   ins_cost(INSN_COST);
11957   format %{ "add  $dst, $src1, $src2, sxtw" %}
11958 
11959    ins_encode %{
11960      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11961             as_Register($src2$$reg), ext::sxtw);
11962    %}
11963   ins_pipe(ialu_reg_reg);
11964 %}
11965 
11966 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11967 %{
11968   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11969   ins_cost(INSN_COST);
11970   format %{ "add  $dst, $src1, $src2, sxtb" %}
11971 
11972    ins_encode %{
11973      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11974             as_Register($src2$$reg), ext::sxtb);
11975    %}
11976   ins_pipe(ialu_reg_reg);
11977 %}
11978 
11979 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11980 %{
11981   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11982   ins_cost(INSN_COST);
11983   format %{ "add  $dst, $src1, $src2, uxtb" %}
11984 
11985    ins_encode %{
11986      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11987             as_Register($src2$$reg), ext::uxtb);
11988    %}
11989   ins_pipe(ialu_reg_reg);
11990 %}
11991 
11992 
11993 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11994 %{
11995   match(Set dst (AddI src1 (AndI src2 mask)));
11996   ins_cost(INSN_COST);
11997   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11998 
11999    ins_encode %{
12000      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12001             as_Register($src2$$reg), ext::uxtb);
12002    %}
12003   ins_pipe(ialu_reg_reg);
12004 %}
12005 
12006 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12007 %{
12008   match(Set dst (AddI src1 (AndI src2 mask)));
12009   ins_cost(INSN_COST);
12010   format %{ "addw  $dst, $src1, $src2, uxth" %}
12011 
12012    ins_encode %{
12013      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12014             as_Register($src2$$reg), ext::uxth);
12015    %}
12016   ins_pipe(ialu_reg_reg);
12017 %}
12018 
12019 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12020 %{
12021   match(Set dst (AddL src1 (AndL src2 mask)));
12022   ins_cost(INSN_COST);
12023   format %{ "add  $dst, $src1, $src2, uxtb" %}
12024 
12025    ins_encode %{
12026      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12027             as_Register($src2$$reg), ext::uxtb);
12028    %}
12029   ins_pipe(ialu_reg_reg);
12030 %}
12031 
12032 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12033 %{
12034   match(Set dst (AddL src1 (AndL src2 mask)));
12035   ins_cost(INSN_COST);
12036   format %{ "add  $dst, $src1, $src2, uxth" %}
12037 
12038    ins_encode %{
12039      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12040             as_Register($src2$$reg), ext::uxth);
12041    %}
12042   ins_pipe(ialu_reg_reg);
12043 %}
12044 
12045 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12046 %{
12047   match(Set dst (AddL src1 (AndL src2 mask)));
12048   ins_cost(INSN_COST);
12049   format %{ "add  $dst, $src1, $src2, uxtw" %}
12050 
12051    ins_encode %{
12052      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12053             as_Register($src2$$reg), ext::uxtw);
12054    %}
12055   ins_pipe(ialu_reg_reg);
12056 %}
12057 
12058 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12059 %{
12060   match(Set dst (SubI src1 (AndI src2 mask)));
12061   ins_cost(INSN_COST);
12062   format %{ "subw  $dst, $src1, $src2, uxtb" %}
12063 
12064    ins_encode %{
12065      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12066             as_Register($src2$$reg), ext::uxtb);
12067    %}
12068   ins_pipe(ialu_reg_reg);
12069 %}
12070 
12071 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12072 %{
12073   match(Set dst (SubI src1 (AndI src2 mask)));
12074   ins_cost(INSN_COST);
12075   format %{ "subw  $dst, $src1, $src2, uxth" %}
12076 
12077    ins_encode %{
12078      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12079             as_Register($src2$$reg), ext::uxth);
12080    %}
12081   ins_pipe(ialu_reg_reg);
12082 %}
12083 
12084 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12085 %{
12086   match(Set dst (SubL src1 (AndL src2 mask)));
12087   ins_cost(INSN_COST);
12088   format %{ "sub  $dst, $src1, $src2, uxtb" %}
12089 
12090    ins_encode %{
12091      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12092             as_Register($src2$$reg), ext::uxtb);
12093    %}
12094   ins_pipe(ialu_reg_reg);
12095 %}
12096 
12097 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12098 %{
12099   match(Set dst (SubL src1 (AndL src2 mask)));
12100   ins_cost(INSN_COST);
12101   format %{ "sub  $dst, $src1, $src2, uxth" %}
12102 
12103    ins_encode %{
12104      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12105             as_Register($src2$$reg), ext::uxth);
12106    %}
12107   ins_pipe(ialu_reg_reg);
12108 %}
12109 
12110 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12111 %{
12112   match(Set dst (SubL src1 (AndL src2 mask)));
12113   ins_cost(INSN_COST);
12114   format %{ "sub  $dst, $src1, $src2, uxtw" %}
12115 
12116    ins_encode %{
12117      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12118             as_Register($src2$$reg), ext::uxtw);
12119    %}
12120   ins_pipe(ialu_reg_reg);
12121 %}
12122 
12123 
12124 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
12125 %{
12126   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12127   ins_cost(1.9 * INSN_COST);
12128   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
12129 
12130    ins_encode %{
12131      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12132             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12133    %}
12134   ins_pipe(ialu_reg_reg_shift);
12135 %}
12136 
12137 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
12138 %{
12139   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12140   ins_cost(1.9 * INSN_COST);
12141   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
12142 
12143    ins_encode %{
12144      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12145             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12146    %}
12147   ins_pipe(ialu_reg_reg_shift);
12148 %}
12149 
12150 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
12151 %{
12152   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12153   ins_cost(1.9 * INSN_COST);
12154   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
12155 
12156    ins_encode %{
12157      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12158             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
12159    %}
12160   ins_pipe(ialu_reg_reg_shift);
12161 %}
12162 
12163 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
12164 %{
12165   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12166   ins_cost(1.9 * INSN_COST);
12167   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
12168 
12169    ins_encode %{
12170      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12171             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12172    %}
12173   ins_pipe(ialu_reg_reg_shift);
12174 %}
12175 
12176 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
12177 %{
12178   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12179   ins_cost(1.9 * INSN_COST);
12180   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
12181 
12182    ins_encode %{
12183      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12184             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12185    %}
12186   ins_pipe(ialu_reg_reg_shift);
12187 %}
12188 
12189 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
12190 %{
12191   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
12192   ins_cost(1.9 * INSN_COST);
12193   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
12194 
12195    ins_encode %{
12196      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12197             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
12198    %}
12199   ins_pipe(ialu_reg_reg_shift);
12200 %}
12201 
12202 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
12203 %{
12204   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12205   ins_cost(1.9 * INSN_COST);
12206   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
12207 
12208    ins_encode %{
12209      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12210             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12211    %}
12212   ins_pipe(ialu_reg_reg_shift);
12213 %}
12214 
12215 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12216 %{
12217   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12218   ins_cost(1.9 * INSN_COST);
12219   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
12220 
12221    ins_encode %{
12222      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12223             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12224    %}
12225   ins_pipe(ialu_reg_reg_shift);
12226 %}
12227 
12228 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
12229 %{
12230   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12231   ins_cost(1.9 * INSN_COST);
12232   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
12233 
12234    ins_encode %{
12235      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12236             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12237    %}
12238   ins_pipe(ialu_reg_reg_shift);
12239 %}
12240 
12241 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12242 %{
12243   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12244   ins_cost(1.9 * INSN_COST);
12245   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
12246 
12247    ins_encode %{
12248      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12249             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12250    %}
12251   ins_pipe(ialu_reg_reg_shift);
12252 %}
12253 
12254 
12255 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12256 %{
12257   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
12258   ins_cost(1.9 * INSN_COST);
12259   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
12260 
12261    ins_encode %{
12262      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12263             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12264    %}
12265   ins_pipe(ialu_reg_reg_shift);
12266 %};
12267 
12268 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12269 %{
12270   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
12271   ins_cost(1.9 * INSN_COST);
12272   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
12273 
12274    ins_encode %{
12275      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12276             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12277    %}
12278   ins_pipe(ialu_reg_reg_shift);
12279 %};
12280 
12281 
12282 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12283 %{
12284   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12285   ins_cost(1.9 * INSN_COST);
12286   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
12287 
12288    ins_encode %{
12289      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12290             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12291    %}
12292   ins_pipe(ialu_reg_reg_shift);
12293 %}
12294 
12295 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12296 %{
12297   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12298   ins_cost(1.9 * INSN_COST);
12299   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
12300 
12301    ins_encode %{
12302      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12303             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12304    %}
12305   ins_pipe(ialu_reg_reg_shift);
12306 %}
12307 
12308 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12309 %{
12310   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12311   ins_cost(1.9 * INSN_COST);
12312   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
12313 
12314    ins_encode %{
12315      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12316             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12317    %}
12318   ins_pipe(ialu_reg_reg_shift);
12319 %}
12320 
12321 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12322 %{
12323   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12324   ins_cost(1.9 * INSN_COST);
12325   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
12326 
12327    ins_encode %{
12328      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12329             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12330    %}
12331   ins_pipe(ialu_reg_reg_shift);
12332 %}
12333 
12334 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12335 %{
12336   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12337   ins_cost(1.9 * INSN_COST);
12338   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
12339 
12340    ins_encode %{
12341      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12342             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12343    %}
12344   ins_pipe(ialu_reg_reg_shift);
12345 %}
12346 
12347 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12348 %{
12349   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12350   ins_cost(1.9 * INSN_COST);
12351   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
12352 
12353    ins_encode %{
12354      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12355             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12356    %}
12357   ins_pipe(ialu_reg_reg_shift);
12358 %}
12359 
12360 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12361 %{
12362   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12363   ins_cost(1.9 * INSN_COST);
12364   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
12365 
12366    ins_encode %{
12367      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12368             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12369    %}
12370   ins_pipe(ialu_reg_reg_shift);
12371 %}
12372 
12373 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12374 %{
12375   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12376   ins_cost(1.9 * INSN_COST);
12377   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
12378 
12379    ins_encode %{
12380      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12381             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12382    %}
12383   ins_pipe(ialu_reg_reg_shift);
12384 %}
12385 
12386 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12387 %{
12388   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12389   ins_cost(1.9 * INSN_COST);
12390   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
12391 
12392    ins_encode %{
12393      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12394             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12395    %}
12396   ins_pipe(ialu_reg_reg_shift);
12397 %}
12398 
12399 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12400 %{
12401   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12402   ins_cost(1.9 * INSN_COST);
12403   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
12404 
12405    ins_encode %{
12406      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12407             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12408    %}
12409   ins_pipe(ialu_reg_reg_shift);
12410 %}
12411 // END This section of the file is automatically generated. Do not edit --------------
12412 
12413 // ============================================================================
12414 // Floating Point Arithmetic Instructions
12415 
12416 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12417   match(Set dst (AddF src1 src2));
12418 
12419   ins_cost(INSN_COST * 5);
12420   format %{ "fadds   $dst, $src1, $src2" %}
12421 
12422   ins_encode %{
12423     __ fadds(as_FloatRegister($dst$$reg),
12424              as_FloatRegister($src1$$reg),
12425              as_FloatRegister($src2$$reg));
12426   %}
12427 
12428   ins_pipe(fp_dop_reg_reg_s);
12429 %}
12430 
12431 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12432   match(Set dst (AddD src1 src2));
12433 
12434   ins_cost(INSN_COST * 5);
12435   format %{ "faddd   $dst, $src1, $src2" %}
12436 
12437   ins_encode %{
12438     __ faddd(as_FloatRegister($dst$$reg),
12439              as_FloatRegister($src1$$reg),
12440              as_FloatRegister($src2$$reg));
12441   %}
12442 
12443   ins_pipe(fp_dop_reg_reg_d);
12444 %}
12445 
12446 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12447   match(Set dst (SubF src1 src2));
12448 
12449   ins_cost(INSN_COST * 5);
12450   format %{ "fsubs   $dst, $src1, $src2" %}
12451 
12452   ins_encode %{
12453     __ fsubs(as_FloatRegister($dst$$reg),
12454              as_FloatRegister($src1$$reg),
12455              as_FloatRegister($src2$$reg));
12456   %}
12457 
12458   ins_pipe(fp_dop_reg_reg_s);
12459 %}
12460 
12461 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12462   match(Set dst (SubD src1 src2));
12463 
12464   ins_cost(INSN_COST * 5);
12465   format %{ "fsubd   $dst, $src1, $src2" %}
12466 
12467   ins_encode %{
12468     __ fsubd(as_FloatRegister($dst$$reg),
12469              as_FloatRegister($src1$$reg),
12470              as_FloatRegister($src2$$reg));
12471   %}
12472 
12473   ins_pipe(fp_dop_reg_reg_d);
12474 %}
12475 
12476 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12477   match(Set dst (MulF src1 src2));
12478 
12479   ins_cost(INSN_COST * 6);
12480   format %{ "fmuls   $dst, $src1, $src2" %}
12481 
12482   ins_encode %{
12483     __ fmuls(as_FloatRegister($dst$$reg),
12484              as_FloatRegister($src1$$reg),
12485              as_FloatRegister($src2$$reg));
12486   %}
12487 
12488   ins_pipe(fp_dop_reg_reg_s);
12489 %}
12490 
12491 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12492   match(Set dst (MulD src1 src2));
12493 
12494   ins_cost(INSN_COST * 6);
12495   format %{ "fmuld   $dst, $src1, $src2" %}
12496 
12497   ins_encode %{
12498     __ fmuld(as_FloatRegister($dst$$reg),
12499              as_FloatRegister($src1$$reg),
12500              as_FloatRegister($src2$$reg));
12501   %}
12502 
12503   ins_pipe(fp_dop_reg_reg_d);
12504 %}
12505 
12506 // src1 * src2 + src3
12507 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12508   predicate(UseFMA);
12509   match(Set dst (FmaF src3 (Binary src1 src2)));
12510 
12511   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12512 
12513   ins_encode %{
12514     __ fmadds(as_FloatRegister($dst$$reg),
12515              as_FloatRegister($src1$$reg),
12516              as_FloatRegister($src2$$reg),
12517              as_FloatRegister($src3$$reg));
12518   %}
12519 
12520   ins_pipe(pipe_class_default);
12521 %}
12522 
12523 // src1 * src2 + src3
12524 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12525   predicate(UseFMA);
12526   match(Set dst (FmaD src3 (Binary src1 src2)));
12527 
12528   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
12529 
12530   ins_encode %{
12531     __ fmaddd(as_FloatRegister($dst$$reg),
12532              as_FloatRegister($src1$$reg),
12533              as_FloatRegister($src2$$reg),
12534              as_FloatRegister($src3$$reg));
12535   %}
12536 
12537   ins_pipe(pipe_class_default);
12538 %}
12539 
12540 // -src1 * src2 + src3
12541 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12542   predicate(UseFMA);
12543   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
12544   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
12545 
12546   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
12547 
12548   ins_encode %{
12549     __ fmsubs(as_FloatRegister($dst$$reg),
12550               as_FloatRegister($src1$$reg),
12551               as_FloatRegister($src2$$reg),
12552               as_FloatRegister($src3$$reg));
12553   %}
12554 
12555   ins_pipe(pipe_class_default);
12556 %}
12557 
12558 // -src1 * src2 + src3
12559 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12560   predicate(UseFMA);
12561   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
12562   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
12563 
12564   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
12565 
12566   ins_encode %{
12567     __ fmsubd(as_FloatRegister($dst$$reg),
12568               as_FloatRegister($src1$$reg),
12569               as_FloatRegister($src2$$reg),
12570               as_FloatRegister($src3$$reg));
12571   %}
12572 
12573   ins_pipe(pipe_class_default);
12574 %}
12575 
12576 // -src1 * src2 - src3
12577 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12578   predicate(UseFMA);
12579   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
12580   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
12581 
12582   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
12583 
12584   ins_encode %{
12585     __ fnmadds(as_FloatRegister($dst$$reg),
12586                as_FloatRegister($src1$$reg),
12587                as_FloatRegister($src2$$reg),
12588                as_FloatRegister($src3$$reg));
12589   %}
12590 
12591   ins_pipe(pipe_class_default);
12592 %}
12593 
12594 // -src1 * src2 - src3
12595 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12596   predicate(UseFMA);
12597   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
12598   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
12599 
12600   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
12601 
12602   ins_encode %{
12603     __ fnmaddd(as_FloatRegister($dst$$reg),
12604                as_FloatRegister($src1$$reg),
12605                as_FloatRegister($src2$$reg),
12606                as_FloatRegister($src3$$reg));
12607   %}
12608 
12609   ins_pipe(pipe_class_default);
12610 %}
12611 
12612 // src1 * src2 - src3
12613 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12614   predicate(UseFMA);
12615   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
12616 
12617   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12618 
12619   ins_encode %{
12620     __ fnmsubs(as_FloatRegister($dst$$reg),
12621                as_FloatRegister($src1$$reg),
12622                as_FloatRegister($src2$$reg),
12623                as_FloatRegister($src3$$reg));
12624   %}
12625 
12626   ins_pipe(pipe_class_default);
12627 %}
12628 
12629 // src1 * src2 - src3
12630 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12631   predicate(UseFMA);
12632   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
12633 
12634   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12635 
12636   ins_encode %{
12637   // n.b. insn name should be fnmsubd
12638     __ fnmsub(as_FloatRegister($dst$$reg),
12639               as_FloatRegister($src1$$reg),
12640               as_FloatRegister($src2$$reg),
12641               as_FloatRegister($src3$$reg));
12642   %}
12643 
12644   ins_pipe(pipe_class_default);
12645 %}
12646 
12647 
12648 // Math.max(FF)F
12649 instruct maxF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12650   match(Set dst (MaxF src1 src2));
12651 
12652   format %{ "fmaxs   $dst, $src1, $src2" %}
12653   ins_encode %{
12654     __ fmaxs(as_FloatRegister($dst$$reg),
12655              as_FloatRegister($src1$$reg),
12656              as_FloatRegister($src2$$reg));
12657   %}
12658 
12659   ins_pipe(fp_dop_reg_reg_s);
12660 %}
12661 
12662 // Math.min(FF)F
12663 instruct minF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12664   match(Set dst (MinF src1 src2));
12665 
12666   format %{ "fmins   $dst, $src1, $src2" %}
12667   ins_encode %{
12668     __ fmins(as_FloatRegister($dst$$reg),
12669              as_FloatRegister($src1$$reg),
12670              as_FloatRegister($src2$$reg));
12671   %}
12672 
12673   ins_pipe(fp_dop_reg_reg_s);
12674 %}
12675 
12676 // Math.max(DD)D
12677 instruct maxD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12678   match(Set dst (MaxD src1 src2));
12679 
12680   format %{ "fmaxd   $dst, $src1, $src2" %}
12681   ins_encode %{
12682     __ fmaxd(as_FloatRegister($dst$$reg),
12683              as_FloatRegister($src1$$reg),
12684              as_FloatRegister($src2$$reg));
12685   %}
12686 
12687   ins_pipe(fp_dop_reg_reg_d);
12688 %}
12689 
12690 // Math.min(DD)D
12691 instruct minD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12692   match(Set dst (MinD src1 src2));
12693 
12694   format %{ "fmind   $dst, $src1, $src2" %}
12695   ins_encode %{
12696     __ fmind(as_FloatRegister($dst$$reg),
12697              as_FloatRegister($src1$$reg),
12698              as_FloatRegister($src2$$reg));
12699   %}
12700 
12701   ins_pipe(fp_dop_reg_reg_d);
12702 %}
12703 
12704 
12705 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12706   match(Set dst (DivF src1  src2));
12707 
12708   ins_cost(INSN_COST * 18);
12709   format %{ "fdivs   $dst, $src1, $src2" %}
12710 
12711   ins_encode %{
12712     __ fdivs(as_FloatRegister($dst$$reg),
12713              as_FloatRegister($src1$$reg),
12714              as_FloatRegister($src2$$reg));
12715   %}
12716 
12717   ins_pipe(fp_div_s);
12718 %}
12719 
12720 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12721   match(Set dst (DivD src1  src2));
12722 
12723   ins_cost(INSN_COST * 32);
12724   format %{ "fdivd   $dst, $src1, $src2" %}
12725 
12726   ins_encode %{
12727     __ fdivd(as_FloatRegister($dst$$reg),
12728              as_FloatRegister($src1$$reg),
12729              as_FloatRegister($src2$$reg));
12730   %}
12731 
12732   ins_pipe(fp_div_d);
12733 %}
12734 
12735 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12736   match(Set dst (NegF src));
12737 
12738   ins_cost(INSN_COST * 3);
12739   format %{ "fneg   $dst, $src" %}
12740 
12741   ins_encode %{
12742     __ fnegs(as_FloatRegister($dst$$reg),
12743              as_FloatRegister($src$$reg));
12744   %}
12745 
12746   ins_pipe(fp_uop_s);
12747 %}
12748 
12749 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12750   match(Set dst (NegD src));
12751 
12752   ins_cost(INSN_COST * 3);
12753   format %{ "fnegd   $dst, $src" %}
12754 
12755   ins_encode %{
12756     __ fnegd(as_FloatRegister($dst$$reg),
12757              as_FloatRegister($src$$reg));
12758   %}
12759 
12760   ins_pipe(fp_uop_d);
12761 %}
12762 
12763 instruct absF_reg(vRegF dst, vRegF src) %{
12764   match(Set dst (AbsF src));
12765 
12766   ins_cost(INSN_COST * 3);
12767   format %{ "fabss   $dst, $src" %}
12768   ins_encode %{
12769     __ fabss(as_FloatRegister($dst$$reg),
12770              as_FloatRegister($src$$reg));
12771   %}
12772 
12773   ins_pipe(fp_uop_s);
12774 %}
12775 
12776 instruct absD_reg(vRegD dst, vRegD src) %{
12777   match(Set dst (AbsD src));
12778 
12779   ins_cost(INSN_COST * 3);
12780   format %{ "fabsd   $dst, $src" %}
12781   ins_encode %{
12782     __ fabsd(as_FloatRegister($dst$$reg),
12783              as_FloatRegister($src$$reg));
12784   %}
12785 
12786   ins_pipe(fp_uop_d);
12787 %}
12788 
12789 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12790   match(Set dst (SqrtD src));
12791 
12792   ins_cost(INSN_COST * 50);
12793   format %{ "fsqrtd  $dst, $src" %}
12794   ins_encode %{
12795     __ fsqrtd(as_FloatRegister($dst$$reg),
12796              as_FloatRegister($src$$reg));
12797   %}
12798 
12799   ins_pipe(fp_div_s);
12800 %}
12801 
12802 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12803   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12804 
12805   ins_cost(INSN_COST * 50);
12806   format %{ "fsqrts  $dst, $src" %}
12807   ins_encode %{
12808     __ fsqrts(as_FloatRegister($dst$$reg),
12809              as_FloatRegister($src$$reg));
12810   %}
12811 
12812   ins_pipe(fp_div_d);
12813 %}
12814 
12815 // ============================================================================
12816 // Logical Instructions
12817 
12818 // Integer Logical Instructions
12819 
12820 // And Instructions
12821 
12822 
12823 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12824   match(Set dst (AndI src1 src2));
12825 
12826   format %{ "andw  $dst, $src1, $src2\t# int" %}
12827 
12828   ins_cost(INSN_COST);
12829   ins_encode %{
12830     __ andw(as_Register($dst$$reg),
12831             as_Register($src1$$reg),
12832             as_Register($src2$$reg));
12833   %}
12834 
12835   ins_pipe(ialu_reg_reg);
12836 %}
12837 
12838 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12839   match(Set dst (AndI src1 src2));
12840 
12841   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12842 
12843   ins_cost(INSN_COST);
12844   ins_encode %{
12845     __ andw(as_Register($dst$$reg),
12846             as_Register($src1$$reg),
12847             (unsigned long)($src2$$constant));
12848   %}
12849 
12850   ins_pipe(ialu_reg_imm);
12851 %}
12852 
12853 // Or Instructions
12854 
12855 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12856   match(Set dst (OrI src1 src2));
12857 
12858   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12859 
12860   ins_cost(INSN_COST);
12861   ins_encode %{
12862     __ orrw(as_Register($dst$$reg),
12863             as_Register($src1$$reg),
12864             as_Register($src2$$reg));
12865   %}
12866 
12867   ins_pipe(ialu_reg_reg);
12868 %}
12869 
12870 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12871   match(Set dst (OrI src1 src2));
12872 
12873   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12874 
12875   ins_cost(INSN_COST);
12876   ins_encode %{
12877     __ orrw(as_Register($dst$$reg),
12878             as_Register($src1$$reg),
12879             (unsigned long)($src2$$constant));
12880   %}
12881 
12882   ins_pipe(ialu_reg_imm);
12883 %}
12884 
12885 // Xor Instructions
12886 
12887 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12888   match(Set dst (XorI src1 src2));
12889 
12890   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12891 
12892   ins_cost(INSN_COST);
12893   ins_encode %{
12894     __ eorw(as_Register($dst$$reg),
12895             as_Register($src1$$reg),
12896             as_Register($src2$$reg));
12897   %}
12898 
12899   ins_pipe(ialu_reg_reg);
12900 %}
12901 
12902 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12903   match(Set dst (XorI src1 src2));
12904 
12905   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12906 
12907   ins_cost(INSN_COST);
12908   ins_encode %{
12909     __ eorw(as_Register($dst$$reg),
12910             as_Register($src1$$reg),
12911             (unsigned long)($src2$$constant));
12912   %}
12913 
12914   ins_pipe(ialu_reg_imm);
12915 %}
12916 
12917 // Long Logical Instructions
12918 // TODO
12919 
12920 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
12921   match(Set dst (AndL src1 src2));
12922 
12923   format %{ "and  $dst, $src1, $src2\t# int" %}
12924 
12925   ins_cost(INSN_COST);
12926   ins_encode %{
12927     __ andr(as_Register($dst$$reg),
12928             as_Register($src1$$reg),
12929             as_Register($src2$$reg));
12930   %}
12931 
12932   ins_pipe(ialu_reg_reg);
12933 %}
12934 
12935 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
12936   match(Set dst (AndL src1 src2));
12937 
12938   format %{ "and  $dst, $src1, $src2\t# int" %}
12939 
12940   ins_cost(INSN_COST);
12941   ins_encode %{
12942     __ andr(as_Register($dst$$reg),
12943             as_Register($src1$$reg),
12944             (unsigned long)($src2$$constant));
12945   %}
12946 
12947   ins_pipe(ialu_reg_imm);
12948 %}
12949 
12950 // Or Instructions
12951 
12952 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12953   match(Set dst (OrL src1 src2));
12954 
12955   format %{ "orr  $dst, $src1, $src2\t# int" %}
12956 
12957   ins_cost(INSN_COST);
12958   ins_encode %{
12959     __ orr(as_Register($dst$$reg),
12960            as_Register($src1$$reg),
12961            as_Register($src2$$reg));
12962   %}
12963 
12964   ins_pipe(ialu_reg_reg);
12965 %}
12966 
12967 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12968   match(Set dst (OrL src1 src2));
12969 
12970   format %{ "orr  $dst, $src1, $src2\t# int" %}
12971 
12972   ins_cost(INSN_COST);
12973   ins_encode %{
12974     __ orr(as_Register($dst$$reg),
12975            as_Register($src1$$reg),
12976            (unsigned long)($src2$$constant));
12977   %}
12978 
12979   ins_pipe(ialu_reg_imm);
12980 %}
12981 
12982 // Xor Instructions
12983 
12984 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12985   match(Set dst (XorL src1 src2));
12986 
12987   format %{ "eor  $dst, $src1, $src2\t# int" %}
12988 
12989   ins_cost(INSN_COST);
12990   ins_encode %{
12991     __ eor(as_Register($dst$$reg),
12992            as_Register($src1$$reg),
12993            as_Register($src2$$reg));
12994   %}
12995 
12996   ins_pipe(ialu_reg_reg);
12997 %}
12998 
12999 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13000   match(Set dst (XorL src1 src2));
13001 
13002   ins_cost(INSN_COST);
13003   format %{ "eor  $dst, $src1, $src2\t# int" %}
13004 
13005   ins_encode %{
13006     __ eor(as_Register($dst$$reg),
13007            as_Register($src1$$reg),
13008            (unsigned long)($src2$$constant));
13009   %}
13010 
13011   ins_pipe(ialu_reg_imm);
13012 %}
13013 
13014 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13015 %{
13016   match(Set dst (ConvI2L src));
13017 
13018   ins_cost(INSN_COST);
13019   format %{ "sxtw  $dst, $src\t# i2l" %}
13020   ins_encode %{
13021     __ sbfm($dst$$Register, $src$$Register, 0, 31);
13022   %}
13023   ins_pipe(ialu_reg_shift);
13024 %}
13025 
13026 // this pattern occurs in bigmath arithmetic
13027 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13028 %{
13029   match(Set dst (AndL (ConvI2L src) mask));
13030 
13031   ins_cost(INSN_COST);
13032   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
13033   ins_encode %{
13034     __ ubfm($dst$$Register, $src$$Register, 0, 31);
13035   %}
13036 
13037   ins_pipe(ialu_reg_shift);
13038 %}
13039 
13040 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13041   match(Set dst (ConvL2I src));
13042 
13043   ins_cost(INSN_COST);
13044   format %{ "movw  $dst, $src \t// l2i" %}
13045 
13046   ins_encode %{
13047     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13048   %}
13049 
13050   ins_pipe(ialu_reg);
13051 %}
13052 
13053 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13054 %{
13055   match(Set dst (Conv2B src));
13056   effect(KILL cr);
13057 
13058   format %{
13059     "cmpw $src, zr\n\t"
13060     "cset $dst, ne"
13061   %}
13062 
13063   ins_encode %{
13064     __ cmpw(as_Register($src$$reg), zr);
13065     __ cset(as_Register($dst$$reg), Assembler::NE);
13066   %}
13067 
13068   ins_pipe(ialu_reg);
13069 %}
13070 
13071 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
13072 %{
13073   match(Set dst (Conv2B src));
13074   effect(KILL cr);
13075 
13076   format %{
13077     "cmp  $src, zr\n\t"
13078     "cset $dst, ne"
13079   %}
13080 
13081   ins_encode %{
13082     __ cmp(as_Register($src$$reg), zr);
13083     __ cset(as_Register($dst$$reg), Assembler::NE);
13084   %}
13085 
13086   ins_pipe(ialu_reg);
13087 %}
13088 
13089 instruct convD2F_reg(vRegF dst, vRegD src) %{
13090   match(Set dst (ConvD2F src));
13091 
13092   ins_cost(INSN_COST * 5);
13093   format %{ "fcvtd  $dst, $src \t// d2f" %}
13094 
13095   ins_encode %{
13096     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13097   %}
13098 
13099   ins_pipe(fp_d2f);
13100 %}
13101 
13102 instruct convF2D_reg(vRegD dst, vRegF src) %{
13103   match(Set dst (ConvF2D src));
13104 
13105   ins_cost(INSN_COST * 5);
13106   format %{ "fcvts  $dst, $src \t// f2d" %}
13107 
13108   ins_encode %{
13109     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13110   %}
13111 
13112   ins_pipe(fp_f2d);
13113 %}
13114 
13115 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13116   match(Set dst (ConvF2I src));
13117 
13118   ins_cost(INSN_COST * 5);
13119   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
13120 
13121   ins_encode %{
13122     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13123   %}
13124 
13125   ins_pipe(fp_f2i);
13126 %}
13127 
13128 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13129   match(Set dst (ConvF2L src));
13130 
13131   ins_cost(INSN_COST * 5);
13132   format %{ "fcvtzs  $dst, $src \t// f2l" %}
13133 
13134   ins_encode %{
13135     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13136   %}
13137 
13138   ins_pipe(fp_f2l);
13139 %}
13140 
13141 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13142   match(Set dst (ConvI2F src));
13143 
13144   ins_cost(INSN_COST * 5);
13145   format %{ "scvtfws  $dst, $src \t// i2f" %}
13146 
13147   ins_encode %{
13148     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13149   %}
13150 
13151   ins_pipe(fp_i2f);
13152 %}
13153 
13154 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
13155   match(Set dst (ConvL2F src));
13156 
13157   ins_cost(INSN_COST * 5);
13158   format %{ "scvtfs  $dst, $src \t// l2f" %}
13159 
13160   ins_encode %{
13161     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13162   %}
13163 
13164   ins_pipe(fp_l2f);
13165 %}
13166 
13167 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
13168   match(Set dst (ConvD2I src));
13169 
13170   ins_cost(INSN_COST * 5);
13171   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
13172 
13173   ins_encode %{
13174     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13175   %}
13176 
13177   ins_pipe(fp_d2i);
13178 %}
13179 
13180 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13181   match(Set dst (ConvD2L src));
13182 
13183   ins_cost(INSN_COST * 5);
13184   format %{ "fcvtzd  $dst, $src \t// d2l" %}
13185 
13186   ins_encode %{
13187     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13188   %}
13189 
13190   ins_pipe(fp_d2l);
13191 %}
13192 
13193 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
13194   match(Set dst (ConvI2D src));
13195 
13196   ins_cost(INSN_COST * 5);
13197   format %{ "scvtfwd  $dst, $src \t// i2d" %}
13198 
13199   ins_encode %{
13200     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13201   %}
13202 
13203   ins_pipe(fp_i2d);
13204 %}
13205 
13206 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
13207   match(Set dst (ConvL2D src));
13208 
13209   ins_cost(INSN_COST * 5);
13210   format %{ "scvtfd  $dst, $src \t// l2d" %}
13211 
13212   ins_encode %{
13213     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13214   %}
13215 
13216   ins_pipe(fp_l2d);
13217 %}
13218 
13219 // stack <-> reg and reg <-> reg shuffles with no conversion
13220 
13221 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
13222 
13223   match(Set dst (MoveF2I src));
13224 
13225   effect(DEF dst, USE src);
13226 
13227   ins_cost(4 * INSN_COST);
13228 
13229   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
13230 
13231   ins_encode %{
13232     __ ldrw($dst$$Register, Address(sp, $src$$disp));
13233   %}
13234 
13235   ins_pipe(iload_reg_reg);
13236 
13237 %}
13238 
13239 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
13240 
13241   match(Set dst (MoveI2F src));
13242 
13243   effect(DEF dst, USE src);
13244 
13245   ins_cost(4 * INSN_COST);
13246 
13247   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
13248 
13249   ins_encode %{
13250     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13251   %}
13252 
13253   ins_pipe(pipe_class_memory);
13254 
13255 %}
13256 
13257 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
13258 
13259   match(Set dst (MoveD2L src));
13260 
13261   effect(DEF dst, USE src);
13262 
13263   ins_cost(4 * INSN_COST);
13264 
13265   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13266 
13267   ins_encode %{
13268     __ ldr($dst$$Register, Address(sp, $src$$disp));
13269   %}
13270 
13271   ins_pipe(iload_reg_reg);
13272 
13273 %}
13274 
13275 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13276 
13277   match(Set dst (MoveL2D src));
13278 
13279   effect(DEF dst, USE src);
13280 
13281   ins_cost(4 * INSN_COST);
13282 
13283   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13284 
13285   ins_encode %{
13286     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13287   %}
13288 
13289   ins_pipe(pipe_class_memory);
13290 
13291 %}
13292 
13293 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13294 
13295   match(Set dst (MoveF2I src));
13296 
13297   effect(DEF dst, USE src);
13298 
13299   ins_cost(INSN_COST);
13300 
13301   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13302 
13303   ins_encode %{
13304     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13305   %}
13306 
13307   ins_pipe(pipe_class_memory);
13308 
13309 %}
13310 
13311 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13312 
13313   match(Set dst (MoveI2F src));
13314 
13315   effect(DEF dst, USE src);
13316 
13317   ins_cost(INSN_COST);
13318 
13319   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
13320 
13321   ins_encode %{
13322     __ strw($src$$Register, Address(sp, $dst$$disp));
13323   %}
13324 
13325   ins_pipe(istore_reg_reg);
13326 
13327 %}
13328 
13329 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
13330 
13331   match(Set dst (MoveD2L src));
13332 
13333   effect(DEF dst, USE src);
13334 
13335   ins_cost(INSN_COST);
13336 
13337   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
13338 
13339   ins_encode %{
13340     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13341   %}
13342 
13343   ins_pipe(pipe_class_memory);
13344 
13345 %}
13346 
13347 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
13348 
13349   match(Set dst (MoveL2D src));
13350 
13351   effect(DEF dst, USE src);
13352 
13353   ins_cost(INSN_COST);
13354 
13355   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
13356 
13357   ins_encode %{
13358     __ str($src$$Register, Address(sp, $dst$$disp));
13359   %}
13360 
13361   ins_pipe(istore_reg_reg);
13362 
13363 %}
13364 
13365 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13366 
13367   match(Set dst (MoveF2I src));
13368 
13369   effect(DEF dst, USE src);
13370 
13371   ins_cost(INSN_COST);
13372 
13373   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
13374 
13375   ins_encode %{
13376     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
13377   %}
13378 
13379   ins_pipe(fp_f2i);
13380 
13381 %}
13382 
13383 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
13384 
13385   match(Set dst (MoveI2F src));
13386 
13387   effect(DEF dst, USE src);
13388 
13389   ins_cost(INSN_COST);
13390 
13391   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
13392 
13393   ins_encode %{
13394     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
13395   %}
13396 
13397   ins_pipe(fp_i2f);
13398 
13399 %}
13400 
13401 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13402 
13403   match(Set dst (MoveD2L src));
13404 
13405   effect(DEF dst, USE src);
13406 
13407   ins_cost(INSN_COST);
13408 
13409   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
13410 
13411   ins_encode %{
13412     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
13413   %}
13414 
13415   ins_pipe(fp_d2l);
13416 
13417 %}
13418 
13419 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
13420 
13421   match(Set dst (MoveL2D src));
13422 
13423   effect(DEF dst, USE src);
13424 
13425   ins_cost(INSN_COST);
13426 
13427   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
13428 
13429   ins_encode %{
13430     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
13431   %}
13432 
13433   ins_pipe(fp_l2d);
13434 
13435 %}
13436 
13437 // ============================================================================
13438 // clearing of an array
13439 
13440 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
13441 %{
13442   match(Set dummy (ClearArray (Binary cnt base) val));
13443   effect(USE_KILL cnt, USE_KILL base);
13444 
13445   ins_cost(4 * INSN_COST);
13446   format %{ "ClearArray $cnt, $base" %}
13447 
13448   ins_encode %{
13449     __ zero_words($base$$Register, $cnt$$Register);
13450   %}
13451 
13452   ins_pipe(pipe_class_memory);
13453 %}
13454 
13455 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL val, Universe dummy, rFlagsReg cr)
13456 %{
13457   predicate((u_int64_t)n->in(3)->get_long()
13458             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
13459 
13460   match(Set dummy (ClearArray (Binary cnt base) val));
13461   effect(USE_KILL base);
13462 
13463   ins_cost(4 * INSN_COST);
13464   format %{ "ClearArray $cnt, $base" %}
13465 
13466   ins_encode %{
13467     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
13468   %}
13469 
13470   ins_pipe(pipe_class_memory);
13471 %}
13472 
13473 // ============================================================================
13474 // Overflow Math Instructions
13475 
13476 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13477 %{
13478   match(Set cr (OverflowAddI op1 op2));
13479 
13480   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13481   ins_cost(INSN_COST);
13482   ins_encode %{
13483     __ cmnw($op1$$Register, $op2$$Register);
13484   %}
13485 
13486   ins_pipe(icmp_reg_reg);
13487 %}
13488 
13489 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13490 %{
13491   match(Set cr (OverflowAddI op1 op2));
13492 
13493   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13494   ins_cost(INSN_COST);
13495   ins_encode %{
13496     __ cmnw($op1$$Register, $op2$$constant);
13497   %}
13498 
13499   ins_pipe(icmp_reg_imm);
13500 %}
13501 
13502 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13503 %{
13504   match(Set cr (OverflowAddL op1 op2));
13505 
13506   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13507   ins_cost(INSN_COST);
13508   ins_encode %{
13509     __ cmn($op1$$Register, $op2$$Register);
13510   %}
13511 
13512   ins_pipe(icmp_reg_reg);
13513 %}
13514 
13515 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13516 %{
13517   match(Set cr (OverflowAddL op1 op2));
13518 
13519   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13520   ins_cost(INSN_COST);
13521   ins_encode %{
13522     __ cmn($op1$$Register, $op2$$constant);
13523   %}
13524 
13525   ins_pipe(icmp_reg_imm);
13526 %}
13527 
13528 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13529 %{
13530   match(Set cr (OverflowSubI op1 op2));
13531 
13532   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13533   ins_cost(INSN_COST);
13534   ins_encode %{
13535     __ cmpw($op1$$Register, $op2$$Register);
13536   %}
13537 
13538   ins_pipe(icmp_reg_reg);
13539 %}
13540 
13541 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13542 %{
13543   match(Set cr (OverflowSubI op1 op2));
13544 
13545   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13546   ins_cost(INSN_COST);
13547   ins_encode %{
13548     __ cmpw($op1$$Register, $op2$$constant);
13549   %}
13550 
13551   ins_pipe(icmp_reg_imm);
13552 %}
13553 
13554 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13555 %{
13556   match(Set cr (OverflowSubL op1 op2));
13557 
13558   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13559   ins_cost(INSN_COST);
13560   ins_encode %{
13561     __ cmp($op1$$Register, $op2$$Register);
13562   %}
13563 
13564   ins_pipe(icmp_reg_reg);
13565 %}
13566 
13567 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13568 %{
13569   match(Set cr (OverflowSubL op1 op2));
13570 
13571   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13572   ins_cost(INSN_COST);
13573   ins_encode %{
13574     __ subs(zr, $op1$$Register, $op2$$constant);
13575   %}
13576 
13577   ins_pipe(icmp_reg_imm);
13578 %}
13579 
13580 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13581 %{
13582   match(Set cr (OverflowSubI zero op1));
13583 
13584   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13585   ins_cost(INSN_COST);
13586   ins_encode %{
13587     __ cmpw(zr, $op1$$Register);
13588   %}
13589 
13590   ins_pipe(icmp_reg_imm);
13591 %}
13592 
13593 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
13594 %{
13595   match(Set cr (OverflowSubL zero op1));
13596 
13597   format %{ "cmp   zr, $op1\t# overflow check long" %}
13598   ins_cost(INSN_COST);
13599   ins_encode %{
13600     __ cmp(zr, $op1$$Register);
13601   %}
13602 
13603   ins_pipe(icmp_reg_imm);
13604 %}
13605 
13606 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13607 %{
13608   match(Set cr (OverflowMulI op1 op2));
13609 
13610   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13611             "cmp   rscratch1, rscratch1, sxtw\n\t"
13612             "movw  rscratch1, #0x80000000\n\t"
13613             "cselw rscratch1, rscratch1, zr, NE\n\t"
13614             "cmpw  rscratch1, #1" %}
13615   ins_cost(5 * INSN_COST);
13616   ins_encode %{
13617     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13618     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13619     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13620     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13621     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13622   %}
13623 
13624   ins_pipe(pipe_slow);
13625 %}
13626 
13627 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
13628 %{
13629   match(If cmp (OverflowMulI op1 op2));
13630   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13631             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13632   effect(USE labl, KILL cr);
13633 
13634   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13635             "cmp   rscratch1, rscratch1, sxtw\n\t"
13636             "b$cmp   $labl" %}
13637   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
13638   ins_encode %{
13639     Label* L = $labl$$label;
13640     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13641     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13642     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13643     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13644   %}
13645 
13646   ins_pipe(pipe_serial);
13647 %}
13648 
13649 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13650 %{
13651   match(Set cr (OverflowMulL op1 op2));
13652 
13653   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13654             "smulh rscratch2, $op1, $op2\n\t"
13655             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13656             "movw  rscratch1, #0x80000000\n\t"
13657             "cselw rscratch1, rscratch1, zr, NE\n\t"
13658             "cmpw  rscratch1, #1" %}
13659   ins_cost(6 * INSN_COST);
13660   ins_encode %{
13661     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13662     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13663     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13664     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13665     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13666     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13667   %}
13668 
13669   ins_pipe(pipe_slow);
13670 %}
13671 
13672 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
13673 %{
13674   match(If cmp (OverflowMulL op1 op2));
13675   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13676             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13677   effect(USE labl, KILL cr);
13678 
13679   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13680             "smulh rscratch2, $op1, $op2\n\t"
13681             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13682             "b$cmp $labl" %}
13683   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13684   ins_encode %{
13685     Label* L = $labl$$label;
13686     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13687     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13688     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13689     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13690     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13691   %}
13692 
13693   ins_pipe(pipe_serial);
13694 %}
13695 
13696 // ============================================================================
13697 // Compare Instructions
13698 
13699 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13700 %{
13701   match(Set cr (CmpI op1 op2));
13702 
13703   effect(DEF cr, USE op1, USE op2);
13704 
13705   ins_cost(INSN_COST);
13706   format %{ "cmpw  $op1, $op2" %}
13707 
13708   ins_encode(aarch64_enc_cmpw(op1, op2));
13709 
13710   ins_pipe(icmp_reg_reg);
13711 %}
13712 
13713 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13714 %{
13715   match(Set cr (CmpI op1 zero));
13716 
13717   effect(DEF cr, USE op1);
13718 
13719   ins_cost(INSN_COST);
13720   format %{ "cmpw $op1, 0" %}
13721 
13722   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13723 
13724   ins_pipe(icmp_reg_imm);
13725 %}
13726 
13727 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13728 %{
13729   match(Set cr (CmpI op1 op2));
13730 
13731   effect(DEF cr, USE op1);
13732 
13733   ins_cost(INSN_COST);
13734   format %{ "cmpw  $op1, $op2" %}
13735 
13736   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13737 
13738   ins_pipe(icmp_reg_imm);
13739 %}
13740 
13741 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13742 %{
13743   match(Set cr (CmpI op1 op2));
13744 
13745   effect(DEF cr, USE op1);
13746 
13747   ins_cost(INSN_COST * 2);
13748   format %{ "cmpw  $op1, $op2" %}
13749 
13750   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13751 
13752   ins_pipe(icmp_reg_imm);
13753 %}
13754 
13755 // Unsigned compare Instructions; really, same as signed compare
13756 // except it should only be used to feed an If or a CMovI which takes a
13757 // cmpOpU.
13758 
13759 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13760 %{
13761   match(Set cr (CmpU op1 op2));
13762 
13763   effect(DEF cr, USE op1, USE op2);
13764 
13765   ins_cost(INSN_COST);
13766   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13767 
13768   ins_encode(aarch64_enc_cmpw(op1, op2));
13769 
13770   ins_pipe(icmp_reg_reg);
13771 %}
13772 
13773 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13774 %{
13775   match(Set cr (CmpU op1 zero));
13776 
13777   effect(DEF cr, USE op1);
13778 
13779   ins_cost(INSN_COST);
13780   format %{ "cmpw $op1, #0\t# unsigned" %}
13781 
13782   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13783 
13784   ins_pipe(icmp_reg_imm);
13785 %}
13786 
13787 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13788 %{
13789   match(Set cr (CmpU op1 op2));
13790 
13791   effect(DEF cr, USE op1);
13792 
13793   ins_cost(INSN_COST);
13794   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13795 
13796   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13797 
13798   ins_pipe(icmp_reg_imm);
13799 %}
13800 
13801 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13802 %{
13803   match(Set cr (CmpU op1 op2));
13804 
13805   effect(DEF cr, USE op1);
13806 
13807   ins_cost(INSN_COST * 2);
13808   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13809 
13810   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13811 
13812   ins_pipe(icmp_reg_imm);
13813 %}
13814 
13815 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13816 %{
13817   match(Set cr (CmpL op1 op2));
13818 
13819   effect(DEF cr, USE op1, USE op2);
13820 
13821   ins_cost(INSN_COST);
13822   format %{ "cmp  $op1, $op2" %}
13823 
13824   ins_encode(aarch64_enc_cmp(op1, op2));
13825 
13826   ins_pipe(icmp_reg_reg);
13827 %}
13828 
13829 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
13830 %{
13831   match(Set cr (CmpL op1 zero));
13832 
13833   effect(DEF cr, USE op1);
13834 
13835   ins_cost(INSN_COST);
13836   format %{ "tst  $op1" %}
13837 
13838   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13839 
13840   ins_pipe(icmp_reg_imm);
13841 %}
13842 
13843 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13844 %{
13845   match(Set cr (CmpL op1 op2));
13846 
13847   effect(DEF cr, USE op1);
13848 
13849   ins_cost(INSN_COST);
13850   format %{ "cmp  $op1, $op2" %}
13851 
13852   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13853 
13854   ins_pipe(icmp_reg_imm);
13855 %}
13856 
13857 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13858 %{
13859   match(Set cr (CmpL op1 op2));
13860 
13861   effect(DEF cr, USE op1);
13862 
13863   ins_cost(INSN_COST * 2);
13864   format %{ "cmp  $op1, $op2" %}
13865 
13866   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13867 
13868   ins_pipe(icmp_reg_imm);
13869 %}
13870 
13871 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
13872 %{
13873   match(Set cr (CmpUL op1 op2));
13874 
13875   effect(DEF cr, USE op1, USE op2);
13876 
13877   ins_cost(INSN_COST);
13878   format %{ "cmp  $op1, $op2" %}
13879 
13880   ins_encode(aarch64_enc_cmp(op1, op2));
13881 
13882   ins_pipe(icmp_reg_reg);
13883 %}
13884 
13885 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
13886 %{
13887   match(Set cr (CmpUL op1 zero));
13888 
13889   effect(DEF cr, USE op1);
13890 
13891   ins_cost(INSN_COST);
13892   format %{ "tst  $op1" %}
13893 
13894   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13895 
13896   ins_pipe(icmp_reg_imm);
13897 %}
13898 
13899 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
13900 %{
13901   match(Set cr (CmpUL op1 op2));
13902 
13903   effect(DEF cr, USE op1);
13904 
13905   ins_cost(INSN_COST);
13906   format %{ "cmp  $op1, $op2" %}
13907 
13908   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13909 
13910   ins_pipe(icmp_reg_imm);
13911 %}
13912 
13913 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
13914 %{
13915   match(Set cr (CmpUL op1 op2));
13916 
13917   effect(DEF cr, USE op1);
13918 
13919   ins_cost(INSN_COST * 2);
13920   format %{ "cmp  $op1, $op2" %}
13921 
13922   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13923 
13924   ins_pipe(icmp_reg_imm);
13925 %}
13926 
13927 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13928 %{
13929   match(Set cr (CmpP op1 op2));
13930 
13931   effect(DEF cr, USE op1, USE op2);
13932 
13933   ins_cost(INSN_COST);
13934   format %{ "cmp  $op1, $op2\t // ptr" %}
13935 
13936   ins_encode(aarch64_enc_cmpp(op1, op2));
13937 
13938   ins_pipe(icmp_reg_reg);
13939 %}
13940 
13941 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13942 %{
13943   match(Set cr (CmpN op1 op2));
13944 
13945   effect(DEF cr, USE op1, USE op2);
13946 
13947   ins_cost(INSN_COST);
13948   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
13949 
13950   ins_encode(aarch64_enc_cmpn(op1, op2));
13951 
13952   ins_pipe(icmp_reg_reg);
13953 %}
13954 
13955 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
13956 %{
13957   match(Set cr (CmpP op1 zero));
13958 
13959   effect(DEF cr, USE op1, USE zero);
13960 
13961   ins_cost(INSN_COST);
13962   format %{ "cmp  $op1, 0\t // ptr" %}
13963 
13964   ins_encode(aarch64_enc_testp(op1));
13965 
13966   ins_pipe(icmp_reg_imm);
13967 %}
13968 
13969 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
13970 %{
13971   match(Set cr (CmpN op1 zero));
13972 
13973   effect(DEF cr, USE op1, USE zero);
13974 
13975   ins_cost(INSN_COST);
13976   format %{ "cmp  $op1, 0\t // compressed ptr" %}
13977 
13978   ins_encode(aarch64_enc_testn(op1));
13979 
13980   ins_pipe(icmp_reg_imm);
13981 %}
13982 
13983 // FP comparisons
13984 //
13985 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
13986 // using normal cmpOp. See declaration of rFlagsReg for details.
13987 
13988 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
13989 %{
13990   match(Set cr (CmpF src1 src2));
13991 
13992   ins_cost(3 * INSN_COST);
13993   format %{ "fcmps $src1, $src2" %}
13994 
13995   ins_encode %{
13996     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13997   %}
13998 
13999   ins_pipe(pipe_class_compare);
14000 %}
14001 
14002 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14003 %{
14004   match(Set cr (CmpF src1 src2));
14005 
14006   ins_cost(3 * INSN_COST);
14007   format %{ "fcmps $src1, 0.0" %}
14008 
14009   ins_encode %{
14010     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14011   %}
14012 
14013   ins_pipe(pipe_class_compare);
14014 %}
14015 // FROM HERE
14016 
14017 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14018 %{
14019   match(Set cr (CmpD src1 src2));
14020 
14021   ins_cost(3 * INSN_COST);
14022   format %{ "fcmpd $src1, $src2" %}
14023 
14024   ins_encode %{
14025     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14026   %}
14027 
14028   ins_pipe(pipe_class_compare);
14029 %}
14030 
14031 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14032 %{
14033   match(Set cr (CmpD src1 src2));
14034 
14035   ins_cost(3 * INSN_COST);
14036   format %{ "fcmpd $src1, 0.0" %}
14037 
14038   ins_encode %{
14039     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
14040   %}
14041 
14042   ins_pipe(pipe_class_compare);
14043 %}
14044 
14045 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14046 %{
14047   match(Set dst (CmpF3 src1 src2));
14048   effect(KILL cr);
14049 
14050   ins_cost(5 * INSN_COST);
14051   format %{ "fcmps $src1, $src2\n\t"
14052             "csinvw($dst, zr, zr, eq\n\t"
14053             "csnegw($dst, $dst, $dst, lt)"
14054   %}
14055 
14056   ins_encode %{
14057     Label done;
14058     FloatRegister s1 = as_FloatRegister($src1$$reg);
14059     FloatRegister s2 = as_FloatRegister($src2$$reg);
14060     Register d = as_Register($dst$$reg);
14061     __ fcmps(s1, s2);
14062     // installs 0 if EQ else -1
14063     __ csinvw(d, zr, zr, Assembler::EQ);
14064     // keeps -1 if less or unordered else installs 1
14065     __ csnegw(d, d, d, Assembler::LT);
14066     __ bind(done);
14067   %}
14068 
14069   ins_pipe(pipe_class_default);
14070 
14071 %}
14072 
14073 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
14074 %{
14075   match(Set dst (CmpD3 src1 src2));
14076   effect(KILL cr);
14077 
14078   ins_cost(5 * INSN_COST);
14079   format %{ "fcmpd $src1, $src2\n\t"
14080             "csinvw($dst, zr, zr, eq\n\t"
14081             "csnegw($dst, $dst, $dst, lt)"
14082   %}
14083 
14084   ins_encode %{
14085     Label done;
14086     FloatRegister s1 = as_FloatRegister($src1$$reg);
14087     FloatRegister s2 = as_FloatRegister($src2$$reg);
14088     Register d = as_Register($dst$$reg);
14089     __ fcmpd(s1, s2);
14090     // installs 0 if EQ else -1
14091     __ csinvw(d, zr, zr, Assembler::EQ);
14092     // keeps -1 if less or unordered else installs 1
14093     __ csnegw(d, d, d, Assembler::LT);
14094     __ bind(done);
14095   %}
14096   ins_pipe(pipe_class_default);
14097 
14098 %}
14099 
14100 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
14101 %{
14102   match(Set dst (CmpF3 src1 zero));
14103   effect(KILL cr);
14104 
14105   ins_cost(5 * INSN_COST);
14106   format %{ "fcmps $src1, 0.0\n\t"
14107             "csinvw($dst, zr, zr, eq\n\t"
14108             "csnegw($dst, $dst, $dst, lt)"
14109   %}
14110 
14111   ins_encode %{
14112     Label done;
14113     FloatRegister s1 = as_FloatRegister($src1$$reg);
14114     Register d = as_Register($dst$$reg);
14115     __ fcmps(s1, 0.0D);
14116     // installs 0 if EQ else -1
14117     __ csinvw(d, zr, zr, Assembler::EQ);
14118     // keeps -1 if less or unordered else installs 1
14119     __ csnegw(d, d, d, Assembler::LT);
14120     __ bind(done);
14121   %}
14122 
14123   ins_pipe(pipe_class_default);
14124 
14125 %}
14126 
14127 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14128 %{
14129   match(Set dst (CmpD3 src1 zero));
14130   effect(KILL cr);
14131 
14132   ins_cost(5 * INSN_COST);
14133   format %{ "fcmpd $src1, 0.0\n\t"
14134             "csinvw($dst, zr, zr, eq\n\t"
14135             "csnegw($dst, $dst, $dst, lt)"
14136   %}
14137 
14138   ins_encode %{
14139     Label done;
14140     FloatRegister s1 = as_FloatRegister($src1$$reg);
14141     Register d = as_Register($dst$$reg);
14142     __ fcmpd(s1, 0.0D);
14143     // installs 0 if EQ else -1
14144     __ csinvw(d, zr, zr, Assembler::EQ);
14145     // keeps -1 if less or unordered else installs 1
14146     __ csnegw(d, d, d, Assembler::LT);
14147     __ bind(done);
14148   %}
14149   ins_pipe(pipe_class_default);
14150 
14151 %}
14152 
14153 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
14154 %{
14155   match(Set dst (CmpLTMask p q));
14156   effect(KILL cr);
14157 
14158   ins_cost(3 * INSN_COST);
14159 
14160   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
14161             "csetw $dst, lt\n\t"
14162             "subw $dst, zr, $dst"
14163   %}
14164 
14165   ins_encode %{
14166     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
14167     __ csetw(as_Register($dst$$reg), Assembler::LT);
14168     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
14169   %}
14170 
14171   ins_pipe(ialu_reg_reg);
14172 %}
14173 
14174 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
14175 %{
14176   match(Set dst (CmpLTMask src zero));
14177   effect(KILL cr);
14178 
14179   ins_cost(INSN_COST);
14180 
14181   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
14182 
14183   ins_encode %{
14184     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
14185   %}
14186 
14187   ins_pipe(ialu_reg_shift);
14188 %}
14189 
14190 // ============================================================================
14191 // Max and Min
14192 
14193 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14194 %{
14195   match(Set dst (MinI src1 src2));
14196 
14197   effect(DEF dst, USE src1, USE src2, KILL cr);
14198   size(8);
14199 
14200   ins_cost(INSN_COST * 3);
14201   format %{
14202     "cmpw $src1 $src2\t signed int\n\t"
14203     "cselw $dst, $src1, $src2 lt\t"
14204   %}
14205 
14206   ins_encode %{
14207     __ cmpw(as_Register($src1$$reg),
14208             as_Register($src2$$reg));
14209     __ cselw(as_Register($dst$$reg),
14210              as_Register($src1$$reg),
14211              as_Register($src2$$reg),
14212              Assembler::LT);
14213   %}
14214 
14215   ins_pipe(ialu_reg_reg);
14216 %}
14217 // FROM HERE
14218 
14219 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14220 %{
14221   match(Set dst (MaxI src1 src2));
14222 
14223   effect(DEF dst, USE src1, USE src2, KILL cr);
14224   size(8);
14225 
14226   ins_cost(INSN_COST * 3);
14227   format %{
14228     "cmpw $src1 $src2\t signed int\n\t"
14229     "cselw $dst, $src1, $src2 gt\t"
14230   %}
14231 
14232   ins_encode %{
14233     __ cmpw(as_Register($src1$$reg),
14234             as_Register($src2$$reg));
14235     __ cselw(as_Register($dst$$reg),
14236              as_Register($src1$$reg),
14237              as_Register($src2$$reg),
14238              Assembler::GT);
14239   %}
14240 
14241   ins_pipe(ialu_reg_reg);
14242 %}
14243 
14244 // ============================================================================
14245 // Branch Instructions
14246 
14247 // Direct Branch.
14248 instruct branch(label lbl)
14249 %{
14250   match(Goto);
14251 
14252   effect(USE lbl);
14253 
14254   ins_cost(BRANCH_COST);
14255   format %{ "b  $lbl" %}
14256 
14257   ins_encode(aarch64_enc_b(lbl));
14258 
14259   ins_pipe(pipe_branch);
14260 %}
14261 
14262 // Conditional Near Branch
14263 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
14264 %{
14265   // Same match rule as `branchConFar'.
14266   match(If cmp cr);
14267 
14268   effect(USE lbl);
14269 
14270   ins_cost(BRANCH_COST);
14271   // If set to 1 this indicates that the current instruction is a
14272   // short variant of a long branch. This avoids using this
14273   // instruction in first-pass matching. It will then only be used in
14274   // the `Shorten_branches' pass.
14275   // ins_short_branch(1);
14276   format %{ "b$cmp  $lbl" %}
14277 
14278   ins_encode(aarch64_enc_br_con(cmp, lbl));
14279 
14280   ins_pipe(pipe_branch_cond);
14281 %}
14282 
14283 // Conditional Near Branch Unsigned
14284 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14285 %{
14286   // Same match rule as `branchConFar'.
14287   match(If cmp cr);
14288 
14289   effect(USE lbl);
14290 
14291   ins_cost(BRANCH_COST);
14292   // If set to 1 this indicates that the current instruction is a
14293   // short variant of a long branch. This avoids using this
14294   // instruction in first-pass matching. It will then only be used in
14295   // the `Shorten_branches' pass.
14296   // ins_short_branch(1);
14297   format %{ "b$cmp  $lbl\t# unsigned" %}
14298 
14299   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14300 
14301   ins_pipe(pipe_branch_cond);
14302 %}
14303 
14304 // Make use of CBZ and CBNZ.  These instructions, as well as being
14305 // shorter than (cmp; branch), have the additional benefit of not
14306 // killing the flags.
14307 
14308 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14309   match(If cmp (CmpI op1 op2));
14310   effect(USE labl);
14311 
14312   ins_cost(BRANCH_COST);
14313   format %{ "cbw$cmp   $op1, $labl" %}
14314   ins_encode %{
14315     Label* L = $labl$$label;
14316     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14317     if (cond == Assembler::EQ)
14318       __ cbzw($op1$$Register, *L);
14319     else
14320       __ cbnzw($op1$$Register, *L);
14321   %}
14322   ins_pipe(pipe_cmp_branch);
14323 %}
14324 
14325 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14326   match(If cmp (CmpL op1 op2));
14327   effect(USE labl);
14328 
14329   ins_cost(BRANCH_COST);
14330   format %{ "cb$cmp   $op1, $labl" %}
14331   ins_encode %{
14332     Label* L = $labl$$label;
14333     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14334     if (cond == Assembler::EQ)
14335       __ cbz($op1$$Register, *L);
14336     else
14337       __ cbnz($op1$$Register, *L);
14338   %}
14339   ins_pipe(pipe_cmp_branch);
14340 %}
14341 
14342 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14343   match(If cmp (CmpP op1 op2));
14344   effect(USE labl);
14345 
14346   ins_cost(BRANCH_COST);
14347   format %{ "cb$cmp   $op1, $labl" %}
14348   ins_encode %{
14349     Label* L = $labl$$label;
14350     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14351     if (cond == Assembler::EQ)
14352       __ cbz($op1$$Register, *L);
14353     else
14354       __ cbnz($op1$$Register, *L);
14355   %}
14356   ins_pipe(pipe_cmp_branch);
14357 %}
14358 
14359 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14360   match(If cmp (CmpN op1 op2));
14361   effect(USE labl);
14362 
14363   ins_cost(BRANCH_COST);
14364   format %{ "cbw$cmp   $op1, $labl" %}
14365   ins_encode %{
14366     Label* L = $labl$$label;
14367     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14368     if (cond == Assembler::EQ)
14369       __ cbzw($op1$$Register, *L);
14370     else
14371       __ cbnzw($op1$$Register, *L);
14372   %}
14373   ins_pipe(pipe_cmp_branch);
14374 %}
14375 
14376 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14377   match(If cmp (CmpP (DecodeN oop) zero));
14378   effect(USE labl);
14379 
14380   ins_cost(BRANCH_COST);
14381   format %{ "cb$cmp   $oop, $labl" %}
14382   ins_encode %{
14383     Label* L = $labl$$label;
14384     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14385     if (cond == Assembler::EQ)
14386       __ cbzw($oop$$Register, *L);
14387     else
14388       __ cbnzw($oop$$Register, *L);
14389   %}
14390   ins_pipe(pipe_cmp_branch);
14391 %}
14392 
14393 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
14394   match(If cmp (CmpU op1 op2));
14395   effect(USE labl);
14396 
14397   ins_cost(BRANCH_COST);
14398   format %{ "cbw$cmp   $op1, $labl" %}
14399   ins_encode %{
14400     Label* L = $labl$$label;
14401     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14402     if (cond == Assembler::EQ || cond == Assembler::LS)
14403       __ cbzw($op1$$Register, *L);
14404     else
14405       __ cbnzw($op1$$Register, *L);
14406   %}
14407   ins_pipe(pipe_cmp_branch);
14408 %}
14409 
14410 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
14411   match(If cmp (CmpUL op1 op2));
14412   effect(USE labl);
14413 
14414   ins_cost(BRANCH_COST);
14415   format %{ "cb$cmp   $op1, $labl" %}
14416   ins_encode %{
14417     Label* L = $labl$$label;
14418     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14419     if (cond == Assembler::EQ || cond == Assembler::LS)
14420       __ cbz($op1$$Register, *L);
14421     else
14422       __ cbnz($op1$$Register, *L);
14423   %}
14424   ins_pipe(pipe_cmp_branch);
14425 %}
14426 
14427 // Test bit and Branch
14428 
14429 // Patterns for short (< 32KiB) variants
14430 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14431   match(If cmp (CmpL op1 op2));
14432   effect(USE labl);
14433 
14434   ins_cost(BRANCH_COST);
14435   format %{ "cb$cmp   $op1, $labl # long" %}
14436   ins_encode %{
14437     Label* L = $labl$$label;
14438     Assembler::Condition cond =
14439       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14440     __ tbr(cond, $op1$$Register, 63, *L);
14441   %}
14442   ins_pipe(pipe_cmp_branch);
14443   ins_short_branch(1);
14444 %}
14445 
14446 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14447   match(If cmp (CmpI op1 op2));
14448   effect(USE labl);
14449 
14450   ins_cost(BRANCH_COST);
14451   format %{ "cb$cmp   $op1, $labl # int" %}
14452   ins_encode %{
14453     Label* L = $labl$$label;
14454     Assembler::Condition cond =
14455       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14456     __ tbr(cond, $op1$$Register, 31, *L);
14457   %}
14458   ins_pipe(pipe_cmp_branch);
14459   ins_short_branch(1);
14460 %}
14461 
14462 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14463   match(If cmp (CmpL (AndL op1 op2) op3));
14464   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14465   effect(USE labl);
14466 
14467   ins_cost(BRANCH_COST);
14468   format %{ "tb$cmp   $op1, $op2, $labl" %}
14469   ins_encode %{
14470     Label* L = $labl$$label;
14471     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14472     int bit = exact_log2($op2$$constant);
14473     __ tbr(cond, $op1$$Register, bit, *L);
14474   %}
14475   ins_pipe(pipe_cmp_branch);
14476   ins_short_branch(1);
14477 %}
14478 
14479 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14480   match(If cmp (CmpI (AndI op1 op2) op3));
14481   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14482   effect(USE labl);
14483 
14484   ins_cost(BRANCH_COST);
14485   format %{ "tb$cmp   $op1, $op2, $labl" %}
14486   ins_encode %{
14487     Label* L = $labl$$label;
14488     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14489     int bit = exact_log2($op2$$constant);
14490     __ tbr(cond, $op1$$Register, bit, *L);
14491   %}
14492   ins_pipe(pipe_cmp_branch);
14493   ins_short_branch(1);
14494 %}
14495 
14496 // And far variants
14497 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14498   match(If cmp (CmpL op1 op2));
14499   effect(USE labl);
14500 
14501   ins_cost(BRANCH_COST);
14502   format %{ "cb$cmp   $op1, $labl # long" %}
14503   ins_encode %{
14504     Label* L = $labl$$label;
14505     Assembler::Condition cond =
14506       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14507     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
14508   %}
14509   ins_pipe(pipe_cmp_branch);
14510 %}
14511 
14512 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14513   match(If cmp (CmpI op1 op2));
14514   effect(USE labl);
14515 
14516   ins_cost(BRANCH_COST);
14517   format %{ "cb$cmp   $op1, $labl # int" %}
14518   ins_encode %{
14519     Label* L = $labl$$label;
14520     Assembler::Condition cond =
14521       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14522     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
14523   %}
14524   ins_pipe(pipe_cmp_branch);
14525 %}
14526 
14527 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14528   match(If cmp (CmpL (AndL op1 op2) op3));
14529   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14530   effect(USE labl);
14531 
14532   ins_cost(BRANCH_COST);
14533   format %{ "tb$cmp   $op1, $op2, $labl" %}
14534   ins_encode %{
14535     Label* L = $labl$$label;
14536     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14537     int bit = exact_log2($op2$$constant);
14538     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14539   %}
14540   ins_pipe(pipe_cmp_branch);
14541 %}
14542 
14543 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14544   match(If cmp (CmpI (AndI op1 op2) op3));
14545   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14546   effect(USE labl);
14547 
14548   ins_cost(BRANCH_COST);
14549   format %{ "tb$cmp   $op1, $op2, $labl" %}
14550   ins_encode %{
14551     Label* L = $labl$$label;
14552     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14553     int bit = exact_log2($op2$$constant);
14554     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14555   %}
14556   ins_pipe(pipe_cmp_branch);
14557 %}
14558 
14559 // Test bits
14560 
14561 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14562   match(Set cr (CmpL (AndL op1 op2) op3));
14563   predicate(Assembler::operand_valid_for_logical_immediate
14564             (/*is_32*/false, n->in(1)->in(2)->get_long()));
14565 
14566   ins_cost(INSN_COST);
14567   format %{ "tst $op1, $op2 # long" %}
14568   ins_encode %{
14569     __ tst($op1$$Register, $op2$$constant);
14570   %}
14571   ins_pipe(ialu_reg_reg);
14572 %}
14573 
14574 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14575   match(Set cr (CmpI (AndI op1 op2) op3));
14576   predicate(Assembler::operand_valid_for_logical_immediate
14577             (/*is_32*/true, n->in(1)->in(2)->get_int()));
14578 
14579   ins_cost(INSN_COST);
14580   format %{ "tst $op1, $op2 # int" %}
14581   ins_encode %{
14582     __ tstw($op1$$Register, $op2$$constant);
14583   %}
14584   ins_pipe(ialu_reg_reg);
14585 %}
14586 
14587 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14588   match(Set cr (CmpL (AndL op1 op2) op3));
14589 
14590   ins_cost(INSN_COST);
14591   format %{ "tst $op1, $op2 # long" %}
14592   ins_encode %{
14593     __ tst($op1$$Register, $op2$$Register);
14594   %}
14595   ins_pipe(ialu_reg_reg);
14596 %}
14597 
14598 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
14599   match(Set cr (CmpI (AndI op1 op2) op3));
14600 
14601   ins_cost(INSN_COST);
14602   format %{ "tstw $op1, $op2 # int" %}
14603   ins_encode %{
14604     __ tstw($op1$$Register, $op2$$Register);
14605   %}
14606   ins_pipe(ialu_reg_reg);
14607 %}
14608 
14609 
14610 // Conditional Far Branch
14611 // Conditional Far Branch Unsigned
14612 // TODO: fixme
14613 
14614 // counted loop end branch near
14615 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
14616 %{
14617   match(CountedLoopEnd cmp cr);
14618 
14619   effect(USE lbl);
14620 
14621   ins_cost(BRANCH_COST);
14622   // short variant.
14623   // ins_short_branch(1);
14624   format %{ "b$cmp $lbl \t// counted loop end" %}
14625 
14626   ins_encode(aarch64_enc_br_con(cmp, lbl));
14627 
14628   ins_pipe(pipe_branch);
14629 %}
14630 
14631 // counted loop end branch near Unsigned
14632 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14633 %{
14634   match(CountedLoopEnd cmp cr);
14635 
14636   effect(USE lbl);
14637 
14638   ins_cost(BRANCH_COST);
14639   // short variant.
14640   // ins_short_branch(1);
14641   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
14642 
14643   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14644 
14645   ins_pipe(pipe_branch);
14646 %}
14647 
14648 // counted loop end branch far
14649 // counted loop end branch far unsigned
14650 // TODO: fixme
14651 
14652 // ============================================================================
14653 // inlined locking and unlocking
14654 
14655 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14656 %{
14657   match(Set cr (FastLock object box));
14658   effect(TEMP tmp, TEMP tmp2);
14659 
14660   // TODO
14661   // identify correct cost
14662   ins_cost(5 * INSN_COST);
14663   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
14664 
14665   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
14666 
14667   ins_pipe(pipe_serial);
14668 %}
14669 
14670 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14671 %{
14672   match(Set cr (FastUnlock object box));
14673   effect(TEMP tmp, TEMP tmp2);
14674 
14675   ins_cost(5 * INSN_COST);
14676   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
14677 
14678   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
14679 
14680   ins_pipe(pipe_serial);
14681 %}
14682 
14683 
14684 // ============================================================================
14685 // Safepoint Instructions
14686 
14687 // TODO
14688 // provide a near and far version of this code
14689 
14690 instruct safePoint(iRegP poll)
14691 %{
14692   match(SafePoint poll);
14693 
14694   format %{
14695     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
14696   %}
14697   ins_encode %{
14698     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
14699   %}
14700   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
14701 %}
14702 
14703 
14704 // ============================================================================
14705 // Procedure Call/Return Instructions
14706 
14707 // Call Java Static Instruction
14708 
14709 instruct CallStaticJavaDirect(method meth)
14710 %{
14711   match(CallStaticJava);
14712 
14713   effect(USE meth);
14714 
14715   ins_cost(CALL_COST);
14716 
14717   format %{ "call,static $meth \t// ==> " %}
14718 
14719   ins_encode( aarch64_enc_java_static_call(meth),
14720               aarch64_enc_call_epilog );
14721 
14722   ins_pipe(pipe_class_call);
14723 %}
14724 
14725 // TO HERE
14726 
14727 // Call Java Dynamic Instruction
14728 instruct CallDynamicJavaDirect(method meth)
14729 %{
14730   match(CallDynamicJava);
14731 
14732   effect(USE meth);
14733 
14734   ins_cost(CALL_COST);
14735 
14736   format %{ "CALL,dynamic $meth \t// ==> " %}
14737 
14738   ins_encode( aarch64_enc_java_dynamic_call(meth),
14739                aarch64_enc_call_epilog );
14740 
14741   ins_pipe(pipe_class_call);
14742 %}
14743 
14744 // Call Runtime Instruction
14745 
14746 instruct CallRuntimeDirect(method meth)
14747 %{
14748   match(CallRuntime);
14749 
14750   effect(USE meth);
14751 
14752   ins_cost(CALL_COST);
14753 
14754   format %{ "CALL, runtime $meth" %}
14755 
14756   ins_encode( aarch64_enc_java_to_runtime(meth) );
14757 
14758   ins_pipe(pipe_class_call);
14759 %}
14760 
14761 // Call Runtime Instruction
14762 
14763 instruct CallLeafDirect(method meth)
14764 %{
14765   match(CallLeaf);
14766 
14767   effect(USE meth);
14768 
14769   ins_cost(CALL_COST);
14770 
14771   format %{ "CALL, runtime leaf $meth" %}
14772 
14773   ins_encode( aarch64_enc_java_to_runtime(meth) );
14774 
14775   ins_pipe(pipe_class_call);
14776 %}
14777 
14778 // Call Runtime Instruction
14779 
14780 instruct CallLeafNoFPDirect(method meth)
14781 %{
14782   match(CallLeafNoFP);
14783 
14784   effect(USE meth);
14785 
14786   ins_cost(CALL_COST);
14787 
14788   format %{ "CALL, runtime leaf nofp $meth" %}
14789 
14790   ins_encode( aarch64_enc_java_to_runtime(meth) );
14791 
14792   ins_pipe(pipe_class_call);
14793 %}
14794 
14795 // Tail Call; Jump from runtime stub to Java code.
14796 // Also known as an 'interprocedural jump'.
14797 // Target of jump will eventually return to caller.
14798 // TailJump below removes the return address.
14799 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14800 %{
14801   match(TailCall jump_target method_oop);
14802 
14803   ins_cost(CALL_COST);
14804 
14805   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14806 
14807   ins_encode(aarch64_enc_tail_call(jump_target));
14808 
14809   ins_pipe(pipe_class_call);
14810 %}
14811 
14812 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14813 %{
14814   match(TailJump jump_target ex_oop);
14815 
14816   ins_cost(CALL_COST);
14817 
14818   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14819 
14820   ins_encode(aarch64_enc_tail_jmp(jump_target));
14821 
14822   ins_pipe(pipe_class_call);
14823 %}
14824 
14825 // Create exception oop: created by stack-crawling runtime code.
14826 // Created exception is now available to this handler, and is setup
14827 // just prior to jumping to this handler. No code emitted.
14828 // TODO check
14829 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14830 instruct CreateException(iRegP_R0 ex_oop)
14831 %{
14832   match(Set ex_oop (CreateEx));
14833 
14834   format %{ " -- \t// exception oop; no code emitted" %}
14835 
14836   size(0);
14837 
14838   ins_encode( /*empty*/ );
14839 
14840   ins_pipe(pipe_class_empty);
14841 %}
14842 
14843 // Rethrow exception: The exception oop will come in the first
14844 // argument position. Then JUMP (not call) to the rethrow stub code.
14845 instruct RethrowException() %{
14846   match(Rethrow);
14847   ins_cost(CALL_COST);
14848 
14849   format %{ "b rethrow_stub" %}
14850 
14851   ins_encode( aarch64_enc_rethrow() );
14852 
14853   ins_pipe(pipe_class_call);
14854 %}
14855 
14856 
14857 // Return Instruction
14858 // epilog node loads ret address into lr as part of frame pop
14859 instruct Ret()
14860 %{
14861   match(Return);
14862 
14863   format %{ "ret\t// return register" %}
14864 
14865   ins_encode( aarch64_enc_ret() );
14866 
14867   ins_pipe(pipe_branch);
14868 %}
14869 
14870 // Die now.
14871 instruct ShouldNotReachHere() %{
14872   match(Halt);
14873 
14874   ins_cost(CALL_COST);
14875   format %{ "ShouldNotReachHere" %}
14876 
14877   ins_encode %{
14878     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
14879     // return true
14880     __ dpcs1(0xdead + 1);
14881   %}
14882 
14883   ins_pipe(pipe_class_default);
14884 %}
14885 
14886 // ============================================================================
14887 // Partial Subtype Check
14888 //
14889 // superklass array for an instance of the superklass.  Set a hidden
14890 // internal cache on a hit (cache is checked with exposed code in
14891 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14892 // encoding ALSO sets flags.
14893 
14894 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14895 %{
14896   match(Set result (PartialSubtypeCheck sub super));
14897   effect(KILL cr, KILL temp);
14898 
14899   ins_cost(1100);  // slightly larger than the next version
14900   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14901 
14902   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14903 
14904   opcode(0x1); // Force zero of result reg on hit
14905 
14906   ins_pipe(pipe_class_memory);
14907 %}
14908 
14909 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14910 %{
14911   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14912   effect(KILL temp, KILL result);
14913 
14914   ins_cost(1100);  // slightly larger than the next version
14915   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14916 
14917   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14918 
14919   opcode(0x0); // Don't zero result reg on hit
14920 
14921   ins_pipe(pipe_class_memory);
14922 %}
14923 
14924 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14925                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14926 %{
14927   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14928   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14929   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14930 
14931   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14932   ins_encode %{
14933     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14934     __ string_compare($str1$$Register, $str2$$Register,
14935                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14936                       $tmp1$$Register, $tmp2$$Register,
14937                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
14938   %}
14939   ins_pipe(pipe_class_memory);
14940 %}
14941 
14942 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14943                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14944 %{
14945   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14946   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14947   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14948 
14949   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14950   ins_encode %{
14951     __ string_compare($str1$$Register, $str2$$Register,
14952                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14953                       $tmp1$$Register, $tmp2$$Register,
14954                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
14955   %}
14956   ins_pipe(pipe_class_memory);
14957 %}
14958 
14959 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14960                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14961                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14962 %{
14963   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
14964   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14965   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14966          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14967 
14968   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14969   ins_encode %{
14970     __ string_compare($str1$$Register, $str2$$Register,
14971                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14972                       $tmp1$$Register, $tmp2$$Register,
14973                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14974                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
14975   %}
14976   ins_pipe(pipe_class_memory);
14977 %}
14978 
14979 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14980                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14981                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14982 %{
14983   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14984   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14985   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14986          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14987 
14988   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14989   ins_encode %{
14990     __ string_compare($str1$$Register, $str2$$Register,
14991                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14992                       $tmp1$$Register, $tmp2$$Register,
14993                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14994                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
14995   %}
14996   ins_pipe(pipe_class_memory);
14997 %}
14998 
14999 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15000        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15001        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15002 %{
15003   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15004   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15005   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15006          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15007   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15008 
15009   ins_encode %{
15010     __ string_indexof($str1$$Register, $str2$$Register,
15011                       $cnt1$$Register, $cnt2$$Register,
15012                       $tmp1$$Register, $tmp2$$Register,
15013                       $tmp3$$Register, $tmp4$$Register,
15014                       $tmp5$$Register, $tmp6$$Register,
15015                       -1, $result$$Register, StrIntrinsicNode::UU);
15016   %}
15017   ins_pipe(pipe_class_memory);
15018 %}
15019 
15020 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15021        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15022        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15023 %{
15024   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15025   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15026   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15027          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15028   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15029 
15030   ins_encode %{
15031     __ string_indexof($str1$$Register, $str2$$Register,
15032                       $cnt1$$Register, $cnt2$$Register,
15033                       $tmp1$$Register, $tmp2$$Register,
15034                       $tmp3$$Register, $tmp4$$Register,
15035                       $tmp5$$Register, $tmp6$$Register,
15036                       -1, $result$$Register, StrIntrinsicNode::LL);
15037   %}
15038   ins_pipe(pipe_class_memory);
15039 %}
15040 
15041 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15042        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
15043        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
15044 %{
15045   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15046   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15047   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15048          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
15049   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15050 
15051   ins_encode %{
15052     __ string_indexof($str1$$Register, $str2$$Register,
15053                       $cnt1$$Register, $cnt2$$Register,
15054                       $tmp1$$Register, $tmp2$$Register,
15055                       $tmp3$$Register, $tmp4$$Register,
15056                       $tmp5$$Register, $tmp6$$Register,
15057                       -1, $result$$Register, StrIntrinsicNode::UL);
15058   %}
15059   ins_pipe(pipe_class_memory);
15060 %}
15061 
15062 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15063                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15064                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15065 %{
15066   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15067   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15068   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15069          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15070   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
15071 
15072   ins_encode %{
15073     int icnt2 = (int)$int_cnt2$$constant;
15074     __ string_indexof($str1$$Register, $str2$$Register,
15075                       $cnt1$$Register, zr,
15076                       $tmp1$$Register, $tmp2$$Register,
15077                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15078                       icnt2, $result$$Register, StrIntrinsicNode::UU);
15079   %}
15080   ins_pipe(pipe_class_memory);
15081 %}
15082 
15083 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15084                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15085                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15086 %{
15087   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15088   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15089   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15090          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15091   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
15092 
15093   ins_encode %{
15094     int icnt2 = (int)$int_cnt2$$constant;
15095     __ string_indexof($str1$$Register, $str2$$Register,
15096                       $cnt1$$Register, zr,
15097                       $tmp1$$Register, $tmp2$$Register,
15098                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15099                       icnt2, $result$$Register, StrIntrinsicNode::LL);
15100   %}
15101   ins_pipe(pipe_class_memory);
15102 %}
15103 
15104 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15105                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15106                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
15107 %{
15108   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15109   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15110   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15111          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15112   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15113 
15114   ins_encode %{
15115     int icnt2 = (int)$int_cnt2$$constant;
15116     __ string_indexof($str1$$Register, $str2$$Register,
15117                       $cnt1$$Register, zr,
15118                       $tmp1$$Register, $tmp2$$Register,
15119                       $tmp3$$Register, $tmp4$$Register, zr, zr,
15120                       icnt2, $result$$Register, StrIntrinsicNode::UL);
15121   %}
15122   ins_pipe(pipe_class_memory);
15123 %}
15124 
15125 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
15126                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
15127                               iRegINoSp tmp3, rFlagsReg cr)
15128 %{
15129   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15130   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
15131          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15132 
15133   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
15134 
15135   ins_encode %{
15136     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
15137                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
15138                            $tmp3$$Register);
15139   %}
15140   ins_pipe(pipe_class_memory);
15141 %}
15142 
15143 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15144                         iRegI_R0 result, rFlagsReg cr)
15145 %{
15146   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15147   match(Set result (StrEquals (Binary str1 str2) cnt));
15148   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15149 
15150   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15151   ins_encode %{
15152     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15153     __ string_equals($str1$$Register, $str2$$Register,
15154                      $result$$Register, $cnt$$Register, 1);
15155   %}
15156   ins_pipe(pipe_class_memory);
15157 %}
15158 
15159 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15160                         iRegI_R0 result, rFlagsReg cr)
15161 %{
15162   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
15163   match(Set result (StrEquals (Binary str1 str2) cnt));
15164   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15165 
15166   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15167   ins_encode %{
15168     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15169     __ string_equals($str1$$Register, $str2$$Register,
15170                      $result$$Register, $cnt$$Register, 2);
15171   %}
15172   ins_pipe(pipe_class_memory);
15173 %}
15174 
15175 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15176                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
15177                        iRegP_R10 tmp, rFlagsReg cr)
15178 %{
15179   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15180   match(Set result (AryEq ary1 ary2));
15181   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15182 
15183   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15184   ins_encode %{
15185     __ arrays_equals($ary1$$Register, $ary2$$Register,
15186                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15187                      $result$$Register, $tmp$$Register, 1);
15188     %}
15189   ins_pipe(pipe_class_memory);
15190 %}
15191 
15192 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15193                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
15194                        iRegP_R10 tmp, rFlagsReg cr)
15195 %{
15196   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15197   match(Set result (AryEq ary1 ary2));
15198   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15199 
15200   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
15201   ins_encode %{
15202     __ arrays_equals($ary1$$Register, $ary2$$Register,
15203                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
15204                      $result$$Register, $tmp$$Register, 2);
15205   %}
15206   ins_pipe(pipe_class_memory);
15207 %}
15208 
15209 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
15210 %{
15211   match(Set result (HasNegatives ary1 len));
15212   effect(USE_KILL ary1, USE_KILL len, KILL cr);
15213   format %{ "has negatives byte[] $ary1,$len -> $result" %}
15214   ins_encode %{
15215     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
15216   %}
15217   ins_pipe( pipe_slow );
15218 %}
15219 
15220 // fast char[] to byte[] compression
15221 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15222                          vRegD_V0 tmp1, vRegD_V1 tmp2,
15223                          vRegD_V2 tmp3, vRegD_V3 tmp4,
15224                          iRegI_R0 result, rFlagsReg cr)
15225 %{
15226   match(Set result (StrCompressedCopy src (Binary dst len)));
15227   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15228 
15229   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
15230   ins_encode %{
15231     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15232                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
15233                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
15234                            $result$$Register);
15235   %}
15236   ins_pipe( pipe_slow );
15237 %}
15238 
15239 // fast byte[] to char[] inflation
15240 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
15241                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
15242 %{
15243   match(Set dummy (StrInflatedCopy src (Binary dst len)));
15244   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15245 
15246   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
15247   ins_encode %{
15248     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15249                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
15250   %}
15251   ins_pipe(pipe_class_memory);
15252 %}
15253 
15254 // encode char[] to byte[] in ISO_8859_1
15255 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15256                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
15257                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
15258                           iRegI_R0 result, rFlagsReg cr)
15259 %{
15260   match(Set result (EncodeISOArray src (Binary dst len)));
15261   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
15262          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
15263 
15264   format %{ "Encode array $src,$dst,$len -> $result" %}
15265   ins_encode %{
15266     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15267          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
15268          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15269   %}
15270   ins_pipe( pipe_class_memory );
15271 %}
15272 
15273 // ============================================================================
15274 // This name is KNOWN by the ADLC and cannot be changed.
15275 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15276 // for this guy.
15277 instruct tlsLoadP(thread_RegP dst)
15278 %{
15279   match(Set dst (ThreadLocal));
15280 
15281   ins_cost(0);
15282 
15283   format %{ " -- \t// $dst=Thread::current(), empty" %}
15284 
15285   size(0);
15286 
15287   ins_encode( /*empty*/ );
15288 
15289   ins_pipe(pipe_class_empty);
15290 %}
15291 
15292 // ====================VECTOR INSTRUCTIONS=====================================
15293 
15294 // Load vector (32 bits)
15295 instruct loadV4(vecD dst, vmem4 mem)
15296 %{
15297   predicate(n->as_LoadVector()->memory_size() == 4);
15298   match(Set dst (LoadVector mem));
15299   ins_cost(4 * INSN_COST);
15300   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15301   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15302   ins_pipe(vload_reg_mem64);
15303 %}
15304 
15305 // Load vector (64 bits)
15306 instruct loadV8(vecD dst, vmem8 mem)
15307 %{
15308   predicate(n->as_LoadVector()->memory_size() == 8);
15309   match(Set dst (LoadVector mem));
15310   ins_cost(4 * INSN_COST);
15311   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15312   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15313   ins_pipe(vload_reg_mem64);
15314 %}
15315 
15316 // Load Vector (128 bits)
15317 instruct loadV16(vecX dst, vmem16 mem)
15318 %{
15319   predicate(n->as_LoadVector()->memory_size() == 16);
15320   match(Set dst (LoadVector mem));
15321   ins_cost(4 * INSN_COST);
15322   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15323   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15324   ins_pipe(vload_reg_mem128);
15325 %}
15326 
15327 // Store Vector (32 bits)
15328 instruct storeV4(vecD src, vmem4 mem)
15329 %{
15330   predicate(n->as_StoreVector()->memory_size() == 4);
15331   match(Set mem (StoreVector mem src));
15332   ins_cost(4 * INSN_COST);
15333   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15334   ins_encode( aarch64_enc_strvS(src, mem) );
15335   ins_pipe(vstore_reg_mem64);
15336 %}
15337 
15338 // Store Vector (64 bits)
15339 instruct storeV8(vecD src, vmem8 mem)
15340 %{
15341   predicate(n->as_StoreVector()->memory_size() == 8);
15342   match(Set mem (StoreVector mem src));
15343   ins_cost(4 * INSN_COST);
15344   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15345   ins_encode( aarch64_enc_strvD(src, mem) );
15346   ins_pipe(vstore_reg_mem64);
15347 %}
15348 
15349 // Store Vector (128 bits)
15350 instruct storeV16(vecX src, vmem16 mem)
15351 %{
15352   predicate(n->as_StoreVector()->memory_size() == 16);
15353   match(Set mem (StoreVector mem src));
15354   ins_cost(4 * INSN_COST);
15355   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15356   ins_encode( aarch64_enc_strvQ(src, mem) );
15357   ins_pipe(vstore_reg_mem128);
15358 %}
15359 
15360 instruct replicate8B(vecD dst, iRegIorL2I src)
15361 %{
15362   predicate(n->as_Vector()->length() == 4 ||
15363             n->as_Vector()->length() == 8);
15364   match(Set dst (ReplicateB src));
15365   ins_cost(INSN_COST);
15366   format %{ "dup  $dst, $src\t# vector (8B)" %}
15367   ins_encode %{
15368     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15369   %}
15370   ins_pipe(vdup_reg_reg64);
15371 %}
15372 
15373 instruct replicate16B(vecX dst, iRegIorL2I src)
15374 %{
15375   predicate(n->as_Vector()->length() == 16);
15376   match(Set dst (ReplicateB src));
15377   ins_cost(INSN_COST);
15378   format %{ "dup  $dst, $src\t# vector (16B)" %}
15379   ins_encode %{
15380     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
15381   %}
15382   ins_pipe(vdup_reg_reg128);
15383 %}
15384 
15385 instruct replicate8B_imm(vecD dst, immI con)
15386 %{
15387   predicate(n->as_Vector()->length() == 4 ||
15388             n->as_Vector()->length() == 8);
15389   match(Set dst (ReplicateB con));
15390   ins_cost(INSN_COST);
15391   format %{ "movi  $dst, $con\t# vector(8B)" %}
15392   ins_encode %{
15393     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
15394   %}
15395   ins_pipe(vmovi_reg_imm64);
15396 %}
15397 
15398 instruct replicate16B_imm(vecX dst, immI con)
15399 %{
15400   predicate(n->as_Vector()->length() == 16);
15401   match(Set dst (ReplicateB con));
15402   ins_cost(INSN_COST);
15403   format %{ "movi  $dst, $con\t# vector(16B)" %}
15404   ins_encode %{
15405     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
15406   %}
15407   ins_pipe(vmovi_reg_imm128);
15408 %}
15409 
15410 instruct replicate4S(vecD dst, iRegIorL2I src)
15411 %{
15412   predicate(n->as_Vector()->length() == 2 ||
15413             n->as_Vector()->length() == 4);
15414   match(Set dst (ReplicateS src));
15415   ins_cost(INSN_COST);
15416   format %{ "dup  $dst, $src\t# vector (4S)" %}
15417   ins_encode %{
15418     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
15419   %}
15420   ins_pipe(vdup_reg_reg64);
15421 %}
15422 
15423 instruct replicate8S(vecX dst, iRegIorL2I src)
15424 %{
15425   predicate(n->as_Vector()->length() == 8);
15426   match(Set dst (ReplicateS src));
15427   ins_cost(INSN_COST);
15428   format %{ "dup  $dst, $src\t# vector (8S)" %}
15429   ins_encode %{
15430     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
15431   %}
15432   ins_pipe(vdup_reg_reg128);
15433 %}
15434 
15435 instruct replicate4S_imm(vecD dst, immI con)
15436 %{
15437   predicate(n->as_Vector()->length() == 2 ||
15438             n->as_Vector()->length() == 4);
15439   match(Set dst (ReplicateS con));
15440   ins_cost(INSN_COST);
15441   format %{ "movi  $dst, $con\t# vector(4H)" %}
15442   ins_encode %{
15443     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
15444   %}
15445   ins_pipe(vmovi_reg_imm64);
15446 %}
15447 
15448 instruct replicate8S_imm(vecX dst, immI con)
15449 %{
15450   predicate(n->as_Vector()->length() == 8);
15451   match(Set dst (ReplicateS con));
15452   ins_cost(INSN_COST);
15453   format %{ "movi  $dst, $con\t# vector(8H)" %}
15454   ins_encode %{
15455     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
15456   %}
15457   ins_pipe(vmovi_reg_imm128);
15458 %}
15459 
15460 instruct replicate2I(vecD dst, iRegIorL2I src)
15461 %{
15462   predicate(n->as_Vector()->length() == 2);
15463   match(Set dst (ReplicateI src));
15464   ins_cost(INSN_COST);
15465   format %{ "dup  $dst, $src\t# vector (2I)" %}
15466   ins_encode %{
15467     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
15468   %}
15469   ins_pipe(vdup_reg_reg64);
15470 %}
15471 
15472 instruct replicate4I(vecX dst, iRegIorL2I src)
15473 %{
15474   predicate(n->as_Vector()->length() == 4);
15475   match(Set dst (ReplicateI src));
15476   ins_cost(INSN_COST);
15477   format %{ "dup  $dst, $src\t# vector (4I)" %}
15478   ins_encode %{
15479     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
15480   %}
15481   ins_pipe(vdup_reg_reg128);
15482 %}
15483 
15484 instruct replicate2I_imm(vecD dst, immI con)
15485 %{
15486   predicate(n->as_Vector()->length() == 2);
15487   match(Set dst (ReplicateI con));
15488   ins_cost(INSN_COST);
15489   format %{ "movi  $dst, $con\t# vector(2I)" %}
15490   ins_encode %{
15491     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
15492   %}
15493   ins_pipe(vmovi_reg_imm64);
15494 %}
15495 
15496 instruct replicate4I_imm(vecX dst, immI con)
15497 %{
15498   predicate(n->as_Vector()->length() == 4);
15499   match(Set dst (ReplicateI con));
15500   ins_cost(INSN_COST);
15501   format %{ "movi  $dst, $con\t# vector(4I)" %}
15502   ins_encode %{
15503     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
15504   %}
15505   ins_pipe(vmovi_reg_imm128);
15506 %}
15507 
15508 instruct replicate2L(vecX dst, iRegL src)
15509 %{
15510   predicate(n->as_Vector()->length() == 2);
15511   match(Set dst (ReplicateL src));
15512   ins_cost(INSN_COST);
15513   format %{ "dup  $dst, $src\t# vector (2L)" %}
15514   ins_encode %{
15515     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
15516   %}
15517   ins_pipe(vdup_reg_reg128);
15518 %}
15519 
15520 instruct replicate2L_zero(vecX dst, immI0 zero)
15521 %{
15522   predicate(n->as_Vector()->length() == 2);
15523   match(Set dst (ReplicateI zero));
15524   ins_cost(INSN_COST);
15525   format %{ "movi  $dst, $zero\t# vector(4I)" %}
15526   ins_encode %{
15527     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15528            as_FloatRegister($dst$$reg),
15529            as_FloatRegister($dst$$reg));
15530   %}
15531   ins_pipe(vmovi_reg_imm128);
15532 %}
15533 
15534 instruct replicate2F(vecD dst, vRegF src)
15535 %{
15536   predicate(n->as_Vector()->length() == 2);
15537   match(Set dst (ReplicateF src));
15538   ins_cost(INSN_COST);
15539   format %{ "dup  $dst, $src\t# vector (2F)" %}
15540   ins_encode %{
15541     __ dup(as_FloatRegister($dst$$reg), __ T2S,
15542            as_FloatRegister($src$$reg));
15543   %}
15544   ins_pipe(vdup_reg_freg64);
15545 %}
15546 
15547 instruct replicate4F(vecX dst, vRegF src)
15548 %{
15549   predicate(n->as_Vector()->length() == 4);
15550   match(Set dst (ReplicateF src));
15551   ins_cost(INSN_COST);
15552   format %{ "dup  $dst, $src\t# vector (4F)" %}
15553   ins_encode %{
15554     __ dup(as_FloatRegister($dst$$reg), __ T4S,
15555            as_FloatRegister($src$$reg));
15556   %}
15557   ins_pipe(vdup_reg_freg128);
15558 %}
15559 
15560 instruct replicate2D(vecX dst, vRegD src)
15561 %{
15562   predicate(n->as_Vector()->length() == 2);
15563   match(Set dst (ReplicateD src));
15564   ins_cost(INSN_COST);
15565   format %{ "dup  $dst, $src\t# vector (2D)" %}
15566   ins_encode %{
15567     __ dup(as_FloatRegister($dst$$reg), __ T2D,
15568            as_FloatRegister($src$$reg));
15569   %}
15570   ins_pipe(vdup_reg_dreg128);
15571 %}
15572 
15573 // ====================REDUCTION ARITHMETIC====================================
15574 
15575 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
15576 %{
15577   match(Set dst (AddReductionVI src1 src2));
15578   ins_cost(INSN_COST);
15579   effect(TEMP tmp, TEMP tmp2);
15580   format %{ "umov  $tmp, $src2, S, 0\n\t"
15581             "umov  $tmp2, $src2, S, 1\n\t"
15582             "addw  $dst, $src1, $tmp\n\t"
15583             "addw  $dst, $dst, $tmp2\t add reduction2i"
15584   %}
15585   ins_encode %{
15586     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15587     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15588     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
15589     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
15590   %}
15591   ins_pipe(pipe_class_default);
15592 %}
15593 
15594 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15595 %{
15596   match(Set dst (AddReductionVI src1 src2));
15597   ins_cost(INSN_COST);
15598   effect(TEMP tmp, TEMP tmp2);
15599   format %{ "addv  $tmp, T4S, $src2\n\t"
15600             "umov  $tmp2, $tmp, S, 0\n\t"
15601             "addw  $dst, $tmp2, $src1\t add reduction4i"
15602   %}
15603   ins_encode %{
15604     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
15605             as_FloatRegister($src2$$reg));
15606     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15607     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
15608   %}
15609   ins_pipe(pipe_class_default);
15610 %}
15611 
15612 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
15613 %{
15614   match(Set dst (MulReductionVI src1 src2));
15615   ins_cost(INSN_COST);
15616   effect(TEMP tmp, TEMP dst);
15617   format %{ "umov  $tmp, $src2, S, 0\n\t"
15618             "mul   $dst, $tmp, $src1\n\t"
15619             "umov  $tmp, $src2, S, 1\n\t"
15620             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
15621   %}
15622   ins_encode %{
15623     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15624     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
15625     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15626     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
15627   %}
15628   ins_pipe(pipe_class_default);
15629 %}
15630 
15631 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15632 %{
15633   match(Set dst (MulReductionVI src1 src2));
15634   ins_cost(INSN_COST);
15635   effect(TEMP tmp, TEMP tmp2, TEMP dst);
15636   format %{ "ins   $tmp, $src2, 0, 1\n\t"
15637             "mul   $tmp, $tmp, $src2\n\t"
15638             "umov  $tmp2, $tmp, S, 0\n\t"
15639             "mul   $dst, $tmp2, $src1\n\t"
15640             "umov  $tmp2, $tmp, S, 1\n\t"
15641             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
15642   %}
15643   ins_encode %{
15644     __ ins(as_FloatRegister($tmp$$reg), __ D,
15645            as_FloatRegister($src2$$reg), 0, 1);
15646     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
15647            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
15648     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15649     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
15650     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
15651     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
15652   %}
15653   ins_pipe(pipe_class_default);
15654 %}
15655 
15656 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15657 %{
15658   match(Set dst (AddReductionVF src1 src2));
15659   ins_cost(INSN_COST);
15660   effect(TEMP tmp, TEMP dst);
15661   format %{ "fadds $dst, $src1, $src2\n\t"
15662             "ins   $tmp, S, $src2, 0, 1\n\t"
15663             "fadds $dst, $dst, $tmp\t add reduction2f"
15664   %}
15665   ins_encode %{
15666     __ fadds(as_FloatRegister($dst$$reg),
15667              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15668     __ ins(as_FloatRegister($tmp$$reg), __ S,
15669            as_FloatRegister($src2$$reg), 0, 1);
15670     __ fadds(as_FloatRegister($dst$$reg),
15671              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15672   %}
15673   ins_pipe(pipe_class_default);
15674 %}
15675 
15676 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15677 %{
15678   match(Set dst (AddReductionVF src1 src2));
15679   ins_cost(INSN_COST);
15680   effect(TEMP tmp, TEMP dst);
15681   format %{ "fadds $dst, $src1, $src2\n\t"
15682             "ins   $tmp, S, $src2, 0, 1\n\t"
15683             "fadds $dst, $dst, $tmp\n\t"
15684             "ins   $tmp, S, $src2, 0, 2\n\t"
15685             "fadds $dst, $dst, $tmp\n\t"
15686             "ins   $tmp, S, $src2, 0, 3\n\t"
15687             "fadds $dst, $dst, $tmp\t add reduction4f"
15688   %}
15689   ins_encode %{
15690     __ fadds(as_FloatRegister($dst$$reg),
15691              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15692     __ ins(as_FloatRegister($tmp$$reg), __ S,
15693            as_FloatRegister($src2$$reg), 0, 1);
15694     __ fadds(as_FloatRegister($dst$$reg),
15695              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15696     __ ins(as_FloatRegister($tmp$$reg), __ S,
15697            as_FloatRegister($src2$$reg), 0, 2);
15698     __ fadds(as_FloatRegister($dst$$reg),
15699              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15700     __ ins(as_FloatRegister($tmp$$reg), __ S,
15701            as_FloatRegister($src2$$reg), 0, 3);
15702     __ fadds(as_FloatRegister($dst$$reg),
15703              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15704   %}
15705   ins_pipe(pipe_class_default);
15706 %}
15707 
15708 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15709 %{
15710   match(Set dst (MulReductionVF src1 src2));
15711   ins_cost(INSN_COST);
15712   effect(TEMP tmp, TEMP dst);
15713   format %{ "fmuls $dst, $src1, $src2\n\t"
15714             "ins   $tmp, S, $src2, 0, 1\n\t"
15715             "fmuls $dst, $dst, $tmp\t add reduction4f"
15716   %}
15717   ins_encode %{
15718     __ fmuls(as_FloatRegister($dst$$reg),
15719              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15720     __ ins(as_FloatRegister($tmp$$reg), __ S,
15721            as_FloatRegister($src2$$reg), 0, 1);
15722     __ fmuls(as_FloatRegister($dst$$reg),
15723              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15724   %}
15725   ins_pipe(pipe_class_default);
15726 %}
15727 
15728 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15729 %{
15730   match(Set dst (MulReductionVF src1 src2));
15731   ins_cost(INSN_COST);
15732   effect(TEMP tmp, TEMP dst);
15733   format %{ "fmuls $dst, $src1, $src2\n\t"
15734             "ins   $tmp, S, $src2, 0, 1\n\t"
15735             "fmuls $dst, $dst, $tmp\n\t"
15736             "ins   $tmp, S, $src2, 0, 2\n\t"
15737             "fmuls $dst, $dst, $tmp\n\t"
15738             "ins   $tmp, S, $src2, 0, 3\n\t"
15739             "fmuls $dst, $dst, $tmp\t add reduction4f"
15740   %}
15741   ins_encode %{
15742     __ fmuls(as_FloatRegister($dst$$reg),
15743              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15744     __ ins(as_FloatRegister($tmp$$reg), __ S,
15745            as_FloatRegister($src2$$reg), 0, 1);
15746     __ fmuls(as_FloatRegister($dst$$reg),
15747              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15748     __ ins(as_FloatRegister($tmp$$reg), __ S,
15749            as_FloatRegister($src2$$reg), 0, 2);
15750     __ fmuls(as_FloatRegister($dst$$reg),
15751              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15752     __ ins(as_FloatRegister($tmp$$reg), __ S,
15753            as_FloatRegister($src2$$reg), 0, 3);
15754     __ fmuls(as_FloatRegister($dst$$reg),
15755              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15756   %}
15757   ins_pipe(pipe_class_default);
15758 %}
15759 
15760 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15761 %{
15762   match(Set dst (AddReductionVD src1 src2));
15763   ins_cost(INSN_COST);
15764   effect(TEMP tmp, TEMP dst);
15765   format %{ "faddd $dst, $src1, $src2\n\t"
15766             "ins   $tmp, D, $src2, 0, 1\n\t"
15767             "faddd $dst, $dst, $tmp\t add reduction2d"
15768   %}
15769   ins_encode %{
15770     __ faddd(as_FloatRegister($dst$$reg),
15771              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15772     __ ins(as_FloatRegister($tmp$$reg), __ D,
15773            as_FloatRegister($src2$$reg), 0, 1);
15774     __ faddd(as_FloatRegister($dst$$reg),
15775              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15776   %}
15777   ins_pipe(pipe_class_default);
15778 %}
15779 
15780 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15781 %{
15782   match(Set dst (MulReductionVD src1 src2));
15783   ins_cost(INSN_COST);
15784   effect(TEMP tmp, TEMP dst);
15785   format %{ "fmuld $dst, $src1, $src2\n\t"
15786             "ins   $tmp, D, $src2, 0, 1\n\t"
15787             "fmuld $dst, $dst, $tmp\t add reduction2d"
15788   %}
15789   ins_encode %{
15790     __ fmuld(as_FloatRegister($dst$$reg),
15791              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15792     __ ins(as_FloatRegister($tmp$$reg), __ D,
15793            as_FloatRegister($src2$$reg), 0, 1);
15794     __ fmuld(as_FloatRegister($dst$$reg),
15795              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15796   %}
15797   ins_pipe(pipe_class_default);
15798 %}
15799 
15800 // ====================VECTOR ARITHMETIC=======================================
15801 
15802 // --------------------------------- ADD --------------------------------------
15803 
15804 instruct vadd8B(vecD dst, vecD src1, vecD src2)
15805 %{
15806   predicate(n->as_Vector()->length() == 4 ||
15807             n->as_Vector()->length() == 8);
15808   match(Set dst (AddVB src1 src2));
15809   ins_cost(INSN_COST);
15810   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
15811   ins_encode %{
15812     __ addv(as_FloatRegister($dst$$reg), __ T8B,
15813             as_FloatRegister($src1$$reg),
15814             as_FloatRegister($src2$$reg));
15815   %}
15816   ins_pipe(vdop64);
15817 %}
15818 
15819 instruct vadd16B(vecX dst, vecX src1, vecX src2)
15820 %{
15821   predicate(n->as_Vector()->length() == 16);
15822   match(Set dst (AddVB src1 src2));
15823   ins_cost(INSN_COST);
15824   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
15825   ins_encode %{
15826     __ addv(as_FloatRegister($dst$$reg), __ T16B,
15827             as_FloatRegister($src1$$reg),
15828             as_FloatRegister($src2$$reg));
15829   %}
15830   ins_pipe(vdop128);
15831 %}
15832 
15833 instruct vadd4S(vecD dst, vecD src1, vecD src2)
15834 %{
15835   predicate(n->as_Vector()->length() == 2 ||
15836             n->as_Vector()->length() == 4);
15837   match(Set dst (AddVS src1 src2));
15838   ins_cost(INSN_COST);
15839   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
15840   ins_encode %{
15841     __ addv(as_FloatRegister($dst$$reg), __ T4H,
15842             as_FloatRegister($src1$$reg),
15843             as_FloatRegister($src2$$reg));
15844   %}
15845   ins_pipe(vdop64);
15846 %}
15847 
15848 instruct vadd8S(vecX dst, vecX src1, vecX src2)
15849 %{
15850   predicate(n->as_Vector()->length() == 8);
15851   match(Set dst (AddVS src1 src2));
15852   ins_cost(INSN_COST);
15853   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
15854   ins_encode %{
15855     __ addv(as_FloatRegister($dst$$reg), __ T8H,
15856             as_FloatRegister($src1$$reg),
15857             as_FloatRegister($src2$$reg));
15858   %}
15859   ins_pipe(vdop128);
15860 %}
15861 
15862 instruct vadd2I(vecD dst, vecD src1, vecD src2)
15863 %{
15864   predicate(n->as_Vector()->length() == 2);
15865   match(Set dst (AddVI src1 src2));
15866   ins_cost(INSN_COST);
15867   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
15868   ins_encode %{
15869     __ addv(as_FloatRegister($dst$$reg), __ T2S,
15870             as_FloatRegister($src1$$reg),
15871             as_FloatRegister($src2$$reg));
15872   %}
15873   ins_pipe(vdop64);
15874 %}
15875 
15876 instruct vadd4I(vecX dst, vecX src1, vecX src2)
15877 %{
15878   predicate(n->as_Vector()->length() == 4);
15879   match(Set dst (AddVI src1 src2));
15880   ins_cost(INSN_COST);
15881   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
15882   ins_encode %{
15883     __ addv(as_FloatRegister($dst$$reg), __ T4S,
15884             as_FloatRegister($src1$$reg),
15885             as_FloatRegister($src2$$reg));
15886   %}
15887   ins_pipe(vdop128);
15888 %}
15889 
15890 instruct vadd2L(vecX dst, vecX src1, vecX src2)
15891 %{
15892   predicate(n->as_Vector()->length() == 2);
15893   match(Set dst (AddVL src1 src2));
15894   ins_cost(INSN_COST);
15895   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
15896   ins_encode %{
15897     __ addv(as_FloatRegister($dst$$reg), __ T2D,
15898             as_FloatRegister($src1$$reg),
15899             as_FloatRegister($src2$$reg));
15900   %}
15901   ins_pipe(vdop128);
15902 %}
15903 
15904 instruct vadd2F(vecD dst, vecD src1, vecD src2)
15905 %{
15906   predicate(n->as_Vector()->length() == 2);
15907   match(Set dst (AddVF src1 src2));
15908   ins_cost(INSN_COST);
15909   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
15910   ins_encode %{
15911     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
15912             as_FloatRegister($src1$$reg),
15913             as_FloatRegister($src2$$reg));
15914   %}
15915   ins_pipe(vdop_fp64);
15916 %}
15917 
15918 instruct vadd4F(vecX dst, vecX src1, vecX src2)
15919 %{
15920   predicate(n->as_Vector()->length() == 4);
15921   match(Set dst (AddVF src1 src2));
15922   ins_cost(INSN_COST);
15923   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
15924   ins_encode %{
15925     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
15926             as_FloatRegister($src1$$reg),
15927             as_FloatRegister($src2$$reg));
15928   %}
15929   ins_pipe(vdop_fp128);
15930 %}
15931 
15932 instruct vadd2D(vecX dst, vecX src1, vecX src2)
15933 %{
15934   match(Set dst (AddVD src1 src2));
15935   ins_cost(INSN_COST);
15936   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
15937   ins_encode %{
15938     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
15939             as_FloatRegister($src1$$reg),
15940             as_FloatRegister($src2$$reg));
15941   %}
15942   ins_pipe(vdop_fp128);
15943 %}
15944 
15945 // --------------------------------- SUB --------------------------------------
15946 
15947 instruct vsub8B(vecD dst, vecD src1, vecD src2)
15948 %{
15949   predicate(n->as_Vector()->length() == 4 ||
15950             n->as_Vector()->length() == 8);
15951   match(Set dst (SubVB src1 src2));
15952   ins_cost(INSN_COST);
15953   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
15954   ins_encode %{
15955     __ subv(as_FloatRegister($dst$$reg), __ T8B,
15956             as_FloatRegister($src1$$reg),
15957             as_FloatRegister($src2$$reg));
15958   %}
15959   ins_pipe(vdop64);
15960 %}
15961 
15962 instruct vsub16B(vecX dst, vecX src1, vecX src2)
15963 %{
15964   predicate(n->as_Vector()->length() == 16);
15965   match(Set dst (SubVB src1 src2));
15966   ins_cost(INSN_COST);
15967   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
15968   ins_encode %{
15969     __ subv(as_FloatRegister($dst$$reg), __ T16B,
15970             as_FloatRegister($src1$$reg),
15971             as_FloatRegister($src2$$reg));
15972   %}
15973   ins_pipe(vdop128);
15974 %}
15975 
15976 instruct vsub4S(vecD dst, vecD src1, vecD src2)
15977 %{
15978   predicate(n->as_Vector()->length() == 2 ||
15979             n->as_Vector()->length() == 4);
15980   match(Set dst (SubVS src1 src2));
15981   ins_cost(INSN_COST);
15982   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
15983   ins_encode %{
15984     __ subv(as_FloatRegister($dst$$reg), __ T4H,
15985             as_FloatRegister($src1$$reg),
15986             as_FloatRegister($src2$$reg));
15987   %}
15988   ins_pipe(vdop64);
15989 %}
15990 
15991 instruct vsub8S(vecX dst, vecX src1, vecX src2)
15992 %{
15993   predicate(n->as_Vector()->length() == 8);
15994   match(Set dst (SubVS src1 src2));
15995   ins_cost(INSN_COST);
15996   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
15997   ins_encode %{
15998     __ subv(as_FloatRegister($dst$$reg), __ T8H,
15999             as_FloatRegister($src1$$reg),
16000             as_FloatRegister($src2$$reg));
16001   %}
16002   ins_pipe(vdop128);
16003 %}
16004 
16005 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16006 %{
16007   predicate(n->as_Vector()->length() == 2);
16008   match(Set dst (SubVI src1 src2));
16009   ins_cost(INSN_COST);
16010   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
16011   ins_encode %{
16012     __ subv(as_FloatRegister($dst$$reg), __ T2S,
16013             as_FloatRegister($src1$$reg),
16014             as_FloatRegister($src2$$reg));
16015   %}
16016   ins_pipe(vdop64);
16017 %}
16018 
16019 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16020 %{
16021   predicate(n->as_Vector()->length() == 4);
16022   match(Set dst (SubVI src1 src2));
16023   ins_cost(INSN_COST);
16024   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
16025   ins_encode %{
16026     __ subv(as_FloatRegister($dst$$reg), __ T4S,
16027             as_FloatRegister($src1$$reg),
16028             as_FloatRegister($src2$$reg));
16029   %}
16030   ins_pipe(vdop128);
16031 %}
16032 
16033 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16034 %{
16035   predicate(n->as_Vector()->length() == 2);
16036   match(Set dst (SubVL src1 src2));
16037   ins_cost(INSN_COST);
16038   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
16039   ins_encode %{
16040     __ subv(as_FloatRegister($dst$$reg), __ T2D,
16041             as_FloatRegister($src1$$reg),
16042             as_FloatRegister($src2$$reg));
16043   %}
16044   ins_pipe(vdop128);
16045 %}
16046 
16047 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16048 %{
16049   predicate(n->as_Vector()->length() == 2);
16050   match(Set dst (SubVF src1 src2));
16051   ins_cost(INSN_COST);
16052   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
16053   ins_encode %{
16054     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16055             as_FloatRegister($src1$$reg),
16056             as_FloatRegister($src2$$reg));
16057   %}
16058   ins_pipe(vdop_fp64);
16059 %}
16060 
16061 instruct vsub4F(vecX dst, vecX src1, vecX src2)
16062 %{
16063   predicate(n->as_Vector()->length() == 4);
16064   match(Set dst (SubVF src1 src2));
16065   ins_cost(INSN_COST);
16066   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
16067   ins_encode %{
16068     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
16069             as_FloatRegister($src1$$reg),
16070             as_FloatRegister($src2$$reg));
16071   %}
16072   ins_pipe(vdop_fp128);
16073 %}
16074 
16075 instruct vsub2D(vecX dst, vecX src1, vecX src2)
16076 %{
16077   predicate(n->as_Vector()->length() == 2);
16078   match(Set dst (SubVD src1 src2));
16079   ins_cost(INSN_COST);
16080   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
16081   ins_encode %{
16082     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
16083             as_FloatRegister($src1$$reg),
16084             as_FloatRegister($src2$$reg));
16085   %}
16086   ins_pipe(vdop_fp128);
16087 %}
16088 
16089 // --------------------------------- MUL --------------------------------------
16090 
16091 instruct vmul4S(vecD dst, vecD src1, vecD src2)
16092 %{
16093   predicate(n->as_Vector()->length() == 2 ||
16094             n->as_Vector()->length() == 4);
16095   match(Set dst (MulVS src1 src2));
16096   ins_cost(INSN_COST);
16097   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
16098   ins_encode %{
16099     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
16100             as_FloatRegister($src1$$reg),
16101             as_FloatRegister($src2$$reg));
16102   %}
16103   ins_pipe(vmul64);
16104 %}
16105 
16106 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16107 %{
16108   predicate(n->as_Vector()->length() == 8);
16109   match(Set dst (MulVS src1 src2));
16110   ins_cost(INSN_COST);
16111   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
16112   ins_encode %{
16113     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16114             as_FloatRegister($src1$$reg),
16115             as_FloatRegister($src2$$reg));
16116   %}
16117   ins_pipe(vmul128);
16118 %}
16119 
16120 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16121 %{
16122   predicate(n->as_Vector()->length() == 2);
16123   match(Set dst (MulVI src1 src2));
16124   ins_cost(INSN_COST);
16125   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
16126   ins_encode %{
16127     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16128             as_FloatRegister($src1$$reg),
16129             as_FloatRegister($src2$$reg));
16130   %}
16131   ins_pipe(vmul64);
16132 %}
16133 
16134 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16135 %{
16136   predicate(n->as_Vector()->length() == 4);
16137   match(Set dst (MulVI src1 src2));
16138   ins_cost(INSN_COST);
16139   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
16140   ins_encode %{
16141     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16142             as_FloatRegister($src1$$reg),
16143             as_FloatRegister($src2$$reg));
16144   %}
16145   ins_pipe(vmul128);
16146 %}
16147 
16148 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16149 %{
16150   predicate(n->as_Vector()->length() == 2);
16151   match(Set dst (MulVF src1 src2));
16152   ins_cost(INSN_COST);
16153   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
16154   ins_encode %{
16155     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
16156             as_FloatRegister($src1$$reg),
16157             as_FloatRegister($src2$$reg));
16158   %}
16159   ins_pipe(vmuldiv_fp64);
16160 %}
16161 
16162 instruct vmul4F(vecX dst, vecX src1, vecX src2)
16163 %{
16164   predicate(n->as_Vector()->length() == 4);
16165   match(Set dst (MulVF src1 src2));
16166   ins_cost(INSN_COST);
16167   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
16168   ins_encode %{
16169     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
16170             as_FloatRegister($src1$$reg),
16171             as_FloatRegister($src2$$reg));
16172   %}
16173   ins_pipe(vmuldiv_fp128);
16174 %}
16175 
16176 instruct vmul2D(vecX dst, vecX src1, vecX src2)
16177 %{
16178   predicate(n->as_Vector()->length() == 2);
16179   match(Set dst (MulVD src1 src2));
16180   ins_cost(INSN_COST);
16181   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
16182   ins_encode %{
16183     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
16184             as_FloatRegister($src1$$reg),
16185             as_FloatRegister($src2$$reg));
16186   %}
16187   ins_pipe(vmuldiv_fp128);
16188 %}
16189 
16190 // --------------------------------- MLA --------------------------------------
16191 
16192 instruct vmla4S(vecD dst, vecD src1, vecD src2)
16193 %{
16194   predicate(n->as_Vector()->length() == 2 ||
16195             n->as_Vector()->length() == 4);
16196   match(Set dst (AddVS dst (MulVS src1 src2)));
16197   ins_cost(INSN_COST);
16198   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
16199   ins_encode %{
16200     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
16201             as_FloatRegister($src1$$reg),
16202             as_FloatRegister($src2$$reg));
16203   %}
16204   ins_pipe(vmla64);
16205 %}
16206 
16207 instruct vmla8S(vecX dst, vecX src1, vecX src2)
16208 %{
16209   predicate(n->as_Vector()->length() == 8);
16210   match(Set dst (AddVS dst (MulVS src1 src2)));
16211   ins_cost(INSN_COST);
16212   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
16213   ins_encode %{
16214     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
16215             as_FloatRegister($src1$$reg),
16216             as_FloatRegister($src2$$reg));
16217   %}
16218   ins_pipe(vmla128);
16219 %}
16220 
16221 instruct vmla2I(vecD dst, vecD src1, vecD src2)
16222 %{
16223   predicate(n->as_Vector()->length() == 2);
16224   match(Set dst (AddVI dst (MulVI src1 src2)));
16225   ins_cost(INSN_COST);
16226   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
16227   ins_encode %{
16228     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
16229             as_FloatRegister($src1$$reg),
16230             as_FloatRegister($src2$$reg));
16231   %}
16232   ins_pipe(vmla64);
16233 %}
16234 
16235 instruct vmla4I(vecX dst, vecX src1, vecX src2)
16236 %{
16237   predicate(n->as_Vector()->length() == 4);
16238   match(Set dst (AddVI dst (MulVI src1 src2)));
16239   ins_cost(INSN_COST);
16240   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
16241   ins_encode %{
16242     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
16243             as_FloatRegister($src1$$reg),
16244             as_FloatRegister($src2$$reg));
16245   %}
16246   ins_pipe(vmla128);
16247 %}
16248 
16249 // dst + src1 * src2
16250 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
16251   predicate(UseFMA && n->as_Vector()->length() == 2);
16252   match(Set dst (FmaVF  dst (Binary src1 src2)));
16253   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
16254   ins_cost(INSN_COST);
16255   ins_encode %{
16256     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
16257             as_FloatRegister($src1$$reg),
16258             as_FloatRegister($src2$$reg));
16259   %}
16260   ins_pipe(vmuldiv_fp64);
16261 %}
16262 
16263 // dst + src1 * src2
16264 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
16265   predicate(UseFMA && n->as_Vector()->length() == 4);
16266   match(Set dst (FmaVF  dst (Binary src1 src2)));
16267   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
16268   ins_cost(INSN_COST);
16269   ins_encode %{
16270     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
16271             as_FloatRegister($src1$$reg),
16272             as_FloatRegister($src2$$reg));
16273   %}
16274   ins_pipe(vmuldiv_fp128);
16275 %}
16276 
16277 // dst + src1 * src2
16278 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
16279   predicate(UseFMA && n->as_Vector()->length() == 2);
16280   match(Set dst (FmaVD  dst (Binary src1 src2)));
16281   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
16282   ins_cost(INSN_COST);
16283   ins_encode %{
16284     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
16285             as_FloatRegister($src1$$reg),
16286             as_FloatRegister($src2$$reg));
16287   %}
16288   ins_pipe(vmuldiv_fp128);
16289 %}
16290 
16291 // --------------------------------- MLS --------------------------------------
16292 
16293 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16294 %{
16295   predicate(n->as_Vector()->length() == 2 ||
16296             n->as_Vector()->length() == 4);
16297   match(Set dst (SubVS dst (MulVS src1 src2)));
16298   ins_cost(INSN_COST);
16299   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16300   ins_encode %{
16301     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16302             as_FloatRegister($src1$$reg),
16303             as_FloatRegister($src2$$reg));
16304   %}
16305   ins_pipe(vmla64);
16306 %}
16307 
16308 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16309 %{
16310   predicate(n->as_Vector()->length() == 8);
16311   match(Set dst (SubVS dst (MulVS src1 src2)));
16312   ins_cost(INSN_COST);
16313   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16314   ins_encode %{
16315     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16316             as_FloatRegister($src1$$reg),
16317             as_FloatRegister($src2$$reg));
16318   %}
16319   ins_pipe(vmla128);
16320 %}
16321 
16322 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16323 %{
16324   predicate(n->as_Vector()->length() == 2);
16325   match(Set dst (SubVI dst (MulVI src1 src2)));
16326   ins_cost(INSN_COST);
16327   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16328   ins_encode %{
16329     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16330             as_FloatRegister($src1$$reg),
16331             as_FloatRegister($src2$$reg));
16332   %}
16333   ins_pipe(vmla64);
16334 %}
16335 
16336 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16337 %{
16338   predicate(n->as_Vector()->length() == 4);
16339   match(Set dst (SubVI dst (MulVI src1 src2)));
16340   ins_cost(INSN_COST);
16341   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16342   ins_encode %{
16343     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16344             as_FloatRegister($src1$$reg),
16345             as_FloatRegister($src2$$reg));
16346   %}
16347   ins_pipe(vmla128);
16348 %}
16349 
16350 // dst - src1 * src2
16351 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
16352   predicate(UseFMA && n->as_Vector()->length() == 2);
16353   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16354   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16355   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
16356   ins_cost(INSN_COST);
16357   ins_encode %{
16358     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
16359             as_FloatRegister($src1$$reg),
16360             as_FloatRegister($src2$$reg));
16361   %}
16362   ins_pipe(vmuldiv_fp64);
16363 %}
16364 
16365 // dst - src1 * src2
16366 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
16367   predicate(UseFMA && n->as_Vector()->length() == 4);
16368   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16369   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16370   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
16371   ins_cost(INSN_COST);
16372   ins_encode %{
16373     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
16374             as_FloatRegister($src1$$reg),
16375             as_FloatRegister($src2$$reg));
16376   %}
16377   ins_pipe(vmuldiv_fp128);
16378 %}
16379 
16380 // dst - src1 * src2
16381 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
16382   predicate(UseFMA && n->as_Vector()->length() == 2);
16383   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
16384   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
16385   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
16386   ins_cost(INSN_COST);
16387   ins_encode %{
16388     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
16389             as_FloatRegister($src1$$reg),
16390             as_FloatRegister($src2$$reg));
16391   %}
16392   ins_pipe(vmuldiv_fp128);
16393 %}
16394 
16395 // --------------------------------- DIV --------------------------------------
16396 
16397 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16398 %{
16399   predicate(n->as_Vector()->length() == 2);
16400   match(Set dst (DivVF src1 src2));
16401   ins_cost(INSN_COST);
16402   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16403   ins_encode %{
16404     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16405             as_FloatRegister($src1$$reg),
16406             as_FloatRegister($src2$$reg));
16407   %}
16408   ins_pipe(vmuldiv_fp64);
16409 %}
16410 
16411 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16412 %{
16413   predicate(n->as_Vector()->length() == 4);
16414   match(Set dst (DivVF src1 src2));
16415   ins_cost(INSN_COST);
16416   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16417   ins_encode %{
16418     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16419             as_FloatRegister($src1$$reg),
16420             as_FloatRegister($src2$$reg));
16421   %}
16422   ins_pipe(vmuldiv_fp128);
16423 %}
16424 
16425 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16426 %{
16427   predicate(n->as_Vector()->length() == 2);
16428   match(Set dst (DivVD src1 src2));
16429   ins_cost(INSN_COST);
16430   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16431   ins_encode %{
16432     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16433             as_FloatRegister($src1$$reg),
16434             as_FloatRegister($src2$$reg));
16435   %}
16436   ins_pipe(vmuldiv_fp128);
16437 %}
16438 
16439 // --------------------------------- SQRT -------------------------------------
16440 
16441 instruct vsqrt2D(vecX dst, vecX src)
16442 %{
16443   predicate(n->as_Vector()->length() == 2);
16444   match(Set dst (SqrtVD src));
16445   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
16446   ins_encode %{
16447     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16448              as_FloatRegister($src$$reg));
16449   %}
16450   ins_pipe(vsqrt_fp128);
16451 %}
16452 
16453 // --------------------------------- ABS --------------------------------------
16454 
16455 instruct vabs2F(vecD dst, vecD src)
16456 %{
16457   predicate(n->as_Vector()->length() == 2);
16458   match(Set dst (AbsVF src));
16459   ins_cost(INSN_COST * 3);
16460   format %{ "fabs  $dst,$src\t# vector (2S)" %}
16461   ins_encode %{
16462     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
16463             as_FloatRegister($src$$reg));
16464   %}
16465   ins_pipe(vunop_fp64);
16466 %}
16467 
16468 instruct vabs4F(vecX dst, vecX src)
16469 %{
16470   predicate(n->as_Vector()->length() == 4);
16471   match(Set dst (AbsVF src));
16472   ins_cost(INSN_COST * 3);
16473   format %{ "fabs  $dst,$src\t# vector (4S)" %}
16474   ins_encode %{
16475     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
16476             as_FloatRegister($src$$reg));
16477   %}
16478   ins_pipe(vunop_fp128);
16479 %}
16480 
16481 instruct vabs2D(vecX dst, vecX src)
16482 %{
16483   predicate(n->as_Vector()->length() == 2);
16484   match(Set dst (AbsVD src));
16485   ins_cost(INSN_COST * 3);
16486   format %{ "fabs  $dst,$src\t# vector (2D)" %}
16487   ins_encode %{
16488     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
16489             as_FloatRegister($src$$reg));
16490   %}
16491   ins_pipe(vunop_fp128);
16492 %}
16493 
16494 // --------------------------------- NEG --------------------------------------
16495 
16496 instruct vneg2F(vecD dst, vecD src)
16497 %{
16498   predicate(n->as_Vector()->length() == 2);
16499   match(Set dst (NegVF src));
16500   ins_cost(INSN_COST * 3);
16501   format %{ "fneg  $dst,$src\t# vector (2S)" %}
16502   ins_encode %{
16503     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
16504             as_FloatRegister($src$$reg));
16505   %}
16506   ins_pipe(vunop_fp64);
16507 %}
16508 
16509 instruct vneg4F(vecX dst, vecX src)
16510 %{
16511   predicate(n->as_Vector()->length() == 4);
16512   match(Set dst (NegVF src));
16513   ins_cost(INSN_COST * 3);
16514   format %{ "fneg  $dst,$src\t# vector (4S)" %}
16515   ins_encode %{
16516     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
16517             as_FloatRegister($src$$reg));
16518   %}
16519   ins_pipe(vunop_fp128);
16520 %}
16521 
16522 instruct vneg2D(vecX dst, vecX src)
16523 %{
16524   predicate(n->as_Vector()->length() == 2);
16525   match(Set dst (NegVD src));
16526   ins_cost(INSN_COST * 3);
16527   format %{ "fneg  $dst,$src\t# vector (2D)" %}
16528   ins_encode %{
16529     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
16530             as_FloatRegister($src$$reg));
16531   %}
16532   ins_pipe(vunop_fp128);
16533 %}
16534 
16535 // --------------------------------- AND --------------------------------------
16536 
16537 instruct vand8B(vecD dst, vecD src1, vecD src2)
16538 %{
16539   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16540             n->as_Vector()->length_in_bytes() == 8);
16541   match(Set dst (AndV src1 src2));
16542   ins_cost(INSN_COST);
16543   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16544   ins_encode %{
16545     __ andr(as_FloatRegister($dst$$reg), __ T8B,
16546             as_FloatRegister($src1$$reg),
16547             as_FloatRegister($src2$$reg));
16548   %}
16549   ins_pipe(vlogical64);
16550 %}
16551 
16552 instruct vand16B(vecX dst, vecX src1, vecX src2)
16553 %{
16554   predicate(n->as_Vector()->length_in_bytes() == 16);
16555   match(Set dst (AndV src1 src2));
16556   ins_cost(INSN_COST);
16557   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
16558   ins_encode %{
16559     __ andr(as_FloatRegister($dst$$reg), __ T16B,
16560             as_FloatRegister($src1$$reg),
16561             as_FloatRegister($src2$$reg));
16562   %}
16563   ins_pipe(vlogical128);
16564 %}
16565 
16566 // --------------------------------- OR ---------------------------------------
16567 
16568 instruct vor8B(vecD dst, vecD src1, vecD src2)
16569 %{
16570   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16571             n->as_Vector()->length_in_bytes() == 8);
16572   match(Set dst (OrV src1 src2));
16573   ins_cost(INSN_COST);
16574   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16575   ins_encode %{
16576     __ orr(as_FloatRegister($dst$$reg), __ T8B,
16577             as_FloatRegister($src1$$reg),
16578             as_FloatRegister($src2$$reg));
16579   %}
16580   ins_pipe(vlogical64);
16581 %}
16582 
16583 instruct vor16B(vecX dst, vecX src1, vecX src2)
16584 %{
16585   predicate(n->as_Vector()->length_in_bytes() == 16);
16586   match(Set dst (OrV src1 src2));
16587   ins_cost(INSN_COST);
16588   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
16589   ins_encode %{
16590     __ orr(as_FloatRegister($dst$$reg), __ T16B,
16591             as_FloatRegister($src1$$reg),
16592             as_FloatRegister($src2$$reg));
16593   %}
16594   ins_pipe(vlogical128);
16595 %}
16596 
16597 // --------------------------------- XOR --------------------------------------
16598 
16599 instruct vxor8B(vecD dst, vecD src1, vecD src2)
16600 %{
16601   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16602             n->as_Vector()->length_in_bytes() == 8);
16603   match(Set dst (XorV src1 src2));
16604   ins_cost(INSN_COST);
16605   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
16606   ins_encode %{
16607     __ eor(as_FloatRegister($dst$$reg), __ T8B,
16608             as_FloatRegister($src1$$reg),
16609             as_FloatRegister($src2$$reg));
16610   %}
16611   ins_pipe(vlogical64);
16612 %}
16613 
16614 instruct vxor16B(vecX dst, vecX src1, vecX src2)
16615 %{
16616   predicate(n->as_Vector()->length_in_bytes() == 16);
16617   match(Set dst (XorV src1 src2));
16618   ins_cost(INSN_COST);
16619   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
16620   ins_encode %{
16621     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16622             as_FloatRegister($src1$$reg),
16623             as_FloatRegister($src2$$reg));
16624   %}
16625   ins_pipe(vlogical128);
16626 %}
16627 
16628 // ------------------------------ Shift ---------------------------------------
16629 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
16630   predicate(n->as_Vector()->length_in_bytes() == 8);
16631   match(Set dst (LShiftCntV cnt));
16632   match(Set dst (RShiftCntV cnt));
16633   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
16634   ins_encode %{
16635     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
16636   %}
16637   ins_pipe(vdup_reg_reg64);
16638 %}
16639 
16640 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
16641   predicate(n->as_Vector()->length_in_bytes() == 16);
16642   match(Set dst (LShiftCntV cnt));
16643   match(Set dst (RShiftCntV cnt));
16644   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
16645   ins_encode %{
16646     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16647   %}
16648   ins_pipe(vdup_reg_reg128);
16649 %}
16650 
16651 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
16652   predicate(n->as_Vector()->length() == 4 ||
16653             n->as_Vector()->length() == 8);
16654   match(Set dst (LShiftVB src shift));
16655   ins_cost(INSN_COST);
16656   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
16657   ins_encode %{
16658     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16659             as_FloatRegister($src$$reg),
16660             as_FloatRegister($shift$$reg));
16661   %}
16662   ins_pipe(vshift64);
16663 %}
16664 
16665 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
16666   predicate(n->as_Vector()->length() == 16);
16667   match(Set dst (LShiftVB src shift));
16668   ins_cost(INSN_COST);
16669   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
16670   ins_encode %{
16671     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16672             as_FloatRegister($src$$reg),
16673             as_FloatRegister($shift$$reg));
16674   %}
16675   ins_pipe(vshift128);
16676 %}
16677 
16678 // Right shifts with vector shift count on aarch64 SIMD are implemented
16679 // as left shift by negative shift count.
16680 // There are two cases for vector shift count.
16681 //
16682 // Case 1: The vector shift count is from replication.
16683 //        |            |
16684 //    LoadVector  RShiftCntV
16685 //        |       /
16686 //     RShiftVI
16687 // Note: In inner loop, multiple neg instructions are used, which can be
16688 // moved to outer loop and merge into one neg instruction.
16689 //
16690 // Case 2: The vector shift count is from loading.
16691 // This case isn't supported by middle-end now. But it's supported by
16692 // panama/vectorIntrinsics(JEP 338: Vector API).
16693 //        |            |
16694 //    LoadVector  LoadVector
16695 //        |       /
16696 //     RShiftVI
16697 //
16698 
16699 instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
16700   predicate(n->as_Vector()->length() == 4 ||
16701             n->as_Vector()->length() == 8);
16702   match(Set dst (RShiftVB src shift));
16703   ins_cost(INSN_COST);
16704   effect(TEMP tmp);
16705   format %{ "negr  $tmp,$shift\t"
16706             "sshl  $dst,$src,$tmp\t# vector (8B)" %}
16707   ins_encode %{
16708     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16709             as_FloatRegister($shift$$reg));
16710     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16711             as_FloatRegister($src$$reg),
16712             as_FloatRegister($tmp$$reg));
16713   %}
16714   ins_pipe(vshift64);
16715 %}
16716 
16717 instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
16718   predicate(n->as_Vector()->length() == 16);
16719   match(Set dst (RShiftVB src shift));
16720   ins_cost(INSN_COST);
16721   effect(TEMP tmp);
16722   format %{ "negr  $tmp,$shift\t"
16723             "sshl  $dst,$src,$tmp\t# vector (16B)" %}
16724   ins_encode %{
16725     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16726             as_FloatRegister($shift$$reg));
16727     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16728             as_FloatRegister($src$$reg),
16729             as_FloatRegister($tmp$$reg));
16730   %}
16731   ins_pipe(vshift128);
16732 %}
16733 
16734 instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
16735   predicate(n->as_Vector()->length() == 4 ||
16736             n->as_Vector()->length() == 8);
16737   match(Set dst (URShiftVB src shift));
16738   ins_cost(INSN_COST);
16739   effect(TEMP tmp);
16740   format %{ "negr  $tmp,$shift\t"
16741             "ushl  $dst,$src,$tmp\t# vector (8B)" %}
16742   ins_encode %{
16743     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16744             as_FloatRegister($shift$$reg));
16745     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16746             as_FloatRegister($src$$reg),
16747             as_FloatRegister($tmp$$reg));
16748   %}
16749   ins_pipe(vshift64);
16750 %}
16751 
16752 instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
16753   predicate(n->as_Vector()->length() == 16);
16754   match(Set dst (URShiftVB src shift));
16755   ins_cost(INSN_COST);
16756   effect(TEMP tmp);
16757   format %{ "negr  $tmp,$shift\t"
16758             "ushl  $dst,$src,$tmp\t# vector (16B)" %}
16759   ins_encode %{
16760     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16761             as_FloatRegister($shift$$reg));
16762     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
16763             as_FloatRegister($src$$reg),
16764             as_FloatRegister($tmp$$reg));
16765   %}
16766   ins_pipe(vshift128);
16767 %}
16768 
16769 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
16770   predicate(n->as_Vector()->length() == 4 ||
16771             n->as_Vector()->length() == 8);
16772   match(Set dst (LShiftVB src shift));
16773   ins_cost(INSN_COST);
16774   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
16775   ins_encode %{
16776     int sh = (int)$shift$$constant;
16777     if (sh >= 8) {
16778       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16779              as_FloatRegister($src$$reg),
16780              as_FloatRegister($src$$reg));
16781     } else {
16782       __ shl(as_FloatRegister($dst$$reg), __ T8B,
16783              as_FloatRegister($src$$reg), sh);
16784     }
16785   %}
16786   ins_pipe(vshift64_imm);
16787 %}
16788 
16789 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
16790   predicate(n->as_Vector()->length() == 16);
16791   match(Set dst (LShiftVB src shift));
16792   ins_cost(INSN_COST);
16793   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
16794   ins_encode %{
16795     int sh = (int)$shift$$constant;
16796     if (sh >= 8) {
16797       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16798              as_FloatRegister($src$$reg),
16799              as_FloatRegister($src$$reg));
16800     } else {
16801       __ shl(as_FloatRegister($dst$$reg), __ T16B,
16802              as_FloatRegister($src$$reg), sh);
16803     }
16804   %}
16805   ins_pipe(vshift128_imm);
16806 %}
16807 
16808 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
16809   predicate(n->as_Vector()->length() == 4 ||
16810             n->as_Vector()->length() == 8);
16811   match(Set dst (RShiftVB src shift));
16812   ins_cost(INSN_COST);
16813   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
16814   ins_encode %{
16815     int sh = (int)$shift$$constant;
16816     if (sh >= 8) sh = 7;
16817     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
16818            as_FloatRegister($src$$reg), sh);
16819   %}
16820   ins_pipe(vshift64_imm);
16821 %}
16822 
16823 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
16824   predicate(n->as_Vector()->length() == 16);
16825   match(Set dst (RShiftVB src shift));
16826   ins_cost(INSN_COST);
16827   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
16828   ins_encode %{
16829     int sh = (int)$shift$$constant;
16830     if (sh >= 8) sh = 7;
16831     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
16832            as_FloatRegister($src$$reg), sh);
16833   %}
16834   ins_pipe(vshift128_imm);
16835 %}
16836 
16837 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
16838   predicate(n->as_Vector()->length() == 4 ||
16839             n->as_Vector()->length() == 8);
16840   match(Set dst (URShiftVB src shift));
16841   ins_cost(INSN_COST);
16842   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
16843   ins_encode %{
16844     int sh = (int)$shift$$constant;
16845     if (sh >= 8) {
16846       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16847              as_FloatRegister($src$$reg),
16848              as_FloatRegister($src$$reg));
16849     } else {
16850       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
16851              as_FloatRegister($src$$reg), sh);
16852     }
16853   %}
16854   ins_pipe(vshift64_imm);
16855 %}
16856 
16857 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
16858   predicate(n->as_Vector()->length() == 16);
16859   match(Set dst (URShiftVB src shift));
16860   ins_cost(INSN_COST);
16861   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
16862   ins_encode %{
16863     int sh = (int)$shift$$constant;
16864     if (sh >= 8) {
16865       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16866              as_FloatRegister($src$$reg),
16867              as_FloatRegister($src$$reg));
16868     } else {
16869       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
16870              as_FloatRegister($src$$reg), sh);
16871     }
16872   %}
16873   ins_pipe(vshift128_imm);
16874 %}
16875 
16876 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
16877   predicate(n->as_Vector()->length() == 2 ||
16878             n->as_Vector()->length() == 4);
16879   match(Set dst (LShiftVS src shift));
16880   ins_cost(INSN_COST);
16881   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
16882   ins_encode %{
16883     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16884             as_FloatRegister($src$$reg),
16885             as_FloatRegister($shift$$reg));
16886   %}
16887   ins_pipe(vshift64);
16888 %}
16889 
16890 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
16891   predicate(n->as_Vector()->length() == 8);
16892   match(Set dst (LShiftVS src shift));
16893   ins_cost(INSN_COST);
16894   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
16895   ins_encode %{
16896     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16897             as_FloatRegister($src$$reg),
16898             as_FloatRegister($shift$$reg));
16899   %}
16900   ins_pipe(vshift128);
16901 %}
16902 
16903 instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
16904   predicate(n->as_Vector()->length() == 2 ||
16905             n->as_Vector()->length() == 4);
16906   match(Set dst (RShiftVS src shift));
16907   ins_cost(INSN_COST);
16908   effect(TEMP tmp);
16909   format %{ "negr  $tmp,$shift\t"
16910             "sshl  $dst,$src,$tmp\t# vector (4H)" %}
16911   ins_encode %{
16912     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16913             as_FloatRegister($shift$$reg));
16914     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16915             as_FloatRegister($src$$reg),
16916             as_FloatRegister($tmp$$reg));
16917   %}
16918   ins_pipe(vshift64);
16919 %}
16920 
16921 instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
16922   predicate(n->as_Vector()->length() == 8);
16923   match(Set dst (RShiftVS src shift));
16924   ins_cost(INSN_COST);
16925   effect(TEMP tmp);
16926   format %{ "negr  $tmp,$shift\t"
16927             "sshl  $dst,$src,$tmp\t# vector (8H)" %}
16928   ins_encode %{
16929     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16930             as_FloatRegister($shift$$reg));
16931     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16932             as_FloatRegister($src$$reg),
16933             as_FloatRegister($tmp$$reg));
16934   %}
16935   ins_pipe(vshift128);
16936 %}
16937 
16938 instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
16939   predicate(n->as_Vector()->length() == 2 ||
16940             n->as_Vector()->length() == 4);
16941   match(Set dst (URShiftVS src shift));
16942   ins_cost(INSN_COST);
16943   effect(TEMP tmp);
16944   format %{ "negr  $tmp,$shift\t"
16945             "ushl  $dst,$src,$tmp\t# vector (4H)" %}
16946   ins_encode %{
16947     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16948             as_FloatRegister($shift$$reg));
16949     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
16950             as_FloatRegister($src$$reg),
16951             as_FloatRegister($tmp$$reg));
16952   %}
16953   ins_pipe(vshift64);
16954 %}
16955 
16956 instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
16957   predicate(n->as_Vector()->length() == 8);
16958   match(Set dst (URShiftVS src shift));
16959   ins_cost(INSN_COST);
16960   effect(TEMP tmp);
16961   format %{ "negr  $tmp,$shift\t"
16962             "ushl  $dst,$src,$tmp\t# vector (8H)" %}
16963   ins_encode %{
16964     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16965             as_FloatRegister($shift$$reg));
16966     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
16967             as_FloatRegister($src$$reg),
16968             as_FloatRegister($tmp$$reg));
16969   %}
16970   ins_pipe(vshift128);
16971 %}
16972 
16973 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
16974   predicate(n->as_Vector()->length() == 2 ||
16975             n->as_Vector()->length() == 4);
16976   match(Set dst (LShiftVS src shift));
16977   ins_cost(INSN_COST);
16978   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
16979   ins_encode %{
16980     int sh = (int)$shift$$constant;
16981     if (sh >= 16) {
16982       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16983              as_FloatRegister($src$$reg),
16984              as_FloatRegister($src$$reg));
16985     } else {
16986       __ shl(as_FloatRegister($dst$$reg), __ T4H,
16987              as_FloatRegister($src$$reg), sh);
16988     }
16989   %}
16990   ins_pipe(vshift64_imm);
16991 %}
16992 
16993 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
16994   predicate(n->as_Vector()->length() == 8);
16995   match(Set dst (LShiftVS src shift));
16996   ins_cost(INSN_COST);
16997   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
16998   ins_encode %{
16999     int sh = (int)$shift$$constant;
17000     if (sh >= 16) {
17001       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17002              as_FloatRegister($src$$reg),
17003              as_FloatRegister($src$$reg));
17004     } else {
17005       __ shl(as_FloatRegister($dst$$reg), __ T8H,
17006              as_FloatRegister($src$$reg), sh);
17007     }
17008   %}
17009   ins_pipe(vshift128_imm);
17010 %}
17011 
17012 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17013   predicate(n->as_Vector()->length() == 2 ||
17014             n->as_Vector()->length() == 4);
17015   match(Set dst (RShiftVS src shift));
17016   ins_cost(INSN_COST);
17017   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
17018   ins_encode %{
17019     int sh = (int)$shift$$constant;
17020     if (sh >= 16) sh = 15;
17021     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17022            as_FloatRegister($src$$reg), sh);
17023   %}
17024   ins_pipe(vshift64_imm);
17025 %}
17026 
17027 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17028   predicate(n->as_Vector()->length() == 8);
17029   match(Set dst (RShiftVS src shift));
17030   ins_cost(INSN_COST);
17031   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
17032   ins_encode %{
17033     int sh = (int)$shift$$constant;
17034     if (sh >= 16) sh = 15;
17035     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17036            as_FloatRegister($src$$reg), sh);
17037   %}
17038   ins_pipe(vshift128_imm);
17039 %}
17040 
17041 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17042   predicate(n->as_Vector()->length() == 2 ||
17043             n->as_Vector()->length() == 4);
17044   match(Set dst (URShiftVS src shift));
17045   ins_cost(INSN_COST);
17046   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
17047   ins_encode %{
17048     int sh = (int)$shift$$constant;
17049     if (sh >= 16) {
17050       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17051              as_FloatRegister($src$$reg),
17052              as_FloatRegister($src$$reg));
17053     } else {
17054       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17055              as_FloatRegister($src$$reg), sh);
17056     }
17057   %}
17058   ins_pipe(vshift64_imm);
17059 %}
17060 
17061 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17062   predicate(n->as_Vector()->length() == 8);
17063   match(Set dst (URShiftVS src shift));
17064   ins_cost(INSN_COST);
17065   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
17066   ins_encode %{
17067     int sh = (int)$shift$$constant;
17068     if (sh >= 16) {
17069       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17070              as_FloatRegister($src$$reg),
17071              as_FloatRegister($src$$reg));
17072     } else {
17073       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17074              as_FloatRegister($src$$reg), sh);
17075     }
17076   %}
17077   ins_pipe(vshift128_imm);
17078 %}
17079 
17080 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
17081   predicate(n->as_Vector()->length() == 2);
17082   match(Set dst (LShiftVI src shift));
17083   ins_cost(INSN_COST);
17084   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
17085   ins_encode %{
17086     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17087             as_FloatRegister($src$$reg),
17088             as_FloatRegister($shift$$reg));
17089   %}
17090   ins_pipe(vshift64);
17091 %}
17092 
17093 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
17094   predicate(n->as_Vector()->length() == 4);
17095   match(Set dst (LShiftVI src shift));
17096   ins_cost(INSN_COST);
17097   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
17098   ins_encode %{
17099     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17100             as_FloatRegister($src$$reg),
17101             as_FloatRegister($shift$$reg));
17102   %}
17103   ins_pipe(vshift128);
17104 %}
17105 
17106 instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
17107   predicate(n->as_Vector()->length() == 2);
17108   match(Set dst (RShiftVI src shift));
17109   ins_cost(INSN_COST);
17110   effect(TEMP tmp);
17111   format %{ "negr  $tmp,$shift\t"
17112             "sshl  $dst,$src,$tmp\t# vector (2S)" %}
17113   ins_encode %{
17114     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
17115             as_FloatRegister($shift$$reg));
17116     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17117             as_FloatRegister($src$$reg),
17118             as_FloatRegister($tmp$$reg));
17119   %}
17120   ins_pipe(vshift64);
17121 %}
17122 
17123 instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
17124   predicate(n->as_Vector()->length() == 4);
17125   match(Set dst (RShiftVI src shift));
17126   ins_cost(INSN_COST);
17127   effect(TEMP tmp);
17128   format %{ "negr  $tmp,$shift\t"
17129             "sshl  $dst,$src,$tmp\t# vector (4S)" %}
17130   ins_encode %{
17131     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17132             as_FloatRegister($shift$$reg));
17133     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17134             as_FloatRegister($src$$reg),
17135             as_FloatRegister($tmp$$reg));
17136   %}
17137   ins_pipe(vshift128);
17138 %}
17139 
17140 instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
17141   predicate(n->as_Vector()->length() == 2);
17142   match(Set dst (URShiftVI src shift));
17143   ins_cost(INSN_COST);
17144   effect(TEMP tmp);
17145   format %{ "negr  $tmp,$shift\t"
17146             "ushl  $dst,$src,$tmp\t# vector (2S)" %}
17147   ins_encode %{
17148     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
17149             as_FloatRegister($shift$$reg));
17150     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
17151             as_FloatRegister($src$$reg),
17152             as_FloatRegister($tmp$$reg));
17153   %}
17154   ins_pipe(vshift64);
17155 %}
17156 
17157 instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
17158   predicate(n->as_Vector()->length() == 4);
17159   match(Set dst (URShiftVI src shift));
17160   ins_cost(INSN_COST);
17161   effect(TEMP tmp);
17162   format %{ "negr  $tmp,$shift\t"
17163             "ushl  $dst,$src,$tmp\t# vector (4S)" %}
17164   ins_encode %{
17165     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17166             as_FloatRegister($shift$$reg));
17167     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
17168             as_FloatRegister($src$$reg),
17169             as_FloatRegister($tmp$$reg));
17170   %}
17171   ins_pipe(vshift128);
17172 %}
17173 
17174 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
17175   predicate(n->as_Vector()->length() == 2);
17176   match(Set dst (LShiftVI src shift));
17177   ins_cost(INSN_COST);
17178   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
17179   ins_encode %{
17180     __ shl(as_FloatRegister($dst$$reg), __ T2S,
17181            as_FloatRegister($src$$reg),
17182            (int)$shift$$constant);
17183   %}
17184   ins_pipe(vshift64_imm);
17185 %}
17186 
17187 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
17188   predicate(n->as_Vector()->length() == 4);
17189   match(Set dst (LShiftVI src shift));
17190   ins_cost(INSN_COST);
17191   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
17192   ins_encode %{
17193     __ shl(as_FloatRegister($dst$$reg), __ T4S,
17194            as_FloatRegister($src$$reg),
17195            (int)$shift$$constant);
17196   %}
17197   ins_pipe(vshift128_imm);
17198 %}
17199 
17200 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
17201   predicate(n->as_Vector()->length() == 2);
17202   match(Set dst (RShiftVI src shift));
17203   ins_cost(INSN_COST);
17204   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
17205   ins_encode %{
17206     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
17207             as_FloatRegister($src$$reg),
17208             (int)$shift$$constant);
17209   %}
17210   ins_pipe(vshift64_imm);
17211 %}
17212 
17213 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
17214   predicate(n->as_Vector()->length() == 4);
17215   match(Set dst (RShiftVI src shift));
17216   ins_cost(INSN_COST);
17217   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
17218   ins_encode %{
17219     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
17220             as_FloatRegister($src$$reg),
17221             (int)$shift$$constant);
17222   %}
17223   ins_pipe(vshift128_imm);
17224 %}
17225 
17226 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
17227   predicate(n->as_Vector()->length() == 2);
17228   match(Set dst (URShiftVI src shift));
17229   ins_cost(INSN_COST);
17230   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
17231   ins_encode %{
17232     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
17233             as_FloatRegister($src$$reg),
17234             (int)$shift$$constant);
17235   %}
17236   ins_pipe(vshift64_imm);
17237 %}
17238 
17239 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
17240   predicate(n->as_Vector()->length() == 4);
17241   match(Set dst (URShiftVI src shift));
17242   ins_cost(INSN_COST);
17243   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
17244   ins_encode %{
17245     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17246             as_FloatRegister($src$$reg),
17247             (int)$shift$$constant);
17248   %}
17249   ins_pipe(vshift128_imm);
17250 %}
17251 
17252 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17253   predicate(n->as_Vector()->length() == 2);
17254   match(Set dst (LShiftVL src shift));
17255   ins_cost(INSN_COST);
17256   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
17257   ins_encode %{
17258     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17259             as_FloatRegister($src$$reg),
17260             as_FloatRegister($shift$$reg));
17261   %}
17262   ins_pipe(vshift128);
17263 %}
17264 
17265 instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
17266   predicate(n->as_Vector()->length() == 2);
17267   match(Set dst (RShiftVL src shift));
17268   ins_cost(INSN_COST);
17269   effect(TEMP tmp);
17270   format %{ "negr  $tmp,$shift\t"
17271             "sshl  $dst,$src,$tmp\t# vector (2D)" %}
17272   ins_encode %{
17273     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17274             as_FloatRegister($shift$$reg));
17275     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17276             as_FloatRegister($src$$reg),
17277             as_FloatRegister($tmp$$reg));
17278   %}
17279   ins_pipe(vshift128);
17280 %}
17281 
17282 instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
17283   predicate(n->as_Vector()->length() == 2);
17284   match(Set dst (URShiftVL src shift));
17285   ins_cost(INSN_COST);
17286   effect(TEMP tmp);
17287   format %{ "negr  $tmp,$shift\t"
17288             "ushl  $dst,$src,$tmp\t# vector (2D)" %}
17289   ins_encode %{
17290     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17291             as_FloatRegister($shift$$reg));
17292     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17293             as_FloatRegister($src$$reg),
17294             as_FloatRegister($tmp$$reg));
17295   %}
17296   ins_pipe(vshift128);
17297 %}
17298 
17299 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17300   predicate(n->as_Vector()->length() == 2);
17301   match(Set dst (LShiftVL src shift));
17302   ins_cost(INSN_COST);
17303   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17304   ins_encode %{
17305     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17306            as_FloatRegister($src$$reg),
17307            (int)$shift$$constant);
17308   %}
17309   ins_pipe(vshift128_imm);
17310 %}
17311 
17312 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17313   predicate(n->as_Vector()->length() == 2);
17314   match(Set dst (RShiftVL src shift));
17315   ins_cost(INSN_COST);
17316   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17317   ins_encode %{
17318     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17319             as_FloatRegister($src$$reg),
17320             (int)$shift$$constant);
17321   %}
17322   ins_pipe(vshift128_imm);
17323 %}
17324 
17325 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17326   predicate(n->as_Vector()->length() == 2);
17327   match(Set dst (URShiftVL src shift));
17328   ins_cost(INSN_COST);
17329   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17330   ins_encode %{
17331     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17332             as_FloatRegister($src$$reg),
17333             (int)$shift$$constant);
17334   %}
17335   ins_pipe(vshift128_imm);
17336 %}
17337 
17338 //----------PEEPHOLE RULES-----------------------------------------------------
17339 // These must follow all instruction definitions as they use the names
17340 // defined in the instructions definitions.
17341 //
17342 // peepmatch ( root_instr_name [preceding_instruction]* );
17343 //
17344 // peepconstraint %{
17345 // (instruction_number.operand_name relational_op instruction_number.operand_name
17346 //  [, ...] );
17347 // // instruction numbers are zero-based using left to right order in peepmatch
17348 //
17349 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
17350 // // provide an instruction_number.operand_name for each operand that appears
17351 // // in the replacement instruction's match rule
17352 //
17353 // ---------VM FLAGS---------------------------------------------------------
17354 //
17355 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17356 //
17357 // Each peephole rule is given an identifying number starting with zero and
17358 // increasing by one in the order seen by the parser.  An individual peephole
17359 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17360 // on the command-line.
17361 //
17362 // ---------CURRENT LIMITATIONS----------------------------------------------
17363 //
17364 // Only match adjacent instructions in same basic block
17365 // Only equality constraints
17366 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17367 // Only one replacement instruction
17368 //
17369 // ---------EXAMPLE----------------------------------------------------------
17370 //
17371 // // pertinent parts of existing instructions in architecture description
17372 // instruct movI(iRegINoSp dst, iRegI src)
17373 // %{
17374 //   match(Set dst (CopyI src));
17375 // %}
17376 //
17377 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17378 // %{
17379 //   match(Set dst (AddI dst src));
17380 //   effect(KILL cr);
17381 // %}
17382 //
17383 // // Change (inc mov) to lea
17384 // peephole %{
17385 //   // increment preceeded by register-register move
17386 //   peepmatch ( incI_iReg movI );
17387 //   // require that the destination register of the increment
17388 //   // match the destination register of the move
17389 //   peepconstraint ( 0.dst == 1.dst );
17390 //   // construct a replacement instruction that sets
17391 //   // the destination to ( move's source register + one )
17392 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17393 // %}
17394 //
17395 
17396 // Implementation no longer uses movX instructions since
17397 // machine-independent system no longer uses CopyX nodes.
17398 //
17399 // peephole
17400 // %{
17401 //   peepmatch (incI_iReg movI);
17402 //   peepconstraint (0.dst == 1.dst);
17403 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17404 // %}
17405 
17406 // peephole
17407 // %{
17408 //   peepmatch (decI_iReg movI);
17409 //   peepconstraint (0.dst == 1.dst);
17410 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17411 // %}
17412 
17413 // peephole
17414 // %{
17415 //   peepmatch (addI_iReg_imm movI);
17416 //   peepconstraint (0.dst == 1.dst);
17417 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17418 // %}
17419 
17420 // peephole
17421 // %{
17422 //   peepmatch (incL_iReg movL);
17423 //   peepconstraint (0.dst == 1.dst);
17424 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17425 // %}
17426 
17427 // peephole
17428 // %{
17429 //   peepmatch (decL_iReg movL);
17430 //   peepconstraint (0.dst == 1.dst);
17431 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17432 // %}
17433 
17434 // peephole
17435 // %{
17436 //   peepmatch (addL_iReg_imm movL);
17437 //   peepconstraint (0.dst == 1.dst);
17438 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17439 // %}
17440 
17441 // peephole
17442 // %{
17443 //   peepmatch (addP_iReg_imm movP);
17444 //   peepconstraint (0.dst == 1.dst);
17445 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17446 // %}
17447 
17448 // // Change load of spilled value to only a spill
17449 // instruct storeI(memory mem, iRegI src)
17450 // %{
17451 //   match(Set mem (StoreI mem src));
17452 // %}
17453 //
17454 // instruct loadI(iRegINoSp dst, memory mem)
17455 // %{
17456 //   match(Set dst (LoadI mem));
17457 // %}
17458 //
17459 
17460 //----------SMARTSPILL RULES---------------------------------------------------
17461 // These must follow all instruction definitions as they use the names
17462 // defined in the instructions definitions.
17463 
17464 // Local Variables:
17465 // mode: c++
17466 // End: