1 //
   2 // Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "gc/shared/cardTableModRefBS.hpp"
 999 #include "gc/shenandoah/brooksPointer.hpp"
1000 #include "opto/addnode.hpp"
1001 
1002 class CallStubImpl {
1003 
1004   //--------------------------------------------------------------
1005   //---<  Used for optimization in Compile::shorten_branches  >---
1006   //--------------------------------------------------------------
1007 
1008  public:
1009   // Size of call trampoline stub.
1010   static uint size_call_trampoline() {
1011     return 0; // no call trampolines on this platform
1012   }
1013 
1014   // number of relocations needed by a call trampoline stub
1015   static uint reloc_call_trampoline() {
1016     return 0; // no call trampolines on this platform
1017   }
1018 };
1019 
1020 class HandlerImpl {
1021 
1022  public:
1023 
1024   static int emit_exception_handler(CodeBuffer &cbuf);
1025   static int emit_deopt_handler(CodeBuffer& cbuf);
1026 
1027   static uint size_exception_handler() {
1028     return MacroAssembler::far_branch_size();
1029   }
1030 
1031   static uint size_deopt_handler() {
1032     // count one adr and one far branch instruction
1033     return 4 * NativeInstruction::instruction_size;
1034   }
1035 };
1036 
1037   // graph traversal helpers
1038 
1039   MemBarNode *parent_membar(const Node *n);
1040   MemBarNode *child_membar(const MemBarNode *n);
1041   bool leading_membar(const MemBarNode *barrier);
1042 
1043   bool is_card_mark_membar(const MemBarNode *barrier);
1044   bool is_CAS(int opcode);
1045 
1046   MemBarNode *leading_to_normal(MemBarNode *leading);
1047   MemBarNode *normal_to_leading(const MemBarNode *barrier);
1048   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
1049   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
1050   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1051 
1052   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1053 
1054   bool unnecessary_acquire(const Node *barrier);
1055   bool needs_acquiring_load(const Node *load);
1056 
1057   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1058 
1059   bool unnecessary_release(const Node *barrier);
1060   bool unnecessary_volatile(const Node *barrier);
1061   bool needs_releasing_store(const Node *store);
1062 
1063   // predicate controlling translation of CompareAndSwapX
1064   bool needs_acquiring_load_exclusive(const Node *load);
1065 
1066   // predicate controlling translation of StoreCM
1067   bool unnecessary_storestore(const Node *storecm);
1068 
1069   // predicate controlling addressing modes
1070   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1071 %}
1072 
1073 source %{
1074 
1075   // Optimizaton of volatile gets and puts
1076   // -------------------------------------
1077   //
1078   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1079   // use to implement volatile reads and writes. For a volatile read
1080   // we simply need
1081   //
1082   //   ldar<x>
1083   //
1084   // and for a volatile write we need
1085   //
1086   //   stlr<x>
1087   //
1088   // Alternatively, we can implement them by pairing a normal
1089   // load/store with a memory barrier. For a volatile read we need
1090   //
1091   //   ldr<x>
1092   //   dmb ishld
1093   //
1094   // for a volatile write
1095   //
1096   //   dmb ish
1097   //   str<x>
1098   //   dmb ish
1099   //
1100   // We can also use ldaxr and stlxr to implement compare and swap CAS
1101   // sequences. These are normally translated to an instruction
1102   // sequence like the following
1103   //
1104   //   dmb      ish
1105   // retry:
1106   //   ldxr<x>   rval raddr
1107   //   cmp       rval rold
1108   //   b.ne done
1109   //   stlxr<x>  rval, rnew, rold
1110   //   cbnz      rval retry
1111   // done:
1112   //   cset      r0, eq
1113   //   dmb ishld
1114   //
1115   // Note that the exclusive store is already using an stlxr
1116   // instruction. That is required to ensure visibility to other
1117   // threads of the exclusive write (assuming it succeeds) before that
1118   // of any subsequent writes.
1119   //
1120   // The following instruction sequence is an improvement on the above
1121   //
1122   // retry:
1123   //   ldaxr<x>  rval raddr
1124   //   cmp       rval rold
1125   //   b.ne done
1126   //   stlxr<x>  rval, rnew, rold
1127   //   cbnz      rval retry
1128   // done:
1129   //   cset      r0, eq
1130   //
1131   // We don't need the leading dmb ish since the stlxr guarantees
1132   // visibility of prior writes in the case that the swap is
1133   // successful. Crucially we don't have to worry about the case where
1134   // the swap is not successful since no valid program should be
1135   // relying on visibility of prior changes by the attempting thread
1136   // in the case where the CAS fails.
1137   //
1138   // Similarly, we don't need the trailing dmb ishld if we substitute
1139   // an ldaxr instruction since that will provide all the guarantees we
1140   // require regarding observation of changes made by other threads
1141   // before any change to the CAS address observed by the load.
1142   //
1143   // In order to generate the desired instruction sequence we need to
1144   // be able to identify specific 'signature' ideal graph node
1145   // sequences which i) occur as a translation of a volatile reads or
1146   // writes or CAS operations and ii) do not occur through any other
1147   // translation or graph transformation. We can then provide
1148   // alternative aldc matching rules which translate these node
1149   // sequences to the desired machine code sequences. Selection of the
1150   // alternative rules can be implemented by predicates which identify
1151   // the relevant node sequences.
1152   //
1153   // The ideal graph generator translates a volatile read to the node
1154   // sequence
1155   //
1156   //   LoadX[mo_acquire]
1157   //   MemBarAcquire
1158   //
1159   // As a special case when using the compressed oops optimization we
1160   // may also see this variant
1161   //
1162   //   LoadN[mo_acquire]
1163   //   DecodeN
1164   //   MemBarAcquire
1165   //
1166   // A volatile write is translated to the node sequence
1167   //
1168   //   MemBarRelease
1169   //   StoreX[mo_release] {CardMark}-optional
1170   //   MemBarVolatile
1171   //
1172   // n.b. the above node patterns are generated with a strict
1173   // 'signature' configuration of input and output dependencies (see
1174   // the predicates below for exact details). The card mark may be as
1175   // simple as a few extra nodes or, in a few GC configurations, may
1176   // include more complex control flow between the leading and
1177   // trailing memory barriers. However, whatever the card mark
1178   // configuration these signatures are unique to translated volatile
1179   // reads/stores -- they will not appear as a result of any other
1180   // bytecode translation or inlining nor as a consequence of
1181   // optimizing transforms.
1182   //
1183   // We also want to catch inlined unsafe volatile gets and puts and
1184   // be able to implement them using either ldar<x>/stlr<x> or some
1185   // combination of ldr<x>/stlr<x> and dmb instructions.
1186   //
1187   // Inlined unsafe volatiles puts manifest as a minor variant of the
1188   // normal volatile put node sequence containing an extra cpuorder
1189   // membar
1190   //
1191   //   MemBarRelease
1192   //   MemBarCPUOrder
1193   //   StoreX[mo_release] {CardMark}-optional
1194   //   MemBarVolatile
1195   //
1196   // n.b. as an aside, the cpuorder membar is not itself subject to
1197   // matching and translation by adlc rules.  However, the rule
1198   // predicates need to detect its presence in order to correctly
1199   // select the desired adlc rules.
1200   //
1201   // Inlined unsafe volatile gets manifest as a somewhat different
1202   // node sequence to a normal volatile get
1203   //
1204   //   MemBarCPUOrder
1205   //        ||       \\
1206   //   MemBarAcquire LoadX[mo_acquire]
1207   //        ||
1208   //   MemBarCPUOrder
1209   //
1210   // In this case the acquire membar does not directly depend on the
1211   // load. However, we can be sure that the load is generated from an
1212   // inlined unsafe volatile get if we see it dependent on this unique
1213   // sequence of membar nodes. Similarly, given an acquire membar we
1214   // can know that it was added because of an inlined unsafe volatile
1215   // get if it is fed and feeds a cpuorder membar and if its feed
1216   // membar also feeds an acquiring load.
1217   //
1218   // Finally an inlined (Unsafe) CAS operation is translated to the
1219   // following ideal graph
1220   //
1221   //   MemBarRelease
1222   //   MemBarCPUOrder
1223   //   CompareAndSwapX {CardMark}-optional
1224   //   MemBarCPUOrder
1225   //   MemBarAcquire
1226   //
1227   // So, where we can identify these volatile read and write
1228   // signatures we can choose to plant either of the above two code
1229   // sequences. For a volatile read we can simply plant a normal
1230   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1231   // also choose to inhibit translation of the MemBarAcquire and
1232   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1233   //
1234   // When we recognise a volatile store signature we can choose to
1235   // plant at a dmb ish as a translation for the MemBarRelease, a
1236   // normal str<x> and then a dmb ish for the MemBarVolatile.
1237   // Alternatively, we can inhibit translation of the MemBarRelease
1238   // and MemBarVolatile and instead plant a simple stlr<x>
1239   // instruction.
1240   //
1241   // when we recognise a CAS signature we can choose to plant a dmb
1242   // ish as a translation for the MemBarRelease, the conventional
1243   // macro-instruction sequence for the CompareAndSwap node (which
1244   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1245   // Alternatively, we can elide generation of the dmb instructions
1246   // and plant the alternative CompareAndSwap macro-instruction
1247   // sequence (which uses ldaxr<x>).
1248   //
1249   // Of course, the above only applies when we see these signature
1250   // configurations. We still want to plant dmb instructions in any
1251   // other cases where we may see a MemBarAcquire, MemBarRelease or
1252   // MemBarVolatile. For example, at the end of a constructor which
1253   // writes final/volatile fields we will see a MemBarRelease
1254   // instruction and this needs a 'dmb ish' lest we risk the
1255   // constructed object being visible without making the
1256   // final/volatile field writes visible.
1257   //
1258   // n.b. the translation rules below which rely on detection of the
1259   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1260   // If we see anything other than the signature configurations we
1261   // always just translate the loads and stores to ldr<x> and str<x>
1262   // and translate acquire, release and volatile membars to the
1263   // relevant dmb instructions.
1264   //
1265 
1266   // graph traversal helpers used for volatile put/get and CAS
1267   // optimization
1268 
1269   // 1) general purpose helpers
1270 
1271   // if node n is linked to a parent MemBarNode by an intervening
1272   // Control and Memory ProjNode return the MemBarNode otherwise return
1273   // NULL.
1274   //
1275   // n may only be a Load or a MemBar.
1276 
1277   MemBarNode *parent_membar(const Node *n)
1278   {
1279     Node *ctl = NULL;
1280     Node *mem = NULL;
1281     Node *membar = NULL;
1282 
1283     if (n->is_Load()) {
1284       ctl = n->lookup(LoadNode::Control);
1285       mem = n->lookup(LoadNode::Memory);
1286     } else if (n->is_MemBar()) {
1287       ctl = n->lookup(TypeFunc::Control);
1288       mem = n->lookup(TypeFunc::Memory);
1289     } else {
1290         return NULL;
1291     }
1292 
1293     if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1294       return NULL;
1295     }
1296 
1297     membar = ctl->lookup(0);
1298 
1299     if (!membar || !membar->is_MemBar()) {
1300       return NULL;
1301     }
1302 
1303     if (mem->lookup(0) != membar) {
1304       return NULL;
1305     }
1306 
1307     return membar->as_MemBar();
1308   }
1309 
1310   // if n is linked to a child MemBarNode by intervening Control and
1311   // Memory ProjNodes return the MemBarNode otherwise return NULL.
1312 
1313   MemBarNode *child_membar(const MemBarNode *n)
1314   {
1315     ProjNode *ctl = n->proj_out(TypeFunc::Control);
1316     ProjNode *mem = n->proj_out(TypeFunc::Memory);
1317 
1318     // MemBar needs to have both a Ctl and Mem projection
1319     if (! ctl || ! mem)
1320       return NULL;
1321 
1322     MemBarNode *child = NULL;
1323     Node *x;
1324 
1325     for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1326       x = ctl->fast_out(i);
1327       // if we see a membar we keep hold of it. we may also see a new
1328       // arena copy of the original but it will appear later
1329       if (x->is_MemBar()) {
1330           child = x->as_MemBar();
1331           break;
1332       }
1333     }
1334 
1335     if (child == NULL) {
1336       return NULL;
1337     }
1338 
1339     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1340       x = mem->fast_out(i);
1341       // if we see a membar we keep hold of it. we may also see a new
1342       // arena copy of the original but it will appear later
1343       if (x == child) {
1344         return child;
1345       }
1346     }
1347     return NULL;
1348   }
1349 
1350   // helper predicate use to filter candidates for a leading memory
1351   // barrier
1352   //
1353   // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1354   // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1355 
1356   bool leading_membar(const MemBarNode *barrier)
1357   {
1358     int opcode = barrier->Opcode();
1359     // if this is a release membar we are ok
1360     if (opcode == Op_MemBarRelease) {
1361       return true;
1362     }
1363     // if its a cpuorder membar . . .
1364     if (opcode != Op_MemBarCPUOrder) {
1365       return false;
1366     }
1367     // then the parent has to be a release membar
1368     MemBarNode *parent = parent_membar(barrier);
1369     if (!parent) {
1370       return false;
1371     }
1372     opcode = parent->Opcode();
1373     return opcode == Op_MemBarRelease;
1374   }
1375 
1376   // 2) card mark detection helper
1377 
1378   // helper predicate which can be used to detect a volatile membar
1379   // introduced as part of a conditional card mark sequence either by
1380   // G1 or by CMS when UseCondCardMark is true.
1381   //
1382   // membar can be definitively determined to be part of a card mark
1383   // sequence if and only if all the following hold
1384   //
1385   // i) it is a MemBarVolatile
1386   //
1387   // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1388   // true
1389   //
1390   // iii) the node's Mem projection feeds a StoreCM node.
1391 
1392   bool is_card_mark_membar(const MemBarNode *barrier)
1393   {
1394     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1395       return false;
1396     }
1397 
1398     if (barrier->Opcode() != Op_MemBarVolatile) {
1399       return false;
1400     }
1401 
1402     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1403 
1404     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1405       Node *y = mem->fast_out(i);
1406       if (y->Opcode() == Op_StoreCM) {
1407         return true;
1408       }
1409     }
1410 
1411     return false;
1412   }
1413 
1414 
1415   // 3) helper predicates to traverse volatile put or CAS graphs which
1416   // may contain GC barrier subgraphs
1417 
1418   // Preamble
1419   // --------
1420   //
1421   // for volatile writes we can omit generating barriers and employ a
1422   // releasing store when we see a node sequence sequence with a
1423   // leading MemBarRelease and a trailing MemBarVolatile as follows
1424   //
1425   //   MemBarRelease
1426   //  {      ||      } -- optional
1427   //  {MemBarCPUOrder}
1428   //         ||     \\
1429   //         ||     StoreX[mo_release]
1430   //         | \     /
1431   //         | MergeMem
1432   //         | /
1433   //   MemBarVolatile
1434   //
1435   // where
1436   //  || and \\ represent Ctl and Mem feeds via Proj nodes
1437   //  | \ and / indicate further routing of the Ctl and Mem feeds
1438   //
1439   // this is the graph we see for non-object stores. however, for a
1440   // volatile Object store (StoreN/P) we may see other nodes below the
1441   // leading membar because of the need for a GC pre- or post-write
1442   // barrier.
1443   //
1444   // with most GC configurations we with see this simple variant which
1445   // includes a post-write barrier card mark.
1446   //
1447   //   MemBarRelease______________________________
1448   //         ||    \\               Ctl \        \\
1449   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
1450   //         | \     /                       . . .  /
1451   //         | MergeMem
1452   //         | /
1453   //         ||      /
1454   //   MemBarVolatile
1455   //
1456   // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1457   // the object address to an int used to compute the card offset) and
1458   // Ctl+Mem to a StoreB node (which does the actual card mark).
1459   //
1460   // n.b. a StoreCM node will only appear in this configuration when
1461   // using CMS. StoreCM differs from a normal card mark write (StoreB)
1462   // because it implies a requirement to order visibility of the card
1463   // mark (StoreCM) relative to the object put (StoreP/N) using a
1464   // StoreStore memory barrier (arguably this ought to be represented
1465   // explicitly in the ideal graph but that is not how it works). This
1466   // ordering is required for both non-volatile and volatile
1467   // puts. Normally that means we need to translate a StoreCM using
1468   // the sequence
1469   //
1470   //   dmb ishst
1471   //   stlrb
1472   //
1473   // However, in the case of a volatile put if we can recognise this
1474   // configuration and plant an stlr for the object write then we can
1475   // omit the dmb and just plant an strb since visibility of the stlr
1476   // is ordered before visibility of subsequent stores. StoreCM nodes
1477   // also arise when using G1 or using CMS with conditional card
1478   // marking. In these cases (as we shall see) we don't need to insert
1479   // the dmb when translating StoreCM because there is already an
1480   // intervening StoreLoad barrier between it and the StoreP/N.
1481   //
1482   // It is also possible to perform the card mark conditionally on it
1483   // currently being unmarked in which case the volatile put graph
1484   // will look slightly different
1485   //
1486   //   MemBarRelease____________________________________________
1487   //         ||    \\               Ctl \     Ctl \     \\  Mem \
1488   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
1489   //         | \     /                              \            |
1490   //         | MergeMem                            . . .      StoreB
1491   //         | /                                                /
1492   //         ||     /
1493   //   MemBarVolatile
1494   //
1495   // It is worth noting at this stage that both the above
1496   // configurations can be uniquely identified by checking that the
1497   // memory flow includes the following subgraph:
1498   //
1499   //   MemBarRelease
1500   //  {MemBarCPUOrder}
1501   //          |  \      . . .
1502   //          |  StoreX[mo_release]  . . .
1503   //          |   /
1504   //         MergeMem
1505   //          |
1506   //   MemBarVolatile
1507   //
1508   // This is referred to as a *normal* subgraph. It can easily be
1509   // detected starting from any candidate MemBarRelease,
1510   // StoreX[mo_release] or MemBarVolatile.
1511   //
1512   // A simple variation on this normal case occurs for an unsafe CAS
1513   // operation. The basic graph for a non-object CAS is
1514   //
1515   //   MemBarRelease
1516   //         ||
1517   //   MemBarCPUOrder
1518   //         ||     \\   . . .
1519   //         ||     CompareAndSwapX
1520   //         ||       |
1521   //         ||     SCMemProj
1522   //         | \     /
1523   //         | MergeMem
1524   //         | /
1525   //   MemBarCPUOrder
1526   //         ||
1527   //   MemBarAcquire
1528   //
1529   // The same basic variations on this arrangement (mutatis mutandis)
1530   // occur when a card mark is introduced. i.e. we se the same basic
1531   // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
1532   // tail of the graph is a pair comprising a MemBarCPUOrder +
1533   // MemBarAcquire.
1534   //
1535   // So, in the case of a CAS the normal graph has the variant form
1536   //
1537   //   MemBarRelease
1538   //   MemBarCPUOrder
1539   //          |   \      . . .
1540   //          |  CompareAndSwapX  . . .
1541   //          |    |
1542   //          |   SCMemProj
1543   //          |   /  . . .
1544   //         MergeMem
1545   //          |
1546   //   MemBarCPUOrder
1547   //   MemBarAcquire
1548   //
1549   // This graph can also easily be detected starting from any
1550   // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
1551   //
1552   // the code below uses two helper predicates, leading_to_normal and
1553   // normal_to_leading to identify these normal graphs, one validating
1554   // the layout starting from the top membar and searching down and
1555   // the other validating the layout starting from the lower membar
1556   // and searching up.
1557   //
1558   // There are two special case GC configurations when a normal graph
1559   // may not be generated: when using G1 (which always employs a
1560   // conditional card mark); and when using CMS with conditional card
1561   // marking configured. These GCs are both concurrent rather than
1562   // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
1563   // graph between the leading and trailing membar nodes, in
1564   // particular enforcing stronger memory serialisation beween the
1565   // object put and the corresponding conditional card mark. CMS
1566   // employs a post-write GC barrier while G1 employs both a pre- and
1567   // post-write GC barrier. Of course the extra nodes may be absent --
1568   // they are only inserted for object puts. This significantly
1569   // complicates the task of identifying whether a MemBarRelease,
1570   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
1571   // when using these GC configurations (see below). It adds similar
1572   // complexity to the task of identifying whether a MemBarRelease,
1573   // CompareAndSwapX or MemBarAcquire forms part of a CAS.
1574   //
1575   // In both cases the post-write subtree includes an auxiliary
1576   // MemBarVolatile (StoreLoad barrier) separating the object put and
1577   // the read of the corresponding card. This poses two additional
1578   // problems.
1579   //
1580   // Firstly, a card mark MemBarVolatile needs to be distinguished
1581   // from a normal trailing MemBarVolatile. Resolving this first
1582   // problem is straightforward: a card mark MemBarVolatile always
1583   // projects a Mem feed to a StoreCM node and that is a unique marker
1584   //
1585   //      MemBarVolatile (card mark)
1586   //       C |    \     . . .
1587   //         |   StoreCM   . . .
1588   //       . . .
1589   //
1590   // The second problem is how the code generator is to translate the
1591   // card mark barrier? It always needs to be translated to a "dmb
1592   // ish" instruction whether or not it occurs as part of a volatile
1593   // put. A StoreLoad barrier is needed after the object put to ensure
1594   // i) visibility to GC threads of the object put and ii) visibility
1595   // to the mutator thread of any card clearing write by a GC
1596   // thread. Clearly a normal store (str) will not guarantee this
1597   // ordering but neither will a releasing store (stlr). The latter
1598   // guarantees that the object put is visible but does not guarantee
1599   // that writes by other threads have also been observed.
1600   //
1601   // So, returning to the task of translating the object put and the
1602   // leading/trailing membar nodes: what do the non-normal node graph
1603   // look like for these 2 special cases? and how can we determine the
1604   // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
1605   // in both normal and non-normal cases?
1606   //
1607   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1608   // which selects conditonal execution based on the value loaded
1609   // (LoadB) from the card. Ctl and Mem are fed to the If via an
1610   // intervening StoreLoad barrier (MemBarVolatile).
1611   //
1612   // So, with CMS we may see a node graph for a volatile object store
1613   // which looks like this
1614   //
1615   //   MemBarRelease
1616   //   MemBarCPUOrder_(leading)__________________
1617   //     C |    M \       \\                   C \
1618   //       |       \    StoreN/P[mo_release]  CastP2X
1619   //       |    Bot \    /
1620   //       |       MergeMem
1621   //       |         /
1622   //      MemBarVolatile (card mark)
1623   //     C |  ||    M |
1624   //       | LoadB    |
1625   //       |   |      |
1626   //       | Cmp      |\
1627   //       | /        | \
1628   //       If         |  \
1629   //       | \        |   \
1630   // IfFalse  IfTrue  |    \
1631   //       \     / \  |     \
1632   //        \   / StoreCM    |
1633   //         \ /      |      |
1634   //        Region   . . .   |
1635   //          | \           /
1636   //          |  . . .  \  / Bot
1637   //          |       MergeMem
1638   //          |          |
1639   //        MemBarVolatile (trailing)
1640   //
1641   // The first MergeMem merges the AliasIdxBot Mem slice from the
1642   // leading membar and the oopptr Mem slice from the Store into the
1643   // card mark membar. The trailing MergeMem merges the AliasIdxBot
1644   // Mem slice from the card mark membar and the AliasIdxRaw slice
1645   // from the StoreCM into the trailing membar (n.b. the latter
1646   // proceeds via a Phi associated with the If region).
1647   //
1648   // The graph for a CAS varies slightly, the obvious difference being
1649   // that the StoreN/P node is replaced by a CompareAndSwapP/N node
1650   // and the trailing MemBarVolatile by a MemBarCPUOrder +
1651   // MemBarAcquire pair. The other important difference is that the
1652   // CompareAndSwap node's SCMemProj is not merged into the card mark
1653   // membar - it still feeds the trailing MergeMem. This also means
1654   // that the card mark membar receives its Mem feed directly from the
1655   // leading membar rather than via a MergeMem.
1656   //
1657   //   MemBarRelease
1658   //   MemBarCPUOrder__(leading)_________________________
1659   //       ||                       \\                 C \
1660   //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X
1661   //     C |  ||    M |              |
1662   //       | LoadB    |       ______/|
1663   //       |   |      |      /       |
1664   //       | Cmp      |     /      SCMemProj
1665   //       | /        |    /         |
1666   //       If         |   /         /
1667   //       | \        |  /         /
1668   // IfFalse  IfTrue  | /         /
1669   //       \     / \  |/ prec    /
1670   //        \   / StoreCM       /
1671   //         \ /      |        /
1672   //        Region   . . .    /
1673   //          | \            /
1674   //          |  . . .  \   / Bot
1675   //          |       MergeMem
1676   //          |          |
1677   //        MemBarCPUOrder
1678   //        MemBarAcquire (trailing)
1679   //
1680   // This has a slightly different memory subgraph to the one seen
1681   // previously but the core of it is the same as for the CAS normal
1682   // sungraph
1683   //
1684   //   MemBarRelease
1685   //   MemBarCPUOrder____
1686   //      ||             \      . . .
1687   //   MemBarVolatile  CompareAndSwapX  . . .
1688   //      |  \            |
1689   //        . . .   SCMemProj
1690   //          |     /  . . .
1691   //         MergeMem
1692   //          |
1693   //   MemBarCPUOrder
1694   //   MemBarAcquire
1695   //
1696   //
1697   // G1 is quite a lot more complicated. The nodes inserted on behalf
1698   // of G1 may comprise: a pre-write graph which adds the old value to
1699   // the SATB queue; the releasing store itself; and, finally, a
1700   // post-write graph which performs a card mark.
1701   //
1702   // The pre-write graph may be omitted, but only when the put is
1703   // writing to a newly allocated (young gen) object and then only if
1704   // there is a direct memory chain to the Initialize node for the
1705   // object allocation. This will not happen for a volatile put since
1706   // any memory chain passes through the leading membar.
1707   //
1708   // The pre-write graph includes a series of 3 If tests. The outermost
1709   // If tests whether SATB is enabled (no else case). The next If tests
1710   // whether the old value is non-NULL (no else case). The third tests
1711   // whether the SATB queue index is > 0, if so updating the queue. The
1712   // else case for this third If calls out to the runtime to allocate a
1713   // new queue buffer.
1714   //
1715   // So with G1 the pre-write and releasing store subgraph looks like
1716   // this (the nested Ifs are omitted).
1717   //
1718   //  MemBarRelease (leading)____________
1719   //     C |  ||  M \   M \    M \  M \ . . .
1720   //       | LoadB   \  LoadL  LoadN   \
1721   //       | /        \                 \
1722   //       If         |\                 \
1723   //       | \        | \                 \
1724   //  IfFalse  IfTrue |  \                 \
1725   //       |     |    |   \                 |
1726   //       |     If   |   /\                |
1727   //       |     |          \               |
1728   //       |                 \              |
1729   //       |    . . .         \             |
1730   //       | /       | /       |            |
1731   //      Region  Phi[M]       |            |
1732   //       | \       |         |            |
1733   //       |  \_____ | ___     |            |
1734   //     C | C \     |   C \ M |            |
1735   //       | CastP2X | StoreN/P[mo_release] |
1736   //       |         |         |            |
1737   //     C |       M |       M |          M |
1738   //        \        |         |           /
1739   //                  . . .
1740   //          (post write subtree elided)
1741   //                    . . .
1742   //             C \         M /
1743   //         MemBarVolatile (trailing)
1744   //
1745   // n.b. the LoadB in this subgraph is not the card read -- it's a
1746   // read of the SATB queue active flag.
1747   //
1748   // Once again the CAS graph is a minor variant on the above with the
1749   // expected substitutions of CompareAndSawpX for StoreN/P and
1750   // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
1751   //
1752   // The G1 post-write subtree is also optional, this time when the
1753   // new value being written is either null or can be identified as a
1754   // newly allocated (young gen) object with no intervening control
1755   // flow. The latter cannot happen but the former may, in which case
1756   // the card mark membar is omitted and the memory feeds form the
1757   // leading membar and the SToreN/P are merged direct into the
1758   // trailing membar as per the normal subgraph. So, the only special
1759   // case which arises is when the post-write subgraph is generated.
1760   //
1761   // The kernel of the post-write G1 subgraph is the card mark itself
1762   // which includes a card mark memory barrier (MemBarVolatile), a
1763   // card test (LoadB), and a conditional update (If feeding a
1764   // StoreCM). These nodes are surrounded by a series of nested Ifs
1765   // which try to avoid doing the card mark. The top level If skips if
1766   // the object reference does not cross regions (i.e. it tests if
1767   // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1768   // need not be recorded. The next If, which skips on a NULL value,
1769   // may be absent (it is not generated if the type of value is >=
1770   // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1771   // checking if card_val != young).  n.b. although this test requires
1772   // a pre-read of the card it can safely be done before the StoreLoad
1773   // barrier. However that does not bypass the need to reread the card
1774   // after the barrier.
1775   //
1776   //                (pre-write subtree elided)
1777   //        . . .                  . . .    . . .  . . .
1778   //        C |                    M |     M |    M |
1779   //       Region                  Phi[M] StoreN    |
1780   //          |                     / \      |      |
1781   //         / \_______            /   \     |      |
1782   //      C / C \      . . .            \    |      |
1783   //       If   CastP2X . . .            |   |      |
1784   //       / \                           |   |      |
1785   //      /   \                          |   |      |
1786   // IfFalse IfTrue                      |   |      |
1787   //   |       |                         |   |     /|
1788   //   |       If                        |   |    / |
1789   //   |      / \                        |   |   /  |
1790   //   |     /   \                        \  |  /   |
1791   //   | IfFalse IfTrue                   MergeMem  |
1792   //   |  . . .    / \                       /      |
1793   //   |          /   \                     /       |
1794   //   |     IfFalse IfTrue                /        |
1795   //   |      . . .    |                  /         |
1796   //   |               If                /          |
1797   //   |               / \              /           |
1798   //   |              /   \            /            |
1799   //   |         IfFalse IfTrue       /             |
1800   //   |           . . .   |         /              |
1801   //   |                    \       /               |
1802   //   |                     \     /                |
1803   //   |             MemBarVolatile__(card mark)    |
1804   //   |                ||   C |  M \  M \          |
1805   //   |               LoadB   If    |    |         |
1806   //   |                      / \    |    |         |
1807   //   |                     . . .   |    |         |
1808   //   |                          \  |    |        /
1809   //   |                        StoreCM   |       /
1810   //   |                          . . .   |      /
1811   //   |                        _________/      /
1812   //   |                       /  _____________/
1813   //   |   . . .       . . .  |  /            /
1814   //   |    |                 | /   _________/
1815   //   |    |               Phi[M] /        /
1816   //   |    |                 |   /        /
1817   //   |    |                 |  /        /
1818   //   |  Region  . . .     Phi[M]  _____/
1819   //   |    /                 |    /
1820   //   |                      |   /
1821   //   | . . .   . . .        |  /
1822   //   | /                    | /
1823   // Region           |  |  Phi[M]
1824   //   |              |  |  / Bot
1825   //    \            MergeMem
1826   //     \            /
1827   //     MemBarVolatile
1828   //
1829   // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
1830   // from the leading membar and the oopptr Mem slice from the Store
1831   // into the card mark membar i.e. the memory flow to the card mark
1832   // membar still looks like a normal graph.
1833   //
1834   // The trailing MergeMem merges an AliasIdxBot Mem slice with other
1835   // Mem slices (from the StoreCM and other card mark queue stores).
1836   // However in this case the AliasIdxBot Mem slice does not come
1837   // direct from the card mark membar. It is merged through a series
1838   // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
1839   // from the leading membar with the Mem feed from the card mark
1840   // membar. Each Phi corresponds to one of the Ifs which may skip
1841   // around the card mark membar. So when the If implementing the NULL
1842   // value check has been elided the total number of Phis is 2
1843   // otherwise it is 3.
1844   //
1845   // The CAS graph when using G1GC also includes a pre-write subgraph
1846   // and an optional post-write subgraph. Teh sam evarioations are
1847   // introduced as for CMS with conditional card marking i.e. the
1848   // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
1849   // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
1850   // Mem feed from the CompareAndSwapP/N includes a precedence
1851   // dependency feed to the StoreCM and a feed via an SCMemProj to the
1852   // trailing membar. So, as before the configuration includes the
1853   // normal CAS graph as a subgraph of the memory flow.
1854   //
1855   // So, the upshot is that in all cases the volatile put graph will
1856   // include a *normal* memory subgraph betwen the leading membar and
1857   // its child membar, either a volatile put graph (including a
1858   // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
1859   // When that child is not a card mark membar then it marks the end
1860   // of the volatile put or CAS subgraph. If the child is a card mark
1861   // membar then the normal subgraph will form part of a volatile put
1862   // subgraph if and only if the child feeds an AliasIdxBot Mem feed
1863   // to a trailing barrier via a MergeMem. That feed is either direct
1864   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
1865   // memory flow (for G1).
1866   //
1867   // The predicates controlling generation of instructions for store
1868   // and barrier nodes employ a few simple helper functions (described
1869   // below) which identify the presence or absence of all these
1870   // subgraph configurations and provide a means of traversing from
1871   // one node in the subgraph to another.
1872 
1873   // is_CAS(int opcode)
1874   //
1875   // return true if opcode is one of the possible CompareAndSwapX
1876   // values otherwise false.
1877 
1878   bool is_CAS(int opcode)
1879   {
1880     switch(opcode) {
1881       // We handle these
1882     case Op_CompareAndSwapI:
1883     case Op_CompareAndSwapL:
1884     case Op_CompareAndSwapP:
1885     case Op_CompareAndSwapN:
1886  // case Op_CompareAndSwapB:
1887  // case Op_CompareAndSwapS:
1888       return true;
1889       // These are TBD
1890     case Op_WeakCompareAndSwapB:
1891     case Op_WeakCompareAndSwapS:
1892     case Op_WeakCompareAndSwapI:
1893     case Op_WeakCompareAndSwapL:
1894     case Op_WeakCompareAndSwapP:
1895     case Op_WeakCompareAndSwapN:
1896     case Op_CompareAndExchangeB:
1897     case Op_CompareAndExchangeS:
1898     case Op_CompareAndExchangeI:
1899     case Op_CompareAndExchangeL:
1900     case Op_CompareAndExchangeP:
1901     case Op_CompareAndExchangeN:
1902       return false;
1903     default:
1904       return false;
1905     }
1906   }
1907 
1908 
1909   // leading_to_normal
1910   //
1911   //graph traversal helper which detects the normal case Mem feed from
1912   // a release membar (or, optionally, its cpuorder child) to a
1913   // dependent volatile membar i.e. it ensures that one or other of
1914   // the following Mem flow subgraph is present.
1915   //
1916   //   MemBarRelease
1917   //   MemBarCPUOrder {leading}
1918   //          |  \      . . .
1919   //          |  StoreN/P[mo_release]  . . .
1920   //          |   /
1921   //         MergeMem
1922   //          |
1923   //   MemBarVolatile {trailing or card mark}
1924   //
1925   //   MemBarRelease
1926   //   MemBarCPUOrder {leading}
1927   //      |       \      . . .
1928   //      |     CompareAndSwapX  . . .
1929   //               |
1930   //     . . .    SCMemProj
1931   //           \   |
1932   //      |    MergeMem
1933   //      |       /
1934   //    MemBarCPUOrder
1935   //    MemBarAcquire {trailing}
1936   //
1937   // if the correct configuration is present returns the trailing
1938   // membar otherwise NULL.
1939   //
1940   // the input membar is expected to be either a cpuorder membar or a
1941   // release membar. in the latter case it should not have a cpu membar
1942   // child.
1943   //
1944   // the returned value may be a card mark or trailing membar
1945   //
1946 
1947   MemBarNode *leading_to_normal(MemBarNode *leading)
1948   {
1949     assert((leading->Opcode() == Op_MemBarRelease ||
1950             leading->Opcode() == Op_MemBarCPUOrder),
1951            "expecting a volatile or cpuroder membar!");
1952 
1953     // check the mem flow
1954     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
1955 
1956     if (!mem) {
1957       return NULL;
1958     }
1959 
1960     Node *x = NULL;
1961     StoreNode * st = NULL;
1962     LoadStoreNode *cas = NULL;
1963     MergeMemNode *mm = NULL;
1964 
1965     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1966       x = mem->fast_out(i);
1967       if (x->is_MergeMem()) {
1968         if (mm != NULL) {
1969           return NULL;
1970         }
1971         // two merge mems is one too many
1972         mm = x->as_MergeMem();
1973       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
1974         // two releasing stores/CAS nodes is one too many
1975         if (st != NULL || cas != NULL) {
1976           return NULL;
1977         }
1978         st = x->as_Store();
1979       } else if (is_CAS(x->Opcode())) {
1980         if (st != NULL || cas != NULL) {
1981           return NULL;
1982         }
1983         cas = x->as_LoadStore();
1984       }
1985     }
1986 
1987     // must have a store or a cas
1988     if (!st && !cas) {
1989       return NULL;
1990     }
1991 
1992     // must have a merge if we also have st
1993     if (st && !mm) {
1994       return NULL;
1995     }
1996 
1997     Node *y = NULL;
1998     if (cas) {
1999       // look for an SCMemProj
2000       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2001         x = cas->fast_out(i);
2002         if (x->is_Proj()) {
2003           y = x;
2004           break;
2005         }
2006       }
2007       if (y == NULL) {
2008         return NULL;
2009       }
2010       // the proj must feed a MergeMem
2011       for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2012         x = y->fast_out(i);
2013         if (x->is_MergeMem()) {
2014           mm = x->as_MergeMem();
2015           break;
2016         }
2017       }
2018       if (mm == NULL)
2019         return NULL;
2020     } else {
2021       // ensure the store feeds the existing mergemem;
2022       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2023         if (st->fast_out(i) == mm) {
2024           y = st;
2025           break;
2026         }
2027       }
2028       if (y == NULL) {
2029         return NULL;
2030       }
2031     }
2032 
2033     MemBarNode *mbar = NULL;
2034     // ensure the merge feeds to the expected type of membar
2035     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2036       x = mm->fast_out(i);
2037       if (x->is_MemBar()) {
2038         int opcode = x->Opcode();
2039         if (opcode == Op_MemBarVolatile && st) {
2040           mbar = x->as_MemBar();
2041         } else if (cas && opcode == Op_MemBarCPUOrder) {
2042           MemBarNode *y =  x->as_MemBar();
2043           y = child_membar(y);
2044           if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
2045             mbar = y;
2046           }
2047         }
2048         break;
2049       }
2050     }
2051 
2052     return mbar;
2053   }
2054 
2055   // normal_to_leading
2056   //
2057   // graph traversal helper which detects the normal case Mem feed
2058   // from either a card mark or a trailing membar to a preceding
2059   // release membar (optionally its cpuorder child) i.e. it ensures
2060   // that one or other of the following Mem flow subgraphs is present.
2061   //
2062   //   MemBarRelease
2063   //   MemBarCPUOrder {leading}
2064   //          |  \      . . .
2065   //          |  StoreN/P[mo_release]  . . .
2066   //          |   /
2067   //         MergeMem
2068   //          |
2069   //   MemBarVolatile {card mark or trailing}
2070   //
2071   //   MemBarRelease
2072   //   MemBarCPUOrder {leading}
2073   //      |       \      . . .
2074   //      |     CompareAndSwapX  . . .
2075   //               |
2076   //     . . .    SCMemProj
2077   //           \   |
2078   //      |    MergeMem
2079   //      |        /
2080   //    MemBarCPUOrder
2081   //    MemBarAcquire {trailing}
2082   //
2083   // this predicate checks for the same flow as the previous predicate
2084   // but starting from the bottom rather than the top.
2085   //
2086   // if the configuration is present returns the cpuorder member for
2087   // preference or when absent the release membar otherwise NULL.
2088   //
2089   // n.b. the input membar is expected to be a MemBarVolatile but
2090   // need not be a card mark membar.
2091 
2092   MemBarNode *normal_to_leading(const MemBarNode *barrier)
2093   {
2094     // input must be a volatile membar
2095     assert((barrier->Opcode() == Op_MemBarVolatile ||
2096             barrier->Opcode() == Op_MemBarAcquire),
2097            "expecting a volatile or an acquire membar");
2098     Node *x;
2099     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2100 
2101     // if we have an acquire membar then it must be fed via a CPUOrder
2102     // membar
2103 
2104     if (is_cas) {
2105       // skip to parent barrier which must be a cpuorder
2106       x = parent_membar(barrier);
2107       if (x->Opcode() != Op_MemBarCPUOrder)
2108         return NULL;
2109     } else {
2110       // start from the supplied barrier
2111       x = (Node *)barrier;
2112     }
2113 
2114     // the Mem feed to the membar should be a merge
2115     x = x ->in(TypeFunc::Memory);
2116     if (!x->is_MergeMem())
2117       return NULL;
2118 
2119     MergeMemNode *mm = x->as_MergeMem();
2120 
2121     if (is_cas) {
2122       // the merge should be fed from the CAS via an SCMemProj node
2123       x = NULL;
2124       for (uint idx = 1; idx < mm->req(); idx++) {
2125         if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2126           x = mm->in(idx);
2127           break;
2128         }
2129       }
2130       if (x == NULL) {
2131         return NULL;
2132       }
2133       // check for a CAS feeding this proj
2134       x = x->in(0);
2135       int opcode = x->Opcode();
2136       if (!is_CAS(opcode)) {
2137         return NULL;
2138       }
2139       // the CAS should get its mem feed from the leading membar
2140       x = x->in(MemNode::Memory);
2141     } else {
2142       // the merge should get its Bottom mem feed from the leading membar
2143       x = mm->in(Compile::AliasIdxBot);
2144     }
2145 
2146     // ensure this is a non control projection
2147     if (!x->is_Proj() || x->is_CFG()) {
2148       return NULL;
2149     }
2150     // if it is fed by a membar that's the one we want
2151     x = x->in(0);
2152 
2153     if (!x->is_MemBar()) {
2154       return NULL;
2155     }
2156 
2157     MemBarNode *leading = x->as_MemBar();
2158     // reject invalid candidates
2159     if (!leading_membar(leading)) {
2160       return NULL;
2161     }
2162 
2163     // ok, we have a leading membar, now for the sanity clauses
2164 
2165     // the leading membar must feed Mem to a releasing store or CAS
2166     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2167     StoreNode *st = NULL;
2168     LoadStoreNode *cas = NULL;
2169     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2170       x = mem->fast_out(i);
2171       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2172         // two stores or CASes is one too many
2173         if (st != NULL || cas != NULL) {
2174           return NULL;
2175         }
2176         st = x->as_Store();
2177       } else if (is_CAS(x->Opcode())) {
2178         if (st != NULL || cas != NULL) {
2179           return NULL;
2180         }
2181         cas = x->as_LoadStore();
2182       }
2183     }
2184 
2185     // we should not have both a store and a cas
2186     if (st == NULL & cas == NULL) {
2187       return NULL;
2188     }
2189 
2190     if (st == NULL) {
2191       // nothing more to check
2192       return leading;
2193     } else {
2194       // we should not have a store if we started from an acquire
2195       if (is_cas) {
2196         return NULL;
2197       }
2198 
2199       // the store should feed the merge we used to get here
2200       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2201         if (st->fast_out(i) == mm) {
2202           return leading;
2203         }
2204       }
2205     }
2206 
2207     return NULL;
2208   }
2209 
2210   // card_mark_to_trailing
2211   //
2212   // graph traversal helper which detects extra, non-normal Mem feed
2213   // from a card mark volatile membar to a trailing membar i.e. it
2214   // ensures that one of the following three GC post-write Mem flow
2215   // subgraphs is present.
2216   //
2217   // 1)
2218   //     . . .
2219   //       |
2220   //   MemBarVolatile (card mark)
2221   //      |          |
2222   //      |        StoreCM
2223   //      |          |
2224   //      |        . . .
2225   //  Bot |  /
2226   //   MergeMem
2227   //      |
2228   //      |
2229   //    MemBarVolatile {trailing}
2230   //
2231   // 2)
2232   //   MemBarRelease/CPUOrder (leading)
2233   //    |
2234   //    |
2235   //    |\       . . .
2236   //    | \        |
2237   //    |  \  MemBarVolatile (card mark)
2238   //    |   \   |     |
2239   //     \   \  |   StoreCM    . . .
2240   //      \   \ |
2241   //       \  Phi
2242   //        \ /
2243   //        Phi  . . .
2244   //     Bot |   /
2245   //       MergeMem
2246   //         |
2247   //    MemBarVolatile {trailing}
2248   //
2249   //
2250   // 3)
2251   //   MemBarRelease/CPUOrder (leading)
2252   //    |
2253   //    |\
2254   //    | \
2255   //    |  \      . . .
2256   //    |   \       |
2257   //    |\   \  MemBarVolatile (card mark)
2258   //    | \   \   |     |
2259   //    |  \   \  |   StoreCM    . . .
2260   //    |   \   \ |
2261   //     \   \  Phi
2262   //      \   \ /
2263   //       \  Phi
2264   //        \ /
2265   //        Phi  . . .
2266   //     Bot |   /
2267   //       MergeMem
2268   //         |
2269   //         |
2270   //    MemBarVolatile {trailing}
2271   //
2272   // configuration 1 is only valid if UseConcMarkSweepGC &&
2273   // UseCondCardMark
2274   //
2275   // configurations 2 and 3 are only valid if UseG1GC.
2276   //
2277   // if a valid configuration is present returns the trailing membar
2278   // otherwise NULL.
2279   //
2280   // n.b. the supplied membar is expected to be a card mark
2281   // MemBarVolatile i.e. the caller must ensure the input node has the
2282   // correct operand and feeds Mem to a StoreCM node
2283 
2284   MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
2285   {
2286     // input must be a card mark volatile membar
2287     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2288 
2289     Node *feed = barrier->proj_out(TypeFunc::Memory);
2290     Node *x;
2291     MergeMemNode *mm = NULL;
2292 
2293     const int MAX_PHIS = 3;     // max phis we will search through
2294     int phicount = 0;           // current search count
2295 
2296     bool retry_feed = true;
2297     while (retry_feed) {
2298       // see if we have a direct MergeMem feed
2299       for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2300         x = feed->fast_out(i);
2301         // the correct Phi will be merging a Bot memory slice
2302         if (x->is_MergeMem()) {
2303           mm = x->as_MergeMem();
2304           break;
2305         }
2306       }
2307       if (mm) {
2308         retry_feed = false;
2309       } else if (UseG1GC & phicount++ < MAX_PHIS) {
2310         // the barrier may feed indirectly via one or two Phi nodes
2311         PhiNode *phi = NULL;
2312         for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
2313           x = feed->fast_out(i);
2314           // the correct Phi will be merging a Bot memory slice
2315           if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
2316             phi = x->as_Phi();
2317             break;
2318           }
2319         }
2320         if (!phi) {
2321           return NULL;
2322         }
2323         // look for another merge below this phi
2324         feed = phi;
2325       } else {
2326         // couldn't find a merge
2327         return NULL;
2328       }
2329     }
2330 
2331     // sanity check this feed turns up as the expected slice
2332     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
2333 
2334     MemBarNode *trailing = NULL;
2335     // be sure we have a trailing membar the merge
2336     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2337       x = mm->fast_out(i);
2338       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
2339         trailing = x->as_MemBar();
2340         break;
2341       }
2342     }
2343 
2344     return trailing;
2345   }
2346 
2347   // trailing_to_card_mark
2348   //
2349   // graph traversal helper which detects extra, non-normal Mem feed
2350   // from a trailing volatile membar to a preceding card mark volatile
2351   // membar i.e. it identifies whether one of the three possible extra
2352   // GC post-write Mem flow subgraphs is present
2353   //
2354   // this predicate checks for the same flow as the previous predicate
2355   // but starting from the bottom rather than the top.
2356   //
2357   // if the configuration is present returns the card mark membar
2358   // otherwise NULL
2359   //
2360   // n.b. the supplied membar is expected to be a trailing
2361   // MemBarVolatile i.e. the caller must ensure the input node has the
2362   // correct opcode
2363 
2364   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
2365   {
2366     assert(trailing->Opcode() == Op_MemBarVolatile,
2367            "expecting a volatile membar");
2368     assert(!is_card_mark_membar(trailing),
2369            "not expecting a card mark membar");
2370 
2371     // the Mem feed to the membar should be a merge
2372     Node *x = trailing->in(TypeFunc::Memory);
2373     if (!x->is_MergeMem()) {
2374       return NULL;
2375     }
2376 
2377     MergeMemNode *mm = x->as_MergeMem();
2378 
2379     x = mm->in(Compile::AliasIdxBot);
2380     // with G1 we may possibly see a Phi or two before we see a Memory
2381     // Proj from the card mark membar
2382 
2383     const int MAX_PHIS = 3;     // max phis we will search through
2384     int phicount = 0;           // current search count
2385 
2386     bool retry_feed = !x->is_Proj();
2387 
2388     while (retry_feed) {
2389       if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
2390         PhiNode *phi = x->as_Phi();
2391         ProjNode *proj = NULL;
2392         PhiNode *nextphi = NULL;
2393         bool found_leading = false;
2394         for (uint i = 1; i < phi->req(); i++) {
2395           x = phi->in(i);
2396           if (x->is_Phi()) {
2397             nextphi = x->as_Phi();
2398           } else if (x->is_Proj()) {
2399             int opcode = x->in(0)->Opcode();
2400             if (opcode == Op_MemBarVolatile) {
2401               proj = x->as_Proj();
2402             } else if (opcode == Op_MemBarRelease ||
2403                        opcode == Op_MemBarCPUOrder) {
2404               // probably a leading membar
2405               found_leading = true;
2406             }
2407           }
2408         }
2409         // if we found a correct looking proj then retry from there
2410         // otherwise we must see a leading and a phi or this the
2411         // wrong config
2412         if (proj != NULL) {
2413           x = proj;
2414           retry_feed = false;
2415         } else if (found_leading && nextphi != NULL) {
2416           // retry from this phi to check phi2
2417           x = nextphi;
2418         } else {
2419           // not what we were looking for
2420           return NULL;
2421         }
2422       } else {
2423         return NULL;
2424       }
2425     }
2426     // the proj has to come from the card mark membar
2427     x = x->in(0);
2428     if (!x->is_MemBar()) {
2429       return NULL;
2430     }
2431 
2432     MemBarNode *card_mark_membar = x->as_MemBar();
2433 
2434     if (!is_card_mark_membar(card_mark_membar)) {
2435       return NULL;
2436     }
2437 
2438     return card_mark_membar;
2439   }
2440 
2441   // trailing_to_leading
2442   //
2443   // graph traversal helper which checks the Mem flow up the graph
2444   // from a (non-card mark) trailing membar attempting to locate and
2445   // return an associated leading membar. it first looks for a
2446   // subgraph in the normal configuration (relying on helper
2447   // normal_to_leading). failing that it then looks for one of the
2448   // possible post-write card mark subgraphs linking the trailing node
2449   // to a the card mark membar (relying on helper
2450   // trailing_to_card_mark), and then checks that the card mark membar
2451   // is fed by a leading membar (once again relying on auxiliary
2452   // predicate normal_to_leading).
2453   //
2454   // if the configuration is valid returns the cpuorder member for
2455   // preference or when absent the release membar otherwise NULL.
2456   //
2457   // n.b. the input membar is expected to be either a volatile or
2458   // acquire membar but in the former case must *not* be a card mark
2459   // membar.
2460 
2461   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
2462   {
2463     assert((trailing->Opcode() == Op_MemBarAcquire ||
2464             trailing->Opcode() == Op_MemBarVolatile),
2465            "expecting an acquire or volatile membar");
2466     assert((trailing->Opcode() != Op_MemBarVolatile ||
2467             !is_card_mark_membar(trailing)),
2468            "not expecting a card mark membar");
2469 
2470     MemBarNode *leading = normal_to_leading(trailing);
2471 
2472     if (leading) {
2473       return leading;
2474     }
2475 
2476     // nothing more to do if this is an acquire
2477     if (trailing->Opcode() == Op_MemBarAcquire) {
2478       return NULL;
2479     }
2480 
2481     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
2482 
2483     if (!card_mark_membar) {
2484       return NULL;
2485     }
2486 
2487     return normal_to_leading(card_mark_membar);
2488   }
2489 
2490   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
2491 
2492 bool unnecessary_acquire(const Node *barrier)
2493 {
2494   assert(barrier->is_MemBar(), "expecting a membar");
2495 
2496   if (UseBarriersForVolatile) {
2497     // we need to plant a dmb
2498     return false;
2499   }
2500 
2501   // a volatile read derived from bytecode (or also from an inlined
2502   // SHA field read via LibraryCallKit::load_field_from_object)
2503   // manifests as a LoadX[mo_acquire] followed by an acquire membar
2504   // with a bogus read dependency on it's preceding load. so in those
2505   // cases we will find the load node at the PARMS offset of the
2506   // acquire membar.  n.b. there may be an intervening DecodeN node.
2507   //
2508   // a volatile load derived from an inlined unsafe field access
2509   // manifests as a cpuorder membar with Ctl and Mem projections
2510   // feeding both an acquire membar and a LoadX[mo_acquire]. The
2511   // acquire then feeds another cpuorder membar via Ctl and Mem
2512   // projections. The load has no output dependency on these trailing
2513   // membars because subsequent nodes inserted into the graph take
2514   // their control feed from the final membar cpuorder meaning they
2515   // are all ordered after the load.
2516 
2517   Node *x = barrier->lookup(TypeFunc::Parms);
2518   if (x) {
2519     // we are starting from an acquire and it has a fake dependency
2520     //
2521     // need to check for
2522     //
2523     //   LoadX[mo_acquire]
2524     //   {  |1   }
2525     //   {DecodeN}
2526     //      |Parms
2527     //   MemBarAcquire*
2528     //
2529     // where * tags node we were passed
2530     // and |k means input k
2531     if (x->is_DecodeNarrowPtr()) {
2532       x = x->in(1);
2533     }
2534 
2535     return (x->is_Load() && x->as_Load()->is_acquire());
2536   }
2537 
2538   // now check for an unsafe volatile get
2539 
2540   // need to check for
2541   //
2542   //   MemBarCPUOrder
2543   //        ||       \\
2544   //   MemBarAcquire* LoadX[mo_acquire]
2545   //        ||
2546   //   MemBarCPUOrder
2547   //
2548   // where * tags node we were passed
2549   // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2550 
2551   // check for a parent MemBarCPUOrder
2552   ProjNode *ctl;
2553   ProjNode *mem;
2554   MemBarNode *parent = parent_membar(barrier);
2555   if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2556     return false;
2557   ctl = parent->proj_out(TypeFunc::Control);
2558   mem = parent->proj_out(TypeFunc::Memory);
2559   if (!ctl || !mem) {
2560     return false;
2561   }
2562   // ensure the proj nodes both feed a LoadX[mo_acquire]
2563   LoadNode *ld = NULL;
2564   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2565     x = ctl->fast_out(i);
2566     // if we see a load we keep hold of it and stop searching
2567     if (x->is_Load()) {
2568       ld = x->as_Load();
2569       break;
2570     }
2571   }
2572   // it must be an acquiring load
2573   if (ld && ld->is_acquire()) {
2574 
2575     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2576       x = mem->fast_out(i);
2577       // if we see the same load we drop it and stop searching
2578       if (x == ld) {
2579         ld = NULL;
2580         break;
2581       }
2582     }
2583     // we must have dropped the load
2584     if (ld == NULL) {
2585       // check for a child cpuorder membar
2586       MemBarNode *child  = child_membar(barrier->as_MemBar());
2587       if (child && child->Opcode() == Op_MemBarCPUOrder)
2588         return true;
2589     }
2590   }
2591 
2592   // final option for unnecessary mebar is that it is a trailing node
2593   // belonging to a CAS
2594 
2595   MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2596 
2597   return leading != NULL;
2598 }
2599 
2600 bool needs_acquiring_load(const Node *n)
2601 {
2602   assert(n->is_Load(), "expecting a load");
2603   if (UseBarriersForVolatile) {
2604     // we use a normal load and a dmb
2605     return false;
2606   }
2607 
2608   LoadNode *ld = n->as_Load();
2609 
2610   if (!ld->is_acquire()) {
2611     return false;
2612   }
2613 
2614   // check if this load is feeding an acquire membar
2615   //
2616   //   LoadX[mo_acquire]
2617   //   {  |1   }
2618   //   {DecodeN}
2619   //      |Parms
2620   //   MemBarAcquire*
2621   //
2622   // where * tags node we were passed
2623   // and |k means input k
2624 
2625   Node *start = ld;
2626   Node *mbacq = NULL;
2627 
2628   // if we hit a DecodeNarrowPtr we reset the start node and restart
2629   // the search through the outputs
2630  restart:
2631 
2632   for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2633     Node *x = start->fast_out(i);
2634     if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2635       mbacq = x;
2636     } else if (!mbacq &&
2637                (x->is_DecodeNarrowPtr() ||
2638                 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2639       start = x;
2640       goto restart;
2641     }
2642   }
2643 
2644   if (mbacq) {
2645     return true;
2646   }
2647 
2648   // now check for an unsafe volatile get
2649 
2650   // check if Ctl and Proj feed comes from a MemBarCPUOrder
2651   //
2652   //     MemBarCPUOrder
2653   //        ||       \\
2654   //   MemBarAcquire* LoadX[mo_acquire]
2655   //        ||
2656   //   MemBarCPUOrder
2657 
2658   MemBarNode *membar;
2659 
2660   membar = parent_membar(ld);
2661 
2662   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2663     return false;
2664   }
2665 
2666   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2667 
2668   membar = child_membar(membar);
2669 
2670   if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2671     return false;
2672   }
2673 
2674   membar = child_membar(membar);
2675 
2676   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2677     return false;
2678   }
2679 
2680   return true;
2681 }
2682 
2683 bool unnecessary_release(const Node *n)
2684 {
2685   assert((n->is_MemBar() &&
2686           n->Opcode() == Op_MemBarRelease),
2687          "expecting a release membar");
2688 
2689   if (UseBarriersForVolatile) {
2690     // we need to plant a dmb
2691     return false;
2692   }
2693 
2694   // if there is a dependent CPUOrder barrier then use that as the
2695   // leading
2696 
2697   MemBarNode *barrier = n->as_MemBar();
2698   // check for an intervening cpuorder membar
2699   MemBarNode *b = child_membar(barrier);
2700   if (b && b->Opcode() == Op_MemBarCPUOrder) {
2701     // ok, so start the check from the dependent cpuorder barrier
2702     barrier = b;
2703   }
2704 
2705   // must start with a normal feed
2706   MemBarNode *child_barrier = leading_to_normal(barrier);
2707 
2708   if (!child_barrier) {
2709     return false;
2710   }
2711 
2712   if (!is_card_mark_membar(child_barrier)) {
2713     // this is the trailing membar and we are done
2714     return true;
2715   }
2716 
2717   // must be sure this card mark feeds a trailing membar
2718   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
2719   return (trailing != NULL);
2720 }
2721 
2722 bool unnecessary_volatile(const Node *n)
2723 {
2724   // assert n->is_MemBar();
2725   if (UseBarriersForVolatile) {
2726     // we need to plant a dmb
2727     return false;
2728   }
2729 
2730   MemBarNode *mbvol = n->as_MemBar();
2731 
2732   // first we check if this is part of a card mark. if so then we have
2733   // to generate a StoreLoad barrier
2734 
2735   if (is_card_mark_membar(mbvol)) {
2736       return false;
2737   }
2738 
2739   // ok, if it's not a card mark then we still need to check if it is
2740   // a trailing membar of a volatile put hgraph.
2741 
2742   return (trailing_to_leading(mbvol) != NULL);
2743 }
2744 
2745 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2746 
2747 bool needs_releasing_store(const Node *n)
2748 {
2749   // assert n->is_Store();
2750   if (UseBarriersForVolatile) {
2751     // we use a normal store and dmb combination
2752     return false;
2753   }
2754 
2755   StoreNode *st = n->as_Store();
2756 
2757   // the store must be marked as releasing
2758   if (!st->is_release()) {
2759     return false;
2760   }
2761 
2762   // the store must be fed by a membar
2763 
2764   Node *x = st->lookup(StoreNode::Memory);
2765 
2766   if (! x || !x->is_Proj()) {
2767     return false;
2768   }
2769 
2770   ProjNode *proj = x->as_Proj();
2771 
2772   x = proj->lookup(0);
2773 
2774   if (!x || !x->is_MemBar()) {
2775     return false;
2776   }
2777 
2778   MemBarNode *barrier = x->as_MemBar();
2779 
2780   // if the barrier is a release membar or a cpuorder mmebar fed by a
2781   // release membar then we need to check whether that forms part of a
2782   // volatile put graph.
2783 
2784   // reject invalid candidates
2785   if (!leading_membar(barrier)) {
2786     return false;
2787   }
2788 
2789   // does this lead a normal subgraph?
2790   MemBarNode *mbvol = leading_to_normal(barrier);
2791 
2792   if (!mbvol) {
2793     return false;
2794   }
2795 
2796   // all done unless this is a card mark
2797   if (!is_card_mark_membar(mbvol)) {
2798     return true;
2799   }
2800 
2801   // we found a card mark -- just make sure we have a trailing barrier
2802 
2803   return (card_mark_to_trailing(mbvol) != NULL);
2804 }
2805 
2806 // predicate controlling translation of CAS
2807 //
2808 // returns true if CAS needs to use an acquiring load otherwise false
2809 
2810 bool needs_acquiring_load_exclusive(const Node *n)
2811 {
2812   assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2813   if (UseBarriersForVolatile) {
2814     return false;
2815   }
2816 
2817   // CAS nodes only ought to turn up in inlined unsafe CAS operations
2818 #ifdef ASSERT
2819   LoadStoreNode *st = n->as_LoadStore();
2820 
2821   // the store must be fed by a membar
2822 
2823   Node *x = st->lookup(StoreNode::Memory);
2824 
2825   assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2826 
2827   ProjNode *proj = x->as_Proj();
2828 
2829   x = proj->lookup(0);
2830 
2831   assert (x && x->is_MemBar(), "CAS not fed by membar!");
2832 
2833   MemBarNode *barrier = x->as_MemBar();
2834 
2835   // the barrier must be a cpuorder mmebar fed by a release membar
2836 
2837   assert(barrier->Opcode() == Op_MemBarCPUOrder,
2838          "CAS not fed by cpuorder membar!");
2839 
2840   MemBarNode *b = parent_membar(barrier);
2841   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2842           "CAS not fed by cpuorder+release membar pair!");
2843 
2844   // does this lead a normal subgraph?
2845   MemBarNode *mbar = leading_to_normal(barrier);
2846 
2847   assert(mbar != NULL, "CAS not embedded in normal graph!");
2848 
2849   assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2850 #endif // ASSERT
2851   // so we can just return true here
2852   return true;
2853 }
2854 
2855 // predicate controlling translation of StoreCM
2856 //
2857 // returns true if a StoreStore must precede the card write otherwise
2858 // false
2859 
2860 bool unnecessary_storestore(const Node *storecm)
2861 {
2862   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
2863 
2864   // we only ever need to generate a dmb ishst between an object put
2865   // and the associated card mark when we are using CMS without
2866   // conditional card marking
2867 
2868   if (!UseConcMarkSweepGC || UseCondCardMark) {
2869     return true;
2870   }
2871 
2872   // if we are implementing volatile puts using barriers then the
2873   // object put as an str so we must insert the dmb ishst
2874 
2875   if (UseBarriersForVolatile) {
2876     return false;
2877   }
2878 
2879   // we can omit the dmb ishst if this StoreCM is part of a volatile
2880   // put because in thta case the put will be implemented by stlr
2881   //
2882   // we need to check for a normal subgraph feeding this StoreCM.
2883   // that means the StoreCM must be fed Memory from a leading membar,
2884   // either a MemBarRelease or its dependent MemBarCPUOrder, and the
2885   // leading membar must be part of a normal subgraph
2886 
2887   Node *x = storecm->in(StoreNode::Memory);
2888 
2889   if (!x->is_Proj()) {
2890     return false;
2891   }
2892 
2893   x = x->in(0);
2894 
2895   if (!x->is_MemBar()) {
2896     return false;
2897   }
2898 
2899   MemBarNode *leading = x->as_MemBar();
2900 
2901   // reject invalid candidates
2902   if (!leading_membar(leading)) {
2903     return false;
2904   }
2905 
2906   // we can omit the StoreStore if it is the head of a normal subgraph
2907   return (leading_to_normal(leading) != NULL);
2908 }
2909 
2910 
2911 #define __ _masm.
2912 
2913 // advance declarations for helper functions to convert register
2914 // indices to register objects
2915 
2916 // the ad file has to provide implementations of certain methods
2917 // expected by the generic code
2918 //
2919 // REQUIRED FUNCTIONALITY
2920 
2921 //=============================================================================
2922 
2923 // !!!!! Special hack to get all types of calls to specify the byte offset
2924 //       from the start of the call to the point where the return address
2925 //       will point.
2926 
2927 int MachCallStaticJavaNode::ret_addr_offset()
2928 {
2929   // call should be a simple bl
2930   int off = 4;
2931   return off;
2932 }
2933 
2934 int MachCallDynamicJavaNode::ret_addr_offset()
2935 {
2936   return 16; // movz, movk, movk, bl
2937 }
2938 
2939 int MachCallRuntimeNode::ret_addr_offset() {
2940   // for generated stubs the call will be
2941   //   far_call(addr)
2942   // for real runtime callouts it will be six instructions
2943   // see aarch64_enc_java_to_runtime
2944   //   adr(rscratch2, retaddr)
2945   //   lea(rscratch1, RuntimeAddress(addr)
2946   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2947   //   blrt rscratch1
2948   CodeBlob *cb = CodeCache::find_blob(_entry_point);
2949   if (cb) {
2950     return MacroAssembler::far_branch_size();
2951   } else {
2952     return 6 * NativeInstruction::instruction_size;
2953   }
2954 }
2955 
2956 // Indicate if the safepoint node needs the polling page as an input
2957 
2958 // the shared code plants the oop data at the start of the generated
2959 // code for the safepoint node and that needs ot be at the load
2960 // instruction itself. so we cannot plant a mov of the safepoint poll
2961 // address followed by a load. setting this to true means the mov is
2962 // scheduled as a prior instruction. that's better for scheduling
2963 // anyway.
2964 
2965 bool SafePointNode::needs_polling_address_input()
2966 {
2967   return true;
2968 }
2969 
2970 //=============================================================================
2971 
2972 #ifndef PRODUCT
2973 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2974   st->print("BREAKPOINT");
2975 }
2976 #endif
2977 
2978 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2979   MacroAssembler _masm(&cbuf);
2980   __ brk(0);
2981 }
2982 
2983 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2984   return MachNode::size(ra_);
2985 }
2986 
2987 //=============================================================================
2988 
2989 #ifndef PRODUCT
2990   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2991     st->print("nop \t# %d bytes pad for loops and calls", _count);
2992   }
2993 #endif
2994 
2995   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2996     MacroAssembler _masm(&cbuf);
2997     for (int i = 0; i < _count; i++) {
2998       __ nop();
2999     }
3000   }
3001 
3002   uint MachNopNode::size(PhaseRegAlloc*) const {
3003     return _count * NativeInstruction::instruction_size;
3004   }
3005 
3006 //=============================================================================
3007 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
3008 
3009 int Compile::ConstantTable::calculate_table_base_offset() const {
3010   return 0;  // absolute addressing, no offset
3011 }
3012 
3013 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
3014 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
3015   ShouldNotReachHere();
3016 }
3017 
3018 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
3019   // Empty encoding
3020 }
3021 
3022 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
3023   return 0;
3024 }
3025 
3026 #ifndef PRODUCT
3027 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
3028   st->print("-- \t// MachConstantBaseNode (empty encoding)");
3029 }
3030 #endif
3031 
3032 #ifndef PRODUCT
3033 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3034   Compile* C = ra_->C;
3035 
3036   int framesize = C->frame_slots() << LogBytesPerInt;
3037 
3038   if (C->need_stack_bang(framesize))
3039     st->print("# stack bang size=%d\n\t", framesize);
3040 
3041   if (framesize < ((1 << 9) + 2 * wordSize)) {
3042     st->print("sub  sp, sp, #%d\n\t", framesize);
3043     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
3044     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
3045   } else {
3046     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
3047     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
3048     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3049     st->print("sub  sp, sp, rscratch1");
3050   }
3051 }
3052 #endif
3053 
3054 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3055   Compile* C = ra_->C;
3056   MacroAssembler _masm(&cbuf);
3057 
3058   // n.b. frame size includes space for return pc and rfp
3059   const long framesize = C->frame_size_in_bytes();
3060   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
3061 
3062   // insert a nop at the start of the prolog so we can patch in a
3063   // branch if we need to invalidate the method later
3064   __ nop();
3065 
3066   int bangsize = C->bang_size_in_bytes();
3067   if (C->need_stack_bang(bangsize) && UseStackBanging)
3068     __ generate_stack_overflow_check(bangsize);
3069 
3070   __ build_frame(framesize);
3071 
3072   if (NotifySimulator) {
3073     __ notify(Assembler::method_entry);
3074   }
3075 
3076   if (VerifyStackAtCalls) {
3077     Unimplemented();
3078   }
3079 
3080   C->set_frame_complete(cbuf.insts_size());
3081 
3082   if (C->has_mach_constant_base_node()) {
3083     // NOTE: We set the table base offset here because users might be
3084     // emitted before MachConstantBaseNode.
3085     Compile::ConstantTable& constant_table = C->constant_table();
3086     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
3087   }
3088 }
3089 
3090 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
3091 {
3092   return MachNode::size(ra_); // too many variables; just compute it
3093                               // the hard way
3094 }
3095 
3096 int MachPrologNode::reloc() const
3097 {
3098   return 0;
3099 }
3100 
3101 //=============================================================================
3102 
3103 #ifndef PRODUCT
3104 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3105   Compile* C = ra_->C;
3106   int framesize = C->frame_slots() << LogBytesPerInt;
3107 
3108   st->print("# pop frame %d\n\t",framesize);
3109 
3110   if (framesize == 0) {
3111     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3112   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
3113     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
3114     st->print("add  sp, sp, #%d\n\t", framesize);
3115   } else {
3116     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
3117     st->print("add  sp, sp, rscratch1\n\t");
3118     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
3119   }
3120 
3121   if (do_polling() && C->is_method_compilation()) {
3122     st->print("# touch polling page\n\t");
3123     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
3124     st->print("ldr zr, [rscratch1]");
3125   }
3126 }
3127 #endif
3128 
3129 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3130   Compile* C = ra_->C;
3131   MacroAssembler _masm(&cbuf);
3132   int framesize = C->frame_slots() << LogBytesPerInt;
3133 
3134   __ remove_frame(framesize);
3135 
3136   if (NotifySimulator) {
3137     __ notify(Assembler::method_reentry);
3138   }
3139 
3140   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
3141     __ reserved_stack_check();
3142   }
3143 
3144   if (do_polling() && C->is_method_compilation()) {
3145     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3146   }
3147 }
3148 
3149 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3150   // Variable size. Determine dynamically.
3151   return MachNode::size(ra_);
3152 }
3153 
3154 int MachEpilogNode::reloc() const {
3155   // Return number of relocatable values contained in this instruction.
3156   return 1; // 1 for polling page.
3157 }
3158 
3159 const Pipeline * MachEpilogNode::pipeline() const {
3160   return MachNode::pipeline_class();
3161 }
3162 
3163 // This method seems to be obsolete. It is declared in machnode.hpp
3164 // and defined in all *.ad files, but it is never called. Should we
3165 // get rid of it?
3166 int MachEpilogNode::safepoint_offset() const {
3167   assert(do_polling(), "no return for this epilog node");
3168   return 4;
3169 }
3170 
3171 //=============================================================================
3172 
3173 // Figure out which register class each belongs in: rc_int, rc_float or
3174 // rc_stack.
3175 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3176 
3177 static enum RC rc_class(OptoReg::Name reg) {
3178 
3179   if (reg == OptoReg::Bad) {
3180     return rc_bad;
3181   }
3182 
3183   // we have 30 int registers * 2 halves
3184   // (rscratch1 and rscratch2 are omitted)
3185 
3186   if (reg < 60) {
3187     return rc_int;
3188   }
3189 
3190   // we have 32 float register * 2 halves
3191   if (reg < 60 + 128) {
3192     return rc_float;
3193   }
3194 
3195   // Between float regs & stack is the flags regs.
3196   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3197 
3198   return rc_stack;
3199 }
3200 
3201 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3202   Compile* C = ra_->C;
3203 
3204   // Get registers to move.
3205   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3206   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3207   OptoReg::Name dst_hi = ra_->get_reg_second(this);
3208   OptoReg::Name dst_lo = ra_->get_reg_first(this);
3209 
3210   enum RC src_hi_rc = rc_class(src_hi);
3211   enum RC src_lo_rc = rc_class(src_lo);
3212   enum RC dst_hi_rc = rc_class(dst_hi);
3213   enum RC dst_lo_rc = rc_class(dst_lo);
3214 
3215   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3216 
3217   if (src_hi != OptoReg::Bad) {
3218     assert((src_lo&1)==0 && src_lo+1==src_hi &&
3219            (dst_lo&1)==0 && dst_lo+1==dst_hi,
3220            "expected aligned-adjacent pairs");
3221   }
3222 
3223   if (src_lo == dst_lo && src_hi == dst_hi) {
3224     return 0;            // Self copy, no move.
3225   }
3226 
3227   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3228               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3229   int src_offset = ra_->reg2offset(src_lo);
3230   int dst_offset = ra_->reg2offset(dst_lo);
3231 
3232   if (bottom_type()->isa_vect() != NULL) {
3233     uint ireg = ideal_reg();
3234     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3235     if (cbuf) {
3236       MacroAssembler _masm(cbuf);
3237       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3238       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3239         // stack->stack
3240         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3241         if (ireg == Op_VecD) {
3242           __ unspill(rscratch1, true, src_offset);
3243           __ spill(rscratch1, true, dst_offset);
3244         } else {
3245           __ spill_copy128(src_offset, dst_offset);
3246         }
3247       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3248         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3249                ireg == Op_VecD ? __ T8B : __ T16B,
3250                as_FloatRegister(Matcher::_regEncode[src_lo]));
3251       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3252         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3253                        ireg == Op_VecD ? __ D : __ Q,
3254                        ra_->reg2offset(dst_lo));
3255       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3256         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3257                        ireg == Op_VecD ? __ D : __ Q,
3258                        ra_->reg2offset(src_lo));
3259       } else {
3260         ShouldNotReachHere();
3261       }
3262     }
3263   } else if (cbuf) {
3264     MacroAssembler _masm(cbuf);
3265     switch (src_lo_rc) {
3266     case rc_int:
3267       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
3268         if (is64) {
3269             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3270                    as_Register(Matcher::_regEncode[src_lo]));
3271         } else {
3272             MacroAssembler _masm(cbuf);
3273             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3274                     as_Register(Matcher::_regEncode[src_lo]));
3275         }
3276       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3277         if (is64) {
3278             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3279                      as_Register(Matcher::_regEncode[src_lo]));
3280         } else {
3281             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3282                      as_Register(Matcher::_regEncode[src_lo]));
3283         }
3284       } else {                    // gpr --> stack spill
3285         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3286         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3287       }
3288       break;
3289     case rc_float:
3290       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
3291         if (is64) {
3292             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3293                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3294         } else {
3295             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3296                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3297         }
3298       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3299           if (cbuf) {
3300             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3301                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3302         } else {
3303             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3304                      as_FloatRegister(Matcher::_regEncode[src_lo]));
3305         }
3306       } else {                    // fpr --> stack spill
3307         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3308         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3309                  is64 ? __ D : __ S, dst_offset);
3310       }
3311       break;
3312     case rc_stack:
3313       if (dst_lo_rc == rc_int) {  // stack --> gpr load
3314         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3315       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3316         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3317                    is64 ? __ D : __ S, src_offset);
3318       } else {                    // stack --> stack copy
3319         assert(dst_lo_rc == rc_stack, "spill to bad register class");
3320         __ unspill(rscratch1, is64, src_offset);
3321         __ spill(rscratch1, is64, dst_offset);
3322       }
3323       break;
3324     default:
3325       assert(false, "bad rc_class for spill");
3326       ShouldNotReachHere();
3327     }
3328   }
3329 
3330   if (st) {
3331     st->print("spill ");
3332     if (src_lo_rc == rc_stack) {
3333       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3334     } else {
3335       st->print("%s -> ", Matcher::regName[src_lo]);
3336     }
3337     if (dst_lo_rc == rc_stack) {
3338       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3339     } else {
3340       st->print("%s", Matcher::regName[dst_lo]);
3341     }
3342     if (bottom_type()->isa_vect() != NULL) {
3343       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3344     } else {
3345       st->print("\t# spill size = %d", is64 ? 64:32);
3346     }
3347   }
3348 
3349   return 0;
3350 
3351 }
3352 
3353 #ifndef PRODUCT
3354 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3355   if (!ra_)
3356     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3357   else
3358     implementation(NULL, ra_, false, st);
3359 }
3360 #endif
3361 
3362 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3363   implementation(&cbuf, ra_, false, NULL);
3364 }
3365 
3366 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3367   return MachNode::size(ra_);
3368 }
3369 
3370 //=============================================================================
3371 
3372 #ifndef PRODUCT
3373 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3374   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3375   int reg = ra_->get_reg_first(this);
3376   st->print("add %s, rsp, #%d]\t# box lock",
3377             Matcher::regName[reg], offset);
3378 }
3379 #endif
3380 
3381 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3382   MacroAssembler _masm(&cbuf);
3383 
3384   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3385   int reg    = ra_->get_encode(this);
3386 
3387   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3388     __ add(as_Register(reg), sp, offset);
3389   } else {
3390     ShouldNotReachHere();
3391   }
3392 }
3393 
3394 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3395   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3396   return 4;
3397 }
3398 
3399 //=============================================================================
3400 
3401 #ifndef PRODUCT
3402 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3403 {
3404   st->print_cr("# MachUEPNode");
3405   if (UseCompressedClassPointers) {
3406     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3407     if (Universe::narrow_klass_shift() != 0) {
3408       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3409     }
3410   } else {
3411    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3412   }
3413   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3414   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3415 }
3416 #endif
3417 
3418 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3419 {
3420   // This is the unverified entry point.
3421   MacroAssembler _masm(&cbuf);
3422 
3423   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3424   Label skip;
3425   // TODO
3426   // can we avoid this skip and still use a reloc?
3427   __ br(Assembler::EQ, skip);
3428   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3429   __ bind(skip);
3430 }
3431 
3432 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3433 {
3434   return MachNode::size(ra_);
3435 }
3436 
3437 // REQUIRED EMIT CODE
3438 
3439 //=============================================================================
3440 
3441 // Emit exception handler code.
3442 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3443 {
3444   // mov rscratch1 #exception_blob_entry_point
3445   // br rscratch1
3446   // Note that the code buffer's insts_mark is always relative to insts.
3447   // That's why we must use the macroassembler to generate a handler.
3448   MacroAssembler _masm(&cbuf);
3449   address base = __ start_a_stub(size_exception_handler());
3450   if (base == NULL) {
3451     ciEnv::current()->record_failure("CodeCache is full");
3452     return 0;  // CodeBuffer::expand failed
3453   }
3454   int offset = __ offset();
3455   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3456   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3457   __ end_a_stub();
3458   return offset;
3459 }
3460 
3461 // Emit deopt handler code.
3462 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3463 {
3464   // Note that the code buffer's insts_mark is always relative to insts.
3465   // That's why we must use the macroassembler to generate a handler.
3466   MacroAssembler _masm(&cbuf);
3467   address base = __ start_a_stub(size_deopt_handler());
3468   if (base == NULL) {
3469     ciEnv::current()->record_failure("CodeCache is full");
3470     return 0;  // CodeBuffer::expand failed
3471   }
3472   int offset = __ offset();
3473 
3474   __ adr(lr, __ pc());
3475   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3476 
3477   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3478   __ end_a_stub();
3479   return offset;
3480 }
3481 
3482 // REQUIRED MATCHER CODE
3483 
3484 //=============================================================================
3485 
3486 const bool Matcher::match_rule_supported(int opcode) {
3487 
3488   switch (opcode) {
3489   default:
3490     break;
3491   }
3492 
3493   if (!has_match_rule(opcode)) {
3494     return false;
3495   }
3496 
3497   return true;  // Per default match rules are supported.
3498 }
3499 
3500 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3501 
3502   // TODO
3503   // identify extra cases that we might want to provide match rules for
3504   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3505   bool ret_value = match_rule_supported(opcode);
3506   // Add rules here.
3507 
3508   return ret_value;  // Per default match rules are supported.
3509 }
3510 
3511 const bool Matcher::has_predicated_vectors(void) {
3512   return false;
3513 }
3514 
3515 const int Matcher::float_pressure(int default_pressure_threshold) {
3516   return default_pressure_threshold;
3517 }
3518 
3519 int Matcher::regnum_to_fpu_offset(int regnum)
3520 {
3521   Unimplemented();
3522   return 0;
3523 }
3524 
3525 // Is this branch offset short enough that a short branch can be used?
3526 //
3527 // NOTE: If the platform does not provide any short branch variants, then
3528 //       this method should return false for offset 0.
3529 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3530   // The passed offset is relative to address of the branch.
3531 
3532   return (-32768 <= offset && offset < 32768);
3533 }
3534 
3535 const bool Matcher::isSimpleConstant64(jlong value) {
3536   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3537   // Probably always true, even if a temp register is required.
3538   return true;
3539 }
3540 
3541 // true just means we have fast l2f conversion
3542 const bool Matcher::convL2FSupported(void) {
3543   return true;
3544 }
3545 
3546 // Vector width in bytes.
3547 const int Matcher::vector_width_in_bytes(BasicType bt) {
3548   int size = MIN2(16,(int)MaxVectorSize);
3549   // Minimum 2 values in vector
3550   if (size < 2*type2aelembytes(bt)) size = 0;
3551   // But never < 4
3552   if (size < 4) size = 0;
3553   return size;
3554 }
3555 
3556 // Limits on vector size (number of elements) loaded into vector.
3557 const int Matcher::max_vector_size(const BasicType bt) {
3558   return vector_width_in_bytes(bt)/type2aelembytes(bt);
3559 }
3560 const int Matcher::min_vector_size(const BasicType bt) {
3561 //  For the moment limit the vector size to 8 bytes
3562     int size = 8 / type2aelembytes(bt);
3563     if (size < 2) size = 2;
3564     return size;
3565 }
3566 
3567 // Vector ideal reg.
3568 const uint Matcher::vector_ideal_reg(int len) {
3569   switch(len) {
3570     case  8: return Op_VecD;
3571     case 16: return Op_VecX;
3572   }
3573   ShouldNotReachHere();
3574   return 0;
3575 }
3576 
3577 const uint Matcher::vector_shift_count_ideal_reg(int size) {
3578   return Op_VecX;
3579 }
3580 
3581 // AES support not yet implemented
3582 const bool Matcher::pass_original_key_for_aes() {
3583   return false;
3584 }
3585 
3586 // x86 supports misaligned vectors store/load.
3587 const bool Matcher::misaligned_vectors_ok() {
3588   return !AlignVector; // can be changed by flag
3589 }
3590 
3591 // false => size gets scaled to BytesPerLong, ok.
3592 const bool Matcher::init_array_count_is_in_bytes = false;
3593 
3594 // Use conditional move (CMOVL)
3595 const int Matcher::long_cmove_cost() {
3596   // long cmoves are no more expensive than int cmoves
3597   return 0;
3598 }
3599 
3600 const int Matcher::float_cmove_cost() {
3601   // float cmoves are no more expensive than int cmoves
3602   return 0;
3603 }
3604 
3605 // Does the CPU require late expand (see block.cpp for description of late expand)?
3606 const bool Matcher::require_postalloc_expand = false;
3607 
3608 // Do we need to mask the count passed to shift instructions or does
3609 // the cpu only look at the lower 5/6 bits anyway?
3610 const bool Matcher::need_masked_shift_count = false;
3611 
3612 // This affects two different things:
3613 //  - how Decode nodes are matched
3614 //  - how ImplicitNullCheck opportunities are recognized
3615 // If true, the matcher will try to remove all Decodes and match them
3616 // (as operands) into nodes. NullChecks are not prepared to deal with
3617 // Decodes by final_graph_reshaping().
3618 // If false, final_graph_reshaping() forces the decode behind the Cmp
3619 // for a NullCheck. The matcher matches the Decode node into a register.
3620 // Implicit_null_check optimization moves the Decode along with the
3621 // memory operation back up before the NullCheck.
3622 bool Matcher::narrow_oop_use_complex_address() {
3623   return Universe::narrow_oop_shift() == 0;
3624 }
3625 
3626 bool Matcher::narrow_klass_use_complex_address() {
3627 // TODO
3628 // decide whether we need to set this to true
3629   return false;
3630 }
3631 
3632 bool Matcher::const_oop_prefer_decode() {
3633   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
3634   return Universe::narrow_oop_base() == NULL;
3635 }
3636 
3637 bool Matcher::const_klass_prefer_decode() {
3638   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
3639   return Universe::narrow_klass_base() == NULL;
3640 }
3641 
3642 // Is it better to copy float constants, or load them directly from
3643 // memory?  Intel can load a float constant from a direct address,
3644 // requiring no extra registers.  Most RISCs will have to materialize
3645 // an address into a register first, so they would do better to copy
3646 // the constant from stack.
3647 const bool Matcher::rematerialize_float_constants = false;
3648 
3649 // If CPU can load and store mis-aligned doubles directly then no
3650 // fixup is needed.  Else we split the double into 2 integer pieces
3651 // and move it piece-by-piece.  Only happens when passing doubles into
3652 // C code as the Java calling convention forces doubles to be aligned.
3653 const bool Matcher::misaligned_doubles_ok = true;
3654 
3655 // No-op on amd64
3656 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3657   Unimplemented();
3658 }
3659 
3660 // Advertise here if the CPU requires explicit rounding operations to
3661 // implement the UseStrictFP mode.
3662 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3663 
3664 // Are floats converted to double when stored to stack during
3665 // deoptimization?
3666 bool Matcher::float_in_double() { return true; }
3667 
3668 // Do ints take an entire long register or just half?
3669 // The relevant question is how the int is callee-saved:
3670 // the whole long is written but de-opt'ing will have to extract
3671 // the relevant 32 bits.
3672 const bool Matcher::int_in_long = true;
3673 
3674 // Return whether or not this register is ever used as an argument.
3675 // This function is used on startup to build the trampoline stubs in
3676 // generateOptoStub.  Registers not mentioned will be killed by the VM
3677 // call in the trampoline, and arguments in those registers not be
3678 // available to the callee.
3679 bool Matcher::can_be_java_arg(int reg)
3680 {
3681   return
3682     reg ==  R0_num || reg == R0_H_num ||
3683     reg ==  R1_num || reg == R1_H_num ||
3684     reg ==  R2_num || reg == R2_H_num ||
3685     reg ==  R3_num || reg == R3_H_num ||
3686     reg ==  R4_num || reg == R4_H_num ||
3687     reg ==  R5_num || reg == R5_H_num ||
3688     reg ==  R6_num || reg == R6_H_num ||
3689     reg ==  R7_num || reg == R7_H_num ||
3690     reg ==  V0_num || reg == V0_H_num ||
3691     reg ==  V1_num || reg == V1_H_num ||
3692     reg ==  V2_num || reg == V2_H_num ||
3693     reg ==  V3_num || reg == V3_H_num ||
3694     reg ==  V4_num || reg == V4_H_num ||
3695     reg ==  V5_num || reg == V5_H_num ||
3696     reg ==  V6_num || reg == V6_H_num ||
3697     reg ==  V7_num || reg == V7_H_num;
3698 }
3699 
3700 bool Matcher::is_spillable_arg(int reg)
3701 {
3702   return can_be_java_arg(reg);
3703 }
3704 
3705 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3706   return false;
3707 }
3708 
3709 RegMask Matcher::divI_proj_mask() {
3710   ShouldNotReachHere();
3711   return RegMask();
3712 }
3713 
3714 // Register for MODI projection of divmodI.
3715 RegMask Matcher::modI_proj_mask() {
3716   ShouldNotReachHere();
3717   return RegMask();
3718 }
3719 
3720 // Register for DIVL projection of divmodL.
3721 RegMask Matcher::divL_proj_mask() {
3722   ShouldNotReachHere();
3723   return RegMask();
3724 }
3725 
3726 // Register for MODL projection of divmodL.
3727 RegMask Matcher::modL_proj_mask() {
3728   ShouldNotReachHere();
3729   return RegMask();
3730 }
3731 
3732 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3733   return FP_REG_mask();
3734 }
3735 
3736 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3737   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3738     Node* u = addp->fast_out(i);
3739     if (u->is_Mem()) {
3740       int opsize = u->as_Mem()->memory_size();
3741       assert(opsize > 0, "unexpected memory operand size");
3742       if (u->as_Mem()->memory_size() != (1<<shift)) {
3743         return false;
3744       }
3745     }
3746   }
3747   return true;
3748 }
3749 
3750 const bool Matcher::convi2l_type_required = false;
3751 
3752 // Should the Matcher clone shifts on addressing modes, expecting them
3753 // to be subsumed into complex addressing expressions or compute them
3754 // into registers?
3755 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3756   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3757     return true;
3758   }
3759 
3760   Node *off = m->in(AddPNode::Offset);
3761   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3762       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3763       // Are there other uses besides address expressions?
3764       !is_visited(off)) {
3765     address_visited.set(off->_idx); // Flag as address_visited
3766     mstack.push(off->in(2), Visit);
3767     Node *conv = off->in(1);
3768     if (conv->Opcode() == Op_ConvI2L &&
3769         // Are there other uses besides address expressions?
3770         !is_visited(conv)) {
3771       address_visited.set(conv->_idx); // Flag as address_visited
3772       mstack.push(conv->in(1), Pre_Visit);
3773     } else {
3774       mstack.push(conv, Pre_Visit);
3775     }
3776     address_visited.test_set(m->_idx); // Flag as address_visited
3777     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3778     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3779     return true;
3780   } else if (off->Opcode() == Op_ConvI2L &&
3781              // Are there other uses besides address expressions?
3782              !is_visited(off)) {
3783     address_visited.test_set(m->_idx); // Flag as address_visited
3784     address_visited.set(off->_idx); // Flag as address_visited
3785     mstack.push(off->in(1), Pre_Visit);
3786     mstack.push(m->in(AddPNode::Address), Pre_Visit);
3787     mstack.push(m->in(AddPNode::Base), Pre_Visit);
3788     return true;
3789   }
3790   return false;
3791 }
3792 
3793 // Transform:
3794 // (AddP base (AddP base address (LShiftL index con)) offset)
3795 // into:
3796 // (AddP base (AddP base offset) (LShiftL index con))
3797 // to take full advantage of ARM's addressing modes
3798 void Compile::reshape_address(AddPNode* addp) {
3799   Node *addr = addp->in(AddPNode::Address);
3800   if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3801     const AddPNode *addp2 = addr->as_AddP();
3802     if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3803          addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3804          size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3805         addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3806 
3807       // Any use that can't embed the address computation?
3808       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3809         Node* u = addp->fast_out(i);
3810         if (!u->is_Mem()) {
3811           return;
3812         }
3813         if (u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3814           return;
3815         }
3816         if (addp2->in(AddPNode::Offset)->Opcode() != Op_ConvI2L) {
3817           int scale = 1 << addp2->in(AddPNode::Offset)->in(2)->get_int();
3818           if (VM_Version::expensive_load(u->as_Mem()->memory_size(), scale)) {
3819             return;
3820           }
3821         }
3822       }
3823 
3824       Node* off = addp->in(AddPNode::Offset);
3825       Node* addr2 = addp2->in(AddPNode::Address);
3826       Node* base = addp->in(AddPNode::Base);
3827 
3828       Node* new_addr = NULL;
3829       // Check whether the graph already has the new AddP we need
3830       // before we create one (no GVN available here).
3831       for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3832         Node* u = addr2->fast_out(i);
3833         if (u->is_AddP() &&
3834             u->in(AddPNode::Base) == base &&
3835             u->in(AddPNode::Address) == addr2 &&
3836             u->in(AddPNode::Offset) == off) {
3837           new_addr = u;
3838           break;
3839         }
3840       }
3841 
3842       if (new_addr == NULL) {
3843         new_addr = new AddPNode(base, addr2, off);
3844       }
3845       Node* new_off = addp2->in(AddPNode::Offset);
3846       addp->set_req(AddPNode::Address, new_addr);
3847       if (addr->outcnt() == 0) {
3848         addr->disconnect_inputs(NULL, this);
3849       }
3850       addp->set_req(AddPNode::Offset, new_off);
3851       if (off->outcnt() == 0) {
3852         off->disconnect_inputs(NULL, this);
3853       }
3854     }
3855   }
3856 }
3857 
3858 // helper for encoding java_to_runtime calls on sim
3859 //
3860 // this is needed to compute the extra arguments required when
3861 // planting a call to the simulator blrt instruction. the TypeFunc
3862 // can be queried to identify the counts for integral, and floating
3863 // arguments and the return type
3864 
3865 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3866 {
3867   int gps = 0;
3868   int fps = 0;
3869   const TypeTuple *domain = tf->domain();
3870   int max = domain->cnt();
3871   for (int i = TypeFunc::Parms; i < max; i++) {
3872     const Type *t = domain->field_at(i);
3873     switch(t->basic_type()) {
3874     case T_FLOAT:
3875     case T_DOUBLE:
3876       fps++;
3877     default:
3878       gps++;
3879     }
3880   }
3881   gpcnt = gps;
3882   fpcnt = fps;
3883   BasicType rt = tf->return_type();
3884   switch (rt) {
3885   case T_VOID:
3886     rtype = MacroAssembler::ret_type_void;
3887     break;
3888   default:
3889     rtype = MacroAssembler::ret_type_integral;
3890     break;
3891   case T_FLOAT:
3892     rtype = MacroAssembler::ret_type_float;
3893     break;
3894   case T_DOUBLE:
3895     rtype = MacroAssembler::ret_type_double;
3896     break;
3897   }
3898 }
3899 enum mem_op { is_load, is_store };
3900 
3901 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN, MEM_OP) \
3902   MacroAssembler _masm(&cbuf);                                          \
3903   {                                                                     \
3904     guarantee(INDEX == -1, "mode not permitted for volatile");          \
3905     guarantee(DISP == 0, "mode not permitted for volatile");            \
3906     guarantee(SCALE == 0, "mode not permitted for volatile");           \
3907     if (MEM_OP == is_store) { __ shenandoah_store_addr_check(as_Register(BASE)); } \
3908     __ INSN(REG, as_Register(BASE));                                    \
3909   }
3910 
3911 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3912 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3913 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3914                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
3915 
3916   // Used for all non-volatile memory accesses.  The use of
3917   // $mem->opcode() to discover whether this pattern uses sign-extended
3918   // offsets is something of a kludge.
3919   static void loadStore(MacroAssembler masm, mem_insn insn, mem_op mo,
3920                          Register reg, int opcode,
3921                          Register base, int index, int size, int disp)
3922   {
3923     Address::extend scale;
3924 
3925     // Hooboy, this is fugly.  We need a way to communicate to the
3926     // encoder that the index needs to be sign extended, so we have to
3927     // enumerate all the cases.
3928     switch (opcode) {
3929     case INDINDEXSCALEDI2L:
3930     case INDINDEXSCALEDI2LN:
3931     case INDINDEXI2L:
3932     case INDINDEXI2LN:
3933       scale = Address::sxtw(size);
3934       break;
3935     default:
3936       scale = Address::lsl(size);
3937     }
3938 
3939     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3940     if (index == -1) {
3941       (masm.*insn)(reg, Address(base, disp));
3942     } else {
3943       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3944       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3945     }
3946   }
3947 
3948   static void loadStore(MacroAssembler masm, mem_float_insn insn, mem_op mo,
3949                          FloatRegister reg, int opcode,
3950                          Register base, int index, int size, int disp)
3951   {
3952     Address::extend scale;
3953 
3954     switch (opcode) {
3955     case INDINDEXSCALEDI2L:
3956     case INDINDEXSCALEDI2LN:
3957       scale = Address::sxtw(size);
3958       break;
3959     default:
3960       scale = Address::lsl(size);
3961     }
3962 
3963     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3964      if (index == -1) {
3965       (masm.*insn)(reg, Address(base, disp));
3966     } else {
3967       assert(disp == 0, "unsupported address mode: disp = %d", disp);
3968       (masm.*insn)(reg, Address(base, as_Register(index), scale));
3969     }
3970   }
3971 
3972   static void loadStore(MacroAssembler masm, mem_vector_insn insn, mem_op mo,
3973                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3974                          int opcode, Register base, int index, int size, int disp)
3975   {
3976     if (mo == is_store) masm.shenandoah_store_addr_check(base);
3977     if (index == -1) {
3978       (masm.*insn)(reg, T, Address(base, disp));
3979     } else {
3980       assert(disp == 0, "unsupported address mode");
3981       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3982     }
3983   }
3984 
3985 %}
3986 
3987 
3988 
3989 //----------ENCODING BLOCK-----------------------------------------------------
3990 // This block specifies the encoding classes used by the compiler to
3991 // output byte streams.  Encoding classes are parameterized macros
3992 // used by Machine Instruction Nodes in order to generate the bit
3993 // encoding of the instruction.  Operands specify their base encoding
3994 // interface with the interface keyword.  There are currently
3995 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3996 // COND_INTER.  REG_INTER causes an operand to generate a function
3997 // which returns its register number when queried.  CONST_INTER causes
3998 // an operand to generate a function which returns the value of the
3999 // constant when queried.  MEMORY_INTER causes an operand to generate
4000 // four functions which return the Base Register, the Index Register,
4001 // the Scale Value, and the Offset Value of the operand when queried.
4002 // COND_INTER causes an operand to generate six functions which return
4003 // the encoding code (ie - encoding bits for the instruction)
4004 // associated with each basic boolean condition for a conditional
4005 // instruction.
4006 //
4007 // Instructions specify two basic values for encoding.  Again, a
4008 // function is available to check if the constant displacement is an
4009 // oop. They use the ins_encode keyword to specify their encoding
4010 // classes (which must be a sequence of enc_class names, and their
4011 // parameters, specified in the encoding block), and they use the
4012 // opcode keyword to specify, in order, their primary, secondary, and
4013 // tertiary opcode.  Only the opcode sections which a particular
4014 // instruction needs for encoding need to be specified.
4015 encode %{
4016   // Build emit functions for each basic byte or larger field in the
4017   // intel encoding scheme (opcode, rm, sib, immediate), and call them
4018   // from C++ code in the enc_class source block.  Emit functions will
4019   // live in the main source block for now.  In future, we can
4020   // generalize this by adding a syntax that specifies the sizes of
4021   // fields in an order, so that the adlc can build the emit functions
4022   // automagically
4023 
4024   // catch all for unimplemented encodings
4025   enc_class enc_unimplemented %{
4026     MacroAssembler _masm(&cbuf);
4027     __ unimplemented("C2 catch all");
4028   %}
4029 
4030   // BEGIN Non-volatile memory access
4031 
4032   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
4033     Register dst_reg = as_Register($dst$$reg);
4034     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, is_load, dst_reg, $mem->opcode(),
4035                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4036   %}
4037 
4038   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
4039     Register dst_reg = as_Register($dst$$reg);
4040     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, is_load,  dst_reg, $mem->opcode(),
4041                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4042   %}
4043 
4044   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
4045     Register dst_reg = as_Register($dst$$reg);
4046     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, is_load,  dst_reg, $mem->opcode(),
4047                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4048   %}
4049 
4050   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
4051     Register dst_reg = as_Register($dst$$reg);
4052     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, is_load,  dst_reg, $mem->opcode(),
4053                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4054   %}
4055 
4056   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
4057     Register dst_reg = as_Register($dst$$reg);
4058     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, is_load,  dst_reg, $mem->opcode(),
4059                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4060   %}
4061 
4062   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
4063     Register dst_reg = as_Register($dst$$reg);
4064     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, is_load,  dst_reg, $mem->opcode(),
4065                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4066   %}
4067 
4068   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
4069     Register dst_reg = as_Register($dst$$reg);
4070     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, is_load,  dst_reg, $mem->opcode(),
4071                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4072   %}
4073 
4074   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
4075     Register dst_reg = as_Register($dst$$reg);
4076     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, is_load,  dst_reg, $mem->opcode(),
4077                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4078   %}
4079 
4080   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
4081     Register dst_reg = as_Register($dst$$reg);
4082     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, is_load,  dst_reg, $mem->opcode(),
4083                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4084   %}
4085 
4086   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
4087     Register dst_reg = as_Register($dst$$reg);
4088     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, is_load,  dst_reg, $mem->opcode(),
4089                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4090   %}
4091 
4092   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
4093     Register dst_reg = as_Register($dst$$reg);
4094     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, is_load,  dst_reg, $mem->opcode(),
4095                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4096   %}
4097 
4098   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
4099     Register dst_reg = as_Register($dst$$reg);
4100     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, $mem->opcode(),
4101                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4102   %}
4103 
4104   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
4105     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4106     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, is_load,  dst_reg, $mem->opcode(),
4107                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4108   %}
4109 
4110   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
4111     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4112     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, is_load,  dst_reg, $mem->opcode(),
4113                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4114   %}
4115 
4116   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
4117     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4118     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::S,
4119        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4120   %}
4121 
4122   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
4123     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4124     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::D,
4125        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4126   %}
4127 
4128   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
4129     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
4130     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, is_load,  dst_reg, MacroAssembler::Q,
4131        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4132   %}
4133 
4134   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
4135     Register src_reg = as_Register($src$$reg);
4136     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, is_store, src_reg, $mem->opcode(),
4137                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4138   %}
4139 
4140   enc_class aarch64_enc_strb0(memory mem) %{
4141     MacroAssembler _masm(&cbuf);
4142     loadStore(_masm, &MacroAssembler::strb, is_store, zr, $mem->opcode(),
4143                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4144   %}
4145 
4146   enc_class aarch64_enc_strb0_ordered(memory mem) %{
4147     MacroAssembler _masm(&cbuf);
4148     __ membar(Assembler::StoreStore);
4149     loadStore(_masm, &MacroAssembler::strb, is_store, zr, $mem->opcode(),
4150                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4151   %}
4152 
4153   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4154     Register src_reg = as_Register($src$$reg);
4155     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, is_store, src_reg, $mem->opcode(),
4156                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4157   %}
4158 
4159   enc_class aarch64_enc_strh0(memory mem) %{
4160     MacroAssembler _masm(&cbuf);
4161     loadStore(_masm, &MacroAssembler::strh, is_store, zr, $mem->opcode(),
4162                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4163   %}
4164 
4165   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4166     Register src_reg = as_Register($src$$reg);
4167     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, is_store, src_reg, $mem->opcode(),
4168                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4169   %}
4170 
4171   enc_class aarch64_enc_strw0(memory mem) %{
4172     MacroAssembler _masm(&cbuf);
4173     loadStore(_masm, &MacroAssembler::strw, is_store, zr, $mem->opcode(),
4174                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4175   %}
4176 
4177   enc_class aarch64_enc_str(iRegL src, memory mem) %{
4178     Register src_reg = as_Register($src$$reg);
4179     // we sometimes get asked to store the stack pointer into the
4180     // current thread -- we cannot do that directly on AArch64
4181     if (src_reg == r31_sp) {
4182       MacroAssembler _masm(&cbuf);
4183       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4184       __ mov(rscratch2, sp);
4185       src_reg = rscratch2;
4186     }
4187     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, $mem->opcode(),
4188                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4189   %}
4190 
4191   enc_class aarch64_enc_str0(memory mem) %{
4192     MacroAssembler _masm(&cbuf);
4193     loadStore(_masm, &MacroAssembler::str, is_store, zr, $mem->opcode(),
4194                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4195   %}
4196 
4197   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4198     FloatRegister src_reg = as_FloatRegister($src$$reg);
4199     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, is_store, src_reg, $mem->opcode(),
4200                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4201   %}
4202 
4203   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4204     FloatRegister src_reg = as_FloatRegister($src$$reg);
4205     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, is_store, src_reg, $mem->opcode(),
4206                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4207   %}
4208 
4209   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4210     FloatRegister src_reg = as_FloatRegister($src$$reg);
4211     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::S,
4212        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4213   %}
4214 
4215   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4216     FloatRegister src_reg = as_FloatRegister($src$$reg);
4217     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::D,
4218        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4219   %}
4220 
4221   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4222     FloatRegister src_reg = as_FloatRegister($src$$reg);
4223     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, is_store, src_reg, MacroAssembler::Q,
4224        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4225   %}
4226 
4227   // END Non-volatile memory access
4228 
4229   // volatile loads and stores
4230 
4231   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4232     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4233                  rscratch1, stlrb, is_store);
4234   %}
4235 
4236   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4237     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4238                  rscratch1, stlrh, is_store);
4239   %}
4240 
4241   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4242     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4243                  rscratch1, stlrw, is_store);
4244   %}
4245 
4246 
4247   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4248     Register dst_reg = as_Register($dst$$reg);
4249     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4250              rscratch1, ldarb, is_load);
4251     __ sxtbw(dst_reg, dst_reg);
4252   %}
4253 
4254   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4255     Register dst_reg = as_Register($dst$$reg);
4256     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4257              rscratch1, ldarb, is_load);
4258     __ sxtb(dst_reg, dst_reg);
4259   %}
4260 
4261   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4262     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4263              rscratch1, ldarb, is_load);
4264   %}
4265 
4266   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4267     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4268              rscratch1, ldarb, is_load);
4269   %}
4270 
4271   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4272     Register dst_reg = as_Register($dst$$reg);
4273     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4274              rscratch1, ldarh, is_load);
4275     __ sxthw(dst_reg, dst_reg);
4276   %}
4277 
4278   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4279     Register dst_reg = as_Register($dst$$reg);
4280     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4281              rscratch1, ldarh, is_load);
4282     __ sxth(dst_reg, dst_reg);
4283   %}
4284 
4285   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4286     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4287              rscratch1, ldarh, is_load);
4288   %}
4289 
4290   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4291     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4292              rscratch1, ldarh, is_load);
4293   %}
4294 
4295   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4296     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4297              rscratch1, ldarw, is_load);
4298   %}
4299 
4300   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4301     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4302              rscratch1, ldarw, is_load);
4303   %}
4304 
4305   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4306     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4307              rscratch1, ldar, is_load);
4308   %}
4309 
4310   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4311     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4312              rscratch1, ldarw, is_load);
4313     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4314   %}
4315 
4316   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4317     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4318              rscratch1, ldar, is_load);
4319     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4320   %}
4321 
4322   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4323     Register src_reg = as_Register($src$$reg);
4324     // we sometimes get asked to store the stack pointer into the
4325     // current thread -- we cannot do that directly on AArch64
4326     if (src_reg == r31_sp) {
4327         MacroAssembler _masm(&cbuf);
4328       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4329       __ mov(rscratch2, sp);
4330       src_reg = rscratch2;
4331     }
4332     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4333                  rscratch1, stlr, is_store);
4334   %}
4335 
4336   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4337     {
4338       MacroAssembler _masm(&cbuf);
4339       FloatRegister src_reg = as_FloatRegister($src$$reg);
4340       __ fmovs(rscratch2, src_reg);
4341     }
4342     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4343                  rscratch1, stlrw, is_store);
4344   %}
4345 
4346   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4347     {
4348       MacroAssembler _masm(&cbuf);
4349       FloatRegister src_reg = as_FloatRegister($src$$reg);
4350       __ fmovd(rscratch2, src_reg);
4351     }
4352     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4353                  rscratch1, stlr, is_store);
4354   %}
4355 
4356   // synchronized read/update encodings
4357 
4358   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4359     MacroAssembler _masm(&cbuf);
4360     Register dst_reg = as_Register($dst$$reg);
4361     Register base = as_Register($mem$$base);
4362     int index = $mem$$index;
4363     int scale = $mem$$scale;
4364     int disp = $mem$$disp;
4365     if (index == -1) {
4366        if (disp != 0) {
4367         __ lea(rscratch1, Address(base, disp));
4368         __ ldaxr(dst_reg, rscratch1);
4369       } else {
4370         // TODO
4371         // should we ever get anything other than this case?
4372         __ ldaxr(dst_reg, base);
4373       }
4374     } else {
4375       Register index_reg = as_Register(index);
4376       if (disp == 0) {
4377         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4378         __ ldaxr(dst_reg, rscratch1);
4379       } else {
4380         __ lea(rscratch1, Address(base, disp));
4381         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4382         __ ldaxr(dst_reg, rscratch1);
4383       }
4384     }
4385   %}
4386 
4387   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4388     MacroAssembler _masm(&cbuf);
4389     Register src_reg = as_Register($src$$reg);
4390     Register base = as_Register($mem$$base);
4391     int index = $mem$$index;
4392     int scale = $mem$$scale;
4393     int disp = $mem$$disp;
4394     if (index == -1) {
4395        if (disp != 0) {
4396         __ lea(rscratch2, Address(base, disp));
4397         __ stlxr(rscratch1, src_reg, rscratch2);
4398       } else {
4399         // TODO
4400         // should we ever get anything other than this case?
4401         __ stlxr(rscratch1, src_reg, base);
4402       }
4403     } else {
4404       Register index_reg = as_Register(index);
4405       if (disp == 0) {
4406         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4407         __ stlxr(rscratch1, src_reg, rscratch2);
4408       } else {
4409         __ lea(rscratch2, Address(base, disp));
4410         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4411         __ stlxr(rscratch1, src_reg, rscratch2);
4412       }
4413     }
4414     __ cmpw(rscratch1, zr);
4415   %}
4416 
4417   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4418     MacroAssembler _masm(&cbuf);
4419     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4420     __ shenandoah_store_addr_check($mem$$base$$Register);
4421     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4422                Assembler::xword, /*acquire*/ false, /*release*/ true,
4423                /*weak*/ false, noreg);
4424   %}
4425 
4426   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4427     MacroAssembler _masm(&cbuf);
4428     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4429     __ shenandoah_store_addr_check($mem$$base$$Register);
4430     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4431                Assembler::word, /*acquire*/ false, /*release*/ true,
4432                /*weak*/ false, noreg);
4433   %}
4434 
4435   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4436     MacroAssembler _masm(&cbuf);
4437     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4438     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4439                Assembler::halfword, /*acquire*/ false, /*release*/ true,
4440                /*weak*/ false, noreg);
4441   %}
4442 
4443   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4444     MacroAssembler _masm(&cbuf);
4445     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4446     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4447                Assembler::byte, /*acquire*/ false, /*release*/ true,
4448                /*weak*/ false, noreg);
4449   %}  
4450     
4451 
4452   enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{
4453     MacroAssembler _masm(&cbuf);
4454     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4455     Register tmp = $tmp$$Register;
4456     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4457     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
4458                               Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ false);
4459   %}
4460 
4461   // The only difference between aarch64_enc_cmpxchg and
4462   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4463   // CompareAndSwap sequence to serve as a barrier on acquiring a
4464   // lock.
4465   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4466     MacroAssembler _masm(&cbuf);
4467     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4468     __ shenandoah_store_addr_check($mem$$base$$Register);
4469     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4470                Assembler::xword, /*acquire*/ true, /*release*/ true,
4471                /*weak*/ false, noreg);
4472   %}
4473 
4474   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4475     MacroAssembler _masm(&cbuf);
4476     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4477     __ shenandoah_store_addr_check($mem$$base$$Register);
4478     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4479                Assembler::word, /*acquire*/ true, /*release*/ true,
4480                /*weak*/ false, noreg);
4481   %}
4482 
4483 
4484   enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp) %{
4485     MacroAssembler _masm(&cbuf);
4486     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4487     Register tmp = $tmp$$Register;
4488     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
4489     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
4490                               Assembler::xword, /*acquire*/ true, /*release*/ true, /*weak*/ false);
4491   %}
4492 
4493   // auxiliary used for CompareAndSwapX to set result register
4494   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4495     MacroAssembler _masm(&cbuf);
4496     Register res_reg = as_Register($res$$reg);
4497     __ cset(res_reg, Assembler::EQ);
4498   %}
4499 
4500   // prefetch encodings
4501 
4502   enc_class aarch64_enc_prefetchw(memory mem) %{
4503     MacroAssembler _masm(&cbuf);
4504     Register base = as_Register($mem$$base);
4505     int index = $mem$$index;
4506     int scale = $mem$$scale;
4507     int disp = $mem$$disp;
4508     if (index == -1) {
4509       __ prfm(Address(base, disp), PSTL1KEEP);
4510     } else {
4511       Register index_reg = as_Register(index);
4512       if (disp == 0) {
4513         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4514       } else {
4515         __ lea(rscratch1, Address(base, disp));
4516         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4517       }
4518     }
4519   %}
4520 
4521   /// mov envcodings
4522 
4523   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4524     MacroAssembler _masm(&cbuf);
4525     u_int32_t con = (u_int32_t)$src$$constant;
4526     Register dst_reg = as_Register($dst$$reg);
4527     if (con == 0) {
4528       __ movw(dst_reg, zr);
4529     } else {
4530       __ movw(dst_reg, con);
4531     }
4532   %}
4533 
4534   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4535     MacroAssembler _masm(&cbuf);
4536     Register dst_reg = as_Register($dst$$reg);
4537     u_int64_t con = (u_int64_t)$src$$constant;
4538     if (con == 0) {
4539       __ mov(dst_reg, zr);
4540     } else {
4541       __ mov(dst_reg, con);
4542     }
4543   %}
4544 
4545   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4546     MacroAssembler _masm(&cbuf);
4547     Register dst_reg = as_Register($dst$$reg);
4548     address con = (address)$src$$constant;
4549     if (con == NULL || con == (address)1) {
4550       ShouldNotReachHere();
4551     } else {
4552       relocInfo::relocType rtype = $src->constant_reloc();
4553       if (rtype == relocInfo::oop_type) {
4554         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4555       } else if (rtype == relocInfo::metadata_type) {
4556         __ mov_metadata(dst_reg, (Metadata*)con);
4557       } else {
4558         assert(rtype == relocInfo::none, "unexpected reloc type");
4559         if (con < (address)(uintptr_t)os::vm_page_size()) {
4560           __ mov(dst_reg, con);
4561         } else {
4562           unsigned long offset;
4563           __ adrp(dst_reg, con, offset);
4564           __ add(dst_reg, dst_reg, offset);
4565         }
4566       }
4567     }
4568   %}
4569 
4570   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4571     MacroAssembler _masm(&cbuf);
4572     Register dst_reg = as_Register($dst$$reg);
4573     __ mov(dst_reg, zr);
4574   %}
4575 
4576   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4577     MacroAssembler _masm(&cbuf);
4578     Register dst_reg = as_Register($dst$$reg);
4579     __ mov(dst_reg, (u_int64_t)1);
4580   %}
4581 
4582   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4583     MacroAssembler _masm(&cbuf);
4584     address page = (address)$src$$constant;
4585     Register dst_reg = as_Register($dst$$reg);
4586     unsigned long off;
4587     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4588     assert(off == 0, "assumed offset == 0");
4589   %}
4590 
4591   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4592     MacroAssembler _masm(&cbuf);
4593     __ load_byte_map_base($dst$$Register);
4594   %}
4595 
4596   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4597     MacroAssembler _masm(&cbuf);
4598     Register dst_reg = as_Register($dst$$reg);
4599     address con = (address)$src$$constant;
4600     if (con == NULL) {
4601       ShouldNotReachHere();
4602     } else {
4603       relocInfo::relocType rtype = $src->constant_reloc();
4604       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4605       __ set_narrow_oop(dst_reg, (jobject)con);
4606     }
4607   %}
4608 
4609   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4610     MacroAssembler _masm(&cbuf);
4611     Register dst_reg = as_Register($dst$$reg);
4612     __ mov(dst_reg, zr);
4613   %}
4614 
4615   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4616     MacroAssembler _masm(&cbuf);
4617     Register dst_reg = as_Register($dst$$reg);
4618     address con = (address)$src$$constant;
4619     if (con == NULL) {
4620       ShouldNotReachHere();
4621     } else {
4622       relocInfo::relocType rtype = $src->constant_reloc();
4623       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4624       __ set_narrow_klass(dst_reg, (Klass *)con);
4625     }
4626   %}
4627 
4628   // arithmetic encodings
4629 
4630   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4631     MacroAssembler _masm(&cbuf);
4632     Register dst_reg = as_Register($dst$$reg);
4633     Register src_reg = as_Register($src1$$reg);
4634     int32_t con = (int32_t)$src2$$constant;
4635     // add has primary == 0, subtract has primary == 1
4636     if ($primary) { con = -con; }
4637     if (con < 0) {
4638       __ subw(dst_reg, src_reg, -con);
4639     } else {
4640       __ addw(dst_reg, src_reg, con);
4641     }
4642   %}
4643 
4644   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4645     MacroAssembler _masm(&cbuf);
4646     Register dst_reg = as_Register($dst$$reg);
4647     Register src_reg = as_Register($src1$$reg);
4648     int32_t con = (int32_t)$src2$$constant;
4649     // add has primary == 0, subtract has primary == 1
4650     if ($primary) { con = -con; }
4651     if (con < 0) {
4652       __ sub(dst_reg, src_reg, -con);
4653     } else {
4654       __ add(dst_reg, src_reg, con);
4655     }
4656   %}
4657 
4658   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4659     MacroAssembler _masm(&cbuf);
4660    Register dst_reg = as_Register($dst$$reg);
4661    Register src1_reg = as_Register($src1$$reg);
4662    Register src2_reg = as_Register($src2$$reg);
4663     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4664   %}
4665 
4666   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4667     MacroAssembler _masm(&cbuf);
4668    Register dst_reg = as_Register($dst$$reg);
4669    Register src1_reg = as_Register($src1$$reg);
4670    Register src2_reg = as_Register($src2$$reg);
4671     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4672   %}
4673 
4674   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4675     MacroAssembler _masm(&cbuf);
4676    Register dst_reg = as_Register($dst$$reg);
4677    Register src1_reg = as_Register($src1$$reg);
4678    Register src2_reg = as_Register($src2$$reg);
4679     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4680   %}
4681 
4682   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4683     MacroAssembler _masm(&cbuf);
4684    Register dst_reg = as_Register($dst$$reg);
4685    Register src1_reg = as_Register($src1$$reg);
4686    Register src2_reg = as_Register($src2$$reg);
4687     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4688   %}
4689 
4690   // compare instruction encodings
4691 
4692   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4693     MacroAssembler _masm(&cbuf);
4694     Register reg1 = as_Register($src1$$reg);
4695     Register reg2 = as_Register($src2$$reg);
4696     __ cmpw(reg1, reg2);
4697   %}
4698 
4699   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4700     MacroAssembler _masm(&cbuf);
4701     Register reg = as_Register($src1$$reg);
4702     int32_t val = $src2$$constant;
4703     if (val >= 0) {
4704       __ subsw(zr, reg, val);
4705     } else {
4706       __ addsw(zr, reg, -val);
4707     }
4708   %}
4709 
4710   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4711     MacroAssembler _masm(&cbuf);
4712     Register reg1 = as_Register($src1$$reg);
4713     u_int32_t val = (u_int32_t)$src2$$constant;
4714     __ movw(rscratch1, val);
4715     __ cmpw(reg1, rscratch1);
4716   %}
4717 
4718   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4719     MacroAssembler _masm(&cbuf);
4720     Register reg1 = as_Register($src1$$reg);
4721     Register reg2 = as_Register($src2$$reg);
4722     __ cmp(reg1, reg2);
4723   %}
4724 
4725   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4726     MacroAssembler _masm(&cbuf);
4727     Register reg = as_Register($src1$$reg);
4728     int64_t val = $src2$$constant;
4729     if (val >= 0) {
4730       __ subs(zr, reg, val);
4731     } else if (val != -val) {
4732       __ adds(zr, reg, -val);
4733     } else {
4734     // aargh, Long.MIN_VALUE is a special case
4735       __ orr(rscratch1, zr, (u_int64_t)val);
4736       __ subs(zr, reg, rscratch1);
4737     }
4738   %}
4739 
4740   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4741     MacroAssembler _masm(&cbuf);
4742     Register reg1 = as_Register($src1$$reg);
4743     u_int64_t val = (u_int64_t)$src2$$constant;
4744     __ mov(rscratch1, val);
4745     __ cmp(reg1, rscratch1);
4746   %}
4747 
4748   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4749     MacroAssembler _masm(&cbuf);
4750     Register reg1 = as_Register($src1$$reg);
4751     Register reg2 = as_Register($src2$$reg);
4752     __ cmp(reg1, reg2);
4753   %}
4754 
4755   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4756     MacroAssembler _masm(&cbuf);
4757     Register reg1 = as_Register($src1$$reg);
4758     Register reg2 = as_Register($src2$$reg);
4759     __ cmpw(reg1, reg2);
4760   %}
4761 
4762   enc_class aarch64_enc_testp(iRegP src) %{
4763     MacroAssembler _masm(&cbuf);
4764     Register reg = as_Register($src$$reg);
4765     __ cmp(reg, zr);
4766   %}
4767 
4768   enc_class aarch64_enc_testn(iRegN src) %{
4769     MacroAssembler _masm(&cbuf);
4770     Register reg = as_Register($src$$reg);
4771     __ cmpw(reg, zr);
4772   %}
4773 
4774   enc_class aarch64_enc_b(label lbl) %{
4775     MacroAssembler _masm(&cbuf);
4776     Label *L = $lbl$$label;
4777     __ b(*L);
4778   %}
4779 
4780   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4781     MacroAssembler _masm(&cbuf);
4782     Label *L = $lbl$$label;
4783     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4784   %}
4785 
4786   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4787     MacroAssembler _masm(&cbuf);
4788     Label *L = $lbl$$label;
4789     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4790   %}
4791 
4792   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4793   %{
4794      Register sub_reg = as_Register($sub$$reg);
4795      Register super_reg = as_Register($super$$reg);
4796      Register temp_reg = as_Register($temp$$reg);
4797      Register result_reg = as_Register($result$$reg);
4798 
4799      Label miss;
4800      MacroAssembler _masm(&cbuf);
4801      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4802                                      NULL, &miss,
4803                                      /*set_cond_codes:*/ true);
4804      if ($primary) {
4805        __ mov(result_reg, zr);
4806      }
4807      __ bind(miss);
4808   %}
4809 
4810   enc_class aarch64_enc_java_static_call(method meth) %{
4811     MacroAssembler _masm(&cbuf);
4812 
4813     address addr = (address)$meth$$method;
4814     address call;
4815     if (!_method) {
4816       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4817       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4818     } else {
4819       int method_index = resolved_method_index(cbuf);
4820       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4821                                                   : static_call_Relocation::spec(method_index);
4822       call = __ trampoline_call(Address(addr, rspec), &cbuf);
4823 
4824       // Emit stub for static call
4825       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4826       if (stub == NULL) {
4827         ciEnv::current()->record_failure("CodeCache is full");
4828         return;
4829       }
4830     }
4831     if (call == NULL) {
4832       ciEnv::current()->record_failure("CodeCache is full");
4833       return;
4834     }
4835   %}
4836 
4837   enc_class aarch64_enc_java_dynamic_call(method meth) %{
4838     MacroAssembler _masm(&cbuf);
4839     int method_index = resolved_method_index(cbuf);
4840     address call = __ ic_call((address)$meth$$method, method_index);
4841     if (call == NULL) {
4842       ciEnv::current()->record_failure("CodeCache is full");
4843       return;
4844     }
4845   %}
4846 
4847   enc_class aarch64_enc_call_epilog() %{
4848     MacroAssembler _masm(&cbuf);
4849     if (VerifyStackAtCalls) {
4850       // Check that stack depth is unchanged: find majik cookie on stack
4851       __ call_Unimplemented();
4852     }
4853   %}
4854 
4855   enc_class aarch64_enc_java_to_runtime(method meth) %{
4856     MacroAssembler _masm(&cbuf);
4857 
4858     // some calls to generated routines (arraycopy code) are scheduled
4859     // by C2 as runtime calls. if so we can call them using a br (they
4860     // will be in a reachable segment) otherwise we have to use a blrt
4861     // which loads the absolute address into a register.
4862     address entry = (address)$meth$$method;
4863     CodeBlob *cb = CodeCache::find_blob(entry);
4864     if (cb) {
4865       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4866       if (call == NULL) {
4867         ciEnv::current()->record_failure("CodeCache is full");
4868         return;
4869       }
4870     } else {
4871       int gpcnt;
4872       int fpcnt;
4873       int rtype;
4874       getCallInfo(tf(), gpcnt, fpcnt, rtype);
4875       Label retaddr;
4876       __ adr(rscratch2, retaddr);
4877       __ lea(rscratch1, RuntimeAddress(entry));
4878       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4879       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4880       __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4881       __ bind(retaddr);
4882       __ add(sp, sp, 2 * wordSize);
4883     }
4884   %}
4885 
4886   enc_class aarch64_enc_rethrow() %{
4887     MacroAssembler _masm(&cbuf);
4888     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4889   %}
4890 
4891   enc_class aarch64_enc_ret() %{
4892     MacroAssembler _masm(&cbuf);
4893     __ ret(lr);
4894   %}
4895 
4896   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4897     MacroAssembler _masm(&cbuf);
4898     Register target_reg = as_Register($jump_target$$reg);
4899     __ br(target_reg);
4900   %}
4901 
4902   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4903     MacroAssembler _masm(&cbuf);
4904     Register target_reg = as_Register($jump_target$$reg);
4905     // exception oop should be in r0
4906     // ret addr has been popped into lr
4907     // callee expects it in r3
4908     __ mov(r3, lr);
4909     __ br(target_reg);
4910   %}
4911 
4912   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4913     MacroAssembler _masm(&cbuf);
4914     Register oop = as_Register($object$$reg);
4915     Register box = as_Register($box$$reg);
4916     Register disp_hdr = as_Register($tmp$$reg);
4917     Register tmp = as_Register($tmp2$$reg);
4918     Label cont;
4919     Label object_has_monitor;
4920     Label cas_failed;
4921 
4922     assert_different_registers(oop, box, tmp, disp_hdr);
4923 
4924     __ shenandoah_store_addr_check(oop);
4925 
4926     // Load markOop from object into displaced_header.
4927     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4928 
4929     // Always do locking in runtime.
4930     if (EmitSync & 0x01) {
4931       __ cmp(oop, zr);
4932       return;
4933     }
4934 
4935     if (UseBiasedLocking && !UseOptoBiasInlining) {
4936       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4937     }
4938 
4939     // Handle existing monitor
4940     if ((EmitSync & 0x02) == 0) {
4941       // we can use AArch64's bit test and branch here but
4942       // markoopDesc does not define a bit index just the bit value
4943       // so assert in case the bit pos changes
4944 #     define __monitor_value_log2 1
4945       assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4946       __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4947 #     undef __monitor_value_log2
4948     }
4949 
4950     // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4951     __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4952 
4953     // Load Compare Value application register.
4954 
4955     // Initialize the box. (Must happen before we update the object mark!)
4956     __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4957 
4958     // Compare object markOop with mark and if equal exchange scratch1
4959     // with object markOop.
4960     if (UseLSE) {
4961       __ mov(tmp, disp_hdr);
4962       __ casal(Assembler::xword, tmp, box, oop);
4963       __ cmp(tmp, disp_hdr);
4964       __ br(Assembler::EQ, cont);
4965     } else {
4966       Label retry_load;
4967       if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4968         __ prfm(Address(oop), PSTL1STRM);
4969       __ bind(retry_load);
4970       __ ldaxr(tmp, oop);
4971       __ cmp(tmp, disp_hdr);
4972       __ br(Assembler::NE, cas_failed);
4973       // use stlxr to ensure update is immediately visible
4974       __ stlxr(tmp, box, oop);
4975       __ cbzw(tmp, cont);
4976       __ b(retry_load);
4977     }
4978 
4979     // Formerly:
4980     // __ cmpxchgptr(/*oldv=*/disp_hdr,
4981     //               /*newv=*/box,
4982     //               /*addr=*/oop,
4983     //               /*tmp=*/tmp,
4984     //               cont,
4985     //               /*fail*/NULL);
4986 
4987     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4988 
4989     // If the compare-and-exchange succeeded, then we found an unlocked
4990     // object, will have now locked it will continue at label cont
4991 
4992     __ bind(cas_failed);
4993     // We did not see an unlocked object so try the fast recursive case.
4994 
4995     // Check if the owner is self by comparing the value in the
4996     // markOop of object (disp_hdr) with the stack pointer.
4997     __ mov(rscratch1, sp);
4998     __ sub(disp_hdr, disp_hdr, rscratch1);
4999     __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
5000     // If condition is true we are cont and hence we can store 0 as the
5001     // displaced header in the box, which indicates that it is a recursive lock.
5002     __ ands(tmp/*==0?*/, disp_hdr, tmp);
5003     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5004 
5005     // Handle existing monitor.
5006     if ((EmitSync & 0x02) == 0) {
5007       __ b(cont);
5008 
5009       __ bind(object_has_monitor);
5010       // The object's monitor m is unlocked iff m->owner == NULL,
5011       // otherwise m->owner may contain a thread or a stack address.
5012       //
5013       // Try to CAS m->owner from NULL to current thread.
5014       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
5015       __ mov(disp_hdr, zr);
5016 
5017       if (UseLSE) {
5018         __ mov(rscratch1, disp_hdr);
5019         __ casal(Assembler::xword, rscratch1, rthread, tmp);
5020         __ cmp(rscratch1, disp_hdr);
5021       } else {
5022         Label retry_load, fail;
5023         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
5024           __ prfm(Address(tmp), PSTL1STRM);
5025         __ bind(retry_load);
5026         __ ldaxr(rscratch1, tmp);
5027         __ cmp(disp_hdr, rscratch1);
5028         __ br(Assembler::NE, fail);
5029         // use stlxr to ensure update is immediately visible
5030         __ stlxr(rscratch1, rthread, tmp);
5031         __ cbnzw(rscratch1, retry_load);
5032         __ bind(fail);
5033       }
5034 
5035       // Label next;
5036       // __ cmpxchgptr(/*oldv=*/disp_hdr,
5037       //               /*newv=*/rthread,
5038       //               /*addr=*/tmp,
5039       //               /*tmp=*/rscratch1,
5040       //               /*succeed*/next,
5041       //               /*fail*/NULL);
5042       // __ bind(next);
5043 
5044       // store a non-null value into the box.
5045       __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5046 
5047       // PPC port checks the following invariants
5048       // #ifdef ASSERT
5049       // bne(flag, cont);
5050       // We have acquired the monitor, check some invariants.
5051       // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
5052       // Invariant 1: _recursions should be 0.
5053       // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
5054       // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
5055       //                        "monitor->_recursions should be 0", -1);
5056       // Invariant 2: OwnerIsThread shouldn't be 0.
5057       // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
5058       //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
5059       //                           "monitor->OwnerIsThread shouldn't be 0", -1);
5060       // #endif
5061     }
5062 
5063     __ bind(cont);
5064     // flag == EQ indicates success
5065     // flag == NE indicates failure
5066 
5067   %}
5068 
5069   // TODO
5070   // reimplement this with custom cmpxchgptr code
5071   // which avoids some of the unnecessary branching
5072   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
5073     MacroAssembler _masm(&cbuf);
5074     Register oop = as_Register($object$$reg);
5075     Register box = as_Register($box$$reg);
5076     Register disp_hdr = as_Register($tmp$$reg);
5077     Register tmp = as_Register($tmp2$$reg);
5078     Label cont;
5079     Label object_has_monitor;
5080     Label cas_failed;
5081 
5082     assert_different_registers(oop, box, tmp, disp_hdr);
5083 
5084     __ shenandoah_store_addr_check(oop);
5085 
5086     // Always do locking in runtime.
5087     if (EmitSync & 0x01) {
5088       __ cmp(oop, zr); // Oop can't be 0 here => always false.
5089       return;
5090     }
5091 
5092     if (UseBiasedLocking && !UseOptoBiasInlining) {
5093       __ biased_locking_exit(oop, tmp, cont);
5094     }
5095 
5096     // Find the lock address and load the displaced header from the stack.
5097     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
5098 
5099     // If the displaced header is 0, we have a recursive unlock.
5100     __ cmp(disp_hdr, zr);
5101     __ br(Assembler::EQ, cont);
5102 
5103 
5104     // Handle existing monitor.
5105     if ((EmitSync & 0x02) == 0) {
5106       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
5107       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
5108     }
5109 
5110     // Check if it is still a light weight lock, this is is true if we
5111     // see the stack address of the basicLock in the markOop of the
5112     // object.
5113 
5114       if (UseLSE) {
5115         __ mov(tmp, box);
5116         __ casl(Assembler::xword, tmp, disp_hdr, oop);
5117         __ cmp(tmp, box);
5118       } else {
5119         Label retry_load;
5120         if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
5121           __ prfm(Address(oop), PSTL1STRM);
5122         __ bind(retry_load);
5123         __ ldxr(tmp, oop);
5124         __ cmp(box, tmp);
5125         __ br(Assembler::NE, cas_failed);
5126         // use stlxr to ensure update is immediately visible
5127         __ stlxr(tmp, disp_hdr, oop);
5128         __ cbzw(tmp, cont);
5129         __ b(retry_load);
5130       }
5131 
5132     // __ cmpxchgptr(/*compare_value=*/box,
5133     //               /*exchange_value=*/disp_hdr,
5134     //               /*where=*/oop,
5135     //               /*result=*/tmp,
5136     //               cont,
5137     //               /*cas_failed*/NULL);
5138     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
5139 
5140     __ bind(cas_failed);
5141 
5142     // Handle existing monitor.
5143     if ((EmitSync & 0x02) == 0) {
5144       __ b(cont);
5145 
5146       __ bind(object_has_monitor);
5147       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
5148       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5149       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
5150       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
5151       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
5152       __ cmp(rscratch1, zr);
5153       __ br(Assembler::NE, cont);
5154 
5155       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
5156       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
5157       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
5158       __ cmp(rscratch1, zr);
5159       __ cbnz(rscratch1, cont);
5160       // need a release store here
5161       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
5162       __ stlr(rscratch1, tmp); // rscratch1 is zero
5163     }
5164 
5165     __ bind(cont);
5166     // flag == EQ indicates success
5167     // flag == NE indicates failure
5168   %}
5169 
5170 %}
5171 
5172 //----------FRAME--------------------------------------------------------------
5173 // Definition of frame structure and management information.
5174 //
5175 //  S T A C K   L A Y O U T    Allocators stack-slot number
5176 //                             |   (to get allocators register number
5177 //  G  Owned by    |        |  v    add OptoReg::stack0())
5178 //  r   CALLER     |        |
5179 //  o     |        +--------+      pad to even-align allocators stack-slot
5180 //  w     V        |  pad0  |        numbers; owned by CALLER
5181 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
5182 //  h     ^        |   in   |  5
5183 //        |        |  args  |  4   Holes in incoming args owned by SELF
5184 //  |     |        |        |  3
5185 //  |     |        +--------+
5186 //  V     |        | old out|      Empty on Intel, window on Sparc
5187 //        |    old |preserve|      Must be even aligned.
5188 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
5189 //        |        |   in   |  3   area for Intel ret address
5190 //     Owned by    |preserve|      Empty on Sparc.
5191 //       SELF      +--------+
5192 //        |        |  pad2  |  2   pad to align old SP
5193 //        |        +--------+  1
5194 //        |        | locks  |  0
5195 //        |        +--------+----> OptoReg::stack0(), even aligned
5196 //        |        |  pad1  | 11   pad to align new SP
5197 //        |        +--------+
5198 //        |        |        | 10
5199 //        |        | spills |  9   spills
5200 //        V        |        |  8   (pad0 slot for callee)
5201 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
5202 //        ^        |  out   |  7
5203 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
5204 //     Owned by    +--------+
5205 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
5206 //        |    new |preserve|      Must be even-aligned.
5207 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
5208 //        |        |        |
5209 //
5210 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
5211 //         known from SELF's arguments and the Java calling convention.
5212 //         Region 6-7 is determined per call site.
5213 // Note 2: If the calling convention leaves holes in the incoming argument
5214 //         area, those holes are owned by SELF.  Holes in the outgoing area
5215 //         are owned by the CALLEE.  Holes should not be nessecary in the
5216 //         incoming area, as the Java calling convention is completely under
5217 //         the control of the AD file.  Doubles can be sorted and packed to
5218 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
5219 //         varargs C calling conventions.
5220 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
5221 //         even aligned with pad0 as needed.
5222 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
5223 //           (the latter is true on Intel but is it false on AArch64?)
5224 //         region 6-11 is even aligned; it may be padded out more so that
5225 //         the region from SP to FP meets the minimum stack alignment.
5226 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5227 //         alignment.  Region 11, pad1, may be dynamically extended so that
5228 //         SP meets the minimum alignment.
5229 
5230 frame %{
5231   // What direction does stack grow in (assumed to be same for C & Java)
5232   stack_direction(TOWARDS_LOW);
5233 
5234   // These three registers define part of the calling convention
5235   // between compiled code and the interpreter.
5236 
5237   // Inline Cache Register or methodOop for I2C.
5238   inline_cache_reg(R12);
5239 
5240   // Method Oop Register when calling interpreter.
5241   interpreter_method_oop_reg(R12);
5242 
5243   // Number of stack slots consumed by locking an object
5244   sync_stack_slots(2);
5245 
5246   // Compiled code's Frame Pointer
5247   frame_pointer(R31);
5248 
5249   // Interpreter stores its frame pointer in a register which is
5250   // stored to the stack by I2CAdaptors.
5251   // I2CAdaptors convert from interpreted java to compiled java.
5252   interpreter_frame_pointer(R29);
5253 
5254   // Stack alignment requirement
5255   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5256 
5257   // Number of stack slots between incoming argument block and the start of
5258   // a new frame.  The PROLOG must add this many slots to the stack.  The
5259   // EPILOG must remove this many slots. aarch64 needs two slots for
5260   // return address and fp.
5261   // TODO think this is correct but check
5262   in_preserve_stack_slots(4);
5263 
5264   // Number of outgoing stack slots killed above the out_preserve_stack_slots
5265   // for calls to C.  Supports the var-args backing area for register parms.
5266   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5267 
5268   // The after-PROLOG location of the return address.  Location of
5269   // return address specifies a type (REG or STACK) and a number
5270   // representing the register number (i.e. - use a register name) or
5271   // stack slot.
5272   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5273   // Otherwise, it is above the locks and verification slot and alignment word
5274   // TODO this may well be correct but need to check why that - 2 is there
5275   // ppc port uses 0 but we definitely need to allow for fixed_slots
5276   // which folds in the space used for monitors
5277   return_addr(STACK - 2 +
5278               align_up((Compile::current()->in_preserve_stack_slots() +
5279                         Compile::current()->fixed_slots()),
5280                        stack_alignment_in_slots()));
5281 
5282   // Body of function which returns an integer array locating
5283   // arguments either in registers or in stack slots.  Passed an array
5284   // of ideal registers called "sig" and a "length" count.  Stack-slot
5285   // offsets are based on outgoing arguments, i.e. a CALLER setting up
5286   // arguments for a CALLEE.  Incoming stack arguments are
5287   // automatically biased by the preserve_stack_slots field above.
5288 
5289   calling_convention
5290   %{
5291     // No difference between ingoing/outgoing just pass false
5292     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5293   %}
5294 
5295   c_calling_convention
5296   %{
5297     // This is obviously always outgoing
5298     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5299   %}
5300 
5301   // Location of compiled Java return values.  Same as C for now.
5302   return_value
5303   %{
5304     // TODO do we allow ideal_reg == Op_RegN???
5305     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5306            "only return normal values");
5307 
5308     static const int lo[Op_RegL + 1] = { // enum name
5309       0,                                 // Op_Node
5310       0,                                 // Op_Set
5311       R0_num,                            // Op_RegN
5312       R0_num,                            // Op_RegI
5313       R0_num,                            // Op_RegP
5314       V0_num,                            // Op_RegF
5315       V0_num,                            // Op_RegD
5316       R0_num                             // Op_RegL
5317     };
5318 
5319     static const int hi[Op_RegL + 1] = { // enum name
5320       0,                                 // Op_Node
5321       0,                                 // Op_Set
5322       OptoReg::Bad,                       // Op_RegN
5323       OptoReg::Bad,                      // Op_RegI
5324       R0_H_num,                          // Op_RegP
5325       OptoReg::Bad,                      // Op_RegF
5326       V0_H_num,                          // Op_RegD
5327       R0_H_num                           // Op_RegL
5328     };
5329 
5330     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5331   %}
5332 %}
5333 
5334 //----------ATTRIBUTES---------------------------------------------------------
5335 //----------Operand Attributes-------------------------------------------------
5336 op_attrib op_cost(1);        // Required cost attribute
5337 
5338 //----------Instruction Attributes---------------------------------------------
5339 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5340 ins_attrib ins_size(32);        // Required size attribute (in bits)
5341 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5342                                 // a non-matching short branch variant
5343                                 // of some long branch?
5344 ins_attrib ins_alignment(4);    // Required alignment attribute (must
5345                                 // be a power of 2) specifies the
5346                                 // alignment that some part of the
5347                                 // instruction (not necessarily the
5348                                 // start) requires.  If > 1, a
5349                                 // compute_padding() function must be
5350                                 // provided for the instruction
5351 
5352 //----------OPERANDS-----------------------------------------------------------
5353 // Operand definitions must precede instruction definitions for correct parsing
5354 // in the ADLC because operands constitute user defined types which are used in
5355 // instruction definitions.
5356 
5357 //----------Simple Operands----------------------------------------------------
5358 
5359 // Integer operands 32 bit
5360 // 32 bit immediate
5361 operand immI()
5362 %{
5363   match(ConI);
5364 
5365   op_cost(0);
5366   format %{ %}
5367   interface(CONST_INTER);
5368 %}
5369 
5370 // 32 bit zero
5371 operand immI0()
5372 %{
5373   predicate(n->get_int() == 0);
5374   match(ConI);
5375 
5376   op_cost(0);
5377   format %{ %}
5378   interface(CONST_INTER);
5379 %}
5380 
5381 // 32 bit unit increment
5382 operand immI_1()
5383 %{
5384   predicate(n->get_int() == 1);
5385   match(ConI);
5386 
5387   op_cost(0);
5388   format %{ %}
5389   interface(CONST_INTER);
5390 %}
5391 
5392 // 32 bit unit decrement
5393 operand immI_M1()
5394 %{
5395   predicate(n->get_int() == -1);
5396   match(ConI);
5397 
5398   op_cost(0);
5399   format %{ %}
5400   interface(CONST_INTER);
5401 %}
5402 
5403 // Shift values for add/sub extension shift
5404 operand immIExt()
5405 %{
5406   predicate(0 <= n->get_int() && (n->get_int() <= 4));
5407   match(ConI);
5408 
5409   op_cost(0);
5410   format %{ %}
5411   interface(CONST_INTER);
5412 %}
5413 
5414 operand immI_le_4()
5415 %{
5416   predicate(n->get_int() <= 4);
5417   match(ConI);
5418 
5419   op_cost(0);
5420   format %{ %}
5421   interface(CONST_INTER);
5422 %}
5423 
5424 operand immI_31()
5425 %{
5426   predicate(n->get_int() == 31);
5427   match(ConI);
5428 
5429   op_cost(0);
5430   format %{ %}
5431   interface(CONST_INTER);
5432 %}
5433 
5434 operand immI_8()
5435 %{
5436   predicate(n->get_int() == 8);
5437   match(ConI);
5438 
5439   op_cost(0);
5440   format %{ %}
5441   interface(CONST_INTER);
5442 %}
5443 
5444 operand immI_16()
5445 %{
5446   predicate(n->get_int() == 16);
5447   match(ConI);
5448 
5449   op_cost(0);
5450   format %{ %}
5451   interface(CONST_INTER);
5452 %}
5453 
5454 operand immI_24()
5455 %{
5456   predicate(n->get_int() == 24);
5457   match(ConI);
5458 
5459   op_cost(0);
5460   format %{ %}
5461   interface(CONST_INTER);
5462 %}
5463 
5464 operand immI_32()
5465 %{
5466   predicate(n->get_int() == 32);
5467   match(ConI);
5468 
5469   op_cost(0);
5470   format %{ %}
5471   interface(CONST_INTER);
5472 %}
5473 
5474 operand immI_48()
5475 %{
5476   predicate(n->get_int() == 48);
5477   match(ConI);
5478 
5479   op_cost(0);
5480   format %{ %}
5481   interface(CONST_INTER);
5482 %}
5483 
5484 operand immI_56()
5485 %{
5486   predicate(n->get_int() == 56);
5487   match(ConI);
5488 
5489   op_cost(0);
5490   format %{ %}
5491   interface(CONST_INTER);
5492 %}
5493 
5494 operand immI_63()
5495 %{
5496   predicate(n->get_int() == 63);
5497   match(ConI);
5498 
5499   op_cost(0);
5500   format %{ %}
5501   interface(CONST_INTER);
5502 %}
5503 
5504 operand immI_64()
5505 %{
5506   predicate(n->get_int() == 64);
5507   match(ConI);
5508 
5509   op_cost(0);
5510   format %{ %}
5511   interface(CONST_INTER);
5512 %}
5513 
5514 operand immI_255()
5515 %{
5516   predicate(n->get_int() == 255);
5517   match(ConI);
5518 
5519   op_cost(0);
5520   format %{ %}
5521   interface(CONST_INTER);
5522 %}
5523 
5524 operand immI_65535()
5525 %{
5526   predicate(n->get_int() == 65535);
5527   match(ConI);
5528 
5529   op_cost(0);
5530   format %{ %}
5531   interface(CONST_INTER);
5532 %}
5533 
5534 operand immL_255()
5535 %{
5536   predicate(n->get_long() == 255L);
5537   match(ConL);
5538 
5539   op_cost(0);
5540   format %{ %}
5541   interface(CONST_INTER);
5542 %}
5543 
5544 operand immL_65535()
5545 %{
5546   predicate(n->get_long() == 65535L);
5547   match(ConL);
5548 
5549   op_cost(0);
5550   format %{ %}
5551   interface(CONST_INTER);
5552 %}
5553 
5554 operand immL_4294967295()
5555 %{
5556   predicate(n->get_long() == 4294967295L);
5557   match(ConL);
5558 
5559   op_cost(0);
5560   format %{ %}
5561   interface(CONST_INTER);
5562 %}
5563 
5564 operand immL_bitmask()
5565 %{
5566   predicate(((n->get_long() & 0xc000000000000000l) == 0)
5567             && is_power_of_2(n->get_long() + 1));
5568   match(ConL);
5569 
5570   op_cost(0);
5571   format %{ %}
5572   interface(CONST_INTER);
5573 %}
5574 
5575 operand immI_bitmask()
5576 %{
5577   predicate(((n->get_int() & 0xc0000000) == 0)
5578             && is_power_of_2(n->get_int() + 1));
5579   match(ConI);
5580 
5581   op_cost(0);
5582   format %{ %}
5583   interface(CONST_INTER);
5584 %}
5585 
5586 // Scale values for scaled offset addressing modes (up to long but not quad)
5587 operand immIScale()
5588 %{
5589   predicate(0 <= n->get_int() && (n->get_int() <= 3));
5590   match(ConI);
5591 
5592   op_cost(0);
5593   format %{ %}
5594   interface(CONST_INTER);
5595 %}
5596 
5597 // 26 bit signed offset -- for pc-relative branches
5598 operand immI26()
5599 %{
5600   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5601   match(ConI);
5602 
5603   op_cost(0);
5604   format %{ %}
5605   interface(CONST_INTER);
5606 %}
5607 
5608 // 19 bit signed offset -- for pc-relative loads
5609 operand immI19()
5610 %{
5611   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5612   match(ConI);
5613 
5614   op_cost(0);
5615   format %{ %}
5616   interface(CONST_INTER);
5617 %}
5618 
5619 // 12 bit unsigned offset -- for base plus immediate loads
5620 operand immIU12()
5621 %{
5622   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5623   match(ConI);
5624 
5625   op_cost(0);
5626   format %{ %}
5627   interface(CONST_INTER);
5628 %}
5629 
5630 operand immLU12()
5631 %{
5632   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5633   match(ConL);
5634 
5635   op_cost(0);
5636   format %{ %}
5637   interface(CONST_INTER);
5638 %}
5639 
5640 // Offset for scaled or unscaled immediate loads and stores
5641 operand immIOffset()
5642 %{
5643   predicate(Address::offset_ok_for_immed(n->get_int()));
5644   match(ConI);
5645 
5646   op_cost(0);
5647   format %{ %}
5648   interface(CONST_INTER);
5649 %}
5650 
5651 operand immIOffset4()
5652 %{
5653   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5654   match(ConI);
5655 
5656   op_cost(0);
5657   format %{ %}
5658   interface(CONST_INTER);
5659 %}
5660 
5661 operand immIOffset8()
5662 %{
5663   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5664   match(ConI);
5665 
5666   op_cost(0);
5667   format %{ %}
5668   interface(CONST_INTER);
5669 %}
5670 
5671 operand immIOffset16()
5672 %{
5673   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5674   match(ConI);
5675 
5676   op_cost(0);
5677   format %{ %}
5678   interface(CONST_INTER);
5679 %}
5680 
5681 operand immLoffset()
5682 %{
5683   predicate(Address::offset_ok_for_immed(n->get_long()));
5684   match(ConL);
5685 
5686   op_cost(0);
5687   format %{ %}
5688   interface(CONST_INTER);
5689 %}
5690 
5691 operand immLoffset4()
5692 %{
5693   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5694   match(ConL);
5695 
5696   op_cost(0);
5697   format %{ %}
5698   interface(CONST_INTER);
5699 %}
5700 
5701 operand immLoffset8()
5702 %{
5703   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5704   match(ConL);
5705 
5706   op_cost(0);
5707   format %{ %}
5708   interface(CONST_INTER);
5709 %}
5710 
5711 operand immLoffset16()
5712 %{
5713   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5714   match(ConL);
5715 
5716   op_cost(0);
5717   format %{ %}
5718   interface(CONST_INTER);
5719 %}
5720 
5721 // 32 bit integer valid for add sub immediate
5722 operand immIAddSub()
5723 %{
5724   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5725   match(ConI);
5726   op_cost(0);
5727   format %{ %}
5728   interface(CONST_INTER);
5729 %}
5730 
5731 // 32 bit unsigned integer valid for logical immediate
5732 // TODO -- check this is right when e.g the mask is 0x80000000
5733 operand immILog()
5734 %{
5735   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5736   match(ConI);
5737 
5738   op_cost(0);
5739   format %{ %}
5740   interface(CONST_INTER);
5741 %}
5742 
5743 // Integer operands 64 bit
5744 // 64 bit immediate
5745 operand immL()
5746 %{
5747   match(ConL);
5748 
5749   op_cost(0);
5750   format %{ %}
5751   interface(CONST_INTER);
5752 %}
5753 
5754 // 64 bit zero
5755 operand immL0()
5756 %{
5757   predicate(n->get_long() == 0);
5758   match(ConL);
5759 
5760   op_cost(0);
5761   format %{ %}
5762   interface(CONST_INTER);
5763 %}
5764 
5765 // 64 bit unit increment
5766 operand immL_1()
5767 %{
5768   predicate(n->get_long() == 1);
5769   match(ConL);
5770 
5771   op_cost(0);
5772   format %{ %}
5773   interface(CONST_INTER);
5774 %}
5775 
5776 // 64 bit unit decrement
5777 operand immL_M1()
5778 %{
5779   predicate(n->get_long() == -1);
5780   match(ConL);
5781 
5782   op_cost(0);
5783   format %{ %}
5784   interface(CONST_INTER);
5785 %}
5786 
5787 // 32 bit offset of pc in thread anchor
5788 
5789 operand immL_pc_off()
5790 %{
5791   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5792                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5793   match(ConL);
5794 
5795   op_cost(0);
5796   format %{ %}
5797   interface(CONST_INTER);
5798 %}
5799 
5800 // 64 bit integer valid for add sub immediate
5801 operand immLAddSub()
5802 %{
5803   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5804   match(ConL);
5805   op_cost(0);
5806   format %{ %}
5807   interface(CONST_INTER);
5808 %}
5809 
5810 // 64 bit integer valid for logical immediate
5811 operand immLLog()
5812 %{
5813   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5814   match(ConL);
5815   op_cost(0);
5816   format %{ %}
5817   interface(CONST_INTER);
5818 %}
5819 
5820 // Long Immediate: low 32-bit mask
5821 operand immL_32bits()
5822 %{
5823   predicate(n->get_long() == 0xFFFFFFFFL);
5824   match(ConL);
5825   op_cost(0);
5826   format %{ %}
5827   interface(CONST_INTER);
5828 %}
5829 
5830 // Pointer operands
5831 // Pointer Immediate
5832 operand immP()
5833 %{
5834   match(ConP);
5835 
5836   op_cost(0);
5837   format %{ %}
5838   interface(CONST_INTER);
5839 %}
5840 
5841 // NULL Pointer Immediate
5842 operand immP0()
5843 %{
5844   predicate(n->get_ptr() == 0);
5845   match(ConP);
5846 
5847   op_cost(0);
5848   format %{ %}
5849   interface(CONST_INTER);
5850 %}
5851 
5852 // Pointer Immediate One
5853 // this is used in object initialization (initial object header)
5854 operand immP_1()
5855 %{
5856   predicate(n->get_ptr() == 1);
5857   match(ConP);
5858 
5859   op_cost(0);
5860   format %{ %}
5861   interface(CONST_INTER);
5862 %}
5863 
5864 // Polling Page Pointer Immediate
5865 operand immPollPage()
5866 %{
5867   predicate((address)n->get_ptr() == os::get_polling_page());
5868   match(ConP);
5869 
5870   op_cost(0);
5871   format %{ %}
5872   interface(CONST_INTER);
5873 %}
5874 
5875 // Card Table Byte Map Base
5876 operand immByteMapBase()
5877 %{
5878   // Get base of card map
5879   predicate((jbyte*)n->get_ptr() ==
5880         ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5881   match(ConP);
5882 
5883   op_cost(0);
5884   format %{ %}
5885   interface(CONST_INTER);
5886 %}
5887 
5888 // Pointer Immediate Minus One
5889 // this is used when we want to write the current PC to the thread anchor
5890 operand immP_M1()
5891 %{
5892   predicate(n->get_ptr() == -1);
5893   match(ConP);
5894 
5895   op_cost(0);
5896   format %{ %}
5897   interface(CONST_INTER);
5898 %}
5899 
5900 // Pointer Immediate Minus Two
5901 // this is used when we want to write the current PC to the thread anchor
5902 operand immP_M2()
5903 %{
5904   predicate(n->get_ptr() == -2);
5905   match(ConP);
5906 
5907   op_cost(0);
5908   format %{ %}
5909   interface(CONST_INTER);
5910 %}
5911 
5912 // Float and Double operands
5913 // Double Immediate
5914 operand immD()
5915 %{
5916   match(ConD);
5917   op_cost(0);
5918   format %{ %}
5919   interface(CONST_INTER);
5920 %}
5921 
5922 // Double Immediate: +0.0d
5923 operand immD0()
5924 %{
5925   predicate(jlong_cast(n->getd()) == 0);
5926   match(ConD);
5927 
5928   op_cost(0);
5929   format %{ %}
5930   interface(CONST_INTER);
5931 %}
5932 
5933 // constant 'double +0.0'.
5934 operand immDPacked()
5935 %{
5936   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5937   match(ConD);
5938   op_cost(0);
5939   format %{ %}
5940   interface(CONST_INTER);
5941 %}
5942 
5943 // Float Immediate
5944 operand immF()
5945 %{
5946   match(ConF);
5947   op_cost(0);
5948   format %{ %}
5949   interface(CONST_INTER);
5950 %}
5951 
5952 // Float Immediate: +0.0f.
5953 operand immF0()
5954 %{
5955   predicate(jint_cast(n->getf()) == 0);
5956   match(ConF);
5957 
5958   op_cost(0);
5959   format %{ %}
5960   interface(CONST_INTER);
5961 %}
5962 
5963 //
5964 operand immFPacked()
5965 %{
5966   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5967   match(ConF);
5968   op_cost(0);
5969   format %{ %}
5970   interface(CONST_INTER);
5971 %}
5972 
5973 // Narrow pointer operands
5974 // Narrow Pointer Immediate
5975 operand immN()
5976 %{
5977   match(ConN);
5978 
5979   op_cost(0);
5980   format %{ %}
5981   interface(CONST_INTER);
5982 %}
5983 
5984 // Narrow NULL Pointer Immediate
5985 operand immN0()
5986 %{
5987   predicate(n->get_narrowcon() == 0);
5988   match(ConN);
5989 
5990   op_cost(0);
5991   format %{ %}
5992   interface(CONST_INTER);
5993 %}
5994 
5995 operand immNKlass()
5996 %{
5997   match(ConNKlass);
5998 
5999   op_cost(0);
6000   format %{ %}
6001   interface(CONST_INTER);
6002 %}
6003 
6004 // Integer 32 bit Register Operands
6005 // Integer 32 bitRegister (excludes SP)
6006 operand iRegI()
6007 %{
6008   constraint(ALLOC_IN_RC(any_reg32));
6009   match(RegI);
6010   match(iRegINoSp);
6011   op_cost(0);
6012   format %{ %}
6013   interface(REG_INTER);
6014 %}
6015 
6016 // Integer 32 bit Register not Special
6017 operand iRegINoSp()
6018 %{
6019   constraint(ALLOC_IN_RC(no_special_reg32));
6020   match(RegI);
6021   op_cost(0);
6022   format %{ %}
6023   interface(REG_INTER);
6024 %}
6025 
6026 // Integer 64 bit Register Operands
6027 // Integer 64 bit Register (includes SP)
6028 operand iRegL()
6029 %{
6030   constraint(ALLOC_IN_RC(any_reg));
6031   match(RegL);
6032   match(iRegLNoSp);
6033   op_cost(0);
6034   format %{ %}
6035   interface(REG_INTER);
6036 %}
6037 
6038 // Integer 64 bit Register not Special
6039 operand iRegLNoSp()
6040 %{
6041   constraint(ALLOC_IN_RC(no_special_reg));
6042   match(RegL);
6043   match(iRegL_R0);
6044   format %{ %}
6045   interface(REG_INTER);
6046 %}
6047 
6048 // Pointer Register Operands
6049 // Pointer Register
6050 operand iRegP()
6051 %{
6052   constraint(ALLOC_IN_RC(ptr_reg));
6053   match(RegP);
6054   match(iRegPNoSp);
6055   match(iRegP_R0);
6056   //match(iRegP_R2);
6057   //match(iRegP_R4);
6058   //match(iRegP_R5);
6059   match(thread_RegP);
6060   op_cost(0);
6061   format %{ %}
6062   interface(REG_INTER);
6063 %}
6064 
6065 // Pointer 64 bit Register not Special
6066 operand iRegPNoSp()
6067 %{
6068   constraint(ALLOC_IN_RC(no_special_ptr_reg));
6069   match(RegP);
6070   // match(iRegP);
6071   // match(iRegP_R0);
6072   // match(iRegP_R2);
6073   // match(iRegP_R4);
6074   // match(iRegP_R5);
6075   // match(thread_RegP);
6076   op_cost(0);
6077   format %{ %}
6078   interface(REG_INTER);
6079 %}
6080 
6081 // Pointer 64 bit Register R0 only
6082 operand iRegP_R0()
6083 %{
6084   constraint(ALLOC_IN_RC(r0_reg));
6085   match(RegP);
6086   // match(iRegP);
6087   match(iRegPNoSp);
6088   op_cost(0);
6089   format %{ %}
6090   interface(REG_INTER);
6091 %}
6092 
6093 // Pointer 64 bit Register R1 only
6094 operand iRegP_R1()
6095 %{
6096   constraint(ALLOC_IN_RC(r1_reg));
6097   match(RegP);
6098   // match(iRegP);
6099   match(iRegPNoSp);
6100   op_cost(0);
6101   format %{ %}
6102   interface(REG_INTER);
6103 %}
6104 
6105 // Pointer 64 bit Register R2 only
6106 operand iRegP_R2()
6107 %{
6108   constraint(ALLOC_IN_RC(r2_reg));
6109   match(RegP);
6110   // match(iRegP);
6111   match(iRegPNoSp);
6112   op_cost(0);
6113   format %{ %}
6114   interface(REG_INTER);
6115 %}
6116 
6117 // Pointer 64 bit Register R3 only
6118 operand iRegP_R3()
6119 %{
6120   constraint(ALLOC_IN_RC(r3_reg));
6121   match(RegP);
6122   // match(iRegP);
6123   match(iRegPNoSp);
6124   op_cost(0);
6125   format %{ %}
6126   interface(REG_INTER);
6127 %}
6128 
6129 // Pointer 64 bit Register R4 only
6130 operand iRegP_R4()
6131 %{
6132   constraint(ALLOC_IN_RC(r4_reg));
6133   match(RegP);
6134   // match(iRegP);
6135   match(iRegPNoSp);
6136   op_cost(0);
6137   format %{ %}
6138   interface(REG_INTER);
6139 %}
6140 
6141 // Pointer 64 bit Register R5 only
6142 operand iRegP_R5()
6143 %{
6144   constraint(ALLOC_IN_RC(r5_reg));
6145   match(RegP);
6146   // match(iRegP);
6147   match(iRegPNoSp);
6148   op_cost(0);
6149   format %{ %}
6150   interface(REG_INTER);
6151 %}
6152 
6153 // Pointer 64 bit Register R10 only
6154 operand iRegP_R10()
6155 %{
6156   constraint(ALLOC_IN_RC(r10_reg));
6157   match(RegP);
6158   // match(iRegP);
6159   match(iRegPNoSp);
6160   op_cost(0);
6161   format %{ %}
6162   interface(REG_INTER);
6163 %}
6164 
6165 // Long 64 bit Register R0 only
6166 operand iRegL_R0()
6167 %{
6168   constraint(ALLOC_IN_RC(r0_reg));
6169   match(RegL);
6170   match(iRegLNoSp);
6171   op_cost(0);
6172   format %{ %}
6173   interface(REG_INTER);
6174 %}
6175 
6176 // Long 64 bit Register R2 only
6177 operand iRegL_R2()
6178 %{
6179   constraint(ALLOC_IN_RC(r2_reg));
6180   match(RegL);
6181   match(iRegLNoSp);
6182   op_cost(0);
6183   format %{ %}
6184   interface(REG_INTER);
6185 %}
6186 
6187 // Long 64 bit Register R3 only
6188 operand iRegL_R3()
6189 %{
6190   constraint(ALLOC_IN_RC(r3_reg));
6191   match(RegL);
6192   match(iRegLNoSp);
6193   op_cost(0);
6194   format %{ %}
6195   interface(REG_INTER);
6196 %}
6197 
6198 // Long 64 bit Register R11 only
6199 operand iRegL_R11()
6200 %{
6201   constraint(ALLOC_IN_RC(r11_reg));
6202   match(RegL);
6203   match(iRegLNoSp);
6204   op_cost(0);
6205   format %{ %}
6206   interface(REG_INTER);
6207 %}
6208 
6209 // Pointer 64 bit Register FP only
6210 operand iRegP_FP()
6211 %{
6212   constraint(ALLOC_IN_RC(fp_reg));
6213   match(RegP);
6214   // match(iRegP);
6215   op_cost(0);
6216   format %{ %}
6217   interface(REG_INTER);
6218 %}
6219 
6220 // Register R0 only
6221 operand iRegI_R0()
6222 %{
6223   constraint(ALLOC_IN_RC(int_r0_reg));
6224   match(RegI);
6225   match(iRegINoSp);
6226   op_cost(0);
6227   format %{ %}
6228   interface(REG_INTER);
6229 %}
6230 
6231 // Register R2 only
6232 operand iRegI_R2()
6233 %{
6234   constraint(ALLOC_IN_RC(int_r2_reg));
6235   match(RegI);
6236   match(iRegINoSp);
6237   op_cost(0);
6238   format %{ %}
6239   interface(REG_INTER);
6240 %}
6241 
6242 // Register R3 only
6243 operand iRegI_R3()
6244 %{
6245   constraint(ALLOC_IN_RC(int_r3_reg));
6246   match(RegI);
6247   match(iRegINoSp);
6248   op_cost(0);
6249   format %{ %}
6250   interface(REG_INTER);
6251 %}
6252 
6253 
6254 // Register R4 only
6255 operand iRegI_R4()
6256 %{
6257   constraint(ALLOC_IN_RC(int_r4_reg));
6258   match(RegI);
6259   match(iRegINoSp);
6260   op_cost(0);
6261   format %{ %}
6262   interface(REG_INTER);
6263 %}
6264 
6265 
6266 // Pointer Register Operands
6267 // Narrow Pointer Register
6268 operand iRegN()
6269 %{
6270   constraint(ALLOC_IN_RC(any_reg32));
6271   match(RegN);
6272   match(iRegNNoSp);
6273   op_cost(0);
6274   format %{ %}
6275   interface(REG_INTER);
6276 %}
6277 
6278 operand iRegN_R0()
6279 %{
6280   constraint(ALLOC_IN_RC(r0_reg));
6281   match(iRegN);
6282   op_cost(0);
6283   format %{ %}
6284   interface(REG_INTER);
6285 %}
6286 
6287 operand iRegN_R2()
6288 %{
6289   constraint(ALLOC_IN_RC(r2_reg));
6290   match(iRegN);
6291   op_cost(0);
6292   format %{ %}
6293   interface(REG_INTER);
6294 %}
6295 
6296 operand iRegN_R3()
6297 %{
6298   constraint(ALLOC_IN_RC(r3_reg));
6299   match(iRegN);
6300   op_cost(0);
6301   format %{ %}
6302   interface(REG_INTER);
6303 %}
6304 
6305 // Integer 64 bit Register not Special
6306 operand iRegNNoSp()
6307 %{
6308   constraint(ALLOC_IN_RC(no_special_reg32));
6309   match(RegN);
6310   op_cost(0);
6311   format %{ %}
6312   interface(REG_INTER);
6313 %}
6314 
6315 // heap base register -- used for encoding immN0
6316 
6317 operand iRegIHeapbase()
6318 %{
6319   constraint(ALLOC_IN_RC(heapbase_reg));
6320   match(RegI);
6321   op_cost(0);
6322   format %{ %}
6323   interface(REG_INTER);
6324 %}
6325 
6326 // Float Register
6327 // Float register operands
6328 operand vRegF()
6329 %{
6330   constraint(ALLOC_IN_RC(float_reg));
6331   match(RegF);
6332 
6333   op_cost(0);
6334   format %{ %}
6335   interface(REG_INTER);
6336 %}
6337 
6338 // Double Register
6339 // Double register operands
6340 operand vRegD()
6341 %{
6342   constraint(ALLOC_IN_RC(double_reg));
6343   match(RegD);
6344 
6345   op_cost(0);
6346   format %{ %}
6347   interface(REG_INTER);
6348 %}
6349 
6350 operand vecD()
6351 %{
6352   constraint(ALLOC_IN_RC(vectord_reg));
6353   match(VecD);
6354 
6355   op_cost(0);
6356   format %{ %}
6357   interface(REG_INTER);
6358 %}
6359 
6360 operand vecX()
6361 %{
6362   constraint(ALLOC_IN_RC(vectorx_reg));
6363   match(VecX);
6364 
6365   op_cost(0);
6366   format %{ %}
6367   interface(REG_INTER);
6368 %}
6369 
6370 operand vRegD_V0()
6371 %{
6372   constraint(ALLOC_IN_RC(v0_reg));
6373   match(RegD);
6374   op_cost(0);
6375   format %{ %}
6376   interface(REG_INTER);
6377 %}
6378 
6379 operand vRegD_V1()
6380 %{
6381   constraint(ALLOC_IN_RC(v1_reg));
6382   match(RegD);
6383   op_cost(0);
6384   format %{ %}
6385   interface(REG_INTER);
6386 %}
6387 
6388 operand vRegD_V2()
6389 %{
6390   constraint(ALLOC_IN_RC(v2_reg));
6391   match(RegD);
6392   op_cost(0);
6393   format %{ %}
6394   interface(REG_INTER);
6395 %}
6396 
6397 operand vRegD_V3()
6398 %{
6399   constraint(ALLOC_IN_RC(v3_reg));
6400   match(RegD);
6401   op_cost(0);
6402   format %{ %}
6403   interface(REG_INTER);
6404 %}
6405 
6406 // Flags register, used as output of signed compare instructions
6407 
6408 // note that on AArch64 we also use this register as the output for
6409 // for floating point compare instructions (CmpF CmpD). this ensures
6410 // that ordered inequality tests use GT, GE, LT or LE none of which
6411 // pass through cases where the result is unordered i.e. one or both
6412 // inputs to the compare is a NaN. this means that the ideal code can
6413 // replace e.g. a GT with an LE and not end up capturing the NaN case
6414 // (where the comparison should always fail). EQ and NE tests are
6415 // always generated in ideal code so that unordered folds into the NE
6416 // case, matching the behaviour of AArch64 NE.
6417 //
6418 // This differs from x86 where the outputs of FP compares use a
6419 // special FP flags registers and where compares based on this
6420 // register are distinguished into ordered inequalities (cmpOpUCF) and
6421 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6422 // to explicitly handle the unordered case in branches. x86 also has
6423 // to include extra CMoveX rules to accept a cmpOpUCF input.
6424 
6425 operand rFlagsReg()
6426 %{
6427   constraint(ALLOC_IN_RC(int_flags));
6428   match(RegFlags);
6429 
6430   op_cost(0);
6431   format %{ "RFLAGS" %}
6432   interface(REG_INTER);
6433 %}
6434 
6435 // Flags register, used as output of unsigned compare instructions
6436 operand rFlagsRegU()
6437 %{
6438   constraint(ALLOC_IN_RC(int_flags));
6439   match(RegFlags);
6440 
6441   op_cost(0);
6442   format %{ "RFLAGSU" %}
6443   interface(REG_INTER);
6444 %}
6445 
6446 // Special Registers
6447 
6448 // Method Register
6449 operand inline_cache_RegP(iRegP reg)
6450 %{
6451   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6452   match(reg);
6453   match(iRegPNoSp);
6454   op_cost(0);
6455   format %{ %}
6456   interface(REG_INTER);
6457 %}
6458 
6459 operand interpreter_method_oop_RegP(iRegP reg)
6460 %{
6461   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6462   match(reg);
6463   match(iRegPNoSp);
6464   op_cost(0);
6465   format %{ %}
6466   interface(REG_INTER);
6467 %}
6468 
6469 // Thread Register
6470 operand thread_RegP(iRegP reg)
6471 %{
6472   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6473   match(reg);
6474   op_cost(0);
6475   format %{ %}
6476   interface(REG_INTER);
6477 %}
6478 
6479 operand lr_RegP(iRegP reg)
6480 %{
6481   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6482   match(reg);
6483   op_cost(0);
6484   format %{ %}
6485   interface(REG_INTER);
6486 %}
6487 
6488 //----------Memory Operands----------------------------------------------------
6489 
6490 operand indirect(iRegP reg)
6491 %{
6492   constraint(ALLOC_IN_RC(ptr_reg));
6493   match(reg);
6494   op_cost(0);
6495   format %{ "[$reg]" %}
6496   interface(MEMORY_INTER) %{
6497     base($reg);
6498     index(0xffffffff);
6499     scale(0x0);
6500     disp(0x0);
6501   %}
6502 %}
6503 
6504 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6505 %{
6506   constraint(ALLOC_IN_RC(ptr_reg));
6507   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6508   match(AddP reg (LShiftL (ConvI2L ireg) scale));
6509   op_cost(0);
6510   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6511   interface(MEMORY_INTER) %{
6512     base($reg);
6513     index($ireg);
6514     scale($scale);
6515     disp(0x0);
6516   %}
6517 %}
6518 
6519 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6520 %{
6521   constraint(ALLOC_IN_RC(ptr_reg));
6522   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6523   match(AddP reg (LShiftL lreg scale));
6524   op_cost(0);
6525   format %{ "$reg, $lreg lsl($scale)" %}
6526   interface(MEMORY_INTER) %{
6527     base($reg);
6528     index($lreg);
6529     scale($scale);
6530     disp(0x0);
6531   %}
6532 %}
6533 
6534 operand indIndexI2L(iRegP reg, iRegI ireg)
6535 %{
6536   constraint(ALLOC_IN_RC(ptr_reg));
6537   match(AddP reg (ConvI2L ireg));
6538   op_cost(0);
6539   format %{ "$reg, $ireg, 0, I2L" %}
6540   interface(MEMORY_INTER) %{
6541     base($reg);
6542     index($ireg);
6543     scale(0x0);
6544     disp(0x0);
6545   %}
6546 %}
6547 
6548 operand indIndex(iRegP reg, iRegL lreg)
6549 %{
6550   constraint(ALLOC_IN_RC(ptr_reg));
6551   match(AddP reg lreg);
6552   op_cost(0);
6553   format %{ "$reg, $lreg" %}
6554   interface(MEMORY_INTER) %{
6555     base($reg);
6556     index($lreg);
6557     scale(0x0);
6558     disp(0x0);
6559   %}
6560 %}
6561 
6562 operand indOffI(iRegP reg, immIOffset off)
6563 %{
6564   constraint(ALLOC_IN_RC(ptr_reg));
6565   match(AddP reg off);
6566   op_cost(0);
6567   format %{ "[$reg, $off]" %}
6568   interface(MEMORY_INTER) %{
6569     base($reg);
6570     index(0xffffffff);
6571     scale(0x0);
6572     disp($off);
6573   %}
6574 %}
6575 
6576 operand indOffI4(iRegP reg, immIOffset4 off)
6577 %{
6578   constraint(ALLOC_IN_RC(ptr_reg));
6579   match(AddP reg off);
6580   op_cost(0);
6581   format %{ "[$reg, $off]" %}
6582   interface(MEMORY_INTER) %{
6583     base($reg);
6584     index(0xffffffff);
6585     scale(0x0);
6586     disp($off);
6587   %}
6588 %}
6589 
6590 operand indOffI8(iRegP reg, immIOffset8 off)
6591 %{
6592   constraint(ALLOC_IN_RC(ptr_reg));
6593   match(AddP reg off);
6594   op_cost(0);
6595   format %{ "[$reg, $off]" %}
6596   interface(MEMORY_INTER) %{
6597     base($reg);
6598     index(0xffffffff);
6599     scale(0x0);
6600     disp($off);
6601   %}
6602 %}
6603 
6604 operand indOffI16(iRegP reg, immIOffset16 off)
6605 %{
6606   constraint(ALLOC_IN_RC(ptr_reg));
6607   match(AddP reg off);
6608   op_cost(0);
6609   format %{ "[$reg, $off]" %}
6610   interface(MEMORY_INTER) %{
6611     base($reg);
6612     index(0xffffffff);
6613     scale(0x0);
6614     disp($off);
6615   %}
6616 %}
6617 
6618 operand indOffL(iRegP reg, immLoffset off)
6619 %{
6620   constraint(ALLOC_IN_RC(ptr_reg));
6621   match(AddP reg off);
6622   op_cost(0);
6623   format %{ "[$reg, $off]" %}
6624   interface(MEMORY_INTER) %{
6625     base($reg);
6626     index(0xffffffff);
6627     scale(0x0);
6628     disp($off);
6629   %}
6630 %}
6631 
6632 operand indOffL4(iRegP reg, immLoffset4 off)
6633 %{
6634   constraint(ALLOC_IN_RC(ptr_reg));
6635   match(AddP reg off);
6636   op_cost(0);
6637   format %{ "[$reg, $off]" %}
6638   interface(MEMORY_INTER) %{
6639     base($reg);
6640     index(0xffffffff);
6641     scale(0x0);
6642     disp($off);
6643   %}
6644 %}
6645 
6646 operand indOffL8(iRegP reg, immLoffset8 off)
6647 %{
6648   constraint(ALLOC_IN_RC(ptr_reg));
6649   match(AddP reg off);
6650   op_cost(0);
6651   format %{ "[$reg, $off]" %}
6652   interface(MEMORY_INTER) %{
6653     base($reg);
6654     index(0xffffffff);
6655     scale(0x0);
6656     disp($off);
6657   %}
6658 %}
6659 
6660 operand indOffL16(iRegP reg, immLoffset16 off)
6661 %{
6662   constraint(ALLOC_IN_RC(ptr_reg));
6663   match(AddP reg off);
6664   op_cost(0);
6665   format %{ "[$reg, $off]" %}
6666   interface(MEMORY_INTER) %{
6667     base($reg);
6668     index(0xffffffff);
6669     scale(0x0);
6670     disp($off);
6671   %}
6672 %}
6673 
6674 operand indirectN(iRegN reg)
6675 %{
6676   predicate(Universe::narrow_oop_shift() == 0);
6677   constraint(ALLOC_IN_RC(ptr_reg));
6678   match(DecodeN reg);
6679   op_cost(0);
6680   format %{ "[$reg]\t# narrow" %}
6681   interface(MEMORY_INTER) %{
6682     base($reg);
6683     index(0xffffffff);
6684     scale(0x0);
6685     disp(0x0);
6686   %}
6687 %}
6688 
6689 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6690 %{
6691   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6692   constraint(ALLOC_IN_RC(ptr_reg));
6693   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6694   op_cost(0);
6695   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6696   interface(MEMORY_INTER) %{
6697     base($reg);
6698     index($ireg);
6699     scale($scale);
6700     disp(0x0);
6701   %}
6702 %}
6703 
6704 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6705 %{
6706   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6707   constraint(ALLOC_IN_RC(ptr_reg));
6708   match(AddP (DecodeN reg) (LShiftL lreg scale));
6709   op_cost(0);
6710   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6711   interface(MEMORY_INTER) %{
6712     base($reg);
6713     index($lreg);
6714     scale($scale);
6715     disp(0x0);
6716   %}
6717 %}
6718 
6719 operand indIndexI2LN(iRegN reg, iRegI ireg)
6720 %{
6721   predicate(Universe::narrow_oop_shift() == 0);
6722   constraint(ALLOC_IN_RC(ptr_reg));
6723   match(AddP (DecodeN reg) (ConvI2L ireg));
6724   op_cost(0);
6725   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6726   interface(MEMORY_INTER) %{
6727     base($reg);
6728     index($ireg);
6729     scale(0x0);
6730     disp(0x0);
6731   %}
6732 %}
6733 
6734 operand indIndexN(iRegN reg, iRegL lreg)
6735 %{
6736   predicate(Universe::narrow_oop_shift() == 0);
6737   constraint(ALLOC_IN_RC(ptr_reg));
6738   match(AddP (DecodeN reg) lreg);
6739   op_cost(0);
6740   format %{ "$reg, $lreg\t# narrow" %}
6741   interface(MEMORY_INTER) %{
6742     base($reg);
6743     index($lreg);
6744     scale(0x0);
6745     disp(0x0);
6746   %}
6747 %}
6748 
6749 operand indOffIN(iRegN reg, immIOffset off)
6750 %{
6751   predicate(Universe::narrow_oop_shift() == 0);
6752   constraint(ALLOC_IN_RC(ptr_reg));
6753   match(AddP (DecodeN reg) off);
6754   op_cost(0);
6755   format %{ "[$reg, $off]\t# narrow" %}
6756   interface(MEMORY_INTER) %{
6757     base($reg);
6758     index(0xffffffff);
6759     scale(0x0);
6760     disp($off);
6761   %}
6762 %}
6763 
6764 operand indOffLN(iRegN reg, immLoffset off)
6765 %{
6766   predicate(Universe::narrow_oop_shift() == 0);
6767   constraint(ALLOC_IN_RC(ptr_reg));
6768   match(AddP (DecodeN reg) off);
6769   op_cost(0);
6770   format %{ "[$reg, $off]\t# narrow" %}
6771   interface(MEMORY_INTER) %{
6772     base($reg);
6773     index(0xffffffff);
6774     scale(0x0);
6775     disp($off);
6776   %}
6777 %}
6778 
6779 
6780 
6781 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6782 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6783 %{
6784   constraint(ALLOC_IN_RC(ptr_reg));
6785   match(AddP reg off);
6786   op_cost(0);
6787   format %{ "[$reg, $off]" %}
6788   interface(MEMORY_INTER) %{
6789     base($reg);
6790     index(0xffffffff);
6791     scale(0x0);
6792     disp($off);
6793   %}
6794 %}
6795 
6796 //----------Special Memory Operands--------------------------------------------
6797 // Stack Slot Operand - This operand is used for loading and storing temporary
6798 //                      values on the stack where a match requires a value to
6799 //                      flow through memory.
6800 operand stackSlotP(sRegP reg)
6801 %{
6802   constraint(ALLOC_IN_RC(stack_slots));
6803   op_cost(100);
6804   // No match rule because this operand is only generated in matching
6805   // match(RegP);
6806   format %{ "[$reg]" %}
6807   interface(MEMORY_INTER) %{
6808     base(0x1e);  // RSP
6809     index(0x0);  // No Index
6810     scale(0x0);  // No Scale
6811     disp($reg);  // Stack Offset
6812   %}
6813 %}
6814 
6815 operand stackSlotI(sRegI reg)
6816 %{
6817   constraint(ALLOC_IN_RC(stack_slots));
6818   // No match rule because this operand is only generated in matching
6819   // match(RegI);
6820   format %{ "[$reg]" %}
6821   interface(MEMORY_INTER) %{
6822     base(0x1e);  // RSP
6823     index(0x0);  // No Index
6824     scale(0x0);  // No Scale
6825     disp($reg);  // Stack Offset
6826   %}
6827 %}
6828 
6829 operand stackSlotF(sRegF reg)
6830 %{
6831   constraint(ALLOC_IN_RC(stack_slots));
6832   // No match rule because this operand is only generated in matching
6833   // match(RegF);
6834   format %{ "[$reg]" %}
6835   interface(MEMORY_INTER) %{
6836     base(0x1e);  // RSP
6837     index(0x0);  // No Index
6838     scale(0x0);  // No Scale
6839     disp($reg);  // Stack Offset
6840   %}
6841 %}
6842 
6843 operand stackSlotD(sRegD reg)
6844 %{
6845   constraint(ALLOC_IN_RC(stack_slots));
6846   // No match rule because this operand is only generated in matching
6847   // match(RegD);
6848   format %{ "[$reg]" %}
6849   interface(MEMORY_INTER) %{
6850     base(0x1e);  // RSP
6851     index(0x0);  // No Index
6852     scale(0x0);  // No Scale
6853     disp($reg);  // Stack Offset
6854   %}
6855 %}
6856 
6857 operand stackSlotL(sRegL reg)
6858 %{
6859   constraint(ALLOC_IN_RC(stack_slots));
6860   // No match rule because this operand is only generated in matching
6861   // match(RegL);
6862   format %{ "[$reg]" %}
6863   interface(MEMORY_INTER) %{
6864     base(0x1e);  // RSP
6865     index(0x0);  // No Index
6866     scale(0x0);  // No Scale
6867     disp($reg);  // Stack Offset
6868   %}
6869 %}
6870 
6871 // Operands for expressing Control Flow
6872 // NOTE: Label is a predefined operand which should not be redefined in
6873 //       the AD file. It is generically handled within the ADLC.
6874 
6875 //----------Conditional Branch Operands----------------------------------------
6876 // Comparison Op  - This is the operation of the comparison, and is limited to
6877 //                  the following set of codes:
6878 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6879 //
6880 // Other attributes of the comparison, such as unsignedness, are specified
6881 // by the comparison instruction that sets a condition code flags register.
6882 // That result is represented by a flags operand whose subtype is appropriate
6883 // to the unsignedness (etc.) of the comparison.
6884 //
6885 // Later, the instruction which matches both the Comparison Op (a Bool) and
6886 // the flags (produced by the Cmp) specifies the coding of the comparison op
6887 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6888 
6889 // used for signed integral comparisons and fp comparisons
6890 
6891 operand cmpOp()
6892 %{
6893   match(Bool);
6894 
6895   format %{ "" %}
6896   interface(COND_INTER) %{
6897     equal(0x0, "eq");
6898     not_equal(0x1, "ne");
6899     less(0xb, "lt");
6900     greater_equal(0xa, "ge");
6901     less_equal(0xd, "le");
6902     greater(0xc, "gt");
6903     overflow(0x6, "vs");
6904     no_overflow(0x7, "vc");
6905   %}
6906 %}
6907 
6908 // used for unsigned integral comparisons
6909 
6910 operand cmpOpU()
6911 %{
6912   match(Bool);
6913 
6914   format %{ "" %}
6915   interface(COND_INTER) %{
6916     equal(0x0, "eq");
6917     not_equal(0x1, "ne");
6918     less(0x3, "lo");
6919     greater_equal(0x2, "hs");
6920     less_equal(0x9, "ls");
6921     greater(0x8, "hi");
6922     overflow(0x6, "vs");
6923     no_overflow(0x7, "vc");
6924   %}
6925 %}
6926 
6927 // used for certain integral comparisons which can be
6928 // converted to cbxx or tbxx instructions
6929 
6930 operand cmpOpEqNe()
6931 %{
6932   match(Bool);
6933   match(CmpOp);
6934   op_cost(0);
6935   predicate(n->as_Bool()->_test._test == BoolTest::ne
6936             || n->as_Bool()->_test._test == BoolTest::eq);
6937 
6938   format %{ "" %}
6939   interface(COND_INTER) %{
6940     equal(0x0, "eq");
6941     not_equal(0x1, "ne");
6942     less(0xb, "lt");
6943     greater_equal(0xa, "ge");
6944     less_equal(0xd, "le");
6945     greater(0xc, "gt");
6946     overflow(0x6, "vs");
6947     no_overflow(0x7, "vc");
6948   %}
6949 %}
6950 
6951 // used for certain integral comparisons which can be
6952 // converted to cbxx or tbxx instructions
6953 
6954 operand cmpOpLtGe()
6955 %{
6956   match(Bool);
6957   match(CmpOp);
6958   op_cost(0);
6959 
6960   predicate(n->as_Bool()->_test._test == BoolTest::lt
6961             || n->as_Bool()->_test._test == BoolTest::ge);
6962 
6963   format %{ "" %}
6964   interface(COND_INTER) %{
6965     equal(0x0, "eq");
6966     not_equal(0x1, "ne");
6967     less(0xb, "lt");
6968     greater_equal(0xa, "ge");
6969     less_equal(0xd, "le");
6970     greater(0xc, "gt");
6971     overflow(0x6, "vs");
6972     no_overflow(0x7, "vc");
6973   %}
6974 %}
6975 
6976 // used for certain unsigned integral comparisons which can be
6977 // converted to cbxx or tbxx instructions
6978 
6979 operand cmpOpUEqNeLtGe()
6980 %{
6981   match(Bool);
6982   match(CmpOp);
6983   op_cost(0);
6984 
6985   predicate(n->as_Bool()->_test._test == BoolTest::eq
6986             || n->as_Bool()->_test._test == BoolTest::ne
6987             || n->as_Bool()->_test._test == BoolTest::lt
6988             || n->as_Bool()->_test._test == BoolTest::ge);
6989 
6990   format %{ "" %}
6991   interface(COND_INTER) %{
6992     equal(0x0, "eq");
6993     not_equal(0x1, "ne");
6994     less(0xb, "lt");
6995     greater_equal(0xa, "ge");
6996     less_equal(0xd, "le");
6997     greater(0xc, "gt");
6998     overflow(0x6, "vs");
6999     no_overflow(0x7, "vc");
7000   %}
7001 %}
7002 
7003 // Special operand allowing long args to int ops to be truncated for free
7004 
7005 operand iRegL2I(iRegL reg) %{
7006 
7007   op_cost(0);
7008 
7009   match(ConvL2I reg);
7010 
7011   format %{ "l2i($reg)" %}
7012 
7013   interface(REG_INTER)
7014 %}
7015 
7016 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
7017 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
7018 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
7019 
7020 //----------OPERAND CLASSES----------------------------------------------------
7021 // Operand Classes are groups of operands that are used as to simplify
7022 // instruction definitions by not requiring the AD writer to specify
7023 // separate instructions for every form of operand when the
7024 // instruction accepts multiple operand types with the same basic
7025 // encoding and format. The classic case of this is memory operands.
7026 
7027 // memory is used to define read/write location for load/store
7028 // instruction defs. we can turn a memory op into an Address
7029 
7030 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
7031                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
7032 
7033 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
7034 // operations. it allows the src to be either an iRegI or a (ConvL2I
7035 // iRegL). in the latter case the l2i normally planted for a ConvL2I
7036 // can be elided because the 32-bit instruction will just employ the
7037 // lower 32 bits anyway.
7038 //
7039 // n.b. this does not elide all L2I conversions. if the truncated
7040 // value is consumed by more than one operation then the ConvL2I
7041 // cannot be bundled into the consuming nodes so an l2i gets planted
7042 // (actually a movw $dst $src) and the downstream instructions consume
7043 // the result of the l2i as an iRegI input. That's a shame since the
7044 // movw is actually redundant but its not too costly.
7045 
7046 opclass iRegIorL2I(iRegI, iRegL2I);
7047 
7048 //----------PIPELINE-----------------------------------------------------------
7049 // Rules which define the behavior of the target architectures pipeline.
7050 
7051 // For specific pipelines, eg A53, define the stages of that pipeline
7052 //pipe_desc(ISS, EX1, EX2, WR);
7053 #define ISS S0
7054 #define EX1 S1
7055 #define EX2 S2
7056 #define WR  S3
7057 
7058 // Integer ALU reg operation
7059 pipeline %{
7060 
7061 attributes %{
7062   // ARM instructions are of fixed length
7063   fixed_size_instructions;        // Fixed size instructions TODO does
7064   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
7065   // ARM instructions come in 32-bit word units
7066   instruction_unit_size = 4;         // An instruction is 4 bytes long
7067   instruction_fetch_unit_size = 64;  // The processor fetches one line
7068   instruction_fetch_units = 1;       // of 64 bytes
7069 
7070   // List of nop instructions
7071   nops( MachNop );
7072 %}
7073 
7074 // We don't use an actual pipeline model so don't care about resources
7075 // or description. we do use pipeline classes to introduce fixed
7076 // latencies
7077 
7078 //----------RESOURCES----------------------------------------------------------
7079 // Resources are the functional units available to the machine
7080 
7081 resources( INS0, INS1, INS01 = INS0 | INS1,
7082            ALU0, ALU1, ALU = ALU0 | ALU1,
7083            MAC,
7084            DIV,
7085            BRANCH,
7086            LDST,
7087            NEON_FP);
7088 
7089 //----------PIPELINE DESCRIPTION-----------------------------------------------
7090 // Pipeline Description specifies the stages in the machine's pipeline
7091 
7092 // Define the pipeline as a generic 6 stage pipeline
7093 pipe_desc(S0, S1, S2, S3, S4, S5);
7094 
7095 //----------PIPELINE CLASSES---------------------------------------------------
7096 // Pipeline Classes describe the stages in which input and output are
7097 // referenced by the hardware pipeline.
7098 
7099 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
7100 %{
7101   single_instruction;
7102   src1   : S1(read);
7103   src2   : S2(read);
7104   dst    : S5(write);
7105   INS01  : ISS;
7106   NEON_FP : S5;
7107 %}
7108 
7109 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
7110 %{
7111   single_instruction;
7112   src1   : S1(read);
7113   src2   : S2(read);
7114   dst    : S5(write);
7115   INS01  : ISS;
7116   NEON_FP : S5;
7117 %}
7118 
7119 pipe_class fp_uop_s(vRegF dst, vRegF src)
7120 %{
7121   single_instruction;
7122   src    : S1(read);
7123   dst    : S5(write);
7124   INS01  : ISS;
7125   NEON_FP : S5;
7126 %}
7127 
7128 pipe_class fp_uop_d(vRegD dst, vRegD src)
7129 %{
7130   single_instruction;
7131   src    : S1(read);
7132   dst    : S5(write);
7133   INS01  : ISS;
7134   NEON_FP : S5;
7135 %}
7136 
7137 pipe_class fp_d2f(vRegF dst, vRegD src)
7138 %{
7139   single_instruction;
7140   src    : S1(read);
7141   dst    : S5(write);
7142   INS01  : ISS;
7143   NEON_FP : S5;
7144 %}
7145 
7146 pipe_class fp_f2d(vRegD dst, vRegF src)
7147 %{
7148   single_instruction;
7149   src    : S1(read);
7150   dst    : S5(write);
7151   INS01  : ISS;
7152   NEON_FP : S5;
7153 %}
7154 
7155 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
7156 %{
7157   single_instruction;
7158   src    : S1(read);
7159   dst    : S5(write);
7160   INS01  : ISS;
7161   NEON_FP : S5;
7162 %}
7163 
7164 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
7165 %{
7166   single_instruction;
7167   src    : S1(read);
7168   dst    : S5(write);
7169   INS01  : ISS;
7170   NEON_FP : S5;
7171 %}
7172 
7173 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
7174 %{
7175   single_instruction;
7176   src    : S1(read);
7177   dst    : S5(write);
7178   INS01  : ISS;
7179   NEON_FP : S5;
7180 %}
7181 
7182 pipe_class fp_l2f(vRegF dst, iRegL src)
7183 %{
7184   single_instruction;
7185   src    : S1(read);
7186   dst    : S5(write);
7187   INS01  : ISS;
7188   NEON_FP : S5;
7189 %}
7190 
7191 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
7192 %{
7193   single_instruction;
7194   src    : S1(read);
7195   dst    : S5(write);
7196   INS01  : ISS;
7197   NEON_FP : S5;
7198 %}
7199 
7200 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7201 %{
7202   single_instruction;
7203   src    : S1(read);
7204   dst    : S5(write);
7205   INS01  : ISS;
7206   NEON_FP : S5;
7207 %}
7208 
7209 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7210 %{
7211   single_instruction;
7212   src    : S1(read);
7213   dst    : S5(write);
7214   INS01  : ISS;
7215   NEON_FP : S5;
7216 %}
7217 
7218 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7219 %{
7220   single_instruction;
7221   src    : S1(read);
7222   dst    : S5(write);
7223   INS01  : ISS;
7224   NEON_FP : S5;
7225 %}
7226 
7227 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7228 %{
7229   single_instruction;
7230   src1   : S1(read);
7231   src2   : S2(read);
7232   dst    : S5(write);
7233   INS0   : ISS;
7234   NEON_FP : S5;
7235 %}
7236 
7237 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7238 %{
7239   single_instruction;
7240   src1   : S1(read);
7241   src2   : S2(read);
7242   dst    : S5(write);
7243   INS0   : ISS;
7244   NEON_FP : S5;
7245 %}
7246 
7247 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7248 %{
7249   single_instruction;
7250   cr     : S1(read);
7251   src1   : S1(read);
7252   src2   : S1(read);
7253   dst    : S3(write);
7254   INS01  : ISS;
7255   NEON_FP : S3;
7256 %}
7257 
7258 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7259 %{
7260   single_instruction;
7261   cr     : S1(read);
7262   src1   : S1(read);
7263   src2   : S1(read);
7264   dst    : S3(write);
7265   INS01  : ISS;
7266   NEON_FP : S3;
7267 %}
7268 
7269 pipe_class fp_imm_s(vRegF dst)
7270 %{
7271   single_instruction;
7272   dst    : S3(write);
7273   INS01  : ISS;
7274   NEON_FP : S3;
7275 %}
7276 
7277 pipe_class fp_imm_d(vRegD dst)
7278 %{
7279   single_instruction;
7280   dst    : S3(write);
7281   INS01  : ISS;
7282   NEON_FP : S3;
7283 %}
7284 
7285 pipe_class fp_load_constant_s(vRegF dst)
7286 %{
7287   single_instruction;
7288   dst    : S4(write);
7289   INS01  : ISS;
7290   NEON_FP : S4;
7291 %}
7292 
7293 pipe_class fp_load_constant_d(vRegD dst)
7294 %{
7295   single_instruction;
7296   dst    : S4(write);
7297   INS01  : ISS;
7298   NEON_FP : S4;
7299 %}
7300 
7301 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7302 %{
7303   single_instruction;
7304   dst    : S5(write);
7305   src1   : S1(read);
7306   src2   : S1(read);
7307   INS01  : ISS;
7308   NEON_FP : S5;
7309 %}
7310 
7311 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7312 %{
7313   single_instruction;
7314   dst    : S5(write);
7315   src1   : S1(read);
7316   src2   : S1(read);
7317   INS0   : ISS;
7318   NEON_FP : S5;
7319 %}
7320 
7321 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7322 %{
7323   single_instruction;
7324   dst    : S5(write);
7325   src1   : S1(read);
7326   src2   : S1(read);
7327   dst    : S1(read);
7328   INS01  : ISS;
7329   NEON_FP : S5;
7330 %}
7331 
7332 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7333 %{
7334   single_instruction;
7335   dst    : S5(write);
7336   src1   : S1(read);
7337   src2   : S1(read);
7338   dst    : S1(read);
7339   INS0   : ISS;
7340   NEON_FP : S5;
7341 %}
7342 
7343 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7344 %{
7345   single_instruction;
7346   dst    : S4(write);
7347   src1   : S2(read);
7348   src2   : S2(read);
7349   INS01  : ISS;
7350   NEON_FP : S4;
7351 %}
7352 
7353 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7354 %{
7355   single_instruction;
7356   dst    : S4(write);
7357   src1   : S2(read);
7358   src2   : S2(read);
7359   INS0   : ISS;
7360   NEON_FP : S4;
7361 %}
7362 
7363 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7364 %{
7365   single_instruction;
7366   dst    : S3(write);
7367   src1   : S2(read);
7368   src2   : S2(read);
7369   INS01  : ISS;
7370   NEON_FP : S3;
7371 %}
7372 
7373 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7374 %{
7375   single_instruction;
7376   dst    : S3(write);
7377   src1   : S2(read);
7378   src2   : S2(read);
7379   INS0   : ISS;
7380   NEON_FP : S3;
7381 %}
7382 
7383 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7384 %{
7385   single_instruction;
7386   dst    : S3(write);
7387   src    : S1(read);
7388   shift  : S1(read);
7389   INS01  : ISS;
7390   NEON_FP : S3;
7391 %}
7392 
7393 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7394 %{
7395   single_instruction;
7396   dst    : S3(write);
7397   src    : S1(read);
7398   shift  : S1(read);
7399   INS0   : ISS;
7400   NEON_FP : S3;
7401 %}
7402 
7403 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7404 %{
7405   single_instruction;
7406   dst    : S3(write);
7407   src    : S1(read);
7408   INS01  : ISS;
7409   NEON_FP : S3;
7410 %}
7411 
7412 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7413 %{
7414   single_instruction;
7415   dst    : S3(write);
7416   src    : S1(read);
7417   INS0   : ISS;
7418   NEON_FP : S3;
7419 %}
7420 
7421 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7422 %{
7423   single_instruction;
7424   dst    : S5(write);
7425   src1   : S1(read);
7426   src2   : S1(read);
7427   INS01  : ISS;
7428   NEON_FP : S5;
7429 %}
7430 
7431 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7432 %{
7433   single_instruction;
7434   dst    : S5(write);
7435   src1   : S1(read);
7436   src2   : S1(read);
7437   INS0   : ISS;
7438   NEON_FP : S5;
7439 %}
7440 
7441 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7442 %{
7443   single_instruction;
7444   dst    : S5(write);
7445   src1   : S1(read);
7446   src2   : S1(read);
7447   INS0   : ISS;
7448   NEON_FP : S5;
7449 %}
7450 
7451 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7452 %{
7453   single_instruction;
7454   dst    : S5(write);
7455   src1   : S1(read);
7456   src2   : S1(read);
7457   INS0   : ISS;
7458   NEON_FP : S5;
7459 %}
7460 
7461 pipe_class vsqrt_fp128(vecX dst, vecX src)
7462 %{
7463   single_instruction;
7464   dst    : S5(write);
7465   src    : S1(read);
7466   INS0   : ISS;
7467   NEON_FP : S5;
7468 %}
7469 
7470 pipe_class vunop_fp64(vecD dst, vecD src)
7471 %{
7472   single_instruction;
7473   dst    : S5(write);
7474   src    : S1(read);
7475   INS01  : ISS;
7476   NEON_FP : S5;
7477 %}
7478 
7479 pipe_class vunop_fp128(vecX dst, vecX src)
7480 %{
7481   single_instruction;
7482   dst    : S5(write);
7483   src    : S1(read);
7484   INS0   : ISS;
7485   NEON_FP : S5;
7486 %}
7487 
7488 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7489 %{
7490   single_instruction;
7491   dst    : S3(write);
7492   src    : S1(read);
7493   INS01  : ISS;
7494   NEON_FP : S3;
7495 %}
7496 
7497 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7498 %{
7499   single_instruction;
7500   dst    : S3(write);
7501   src    : S1(read);
7502   INS01  : ISS;
7503   NEON_FP : S3;
7504 %}
7505 
7506 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7507 %{
7508   single_instruction;
7509   dst    : S3(write);
7510   src    : S1(read);
7511   INS01  : ISS;
7512   NEON_FP : S3;
7513 %}
7514 
7515 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7516 %{
7517   single_instruction;
7518   dst    : S3(write);
7519   src    : S1(read);
7520   INS01  : ISS;
7521   NEON_FP : S3;
7522 %}
7523 
7524 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7525 %{
7526   single_instruction;
7527   dst    : S3(write);
7528   src    : S1(read);
7529   INS01  : ISS;
7530   NEON_FP : S3;
7531 %}
7532 
7533 pipe_class vmovi_reg_imm64(vecD dst)
7534 %{
7535   single_instruction;
7536   dst    : S3(write);
7537   INS01  : ISS;
7538   NEON_FP : S3;
7539 %}
7540 
7541 pipe_class vmovi_reg_imm128(vecX dst)
7542 %{
7543   single_instruction;
7544   dst    : S3(write);
7545   INS0   : ISS;
7546   NEON_FP : S3;
7547 %}
7548 
7549 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7550 %{
7551   single_instruction;
7552   dst    : S5(write);
7553   mem    : ISS(read);
7554   INS01  : ISS;
7555   NEON_FP : S3;
7556 %}
7557 
7558 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7559 %{
7560   single_instruction;
7561   dst    : S5(write);
7562   mem    : ISS(read);
7563   INS01  : ISS;
7564   NEON_FP : S3;
7565 %}
7566 
7567 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7568 %{
7569   single_instruction;
7570   mem    : ISS(read);
7571   src    : S2(read);
7572   INS01  : ISS;
7573   NEON_FP : S3;
7574 %}
7575 
7576 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7577 %{
7578   single_instruction;
7579   mem    : ISS(read);
7580   src    : S2(read);
7581   INS01  : ISS;
7582   NEON_FP : S3;
7583 %}
7584 
7585 //------- Integer ALU operations --------------------------
7586 
7587 // Integer ALU reg-reg operation
7588 // Operands needed in EX1, result generated in EX2
7589 // Eg.  ADD     x0, x1, x2
7590 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7591 %{
7592   single_instruction;
7593   dst    : EX2(write);
7594   src1   : EX1(read);
7595   src2   : EX1(read);
7596   INS01  : ISS; // Dual issue as instruction 0 or 1
7597   ALU    : EX2;
7598 %}
7599 
7600 // Integer ALU reg-reg operation with constant shift
7601 // Shifted register must be available in LATE_ISS instead of EX1
7602 // Eg.  ADD     x0, x1, x2, LSL #2
7603 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7604 %{
7605   single_instruction;
7606   dst    : EX2(write);
7607   src1   : EX1(read);
7608   src2   : ISS(read);
7609   INS01  : ISS;
7610   ALU    : EX2;
7611 %}
7612 
7613 // Integer ALU reg operation with constant shift
7614 // Eg.  LSL     x0, x1, #shift
7615 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7616 %{
7617   single_instruction;
7618   dst    : EX2(write);
7619   src1   : ISS(read);
7620   INS01  : ISS;
7621   ALU    : EX2;
7622 %}
7623 
7624 // Integer ALU reg-reg operation with variable shift
7625 // Both operands must be available in LATE_ISS instead of EX1
7626 // Result is available in EX1 instead of EX2
7627 // Eg.  LSLV    x0, x1, x2
7628 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7629 %{
7630   single_instruction;
7631   dst    : EX1(write);
7632   src1   : ISS(read);
7633   src2   : ISS(read);
7634   INS01  : ISS;
7635   ALU    : EX1;
7636 %}
7637 
7638 // Integer ALU reg-reg operation with extract
7639 // As for _vshift above, but result generated in EX2
7640 // Eg.  EXTR    x0, x1, x2, #N
7641 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7642 %{
7643   single_instruction;
7644   dst    : EX2(write);
7645   src1   : ISS(read);
7646   src2   : ISS(read);
7647   INS1   : ISS; // Can only dual issue as Instruction 1
7648   ALU    : EX1;
7649 %}
7650 
7651 // Integer ALU reg operation
7652 // Eg.  NEG     x0, x1
7653 pipe_class ialu_reg(iRegI dst, iRegI src)
7654 %{
7655   single_instruction;
7656   dst    : EX2(write);
7657   src    : EX1(read);
7658   INS01  : ISS;
7659   ALU    : EX2;
7660 %}
7661 
7662 // Integer ALU reg mmediate operation
7663 // Eg.  ADD     x0, x1, #N
7664 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7665 %{
7666   single_instruction;
7667   dst    : EX2(write);
7668   src1   : EX1(read);
7669   INS01  : ISS;
7670   ALU    : EX2;
7671 %}
7672 
7673 // Integer ALU immediate operation (no source operands)
7674 // Eg.  MOV     x0, #N
7675 pipe_class ialu_imm(iRegI dst)
7676 %{
7677   single_instruction;
7678   dst    : EX1(write);
7679   INS01  : ISS;
7680   ALU    : EX1;
7681 %}
7682 
7683 //------- Compare operation -------------------------------
7684 
7685 // Compare reg-reg
7686 // Eg.  CMP     x0, x1
7687 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7688 %{
7689   single_instruction;
7690 //  fixed_latency(16);
7691   cr     : EX2(write);
7692   op1    : EX1(read);
7693   op2    : EX1(read);
7694   INS01  : ISS;
7695   ALU    : EX2;
7696 %}
7697 
7698 // Compare reg-reg
7699 // Eg.  CMP     x0, #N
7700 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7701 %{
7702   single_instruction;
7703 //  fixed_latency(16);
7704   cr     : EX2(write);
7705   op1    : EX1(read);
7706   INS01  : ISS;
7707   ALU    : EX2;
7708 %}
7709 
7710 //------- Conditional instructions ------------------------
7711 
7712 // Conditional no operands
7713 // Eg.  CSINC   x0, zr, zr, <cond>
7714 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7715 %{
7716   single_instruction;
7717   cr     : EX1(read);
7718   dst    : EX2(write);
7719   INS01  : ISS;
7720   ALU    : EX2;
7721 %}
7722 
7723 // Conditional 2 operand
7724 // EG.  CSEL    X0, X1, X2, <cond>
7725 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7726 %{
7727   single_instruction;
7728   cr     : EX1(read);
7729   src1   : EX1(read);
7730   src2   : EX1(read);
7731   dst    : EX2(write);
7732   INS01  : ISS;
7733   ALU    : EX2;
7734 %}
7735 
7736 // Conditional 2 operand
7737 // EG.  CSEL    X0, X1, X2, <cond>
7738 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7739 %{
7740   single_instruction;
7741   cr     : EX1(read);
7742   src    : EX1(read);
7743   dst    : EX2(write);
7744   INS01  : ISS;
7745   ALU    : EX2;
7746 %}
7747 
7748 //------- Multiply pipeline operations --------------------
7749 
7750 // Multiply reg-reg
7751 // Eg.  MUL     w0, w1, w2
7752 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7753 %{
7754   single_instruction;
7755   dst    : WR(write);
7756   src1   : ISS(read);
7757   src2   : ISS(read);
7758   INS01  : ISS;
7759   MAC    : WR;
7760 %}
7761 
7762 // Multiply accumulate
7763 // Eg.  MADD    w0, w1, w2, w3
7764 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7765 %{
7766   single_instruction;
7767   dst    : WR(write);
7768   src1   : ISS(read);
7769   src2   : ISS(read);
7770   src3   : ISS(read);
7771   INS01  : ISS;
7772   MAC    : WR;
7773 %}
7774 
7775 // Eg.  MUL     w0, w1, w2
7776 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7777 %{
7778   single_instruction;
7779   fixed_latency(3); // Maximum latency for 64 bit mul
7780   dst    : WR(write);
7781   src1   : ISS(read);
7782   src2   : ISS(read);
7783   INS01  : ISS;
7784   MAC    : WR;
7785 %}
7786 
7787 // Multiply accumulate
7788 // Eg.  MADD    w0, w1, w2, w3
7789 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7790 %{
7791   single_instruction;
7792   fixed_latency(3); // Maximum latency for 64 bit mul
7793   dst    : WR(write);
7794   src1   : ISS(read);
7795   src2   : ISS(read);
7796   src3   : ISS(read);
7797   INS01  : ISS;
7798   MAC    : WR;
7799 %}
7800 
7801 //------- Divide pipeline operations --------------------
7802 
7803 // Eg.  SDIV    w0, w1, w2
7804 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7805 %{
7806   single_instruction;
7807   fixed_latency(8); // Maximum latency for 32 bit divide
7808   dst    : WR(write);
7809   src1   : ISS(read);
7810   src2   : ISS(read);
7811   INS0   : ISS; // Can only dual issue as instruction 0
7812   DIV    : WR;
7813 %}
7814 
7815 // Eg.  SDIV    x0, x1, x2
7816 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7817 %{
7818   single_instruction;
7819   fixed_latency(16); // Maximum latency for 64 bit divide
7820   dst    : WR(write);
7821   src1   : ISS(read);
7822   src2   : ISS(read);
7823   INS0   : ISS; // Can only dual issue as instruction 0
7824   DIV    : WR;
7825 %}
7826 
7827 //------- Load pipeline operations ------------------------
7828 
7829 // Load - prefetch
7830 // Eg.  PFRM    <mem>
7831 pipe_class iload_prefetch(memory mem)
7832 %{
7833   single_instruction;
7834   mem    : ISS(read);
7835   INS01  : ISS;
7836   LDST   : WR;
7837 %}
7838 
7839 // Load - reg, mem
7840 // Eg.  LDR     x0, <mem>
7841 pipe_class iload_reg_mem(iRegI dst, memory mem)
7842 %{
7843   single_instruction;
7844   dst    : WR(write);
7845   mem    : ISS(read);
7846   INS01  : ISS;
7847   LDST   : WR;
7848 %}
7849 
7850 // Load - reg, reg
7851 // Eg.  LDR     x0, [sp, x1]
7852 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7853 %{
7854   single_instruction;
7855   dst    : WR(write);
7856   src    : ISS(read);
7857   INS01  : ISS;
7858   LDST   : WR;
7859 %}
7860 
7861 //------- Store pipeline operations -----------------------
7862 
7863 // Store - zr, mem
7864 // Eg.  STR     zr, <mem>
7865 pipe_class istore_mem(memory mem)
7866 %{
7867   single_instruction;
7868   mem    : ISS(read);
7869   INS01  : ISS;
7870   LDST   : WR;
7871 %}
7872 
7873 // Store - reg, mem
7874 // Eg.  STR     x0, <mem>
7875 pipe_class istore_reg_mem(iRegI src, memory mem)
7876 %{
7877   single_instruction;
7878   mem    : ISS(read);
7879   src    : EX2(read);
7880   INS01  : ISS;
7881   LDST   : WR;
7882 %}
7883 
7884 // Store - reg, reg
7885 // Eg. STR      x0, [sp, x1]
7886 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7887 %{
7888   single_instruction;
7889   dst    : ISS(read);
7890   src    : EX2(read);
7891   INS01  : ISS;
7892   LDST   : WR;
7893 %}
7894 
7895 //------- Store pipeline operations -----------------------
7896 
7897 // Branch
7898 pipe_class pipe_branch()
7899 %{
7900   single_instruction;
7901   INS01  : ISS;
7902   BRANCH : EX1;
7903 %}
7904 
7905 // Conditional branch
7906 pipe_class pipe_branch_cond(rFlagsReg cr)
7907 %{
7908   single_instruction;
7909   cr     : EX1(read);
7910   INS01  : ISS;
7911   BRANCH : EX1;
7912 %}
7913 
7914 // Compare & Branch
7915 // EG.  CBZ/CBNZ
7916 pipe_class pipe_cmp_branch(iRegI op1)
7917 %{
7918   single_instruction;
7919   op1    : EX1(read);
7920   INS01  : ISS;
7921   BRANCH : EX1;
7922 %}
7923 
7924 //------- Synchronisation operations ----------------------
7925 
7926 // Any operation requiring serialization.
7927 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
7928 pipe_class pipe_serial()
7929 %{
7930   single_instruction;
7931   force_serialization;
7932   fixed_latency(16);
7933   INS01  : ISS(2); // Cannot dual issue with any other instruction
7934   LDST   : WR;
7935 %}
7936 
7937 // Generic big/slow expanded idiom - also serialized
7938 pipe_class pipe_slow()
7939 %{
7940   instruction_count(10);
7941   multiple_bundles;
7942   force_serialization;
7943   fixed_latency(16);
7944   INS01  : ISS(2); // Cannot dual issue with any other instruction
7945   LDST   : WR;
7946 %}
7947 
7948 // Empty pipeline class
7949 pipe_class pipe_class_empty()
7950 %{
7951   single_instruction;
7952   fixed_latency(0);
7953 %}
7954 
7955 // Default pipeline class.
7956 pipe_class pipe_class_default()
7957 %{
7958   single_instruction;
7959   fixed_latency(2);
7960 %}
7961 
7962 // Pipeline class for compares.
7963 pipe_class pipe_class_compare()
7964 %{
7965   single_instruction;
7966   fixed_latency(16);
7967 %}
7968 
7969 // Pipeline class for memory operations.
7970 pipe_class pipe_class_memory()
7971 %{
7972   single_instruction;
7973   fixed_latency(16);
7974 %}
7975 
7976 // Pipeline class for call.
7977 pipe_class pipe_class_call()
7978 %{
7979   single_instruction;
7980   fixed_latency(100);
7981 %}
7982 
7983 // Define the class for the Nop node.
7984 define %{
7985    MachNop = pipe_class_empty;
7986 %}
7987 
7988 %}
7989 //----------INSTRUCTIONS-------------------------------------------------------
7990 //
7991 // match      -- States which machine-independent subtree may be replaced
7992 //               by this instruction.
7993 // ins_cost   -- The estimated cost of this instruction is used by instruction
7994 //               selection to identify a minimum cost tree of machine
7995 //               instructions that matches a tree of machine-independent
7996 //               instructions.
7997 // format     -- A string providing the disassembly for this instruction.
7998 //               The value of an instruction's operand may be inserted
7999 //               by referring to it with a '$' prefix.
8000 // opcode     -- Three instruction opcodes may be provided.  These are referred
8001 //               to within an encode class as $primary, $secondary, and $tertiary
8002 //               rrspectively.  The primary opcode is commonly used to
8003 //               indicate the type of machine instruction, while secondary
8004 //               and tertiary are often used for prefix options or addressing
8005 //               modes.
8006 // ins_encode -- A list of encode classes with parameters. The encode class
8007 //               name must have been defined in an 'enc_class' specification
8008 //               in the encode section of the architecture description.
8009 
8010 // ============================================================================
8011 // Memory (Load/Store) Instructions
8012 
8013 // Load Instructions
8014 
8015 // Load Byte (8 bit signed)
8016 instruct loadB(iRegINoSp dst, memory mem)
8017 %{
8018   match(Set dst (LoadB mem));
8019   predicate(!needs_acquiring_load(n));
8020 
8021   ins_cost(4 * INSN_COST);
8022   format %{ "ldrsbw  $dst, $mem\t# byte" %}
8023 
8024   ins_encode(aarch64_enc_ldrsbw(dst, mem));
8025 
8026   ins_pipe(iload_reg_mem);
8027 %}
8028 
8029 // Load Byte (8 bit signed) into long
8030 instruct loadB2L(iRegLNoSp dst, memory mem)
8031 %{
8032   match(Set dst (ConvI2L (LoadB mem)));
8033   predicate(!needs_acquiring_load(n->in(1)));
8034 
8035   ins_cost(4 * INSN_COST);
8036   format %{ "ldrsb  $dst, $mem\t# byte" %}
8037 
8038   ins_encode(aarch64_enc_ldrsb(dst, mem));
8039 
8040   ins_pipe(iload_reg_mem);
8041 %}
8042 
8043 // Load Byte (8 bit unsigned)
8044 instruct loadUB(iRegINoSp dst, memory mem)
8045 %{
8046   match(Set dst (LoadUB mem));
8047   predicate(!needs_acquiring_load(n));
8048 
8049   ins_cost(4 * INSN_COST);
8050   format %{ "ldrbw  $dst, $mem\t# byte" %}
8051 
8052   ins_encode(aarch64_enc_ldrb(dst, mem));
8053 
8054   ins_pipe(iload_reg_mem);
8055 %}
8056 
8057 // Load Byte (8 bit unsigned) into long
8058 instruct loadUB2L(iRegLNoSp dst, memory mem)
8059 %{
8060   match(Set dst (ConvI2L (LoadUB mem)));
8061   predicate(!needs_acquiring_load(n->in(1)));
8062 
8063   ins_cost(4 * INSN_COST);
8064   format %{ "ldrb  $dst, $mem\t# byte" %}
8065 
8066   ins_encode(aarch64_enc_ldrb(dst, mem));
8067 
8068   ins_pipe(iload_reg_mem);
8069 %}
8070 
8071 // Load Short (16 bit signed)
8072 instruct loadS(iRegINoSp dst, memory mem)
8073 %{
8074   match(Set dst (LoadS mem));
8075   predicate(!needs_acquiring_load(n));
8076 
8077   ins_cost(4 * INSN_COST);
8078   format %{ "ldrshw  $dst, $mem\t# short" %}
8079 
8080   ins_encode(aarch64_enc_ldrshw(dst, mem));
8081 
8082   ins_pipe(iload_reg_mem);
8083 %}
8084 
8085 // Load Short (16 bit signed) into long
8086 instruct loadS2L(iRegLNoSp dst, memory mem)
8087 %{
8088   match(Set dst (ConvI2L (LoadS mem)));
8089   predicate(!needs_acquiring_load(n->in(1)));
8090 
8091   ins_cost(4 * INSN_COST);
8092   format %{ "ldrsh  $dst, $mem\t# short" %}
8093 
8094   ins_encode(aarch64_enc_ldrsh(dst, mem));
8095 
8096   ins_pipe(iload_reg_mem);
8097 %}
8098 
8099 // Load Char (16 bit unsigned)
8100 instruct loadUS(iRegINoSp dst, memory mem)
8101 %{
8102   match(Set dst (LoadUS mem));
8103   predicate(!needs_acquiring_load(n));
8104 
8105   ins_cost(4 * INSN_COST);
8106   format %{ "ldrh  $dst, $mem\t# short" %}
8107 
8108   ins_encode(aarch64_enc_ldrh(dst, mem));
8109 
8110   ins_pipe(iload_reg_mem);
8111 %}
8112 
8113 // Load Short/Char (16 bit unsigned) into long
8114 instruct loadUS2L(iRegLNoSp dst, memory mem)
8115 %{
8116   match(Set dst (ConvI2L (LoadUS mem)));
8117   predicate(!needs_acquiring_load(n->in(1)));
8118 
8119   ins_cost(4 * INSN_COST);
8120   format %{ "ldrh  $dst, $mem\t# short" %}
8121 
8122   ins_encode(aarch64_enc_ldrh(dst, mem));
8123 
8124   ins_pipe(iload_reg_mem);
8125 %}
8126 
8127 // Load Integer (32 bit signed)
8128 instruct loadI(iRegINoSp dst, memory mem)
8129 %{
8130   match(Set dst (LoadI mem));
8131   predicate(!needs_acquiring_load(n));
8132 
8133   ins_cost(4 * INSN_COST);
8134   format %{ "ldrw  $dst, $mem\t# int" %}
8135 
8136   ins_encode(aarch64_enc_ldrw(dst, mem));
8137 
8138   ins_pipe(iload_reg_mem);
8139 %}
8140 
8141 // Load Integer (32 bit signed) into long
8142 instruct loadI2L(iRegLNoSp dst, memory mem)
8143 %{
8144   match(Set dst (ConvI2L (LoadI mem)));
8145   predicate(!needs_acquiring_load(n->in(1)));
8146 
8147   ins_cost(4 * INSN_COST);
8148   format %{ "ldrsw  $dst, $mem\t# int" %}
8149 
8150   ins_encode(aarch64_enc_ldrsw(dst, mem));
8151 
8152   ins_pipe(iload_reg_mem);
8153 %}
8154 
8155 // Load Integer (32 bit unsigned) into long
8156 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
8157 %{
8158   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8159   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
8160 
8161   ins_cost(4 * INSN_COST);
8162   format %{ "ldrw  $dst, $mem\t# int" %}
8163 
8164   ins_encode(aarch64_enc_ldrw(dst, mem));
8165 
8166   ins_pipe(iload_reg_mem);
8167 %}
8168 
8169 // Load Long (64 bit signed)
8170 instruct loadL(iRegLNoSp dst, memory mem)
8171 %{
8172   match(Set dst (LoadL mem));
8173   predicate(!needs_acquiring_load(n));
8174 
8175   ins_cost(4 * INSN_COST);
8176   format %{ "ldr  $dst, $mem\t# int" %}
8177 
8178   ins_encode(aarch64_enc_ldr(dst, mem));
8179 
8180   ins_pipe(iload_reg_mem);
8181 %}
8182 
8183 // Load Range
8184 instruct loadRange(iRegINoSp dst, memory mem)
8185 %{
8186   match(Set dst (LoadRange mem));
8187 
8188   ins_cost(4 * INSN_COST);
8189   format %{ "ldrw  $dst, $mem\t# range" %}
8190 
8191   ins_encode(aarch64_enc_ldrw(dst, mem));
8192 
8193   ins_pipe(iload_reg_mem);
8194 %}
8195 
8196 // Load Pointer
8197 instruct loadP(iRegPNoSp dst, memory mem)
8198 %{
8199   match(Set dst (LoadP mem));
8200   predicate(!needs_acquiring_load(n));
8201 
8202   ins_cost(4 * INSN_COST);
8203   format %{ "ldr  $dst, $mem\t# ptr" %}
8204 
8205   ins_encode(aarch64_enc_ldr(dst, mem));
8206 
8207   ins_pipe(iload_reg_mem);
8208 %}
8209 
8210 // Load Compressed Pointer
8211 instruct loadN(iRegNNoSp dst, memory mem)
8212 %{
8213   match(Set dst (LoadN mem));
8214   predicate(!needs_acquiring_load(n));
8215 
8216   ins_cost(4 * INSN_COST);
8217   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
8218 
8219   ins_encode(aarch64_enc_ldrw(dst, mem));
8220 
8221   ins_pipe(iload_reg_mem);
8222 %}
8223 
8224 // Load Klass Pointer
8225 instruct loadKlass(iRegPNoSp dst, memory mem)
8226 %{
8227   match(Set dst (LoadKlass mem));
8228   predicate(!needs_acquiring_load(n));
8229 
8230   ins_cost(4 * INSN_COST);
8231   format %{ "ldr  $dst, $mem\t# class" %}
8232 
8233   ins_encode(aarch64_enc_ldr(dst, mem));
8234 
8235   ins_pipe(iload_reg_mem);
8236 %}
8237 
8238 // Load Narrow Klass Pointer
8239 instruct loadNKlass(iRegNNoSp dst, memory mem)
8240 %{
8241   match(Set dst (LoadNKlass mem));
8242   predicate(!needs_acquiring_load(n));
8243 
8244   ins_cost(4 * INSN_COST);
8245   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
8246 
8247   ins_encode(aarch64_enc_ldrw(dst, mem));
8248 
8249   ins_pipe(iload_reg_mem);
8250 %}
8251 
8252 // Load Float
8253 instruct loadF(vRegF dst, memory mem)
8254 %{
8255   match(Set dst (LoadF mem));
8256   predicate(!needs_acquiring_load(n));
8257 
8258   ins_cost(4 * INSN_COST);
8259   format %{ "ldrs  $dst, $mem\t# float" %}
8260 
8261   ins_encode( aarch64_enc_ldrs(dst, mem) );
8262 
8263   ins_pipe(pipe_class_memory);
8264 %}
8265 
8266 // Load Double
8267 instruct loadD(vRegD dst, memory mem)
8268 %{
8269   match(Set dst (LoadD mem));
8270   predicate(!needs_acquiring_load(n));
8271 
8272   ins_cost(4 * INSN_COST);
8273   format %{ "ldrd  $dst, $mem\t# double" %}
8274 
8275   ins_encode( aarch64_enc_ldrd(dst, mem) );
8276 
8277   ins_pipe(pipe_class_memory);
8278 %}
8279 
8280 
8281 // Load Int Constant
8282 instruct loadConI(iRegINoSp dst, immI src)
8283 %{
8284   match(Set dst src);
8285 
8286   ins_cost(INSN_COST);
8287   format %{ "mov $dst, $src\t# int" %}
8288 
8289   ins_encode( aarch64_enc_movw_imm(dst, src) );
8290 
8291   ins_pipe(ialu_imm);
8292 %}
8293 
8294 // Load Long Constant
8295 instruct loadConL(iRegLNoSp dst, immL src)
8296 %{
8297   match(Set dst src);
8298 
8299   ins_cost(INSN_COST);
8300   format %{ "mov $dst, $src\t# long" %}
8301 
8302   ins_encode( aarch64_enc_mov_imm(dst, src) );
8303 
8304   ins_pipe(ialu_imm);
8305 %}
8306 
8307 // Load Pointer Constant
8308 
8309 instruct loadConP(iRegPNoSp dst, immP con)
8310 %{
8311   match(Set dst con);
8312 
8313   ins_cost(INSN_COST * 4);
8314   format %{
8315     "mov  $dst, $con\t# ptr\n\t"
8316   %}
8317 
8318   ins_encode(aarch64_enc_mov_p(dst, con));
8319 
8320   ins_pipe(ialu_imm);
8321 %}
8322 
8323 // Load Null Pointer Constant
8324 
8325 instruct loadConP0(iRegPNoSp dst, immP0 con)
8326 %{
8327   match(Set dst con);
8328 
8329   ins_cost(INSN_COST);
8330   format %{ "mov  $dst, $con\t# NULL ptr" %}
8331 
8332   ins_encode(aarch64_enc_mov_p0(dst, con));
8333 
8334   ins_pipe(ialu_imm);
8335 %}
8336 
8337 // Load Pointer Constant One
8338 
8339 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8340 %{
8341   match(Set dst con);
8342 
8343   ins_cost(INSN_COST);
8344   format %{ "mov  $dst, $con\t# NULL ptr" %}
8345 
8346   ins_encode(aarch64_enc_mov_p1(dst, con));
8347 
8348   ins_pipe(ialu_imm);
8349 %}
8350 
8351 // Load Poll Page Constant
8352 
8353 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8354 %{
8355   match(Set dst con);
8356 
8357   ins_cost(INSN_COST);
8358   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
8359 
8360   ins_encode(aarch64_enc_mov_poll_page(dst, con));
8361 
8362   ins_pipe(ialu_imm);
8363 %}
8364 
8365 // Load Byte Map Base Constant
8366 
8367 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8368 %{
8369   match(Set dst con);
8370 
8371   ins_cost(INSN_COST);
8372   format %{ "adr  $dst, $con\t# Byte Map Base" %}
8373 
8374   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8375 
8376   ins_pipe(ialu_imm);
8377 %}
8378 
8379 // Load Narrow Pointer Constant
8380 
8381 instruct loadConN(iRegNNoSp dst, immN con)
8382 %{
8383   match(Set dst con);
8384 
8385   ins_cost(INSN_COST * 4);
8386   format %{ "mov  $dst, $con\t# compressed ptr" %}
8387 
8388   ins_encode(aarch64_enc_mov_n(dst, con));
8389 
8390   ins_pipe(ialu_imm);
8391 %}
8392 
8393 // Load Narrow Null Pointer Constant
8394 
8395 instruct loadConN0(iRegNNoSp dst, immN0 con)
8396 %{
8397   match(Set dst con);
8398 
8399   ins_cost(INSN_COST);
8400   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
8401 
8402   ins_encode(aarch64_enc_mov_n0(dst, con));
8403 
8404   ins_pipe(ialu_imm);
8405 %}
8406 
8407 // Load Narrow Klass Constant
8408 
8409 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8410 %{
8411   match(Set dst con);
8412 
8413   ins_cost(INSN_COST);
8414   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
8415 
8416   ins_encode(aarch64_enc_mov_nk(dst, con));
8417 
8418   ins_pipe(ialu_imm);
8419 %}
8420 
8421 // Load Packed Float Constant
8422 
8423 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8424   match(Set dst con);
8425   ins_cost(INSN_COST * 4);
8426   format %{ "fmovs  $dst, $con"%}
8427   ins_encode %{
8428     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8429   %}
8430 
8431   ins_pipe(fp_imm_s);
8432 %}
8433 
8434 // Load Float Constant
8435 
8436 instruct loadConF(vRegF dst, immF con) %{
8437   match(Set dst con);
8438 
8439   ins_cost(INSN_COST * 4);
8440 
8441   format %{
8442     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8443   %}
8444 
8445   ins_encode %{
8446     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8447   %}
8448 
8449   ins_pipe(fp_load_constant_s);
8450 %}
8451 
8452 // Load Packed Double Constant
8453 
8454 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8455   match(Set dst con);
8456   ins_cost(INSN_COST);
8457   format %{ "fmovd  $dst, $con"%}
8458   ins_encode %{
8459     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8460   %}
8461 
8462   ins_pipe(fp_imm_d);
8463 %}
8464 
8465 // Load Double Constant
8466 
8467 instruct loadConD(vRegD dst, immD con) %{
8468   match(Set dst con);
8469 
8470   ins_cost(INSN_COST * 5);
8471   format %{
8472     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8473   %}
8474 
8475   ins_encode %{
8476     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8477   %}
8478 
8479   ins_pipe(fp_load_constant_d);
8480 %}
8481 
8482 // Store Instructions
8483 
8484 // Store CMS card-mark Immediate
8485 instruct storeimmCM0(immI0 zero, memory mem)
8486 %{
8487   match(Set mem (StoreCM mem zero));
8488   predicate(unnecessary_storestore(n));
8489 
8490   ins_cost(INSN_COST);
8491   format %{ "strb zr, $mem\t# byte" %}
8492 
8493   ins_encode(aarch64_enc_strb0(mem));
8494 
8495   ins_pipe(istore_mem);
8496 %}
8497 
8498 // Store CMS card-mark Immediate with intervening StoreStore
8499 // needed when using CMS with no conditional card marking
8500 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8501 %{
8502   match(Set mem (StoreCM mem zero));
8503 
8504   ins_cost(INSN_COST * 2);
8505   format %{ "dmb ishst"
8506       "\n\tstrb zr, $mem\t# byte" %}
8507 
8508   ins_encode(aarch64_enc_strb0_ordered(mem));
8509 
8510   ins_pipe(istore_mem);
8511 %}
8512 
8513 // Store Byte
8514 instruct storeB(iRegIorL2I src, memory mem)
8515 %{
8516   match(Set mem (StoreB mem src));
8517   predicate(!needs_releasing_store(n));
8518 
8519   ins_cost(INSN_COST);
8520   format %{ "strb  $src, $mem\t# byte" %}
8521 
8522   ins_encode(aarch64_enc_strb(src, mem));
8523 
8524   ins_pipe(istore_reg_mem);
8525 %}
8526 
8527 
8528 instruct storeimmB0(immI0 zero, memory mem)
8529 %{
8530   match(Set mem (StoreB mem zero));
8531   predicate(!needs_releasing_store(n));
8532 
8533   ins_cost(INSN_COST);
8534   format %{ "strb rscractch2, $mem\t# byte" %}
8535 
8536   ins_encode(aarch64_enc_strb0(mem));
8537 
8538   ins_pipe(istore_mem);
8539 %}
8540 
8541 // Store Char/Short
8542 instruct storeC(iRegIorL2I src, memory mem)
8543 %{
8544   match(Set mem (StoreC mem src));
8545   predicate(!needs_releasing_store(n));
8546 
8547   ins_cost(INSN_COST);
8548   format %{ "strh  $src, $mem\t# short" %}
8549 
8550   ins_encode(aarch64_enc_strh(src, mem));
8551 
8552   ins_pipe(istore_reg_mem);
8553 %}
8554 
8555 instruct storeimmC0(immI0 zero, memory mem)
8556 %{
8557   match(Set mem (StoreC mem zero));
8558   predicate(!needs_releasing_store(n));
8559 
8560   ins_cost(INSN_COST);
8561   format %{ "strh  zr, $mem\t# short" %}
8562 
8563   ins_encode(aarch64_enc_strh0(mem));
8564 
8565   ins_pipe(istore_mem);
8566 %}
8567 
8568 // Store Integer
8569 
8570 instruct storeI(iRegIorL2I src, memory mem)
8571 %{
8572   match(Set mem(StoreI mem src));
8573   predicate(!needs_releasing_store(n));
8574 
8575   ins_cost(INSN_COST);
8576   format %{ "strw  $src, $mem\t# int" %}
8577 
8578   ins_encode(aarch64_enc_strw(src, mem));
8579 
8580   ins_pipe(istore_reg_mem);
8581 %}
8582 
8583 instruct storeimmI0(immI0 zero, memory mem)
8584 %{
8585   match(Set mem(StoreI mem zero));
8586   predicate(!needs_releasing_store(n));
8587 
8588   ins_cost(INSN_COST);
8589   format %{ "strw  zr, $mem\t# int" %}
8590 
8591   ins_encode(aarch64_enc_strw0(mem));
8592 
8593   ins_pipe(istore_mem);
8594 %}
8595 
8596 // Store Long (64 bit signed)
8597 instruct storeL(iRegL src, memory mem)
8598 %{
8599   match(Set mem (StoreL mem src));
8600   predicate(!needs_releasing_store(n));
8601 
8602   ins_cost(INSN_COST);
8603   format %{ "str  $src, $mem\t# int" %}
8604 
8605   ins_encode(aarch64_enc_str(src, mem));
8606 
8607   ins_pipe(istore_reg_mem);
8608 %}
8609 
8610 // Store Long (64 bit signed)
8611 instruct storeimmL0(immL0 zero, memory mem)
8612 %{
8613   match(Set mem (StoreL mem zero));
8614   predicate(!needs_releasing_store(n));
8615 
8616   ins_cost(INSN_COST);
8617   format %{ "str  zr, $mem\t# int" %}
8618 
8619   ins_encode(aarch64_enc_str0(mem));
8620 
8621   ins_pipe(istore_mem);
8622 %}
8623 
8624 // Store Pointer
8625 instruct storeP(iRegP src, memory mem)
8626 %{
8627   match(Set mem (StoreP mem src));
8628   predicate(!needs_releasing_store(n));
8629 
8630   ins_cost(INSN_COST);
8631   format %{ "str  $src, $mem\t# ptr" %}
8632 
8633   ins_encode(aarch64_enc_str(src, mem));
8634 
8635   ins_pipe(istore_reg_mem);
8636 %}
8637 
8638 // Store Pointer
8639 instruct storeimmP0(immP0 zero, memory mem)
8640 %{
8641   match(Set mem (StoreP mem zero));
8642   predicate(!needs_releasing_store(n));
8643 
8644   ins_cost(INSN_COST);
8645   format %{ "str zr, $mem\t# ptr" %}
8646 
8647   ins_encode(aarch64_enc_str0(mem));
8648 
8649   ins_pipe(istore_mem);
8650 %}
8651 
8652 // Store Compressed Pointer
8653 instruct storeN(iRegN src, memory mem)
8654 %{
8655   match(Set mem (StoreN mem src));
8656   predicate(!needs_releasing_store(n));
8657 
8658   ins_cost(INSN_COST);
8659   format %{ "strw  $src, $mem\t# compressed ptr" %}
8660 
8661   ins_encode(aarch64_enc_strw(src, mem));
8662 
8663   ins_pipe(istore_reg_mem);
8664 %}
8665 
8666 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8667 %{
8668   match(Set mem (StoreN mem zero));
8669   predicate(Universe::narrow_oop_base() == NULL &&
8670             Universe::narrow_klass_base() == NULL &&
8671             (!needs_releasing_store(n)));
8672 
8673   ins_cost(INSN_COST);
8674   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8675 
8676   ins_encode(aarch64_enc_strw(heapbase, mem));
8677 
8678   ins_pipe(istore_reg_mem);
8679 %}
8680 
8681 // Store Float
8682 instruct storeF(vRegF src, memory mem)
8683 %{
8684   match(Set mem (StoreF mem src));
8685   predicate(!needs_releasing_store(n));
8686 
8687   ins_cost(INSN_COST);
8688   format %{ "strs  $src, $mem\t# float" %}
8689 
8690   ins_encode( aarch64_enc_strs(src, mem) );
8691 
8692   ins_pipe(pipe_class_memory);
8693 %}
8694 
8695 // TODO
8696 // implement storeImmF0 and storeFImmPacked
8697 
8698 // Store Double
8699 instruct storeD(vRegD src, memory mem)
8700 %{
8701   match(Set mem (StoreD mem src));
8702   predicate(!needs_releasing_store(n));
8703 
8704   ins_cost(INSN_COST);
8705   format %{ "strd  $src, $mem\t# double" %}
8706 
8707   ins_encode( aarch64_enc_strd(src, mem) );
8708 
8709   ins_pipe(pipe_class_memory);
8710 %}
8711 
8712 // Store Compressed Klass Pointer
8713 instruct storeNKlass(iRegN src, memory mem)
8714 %{
8715   predicate(!needs_releasing_store(n));
8716   match(Set mem (StoreNKlass mem src));
8717 
8718   ins_cost(INSN_COST);
8719   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
8720 
8721   ins_encode(aarch64_enc_strw(src, mem));
8722 
8723   ins_pipe(istore_reg_mem);
8724 %}
8725 
8726 // TODO
8727 // implement storeImmD0 and storeDImmPacked
8728 
8729 // prefetch instructions
8730 // Must be safe to execute with invalid address (cannot fault).
8731 
8732 instruct prefetchalloc( memory mem ) %{
8733   match(PrefetchAllocation mem);
8734 
8735   ins_cost(INSN_COST);
8736   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8737 
8738   ins_encode( aarch64_enc_prefetchw(mem) );
8739 
8740   ins_pipe(iload_prefetch);
8741 %}
8742 
8743 //  ---------------- volatile loads and stores ----------------
8744 
8745 // Load Byte (8 bit signed)
8746 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8747 %{
8748   match(Set dst (LoadB mem));
8749 
8750   ins_cost(VOLATILE_REF_COST);
8751   format %{ "ldarsb  $dst, $mem\t# byte" %}
8752 
8753   ins_encode(aarch64_enc_ldarsb(dst, mem));
8754 
8755   ins_pipe(pipe_serial);
8756 %}
8757 
8758 // Load Byte (8 bit signed) into long
8759 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8760 %{
8761   match(Set dst (ConvI2L (LoadB mem)));
8762 
8763   ins_cost(VOLATILE_REF_COST);
8764   format %{ "ldarsb  $dst, $mem\t# byte" %}
8765 
8766   ins_encode(aarch64_enc_ldarsb(dst, mem));
8767 
8768   ins_pipe(pipe_serial);
8769 %}
8770 
8771 // Load Byte (8 bit unsigned)
8772 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8773 %{
8774   match(Set dst (LoadUB mem));
8775 
8776   ins_cost(VOLATILE_REF_COST);
8777   format %{ "ldarb  $dst, $mem\t# byte" %}
8778 
8779   ins_encode(aarch64_enc_ldarb(dst, mem));
8780 
8781   ins_pipe(pipe_serial);
8782 %}
8783 
8784 // Load Byte (8 bit unsigned) into long
8785 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8786 %{
8787   match(Set dst (ConvI2L (LoadUB mem)));
8788 
8789   ins_cost(VOLATILE_REF_COST);
8790   format %{ "ldarb  $dst, $mem\t# byte" %}
8791 
8792   ins_encode(aarch64_enc_ldarb(dst, mem));
8793 
8794   ins_pipe(pipe_serial);
8795 %}
8796 
8797 // Load Short (16 bit signed)
8798 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8799 %{
8800   match(Set dst (LoadS mem));
8801 
8802   ins_cost(VOLATILE_REF_COST);
8803   format %{ "ldarshw  $dst, $mem\t# short" %}
8804 
8805   ins_encode(aarch64_enc_ldarshw(dst, mem));
8806 
8807   ins_pipe(pipe_serial);
8808 %}
8809 
8810 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8811 %{
8812   match(Set dst (LoadUS mem));
8813 
8814   ins_cost(VOLATILE_REF_COST);
8815   format %{ "ldarhw  $dst, $mem\t# short" %}
8816 
8817   ins_encode(aarch64_enc_ldarhw(dst, mem));
8818 
8819   ins_pipe(pipe_serial);
8820 %}
8821 
8822 // Load Short/Char (16 bit unsigned) into long
8823 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8824 %{
8825   match(Set dst (ConvI2L (LoadUS mem)));
8826 
8827   ins_cost(VOLATILE_REF_COST);
8828   format %{ "ldarh  $dst, $mem\t# short" %}
8829 
8830   ins_encode(aarch64_enc_ldarh(dst, mem));
8831 
8832   ins_pipe(pipe_serial);
8833 %}
8834 
8835 // Load Short/Char (16 bit signed) into long
8836 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8837 %{
8838   match(Set dst (ConvI2L (LoadS mem)));
8839 
8840   ins_cost(VOLATILE_REF_COST);
8841   format %{ "ldarh  $dst, $mem\t# short" %}
8842 
8843   ins_encode(aarch64_enc_ldarsh(dst, mem));
8844 
8845   ins_pipe(pipe_serial);
8846 %}
8847 
8848 // Load Integer (32 bit signed)
8849 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8850 %{
8851   match(Set dst (LoadI mem));
8852 
8853   ins_cost(VOLATILE_REF_COST);
8854   format %{ "ldarw  $dst, $mem\t# int" %}
8855 
8856   ins_encode(aarch64_enc_ldarw(dst, mem));
8857 
8858   ins_pipe(pipe_serial);
8859 %}
8860 
8861 // Load Integer (32 bit unsigned) into long
8862 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8863 %{
8864   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8865 
8866   ins_cost(VOLATILE_REF_COST);
8867   format %{ "ldarw  $dst, $mem\t# int" %}
8868 
8869   ins_encode(aarch64_enc_ldarw(dst, mem));
8870 
8871   ins_pipe(pipe_serial);
8872 %}
8873 
8874 // Load Long (64 bit signed)
8875 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8876 %{
8877   match(Set dst (LoadL mem));
8878 
8879   ins_cost(VOLATILE_REF_COST);
8880   format %{ "ldar  $dst, $mem\t# int" %}
8881 
8882   ins_encode(aarch64_enc_ldar(dst, mem));
8883 
8884   ins_pipe(pipe_serial);
8885 %}
8886 
8887 // Load Pointer
8888 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8889 %{
8890   match(Set dst (LoadP mem));
8891 
8892   ins_cost(VOLATILE_REF_COST);
8893   format %{ "ldar  $dst, $mem\t# ptr" %}
8894 
8895   ins_encode(aarch64_enc_ldar(dst, mem));
8896 
8897   ins_pipe(pipe_serial);
8898 %}
8899 
8900 // Load Compressed Pointer
8901 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8902 %{
8903   match(Set dst (LoadN mem));
8904 
8905   ins_cost(VOLATILE_REF_COST);
8906   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
8907 
8908   ins_encode(aarch64_enc_ldarw(dst, mem));
8909 
8910   ins_pipe(pipe_serial);
8911 %}
8912 
8913 // Load Float
8914 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8915 %{
8916   match(Set dst (LoadF mem));
8917 
8918   ins_cost(VOLATILE_REF_COST);
8919   format %{ "ldars  $dst, $mem\t# float" %}
8920 
8921   ins_encode( aarch64_enc_fldars(dst, mem) );
8922 
8923   ins_pipe(pipe_serial);
8924 %}
8925 
8926 // Load Double
8927 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8928 %{
8929   match(Set dst (LoadD mem));
8930 
8931   ins_cost(VOLATILE_REF_COST);
8932   format %{ "ldard  $dst, $mem\t# double" %}
8933 
8934   ins_encode( aarch64_enc_fldard(dst, mem) );
8935 
8936   ins_pipe(pipe_serial);
8937 %}
8938 
8939 // Store Byte
8940 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8941 %{
8942   match(Set mem (StoreB mem src));
8943 
8944   ins_cost(VOLATILE_REF_COST);
8945   format %{ "stlrb  $src, $mem\t# byte" %}
8946 
8947   ins_encode(aarch64_enc_stlrb(src, mem));
8948 
8949   ins_pipe(pipe_class_memory);
8950 %}
8951 
8952 // Store Char/Short
8953 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8954 %{
8955   match(Set mem (StoreC mem src));
8956 
8957   ins_cost(VOLATILE_REF_COST);
8958   format %{ "stlrh  $src, $mem\t# short" %}
8959 
8960   ins_encode(aarch64_enc_stlrh(src, mem));
8961 
8962   ins_pipe(pipe_class_memory);
8963 %}
8964 
8965 // Store Integer
8966 
8967 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8968 %{
8969   match(Set mem(StoreI mem src));
8970 
8971   ins_cost(VOLATILE_REF_COST);
8972   format %{ "stlrw  $src, $mem\t# int" %}
8973 
8974   ins_encode(aarch64_enc_stlrw(src, mem));
8975 
8976   ins_pipe(pipe_class_memory);
8977 %}
8978 
8979 // Store Long (64 bit signed)
8980 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8981 %{
8982   match(Set mem (StoreL mem src));
8983 
8984   ins_cost(VOLATILE_REF_COST);
8985   format %{ "stlr  $src, $mem\t# int" %}
8986 
8987   ins_encode(aarch64_enc_stlr(src, mem));
8988 
8989   ins_pipe(pipe_class_memory);
8990 %}
8991 
8992 // Store Pointer
8993 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8994 %{
8995   match(Set mem (StoreP mem src));
8996 
8997   ins_cost(VOLATILE_REF_COST);
8998   format %{ "stlr  $src, $mem\t# ptr" %}
8999 
9000   ins_encode(aarch64_enc_stlr(src, mem));
9001 
9002   ins_pipe(pipe_class_memory);
9003 %}
9004 
9005 // Store Compressed Pointer
9006 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
9007 %{
9008   match(Set mem (StoreN mem src));
9009 
9010   ins_cost(VOLATILE_REF_COST);
9011   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
9012 
9013   ins_encode(aarch64_enc_stlrw(src, mem));
9014 
9015   ins_pipe(pipe_class_memory);
9016 %}
9017 
9018 // Store Float
9019 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
9020 %{
9021   match(Set mem (StoreF mem src));
9022 
9023   ins_cost(VOLATILE_REF_COST);
9024   format %{ "stlrs  $src, $mem\t# float" %}
9025 
9026   ins_encode( aarch64_enc_fstlrs(src, mem) );
9027 
9028   ins_pipe(pipe_class_memory);
9029 %}
9030 
9031 // TODO
9032 // implement storeImmF0 and storeFImmPacked
9033 
9034 // Store Double
9035 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
9036 %{
9037   match(Set mem (StoreD mem src));
9038 
9039   ins_cost(VOLATILE_REF_COST);
9040   format %{ "stlrd  $src, $mem\t# double" %}
9041 
9042   ins_encode( aarch64_enc_fstlrd(src, mem) );
9043 
9044   ins_pipe(pipe_class_memory);
9045 %}
9046 
9047 //  ---------------- end of volatile loads and stores ----------------
9048 
9049 // ============================================================================
9050 // BSWAP Instructions
9051 
9052 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
9053   match(Set dst (ReverseBytesI src));
9054 
9055   ins_cost(INSN_COST);
9056   format %{ "revw  $dst, $src" %}
9057 
9058   ins_encode %{
9059     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
9060   %}
9061 
9062   ins_pipe(ialu_reg);
9063 %}
9064 
9065 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
9066   match(Set dst (ReverseBytesL src));
9067 
9068   ins_cost(INSN_COST);
9069   format %{ "rev  $dst, $src" %}
9070 
9071   ins_encode %{
9072     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
9073   %}
9074 
9075   ins_pipe(ialu_reg);
9076 %}
9077 
9078 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
9079   match(Set dst (ReverseBytesUS src));
9080 
9081   ins_cost(INSN_COST);
9082   format %{ "rev16w  $dst, $src" %}
9083 
9084   ins_encode %{
9085     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9086   %}
9087 
9088   ins_pipe(ialu_reg);
9089 %}
9090 
9091 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
9092   match(Set dst (ReverseBytesS src));
9093 
9094   ins_cost(INSN_COST);
9095   format %{ "rev16w  $dst, $src\n\t"
9096             "sbfmw $dst, $dst, #0, #15" %}
9097 
9098   ins_encode %{
9099     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
9100     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
9101   %}
9102 
9103   ins_pipe(ialu_reg);
9104 %}
9105 
9106 // ============================================================================
9107 // Zero Count Instructions
9108 
9109 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9110   match(Set dst (CountLeadingZerosI src));
9111 
9112   ins_cost(INSN_COST);
9113   format %{ "clzw  $dst, $src" %}
9114   ins_encode %{
9115     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
9116   %}
9117 
9118   ins_pipe(ialu_reg);
9119 %}
9120 
9121 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
9122   match(Set dst (CountLeadingZerosL src));
9123 
9124   ins_cost(INSN_COST);
9125   format %{ "clz   $dst, $src" %}
9126   ins_encode %{
9127     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
9128   %}
9129 
9130   ins_pipe(ialu_reg);
9131 %}
9132 
9133 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
9134   match(Set dst (CountTrailingZerosI src));
9135 
9136   ins_cost(INSN_COST * 2);
9137   format %{ "rbitw  $dst, $src\n\t"
9138             "clzw   $dst, $dst" %}
9139   ins_encode %{
9140     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
9141     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
9142   %}
9143 
9144   ins_pipe(ialu_reg);
9145 %}
9146 
9147 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
9148   match(Set dst (CountTrailingZerosL src));
9149 
9150   ins_cost(INSN_COST * 2);
9151   format %{ "rbit   $dst, $src\n\t"
9152             "clz    $dst, $dst" %}
9153   ins_encode %{
9154     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
9155     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
9156   %}
9157 
9158   ins_pipe(ialu_reg);
9159 %}
9160 
9161 //---------- Population Count Instructions -------------------------------------
9162 //
9163 
9164 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
9165   predicate(UsePopCountInstruction);
9166   match(Set dst (PopCountI src));
9167   effect(TEMP tmp);
9168   ins_cost(INSN_COST * 13);
9169 
9170   format %{ "movw   $src, $src\n\t"
9171             "mov    $tmp, $src\t# vector (1D)\n\t"
9172             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9173             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9174             "mov    $dst, $tmp\t# vector (1D)" %}
9175   ins_encode %{
9176     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
9177     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9178     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9179     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9180     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9181   %}
9182 
9183   ins_pipe(pipe_class_default);
9184 %}
9185 
9186 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
9187   predicate(UsePopCountInstruction);
9188   match(Set dst (PopCountI (LoadI mem)));
9189   effect(TEMP tmp);
9190   ins_cost(INSN_COST * 13);
9191 
9192   format %{ "ldrs   $tmp, $mem\n\t"
9193             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9194             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9195             "mov    $dst, $tmp\t# vector (1D)" %}
9196   ins_encode %{
9197     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9198     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, is_load, tmp_reg, $mem->opcode(),
9199                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9200     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9201     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9202     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9203   %}
9204 
9205   ins_pipe(pipe_class_default);
9206 %}
9207 
9208 // Note: Long.bitCount(long) returns an int.
9209 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9210   predicate(UsePopCountInstruction);
9211   match(Set dst (PopCountL src));
9212   effect(TEMP tmp);
9213   ins_cost(INSN_COST * 13);
9214 
9215   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
9216             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9217             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9218             "mov    $dst, $tmp\t# vector (1D)" %}
9219   ins_encode %{
9220     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9221     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9222     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9223     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9224   %}
9225 
9226   ins_pipe(pipe_class_default);
9227 %}
9228 
9229 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9230   predicate(UsePopCountInstruction);
9231   match(Set dst (PopCountL (LoadL mem)));
9232   effect(TEMP tmp);
9233   ins_cost(INSN_COST * 13);
9234 
9235   format %{ "ldrd   $tmp, $mem\n\t"
9236             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
9237             "addv   $tmp, $tmp\t# vector (8B)\n\t"
9238             "mov    $dst, $tmp\t# vector (1D)" %}
9239   ins_encode %{
9240     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9241     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, is_load, tmp_reg, $mem->opcode(),
9242                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9243     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9244     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9245     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9246   %}
9247 
9248   ins_pipe(pipe_class_default);
9249 %}
9250 
9251 // ============================================================================
9252 // MemBar Instruction
9253 
9254 instruct load_fence() %{
9255   match(LoadFence);
9256   ins_cost(VOLATILE_REF_COST);
9257 
9258   format %{ "load_fence" %}
9259 
9260   ins_encode %{
9261     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9262   %}
9263   ins_pipe(pipe_serial);
9264 %}
9265 
9266 instruct unnecessary_membar_acquire() %{
9267   predicate(unnecessary_acquire(n));
9268   match(MemBarAcquire);
9269   ins_cost(0);
9270 
9271   format %{ "membar_acquire (elided)" %}
9272 
9273   ins_encode %{
9274     __ block_comment("membar_acquire (elided)");
9275   %}
9276 
9277   ins_pipe(pipe_class_empty);
9278 %}
9279 
9280 instruct membar_acquire() %{
9281   match(MemBarAcquire);
9282   ins_cost(VOLATILE_REF_COST);
9283 
9284   format %{ "membar_acquire" %}
9285 
9286   ins_encode %{
9287     __ block_comment("membar_acquire");
9288     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9289   %}
9290 
9291   ins_pipe(pipe_serial);
9292 %}
9293 
9294 
9295 instruct membar_acquire_lock() %{
9296   match(MemBarAcquireLock);
9297   ins_cost(VOLATILE_REF_COST);
9298 
9299   format %{ "membar_acquire_lock (elided)" %}
9300 
9301   ins_encode %{
9302     __ block_comment("membar_acquire_lock (elided)");
9303   %}
9304 
9305   ins_pipe(pipe_serial);
9306 %}
9307 
9308 instruct store_fence() %{
9309   match(StoreFence);
9310   ins_cost(VOLATILE_REF_COST);
9311 
9312   format %{ "store_fence" %}
9313 
9314   ins_encode %{
9315     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9316   %}
9317   ins_pipe(pipe_serial);
9318 %}
9319 
9320 instruct unnecessary_membar_release() %{
9321   predicate(unnecessary_release(n));
9322   match(MemBarRelease);
9323   ins_cost(0);
9324 
9325   format %{ "membar_release (elided)" %}
9326 
9327   ins_encode %{
9328     __ block_comment("membar_release (elided)");
9329   %}
9330   ins_pipe(pipe_serial);
9331 %}
9332 
9333 instruct membar_release() %{
9334   match(MemBarRelease);
9335   ins_cost(VOLATILE_REF_COST);
9336 
9337   format %{ "membar_release" %}
9338 
9339   ins_encode %{
9340     __ block_comment("membar_release");
9341     __ membar(Assembler::LoadStore|Assembler::StoreStore);
9342   %}
9343   ins_pipe(pipe_serial);
9344 %}
9345 
9346 instruct membar_storestore() %{
9347   match(MemBarStoreStore);
9348   ins_cost(VOLATILE_REF_COST);
9349 
9350   format %{ "MEMBAR-store-store" %}
9351 
9352   ins_encode %{
9353     __ membar(Assembler::StoreStore);
9354   %}
9355   ins_pipe(pipe_serial);
9356 %}
9357 
9358 instruct membar_release_lock() %{
9359   match(MemBarReleaseLock);
9360   ins_cost(VOLATILE_REF_COST);
9361 
9362   format %{ "membar_release_lock (elided)" %}
9363 
9364   ins_encode %{
9365     __ block_comment("membar_release_lock (elided)");
9366   %}
9367 
9368   ins_pipe(pipe_serial);
9369 %}
9370 
9371 instruct unnecessary_membar_volatile() %{
9372   predicate(unnecessary_volatile(n));
9373   match(MemBarVolatile);
9374   ins_cost(0);
9375 
9376   format %{ "membar_volatile (elided)" %}
9377 
9378   ins_encode %{
9379     __ block_comment("membar_volatile (elided)");
9380   %}
9381 
9382   ins_pipe(pipe_serial);
9383 %}
9384 
9385 instruct membar_volatile() %{
9386   match(MemBarVolatile);
9387   ins_cost(VOLATILE_REF_COST*100);
9388 
9389   format %{ "membar_volatile" %}
9390 
9391   ins_encode %{
9392     __ block_comment("membar_volatile");
9393     __ membar(Assembler::StoreLoad);
9394   %}
9395 
9396   ins_pipe(pipe_serial);
9397 %}
9398 
9399 // ============================================================================
9400 // Cast/Convert Instructions
9401 
9402 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9403   match(Set dst (CastX2P src));
9404 
9405   ins_cost(INSN_COST);
9406   format %{ "mov $dst, $src\t# long -> ptr" %}
9407 
9408   ins_encode %{
9409     if ($dst$$reg != $src$$reg) {
9410       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9411     }
9412   %}
9413 
9414   ins_pipe(ialu_reg);
9415 %}
9416 
9417 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9418   match(Set dst (CastP2X src));
9419 
9420   ins_cost(INSN_COST);
9421   format %{ "mov $dst, $src\t# ptr -> long" %}
9422 
9423   ins_encode %{
9424     if ($dst$$reg != $src$$reg) {
9425       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9426     }
9427   %}
9428 
9429   ins_pipe(ialu_reg);
9430 %}
9431 
9432 // Convert oop into int for vectors alignment masking
9433 instruct convP2I(iRegINoSp dst, iRegP src) %{
9434   match(Set dst (ConvL2I (CastP2X src)));
9435 
9436   ins_cost(INSN_COST);
9437   format %{ "movw $dst, $src\t# ptr -> int" %}
9438   ins_encode %{
9439     __ movw($dst$$Register, $src$$Register);
9440   %}
9441 
9442   ins_pipe(ialu_reg);
9443 %}
9444 
9445 // Convert compressed oop into int for vectors alignment masking
9446 // in case of 32bit oops (heap < 4Gb).
9447 instruct convN2I(iRegINoSp dst, iRegN src)
9448 %{
9449   predicate(Universe::narrow_oop_shift() == 0);
9450   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9451 
9452   ins_cost(INSN_COST);
9453   format %{ "mov dst, $src\t# compressed ptr -> int" %}
9454   ins_encode %{
9455     __ movw($dst$$Register, $src$$Register);
9456   %}
9457 
9458   ins_pipe(ialu_reg);
9459 %}
9460 
9461 instruct shenandoahRB(iRegPNoSp dst, iRegP src, rFlagsReg cr) %{
9462   match(Set dst (ShenandoahReadBarrier src));
9463   format %{ "shenandoah_rb $dst,$src" %}
9464   ins_encode %{
9465     Register s = $src$$Register;
9466     Register d = $dst$$Register;
9467     __ ldr(d, Address(s, BrooksPointer::byte_offset()));
9468   %}
9469   ins_pipe(pipe_class_memory);
9470 %}
9471 
9472 instruct shenandoahWB(iRegP_R0 dst, iRegP src, rFlagsReg cr) %{
9473   match(Set dst (ShenandoahWriteBarrier src));
9474   effect(KILL cr);
9475 
9476   format %{ "shenandoah_wb $dst,$src" %}
9477   ins_encode %{
9478     Label done;
9479     Register s = $src$$Register;
9480     Register d = $dst$$Register;
9481     assert(d == r0, "result in r0");
9482     __ block_comment("Shenandoah write barrier {");
9483     // We need that first read barrier in order to trigger a SEGV/NPE on incoming NULL.
9484     // Also, it brings s into d in preparation for the call to shenandoah_write_barrier().
9485     __ ldr(d, Address(s, BrooksPointer::byte_offset()));
9486     __ shenandoah_write_barrier(d);
9487     __ block_comment("} Shenandoah write barrier");
9488   %}
9489   ins_pipe(pipe_slow);
9490 %}
9491 
9492 
9493 // Convert oop pointer into compressed form
9494 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9495   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9496   match(Set dst (EncodeP src));
9497   effect(KILL cr);
9498   ins_cost(INSN_COST * 3);
9499   format %{ "encode_heap_oop $dst, $src" %}
9500   ins_encode %{
9501     Register s = $src$$Register;
9502     Register d = $dst$$Register;
9503     __ encode_heap_oop(d, s);
9504   %}
9505   ins_pipe(ialu_reg);
9506 %}
9507 
9508 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9509   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9510   match(Set dst (EncodeP src));
9511   ins_cost(INSN_COST * 3);
9512   format %{ "encode_heap_oop_not_null $dst, $src" %}
9513   ins_encode %{
9514     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9515   %}
9516   ins_pipe(ialu_reg);
9517 %}
9518 
9519 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9520   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9521             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9522   match(Set dst (DecodeN src));
9523   ins_cost(INSN_COST * 3);
9524   format %{ "decode_heap_oop $dst, $src" %}
9525   ins_encode %{
9526     Register s = $src$$Register;
9527     Register d = $dst$$Register;
9528     __ decode_heap_oop(d, s);
9529   %}
9530   ins_pipe(ialu_reg);
9531 %}
9532 
9533 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9534   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9535             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9536   match(Set dst (DecodeN src));
9537   ins_cost(INSN_COST * 3);
9538   format %{ "decode_heap_oop_not_null $dst, $src" %}
9539   ins_encode %{
9540     Register s = $src$$Register;
9541     Register d = $dst$$Register;
9542     __ decode_heap_oop_not_null(d, s);
9543   %}
9544   ins_pipe(ialu_reg);
9545 %}
9546 
9547 // n.b. AArch64 implementations of encode_klass_not_null and
9548 // decode_klass_not_null do not modify the flags register so, unlike
9549 // Intel, we don't kill CR as a side effect here
9550 
9551 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9552   match(Set dst (EncodePKlass src));
9553 
9554   ins_cost(INSN_COST * 3);
9555   format %{ "encode_klass_not_null $dst,$src" %}
9556 
9557   ins_encode %{
9558     Register src_reg = as_Register($src$$reg);
9559     Register dst_reg = as_Register($dst$$reg);
9560     __ encode_klass_not_null(dst_reg, src_reg);
9561   %}
9562 
9563    ins_pipe(ialu_reg);
9564 %}
9565 
9566 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9567   match(Set dst (DecodeNKlass src));
9568 
9569   ins_cost(INSN_COST * 3);
9570   format %{ "decode_klass_not_null $dst,$src" %}
9571 
9572   ins_encode %{
9573     Register src_reg = as_Register($src$$reg);
9574     Register dst_reg = as_Register($dst$$reg);
9575     if (dst_reg != src_reg) {
9576       __ decode_klass_not_null(dst_reg, src_reg);
9577     } else {
9578       __ decode_klass_not_null(dst_reg);
9579     }
9580   %}
9581 
9582    ins_pipe(ialu_reg);
9583 %}
9584 
9585 instruct checkCastPP(iRegPNoSp dst)
9586 %{
9587   match(Set dst (CheckCastPP dst));
9588 
9589   size(0);
9590   format %{ "# checkcastPP of $dst" %}
9591   ins_encode(/* empty encoding */);
9592   ins_pipe(pipe_class_empty);
9593 %}
9594 
9595 instruct castPP(iRegPNoSp dst)
9596 %{
9597   match(Set dst (CastPP dst));
9598 
9599   size(0);
9600   format %{ "# castPP of $dst" %}
9601   ins_encode(/* empty encoding */);
9602   ins_pipe(pipe_class_empty);
9603 %}
9604 
9605 instruct castII(iRegI dst)
9606 %{
9607   match(Set dst (CastII dst));
9608 
9609   size(0);
9610   format %{ "# castII of $dst" %}
9611   ins_encode(/* empty encoding */);
9612   ins_cost(0);
9613   ins_pipe(pipe_class_empty);
9614 %}
9615 
9616 // ============================================================================
9617 // Atomic operation instructions
9618 //
9619 // Intel and SPARC both implement Ideal Node LoadPLocked and
9620 // Store{PIL}Conditional instructions using a normal load for the
9621 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9622 //
9623 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9624 // pair to lock object allocations from Eden space when not using
9625 // TLABs.
9626 //
9627 // There does not appear to be a Load{IL}Locked Ideal Node and the
9628 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9629 // and to use StoreIConditional only for 32-bit and StoreLConditional
9630 // only for 64-bit.
9631 //
9632 // We implement LoadPLocked and StorePLocked instructions using,
9633 // respectively the AArch64 hw load-exclusive and store-conditional
9634 // instructions. Whereas we must implement each of
9635 // Store{IL}Conditional using a CAS which employs a pair of
9636 // instructions comprising a load-exclusive followed by a
9637 // store-conditional.
9638 
9639 
9640 // Locked-load (linked load) of the current heap-top
9641 // used when updating the eden heap top
9642 // implemented using ldaxr on AArch64
9643 
9644 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9645 %{
9646   match(Set dst (LoadPLocked mem));
9647 
9648   ins_cost(VOLATILE_REF_COST);
9649 
9650   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9651 
9652   ins_encode(aarch64_enc_ldaxr(dst, mem));
9653 
9654   ins_pipe(pipe_serial);
9655 %}
9656 
9657 // Conditional-store of the updated heap-top.
9658 // Used during allocation of the shared heap.
9659 // Sets flag (EQ) on success.
9660 // implemented using stlxr on AArch64.
9661 
9662 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9663 %{
9664   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9665 
9666   ins_cost(VOLATILE_REF_COST);
9667 
9668  // TODO
9669  // do we need to do a store-conditional release or can we just use a
9670  // plain store-conditional?
9671 
9672   format %{
9673     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9674     "cmpw rscratch1, zr\t# EQ on successful write"
9675   %}
9676 
9677   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9678 
9679   ins_pipe(pipe_serial);
9680 %}
9681 
9682 
9683 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9684 // when attempting to rebias a lock towards the current thread.  We
9685 // must use the acquire form of cmpxchg in order to guarantee acquire
9686 // semantics in this case.
9687 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9688 %{
9689   match(Set cr (StoreLConditional mem (Binary oldval newval)));
9690 
9691   ins_cost(VOLATILE_REF_COST);
9692 
9693   format %{
9694     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9695     "cmpw rscratch1, zr\t# EQ on successful write"
9696   %}
9697 
9698   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9699 
9700   ins_pipe(pipe_slow);
9701 %}
9702 
9703 // storeIConditional also has acquire semantics, for no better reason
9704 // than matching storeLConditional.  At the time of writing this
9705 // comment storeIConditional was not used anywhere by AArch64.
9706 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9707 %{
9708   match(Set cr (StoreIConditional mem (Binary oldval newval)));
9709 
9710   ins_cost(VOLATILE_REF_COST);
9711 
9712   format %{
9713     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9714     "cmpw rscratch1, zr\t# EQ on successful write"
9715   %}
9716 
9717   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9718 
9719   ins_pipe(pipe_slow);
9720 %}
9721 
9722 // standard CompareAndSwapX when we are using barriers
9723 // these have higher priority than the rules selected by a predicate
9724 
9725 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9726 // can't match them
9727 
9728 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9729 
9730   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
9731   ins_cost(2 * VOLATILE_REF_COST);
9732 
9733   effect(KILL cr);
9734 
9735   format %{
9736     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9737     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9738   %}
9739 
9740   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
9741             aarch64_enc_cset_eq(res));
9742 
9743   ins_pipe(pipe_slow);
9744 %}
9745 
9746 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9747 
9748   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
9749   ins_cost(2 * VOLATILE_REF_COST);
9750 
9751   effect(KILL cr);
9752 
9753   format %{
9754     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9755     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9756   %}
9757 
9758   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
9759             aarch64_enc_cset_eq(res));
9760 
9761   ins_pipe(pipe_slow);
9762 %}
9763 
9764 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9765 
9766   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9767   ins_cost(2 * VOLATILE_REF_COST);
9768 
9769   effect(KILL cr);
9770 
9771  format %{
9772     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9773     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9774  %}
9775 
9776  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9777             aarch64_enc_cset_eq(res));
9778 
9779   ins_pipe(pipe_slow);
9780 %}
9781 
9782 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9783 
9784   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9785   ins_cost(2 * VOLATILE_REF_COST);
9786 
9787   effect(KILL cr);
9788 
9789  format %{
9790     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9791     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9792  %}
9793 
9794  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9795             aarch64_enc_cset_eq(res));
9796 
9797   ins_pipe(pipe_slow);
9798 %}
9799 
9800 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9801 
9802   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
9803   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9804   ins_cost(2 * VOLATILE_REF_COST);
9805 
9806   effect(KILL cr);
9807 
9808  format %{
9809     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9810     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9811  %}
9812 
9813  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9814             aarch64_enc_cset_eq(res));
9815 
9816   ins_pipe(pipe_slow);
9817 %}
9818 instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
9819 
9820   predicate(UseShenandoahGC && ShenandoahCASBarrier);
9821   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9822   ins_cost(3 * VOLATILE_REF_COST);
9823 
9824   effect(TEMP tmp, KILL cr);
9825 
9826   format %{
9827     "cmpxchg_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9828     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9829   %}
9830 
9831   ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp),
9832              aarch64_enc_cset_eq(res));
9833 
9834   ins_pipe(pipe_slow);
9835 %}
9836 
9837 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9838 
9839   predicate(!UseShenandoahGC || !ShenandoahCASBarrier);
9840   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9841   ins_cost(2 * VOLATILE_REF_COST);
9842 
9843   effect(KILL cr);
9844 
9845  format %{
9846     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9847     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9848  %}
9849 
9850  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9851             aarch64_enc_cset_eq(res));
9852 
9853   ins_pipe(pipe_slow);
9854 %}
9855 
9856 instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
9857 
9858   predicate(UseShenandoahGC && ShenandoahCASBarrier);
9859   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9860   ins_cost(3 * VOLATILE_REF_COST);
9861 
9862   effect(TEMP tmp, KILL cr);
9863 
9864   format %{
9865     "cmpxchg_narrow_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9866     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9867   %}
9868 
9869   ins_encode %{
9870     Register tmp = $tmp$$Register;
9871     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9872     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register, Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ false);
9873     __ cset($res$$Register, Assembler::EQ);
9874   %}
9875 
9876   ins_pipe(pipe_slow);
9877 %}
9878 
9879 // alternative CompareAndSwapX when we are eliding barriers
9880 
9881 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9882 
9883   predicate(needs_acquiring_load_exclusive(n));
9884   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9885   ins_cost(VOLATILE_REF_COST);
9886 
9887   effect(KILL cr);
9888 
9889  format %{
9890     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9891     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9892  %}
9893 
9894  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9895             aarch64_enc_cset_eq(res));
9896 
9897   ins_pipe(pipe_slow);
9898 %}
9899 
9900 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9901 
9902   predicate(needs_acquiring_load_exclusive(n));
9903   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9904   ins_cost(VOLATILE_REF_COST);
9905 
9906   effect(KILL cr);
9907 
9908  format %{
9909     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9910     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9911  %}
9912 
9913  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9914             aarch64_enc_cset_eq(res));
9915 
9916   ins_pipe(pipe_slow);
9917 %}
9918 
9919 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9920 
9921   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR));
9922   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9923   ins_cost(VOLATILE_REF_COST);
9924 
9925   effect(KILL cr);
9926 
9927  format %{
9928     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9929     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9930  %}
9931 
9932  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9933             aarch64_enc_cset_eq(res));
9934 
9935   ins_pipe(pipe_slow);
9936 %}
9937 
9938 instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
9939 
9940   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier);
9941   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9942   ins_cost(2 * VOLATILE_REF_COST);
9943 
9944   effect(TEMP tmp, KILL cr);
9945 
9946   format %{
9947     "cmpxchg_acq_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9948     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9949   %}
9950 
9951   ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp),
9952              aarch64_enc_cset_eq(res));
9953 
9954   ins_pipe(pipe_slow);
9955 %}
9956 
9957 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9958 
9959   predicate(needs_acquiring_load_exclusive(n) && (!UseShenandoahGC || !ShenandoahCASBarrier));
9960   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9961   ins_cost(VOLATILE_REF_COST);
9962 
9963   effect(KILL cr);
9964 
9965  format %{
9966     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9967     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9968  %}
9969 
9970  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9971             aarch64_enc_cset_eq(res));
9972 
9973   ins_pipe(pipe_slow);
9974 %}
9975 
9976 instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
9977 
9978   predicate(needs_acquiring_load_exclusive(n) && UseShenandoahGC && ShenandoahCASBarrier);
9979   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9980   ins_cost(3 * VOLATILE_REF_COST);
9981 
9982   effect(TEMP tmp, KILL cr);
9983 
9984  format %{
9985     "cmpxchg_narrow_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
9986     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9987  %}
9988 
9989   ins_encode %{
9990     Register tmp = $tmp$$Register;
9991     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
9992     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register, Assembler::word, /*acquire*/ true, /*release*/ true, /*weak*/ false);
9993     __ cset($res$$Register, Assembler::EQ);
9994   %}
9995 
9996   ins_pipe(pipe_slow);
9997 %}
9998 
9999 // ---------------------------------------------------------------------
10000 
10001 
10002 // BEGIN This section of the file is automatically generated. Do not edit --------------
10003 
10004 // Sundry CAS operations.  Note that release is always true,
10005 // regardless of the memory ordering of the CAS.  This is because we
10006 // need the volatile case to be sequentially consistent but there is
10007 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
10008 // can't check the type of memory ordering here, so we always emit a
10009 // STLXR.
10010 
10011 // This section is generated from aarch64_ad_cas.m4
10012 
10013 
10014 
10015 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10016   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
10017   ins_cost(2 * VOLATILE_REF_COST);
10018   effect(TEMP_DEF res, KILL cr);
10019   format %{
10020     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
10021   %}
10022   ins_encode %{
10023     __ uxtbw(rscratch2, $oldval$$Register);
10024     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
10025                Assembler::byte, /*acquire*/ false, /*release*/ true,
10026                /*weak*/ false, $res$$Register);
10027     __ sxtbw($res$$Register, $res$$Register);
10028   %}
10029   ins_pipe(pipe_slow);
10030 %}
10031 
10032 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10033   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
10034   ins_cost(2 * VOLATILE_REF_COST);
10035   effect(TEMP_DEF res, KILL cr);
10036   format %{
10037     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
10038   %}
10039   ins_encode %{
10040     __ uxthw(rscratch2, $oldval$$Register);
10041     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
10042                Assembler::halfword, /*acquire*/ false, /*release*/ true,
10043                /*weak*/ false, $res$$Register);
10044     __ sxthw($res$$Register, $res$$Register);
10045   %}
10046   ins_pipe(pipe_slow);
10047 %}
10048 
10049 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10050   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
10051   ins_cost(2 * VOLATILE_REF_COST);
10052   effect(TEMP_DEF res, KILL cr);
10053   format %{
10054     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
10055   %}
10056   ins_encode %{
10057     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10058                Assembler::word, /*acquire*/ false, /*release*/ true,
10059                /*weak*/ false, $res$$Register);
10060   %}
10061   ins_pipe(pipe_slow);
10062 %}
10063 
10064 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
10065   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
10066   ins_cost(2 * VOLATILE_REF_COST);
10067   effect(TEMP_DEF res, KILL cr);
10068   format %{
10069     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
10070   %}
10071   ins_encode %{
10072     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10073                Assembler::xword, /*acquire*/ false, /*release*/ true,
10074                /*weak*/ false, $res$$Register);
10075   %}
10076   ins_pipe(pipe_slow);
10077 %}
10078 
10079 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
10080   predicate(!UseShenandoahGC || !ShenandoahCASBarrier);
10081   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
10082   ins_cost(2 * VOLATILE_REF_COST);
10083   effect(TEMP_DEF res, KILL cr);
10084   format %{
10085     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10086   %}
10087   ins_encode %{
10088     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10089                Assembler::word, /*acquire*/ false, /*release*/ true,
10090                /*weak*/ false, $res$$Register);
10091   %}
10092   ins_pipe(pipe_slow);
10093 %}
10094 
10095 instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
10096   predicate(UseShenandoahGC && ShenandoahCASBarrier);
10097   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
10098   ins_cost(3 * VOLATILE_REF_COST);
10099   effect(TEMP_DEF res, TEMP tmp, KILL cr);
10100   format %{
10101     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10102   %}
10103   ins_encode %{
10104     Register tmp = $tmp$$Register;
10105     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10106     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10107                               Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ false, $res$$Register);
10108   %}
10109   ins_pipe(pipe_slow);
10110 %}
10111 
10112 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10113   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
10114   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
10115   ins_cost(2 * VOLATILE_REF_COST);
10116   effect(TEMP_DEF res, KILL cr);
10117   format %{
10118     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10119   %}
10120   ins_encode %{
10121     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10122                Assembler::xword, /*acquire*/ false, /*release*/ true,
10123                /*weak*/ false, $res$$Register);
10124   %}
10125   ins_pipe(pipe_slow);
10126 %}
10127 
10128 instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
10129   predicate(UseShenandoahGC && ShenandoahCASBarrier);
10130   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
10131   ins_cost(3 * VOLATILE_REF_COST);
10132   effect(TEMP_DEF res, TEMP tmp, KILL cr);
10133   format %{
10134     "cmpxchg_oop_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp"
10135   %}
10136   ins_encode %{
10137     Register tmp = $tmp$$Register;
10138     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10139     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10140                               Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ false, $res$$Register);
10141   %}
10142   ins_pipe(pipe_slow);
10143 %}
10144 
10145 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10146   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
10147   ins_cost(2 * VOLATILE_REF_COST);
10148   effect(KILL cr);
10149   format %{
10150     "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
10151     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10152   %}
10153   ins_encode %{
10154     __ uxtbw(rscratch2, $oldval$$Register);
10155     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
10156                Assembler::byte, /*acquire*/ false, /*release*/ true,
10157                /*weak*/ true, noreg);
10158     __ csetw($res$$Register, Assembler::EQ);
10159   %}
10160   ins_pipe(pipe_slow);
10161 %}
10162 
10163 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10164   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
10165   ins_cost(2 * VOLATILE_REF_COST);
10166   effect(KILL cr);
10167   format %{
10168     "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
10169     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10170   %}
10171   ins_encode %{
10172     __ uxthw(rscratch2, $oldval$$Register);
10173     __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
10174                Assembler::halfword, /*acquire*/ false, /*release*/ true,
10175                /*weak*/ true, noreg);
10176     __ csetw($res$$Register, Assembler::EQ);
10177   %}
10178   ins_pipe(pipe_slow);
10179 %}
10180 
10181 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
10182   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
10183   ins_cost(2 * VOLATILE_REF_COST);
10184   effect(KILL cr);
10185   format %{
10186     "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
10187     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10188   %}
10189   ins_encode %{
10190     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10191                Assembler::word, /*acquire*/ false, /*release*/ true,
10192                /*weak*/ true, noreg);
10193     __ csetw($res$$Register, Assembler::EQ);
10194   %}
10195   ins_pipe(pipe_slow);
10196 %}
10197 
10198 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
10199   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
10200   ins_cost(2 * VOLATILE_REF_COST);
10201   effect(KILL cr);
10202   format %{
10203     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
10204     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10205   %}
10206   ins_encode %{
10207     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10208                Assembler::xword, /*acquire*/ false, /*release*/ true,
10209                /*weak*/ true, noreg);
10210     __ csetw($res$$Register, Assembler::EQ);
10211   %}
10212   ins_pipe(pipe_slow);
10213 %}
10214 
10215 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
10216   predicate(!UseShenandoahGC || !ShenandoahCASBarrier);
10217   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10218   ins_cost(2 * VOLATILE_REF_COST);
10219   effect(KILL cr);
10220   format %{
10221     "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10222     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10223   %}
10224   ins_encode %{
10225     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10226                Assembler::word, /*acquire*/ false, /*release*/ true,
10227                /*weak*/ true, noreg);
10228     __ csetw($res$$Register, Assembler::EQ);
10229   %}
10230   ins_pipe(pipe_slow);
10231 %}
10232 
10233 instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
10234   predicate(UseShenandoahGC && ShenandoahCASBarrier);
10235   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
10236   ins_cost(3 * VOLATILE_REF_COST);
10237   effect(TEMP tmp, KILL cr);
10238   format %{
10239     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
10240     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10241   %}
10242   ins_encode %{
10243     Register tmp = $tmp$$Register;
10244     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10245     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10246                               Assembler::word, /*acquire*/ false, /*release*/ true, /*weak*/ true);
10247     __ csetw($res$$Register, Assembler::EQ);
10248   %}
10249   ins_pipe(pipe_slow);
10250 %}
10251 
10252 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
10253   predicate(!UseShenandoahGC || !ShenandoahCASBarrier || n->in(3)->in(1)->bottom_type() == TypePtr::NULL_PTR);
10254   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10255   ins_cost(2 * VOLATILE_REF_COST);
10256   effect(KILL cr);
10257   format %{
10258     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10259     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10260   %}
10261   ins_encode %{
10262     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
10263                Assembler::xword, /*acquire*/ false, /*release*/ true,
10264                /*weak*/ true, noreg);
10265     __ csetw($res$$Register, Assembler::EQ);
10266   %}
10267   ins_pipe(pipe_slow);
10268 %}
10269 
10270 instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
10271   predicate(UseShenandoahGC && ShenandoahCASBarrier);
10272   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
10273   ins_cost(3 * VOLATILE_REF_COST);
10274   effect(TEMP tmp, KILL cr);
10275   format %{
10276     "cmpxchg_oop_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
10277     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
10278   %}
10279   ins_encode %{
10280     Register tmp = $tmp$$Register;
10281     __ mov(tmp, $oldval$$Register); // Must not clobber oldval.
10282     __ cmpxchg_oop_shenandoah($mem$$Register, tmp, $newval$$Register,
10283                               Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ true);
10284     __ csetw($res$$Register, Assembler::EQ);
10285   %}
10286   ins_pipe(pipe_slow);
10287 %}
10288 // END This section of the file is automatically generated. Do not edit --------------
10289 // ---------------------------------------------------------------------
10290 
10291 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
10292   match(Set prev (GetAndSetI mem newv));
10293   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
10294   ins_encode %{
10295     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10296   %}
10297   ins_pipe(pipe_serial);
10298 %}
10299 
10300 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
10301   match(Set prev (GetAndSetL mem newv));
10302   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10303   ins_encode %{
10304     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10305   %}
10306   ins_pipe(pipe_serial);
10307 %}
10308 
10309 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
10310   match(Set prev (GetAndSetN mem newv));
10311   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
10312   ins_encode %{
10313     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
10314   %}
10315   ins_pipe(pipe_serial);
10316 %}
10317 
10318 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
10319   match(Set prev (GetAndSetP mem newv));
10320   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
10321   ins_encode %{
10322     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
10323   %}
10324   ins_pipe(pipe_serial);
10325 %}
10326 
10327 
10328 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
10329   match(Set newval (GetAndAddL mem incr));
10330   ins_cost(INSN_COST * 10);
10331   format %{ "get_and_addL $newval, [$mem], $incr" %}
10332   ins_encode %{
10333     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
10334   %}
10335   ins_pipe(pipe_serial);
10336 %}
10337 
10338 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
10339   predicate(n->as_LoadStore()->result_not_used());
10340   match(Set dummy (GetAndAddL mem incr));
10341   ins_cost(INSN_COST * 9);
10342   format %{ "get_and_addL [$mem], $incr" %}
10343   ins_encode %{
10344     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
10345   %}
10346   ins_pipe(pipe_serial);
10347 %}
10348 
10349 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
10350   match(Set newval (GetAndAddL mem incr));
10351   ins_cost(INSN_COST * 10);
10352   format %{ "get_and_addL $newval, [$mem], $incr" %}
10353   ins_encode %{
10354     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
10355   %}
10356   ins_pipe(pipe_serial);
10357 %}
10358 
10359 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
10360   predicate(n->as_LoadStore()->result_not_used());
10361   match(Set dummy (GetAndAddL mem incr));
10362   ins_cost(INSN_COST * 9);
10363   format %{ "get_and_addL [$mem], $incr" %}
10364   ins_encode %{
10365     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
10366   %}
10367   ins_pipe(pipe_serial);
10368 %}
10369 
10370 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
10371   match(Set newval (GetAndAddI mem incr));
10372   ins_cost(INSN_COST * 10);
10373   format %{ "get_and_addI $newval, [$mem], $incr" %}
10374   ins_encode %{
10375     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
10376   %}
10377   ins_pipe(pipe_serial);
10378 %}
10379 
10380 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
10381   predicate(n->as_LoadStore()->result_not_used());
10382   match(Set dummy (GetAndAddI mem incr));
10383   ins_cost(INSN_COST * 9);
10384   format %{ "get_and_addI [$mem], $incr" %}
10385   ins_encode %{
10386     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
10387   %}
10388   ins_pipe(pipe_serial);
10389 %}
10390 
10391 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
10392   match(Set newval (GetAndAddI mem incr));
10393   ins_cost(INSN_COST * 10);
10394   format %{ "get_and_addI $newval, [$mem], $incr" %}
10395   ins_encode %{
10396     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
10397   %}
10398   ins_pipe(pipe_serial);
10399 %}
10400 
10401 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
10402   predicate(n->as_LoadStore()->result_not_used());
10403   match(Set dummy (GetAndAddI mem incr));
10404   ins_cost(INSN_COST * 9);
10405   format %{ "get_and_addI [$mem], $incr" %}
10406   ins_encode %{
10407     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
10408   %}
10409   ins_pipe(pipe_serial);
10410 %}
10411 
10412 // Manifest a CmpL result in an integer register.
10413 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
10414 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
10415 %{
10416   match(Set dst (CmpL3 src1 src2));
10417   effect(KILL flags);
10418 
10419   ins_cost(INSN_COST * 6);
10420   format %{
10421       "cmp $src1, $src2"
10422       "csetw $dst, ne"
10423       "cnegw $dst, lt"
10424   %}
10425   // format %{ "CmpL3 $dst, $src1, $src2" %}
10426   ins_encode %{
10427     __ cmp($src1$$Register, $src2$$Register);
10428     __ csetw($dst$$Register, Assembler::NE);
10429     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10430   %}
10431 
10432   ins_pipe(pipe_class_default);
10433 %}
10434 
10435 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10436 %{
10437   match(Set dst (CmpL3 src1 src2));
10438   effect(KILL flags);
10439 
10440   ins_cost(INSN_COST * 6);
10441   format %{
10442       "cmp $src1, $src2"
10443       "csetw $dst, ne"
10444       "cnegw $dst, lt"
10445   %}
10446   ins_encode %{
10447     int32_t con = (int32_t)$src2$$constant;
10448      if (con < 0) {
10449       __ adds(zr, $src1$$Register, -con);
10450     } else {
10451       __ subs(zr, $src1$$Register, con);
10452     }
10453     __ csetw($dst$$Register, Assembler::NE);
10454     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10455   %}
10456 
10457   ins_pipe(pipe_class_default);
10458 %}
10459 
10460 // ============================================================================
10461 // Conditional Move Instructions
10462 
10463 // n.b. we have identical rules for both a signed compare op (cmpOp)
10464 // and an unsigned compare op (cmpOpU). it would be nice if we could
10465 // define an op class which merged both inputs and use it to type the
10466 // argument to a single rule. unfortunatelyt his fails because the
10467 // opclass does not live up to the COND_INTER interface of its
10468 // component operands. When the generic code tries to negate the
10469 // operand it ends up running the generci Machoper::negate method
10470 // which throws a ShouldNotHappen. So, we have to provide two flavours
10471 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10472 
10473 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10474   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10475 
10476   ins_cost(INSN_COST * 2);
10477   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
10478 
10479   ins_encode %{
10480     __ cselw(as_Register($dst$$reg),
10481              as_Register($src2$$reg),
10482              as_Register($src1$$reg),
10483              (Assembler::Condition)$cmp$$cmpcode);
10484   %}
10485 
10486   ins_pipe(icond_reg_reg);
10487 %}
10488 
10489 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10490   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10491 
10492   ins_cost(INSN_COST * 2);
10493   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
10494 
10495   ins_encode %{
10496     __ cselw(as_Register($dst$$reg),
10497              as_Register($src2$$reg),
10498              as_Register($src1$$reg),
10499              (Assembler::Condition)$cmp$$cmpcode);
10500   %}
10501 
10502   ins_pipe(icond_reg_reg);
10503 %}
10504 
10505 // special cases where one arg is zero
10506 
10507 // n.b. this is selected in preference to the rule above because it
10508 // avoids loading constant 0 into a source register
10509 
10510 // TODO
10511 // we ought only to be able to cull one of these variants as the ideal
10512 // transforms ought always to order the zero consistently (to left/right?)
10513 
10514 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10515   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10516 
10517   ins_cost(INSN_COST * 2);
10518   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
10519 
10520   ins_encode %{
10521     __ cselw(as_Register($dst$$reg),
10522              as_Register($src$$reg),
10523              zr,
10524              (Assembler::Condition)$cmp$$cmpcode);
10525   %}
10526 
10527   ins_pipe(icond_reg);
10528 %}
10529 
10530 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10531   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10532 
10533   ins_cost(INSN_COST * 2);
10534   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
10535 
10536   ins_encode %{
10537     __ cselw(as_Register($dst$$reg),
10538              as_Register($src$$reg),
10539              zr,
10540              (Assembler::Condition)$cmp$$cmpcode);
10541   %}
10542 
10543   ins_pipe(icond_reg);
10544 %}
10545 
10546 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10547   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10548 
10549   ins_cost(INSN_COST * 2);
10550   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
10551 
10552   ins_encode %{
10553     __ cselw(as_Register($dst$$reg),
10554              zr,
10555              as_Register($src$$reg),
10556              (Assembler::Condition)$cmp$$cmpcode);
10557   %}
10558 
10559   ins_pipe(icond_reg);
10560 %}
10561 
10562 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10563   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10564 
10565   ins_cost(INSN_COST * 2);
10566   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
10567 
10568   ins_encode %{
10569     __ cselw(as_Register($dst$$reg),
10570              zr,
10571              as_Register($src$$reg),
10572              (Assembler::Condition)$cmp$$cmpcode);
10573   %}
10574 
10575   ins_pipe(icond_reg);
10576 %}
10577 
10578 // special case for creating a boolean 0 or 1
10579 
10580 // n.b. this is selected in preference to the rule above because it
10581 // avoids loading constants 0 and 1 into a source register
10582 
10583 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10584   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10585 
10586   ins_cost(INSN_COST * 2);
10587   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
10588 
10589   ins_encode %{
10590     // equivalently
10591     // cset(as_Register($dst$$reg),
10592     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10593     __ csincw(as_Register($dst$$reg),
10594              zr,
10595              zr,
10596              (Assembler::Condition)$cmp$$cmpcode);
10597   %}
10598 
10599   ins_pipe(icond_none);
10600 %}
10601 
10602 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10603   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10604 
10605   ins_cost(INSN_COST * 2);
10606   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
10607 
10608   ins_encode %{
10609     // equivalently
10610     // cset(as_Register($dst$$reg),
10611     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
10612     __ csincw(as_Register($dst$$reg),
10613              zr,
10614              zr,
10615              (Assembler::Condition)$cmp$$cmpcode);
10616   %}
10617 
10618   ins_pipe(icond_none);
10619 %}
10620 
10621 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10622   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10623 
10624   ins_cost(INSN_COST * 2);
10625   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
10626 
10627   ins_encode %{
10628     __ csel(as_Register($dst$$reg),
10629             as_Register($src2$$reg),
10630             as_Register($src1$$reg),
10631             (Assembler::Condition)$cmp$$cmpcode);
10632   %}
10633 
10634   ins_pipe(icond_reg_reg);
10635 %}
10636 
10637 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10638   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10639 
10640   ins_cost(INSN_COST * 2);
10641   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
10642 
10643   ins_encode %{
10644     __ csel(as_Register($dst$$reg),
10645             as_Register($src2$$reg),
10646             as_Register($src1$$reg),
10647             (Assembler::Condition)$cmp$$cmpcode);
10648   %}
10649 
10650   ins_pipe(icond_reg_reg);
10651 %}
10652 
10653 // special cases where one arg is zero
10654 
10655 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10656   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10657 
10658   ins_cost(INSN_COST * 2);
10659   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
10660 
10661   ins_encode %{
10662     __ csel(as_Register($dst$$reg),
10663             zr,
10664             as_Register($src$$reg),
10665             (Assembler::Condition)$cmp$$cmpcode);
10666   %}
10667 
10668   ins_pipe(icond_reg);
10669 %}
10670 
10671 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10672   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10673 
10674   ins_cost(INSN_COST * 2);
10675   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
10676 
10677   ins_encode %{
10678     __ csel(as_Register($dst$$reg),
10679             zr,
10680             as_Register($src$$reg),
10681             (Assembler::Condition)$cmp$$cmpcode);
10682   %}
10683 
10684   ins_pipe(icond_reg);
10685 %}
10686 
10687 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10688   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10689 
10690   ins_cost(INSN_COST * 2);
10691   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
10692 
10693   ins_encode %{
10694     __ csel(as_Register($dst$$reg),
10695             as_Register($src$$reg),
10696             zr,
10697             (Assembler::Condition)$cmp$$cmpcode);
10698   %}
10699 
10700   ins_pipe(icond_reg);
10701 %}
10702 
10703 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10704   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10705 
10706   ins_cost(INSN_COST * 2);
10707   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
10708 
10709   ins_encode %{
10710     __ csel(as_Register($dst$$reg),
10711             as_Register($src$$reg),
10712             zr,
10713             (Assembler::Condition)$cmp$$cmpcode);
10714   %}
10715 
10716   ins_pipe(icond_reg);
10717 %}
10718 
10719 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10720   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10721 
10722   ins_cost(INSN_COST * 2);
10723   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
10724 
10725   ins_encode %{
10726     __ csel(as_Register($dst$$reg),
10727             as_Register($src2$$reg),
10728             as_Register($src1$$reg),
10729             (Assembler::Condition)$cmp$$cmpcode);
10730   %}
10731 
10732   ins_pipe(icond_reg_reg);
10733 %}
10734 
10735 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10736   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10737 
10738   ins_cost(INSN_COST * 2);
10739   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
10740 
10741   ins_encode %{
10742     __ csel(as_Register($dst$$reg),
10743             as_Register($src2$$reg),
10744             as_Register($src1$$reg),
10745             (Assembler::Condition)$cmp$$cmpcode);
10746   %}
10747 
10748   ins_pipe(icond_reg_reg);
10749 %}
10750 
10751 // special cases where one arg is zero
10752 
10753 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10754   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10755 
10756   ins_cost(INSN_COST * 2);
10757   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
10758 
10759   ins_encode %{
10760     __ csel(as_Register($dst$$reg),
10761             zr,
10762             as_Register($src$$reg),
10763             (Assembler::Condition)$cmp$$cmpcode);
10764   %}
10765 
10766   ins_pipe(icond_reg);
10767 %}
10768 
10769 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10770   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10771 
10772   ins_cost(INSN_COST * 2);
10773   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
10774 
10775   ins_encode %{
10776     __ csel(as_Register($dst$$reg),
10777             zr,
10778             as_Register($src$$reg),
10779             (Assembler::Condition)$cmp$$cmpcode);
10780   %}
10781 
10782   ins_pipe(icond_reg);
10783 %}
10784 
10785 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10786   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10787 
10788   ins_cost(INSN_COST * 2);
10789   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
10790 
10791   ins_encode %{
10792     __ csel(as_Register($dst$$reg),
10793             as_Register($src$$reg),
10794             zr,
10795             (Assembler::Condition)$cmp$$cmpcode);
10796   %}
10797 
10798   ins_pipe(icond_reg);
10799 %}
10800 
10801 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10802   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10803 
10804   ins_cost(INSN_COST * 2);
10805   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
10806 
10807   ins_encode %{
10808     __ csel(as_Register($dst$$reg),
10809             as_Register($src$$reg),
10810             zr,
10811             (Assembler::Condition)$cmp$$cmpcode);
10812   %}
10813 
10814   ins_pipe(icond_reg);
10815 %}
10816 
10817 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10818   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10819 
10820   ins_cost(INSN_COST * 2);
10821   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10822 
10823   ins_encode %{
10824     __ cselw(as_Register($dst$$reg),
10825              as_Register($src2$$reg),
10826              as_Register($src1$$reg),
10827              (Assembler::Condition)$cmp$$cmpcode);
10828   %}
10829 
10830   ins_pipe(icond_reg_reg);
10831 %}
10832 
10833 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10834   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10835 
10836   ins_cost(INSN_COST * 2);
10837   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
10838 
10839   ins_encode %{
10840     __ cselw(as_Register($dst$$reg),
10841              as_Register($src2$$reg),
10842              as_Register($src1$$reg),
10843              (Assembler::Condition)$cmp$$cmpcode);
10844   %}
10845 
10846   ins_pipe(icond_reg_reg);
10847 %}
10848 
10849 // special cases where one arg is zero
10850 
10851 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10852   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10853 
10854   ins_cost(INSN_COST * 2);
10855   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
10856 
10857   ins_encode %{
10858     __ cselw(as_Register($dst$$reg),
10859              zr,
10860              as_Register($src$$reg),
10861              (Assembler::Condition)$cmp$$cmpcode);
10862   %}
10863 
10864   ins_pipe(icond_reg);
10865 %}
10866 
10867 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10868   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10869 
10870   ins_cost(INSN_COST * 2);
10871   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
10872 
10873   ins_encode %{
10874     __ cselw(as_Register($dst$$reg),
10875              zr,
10876              as_Register($src$$reg),
10877              (Assembler::Condition)$cmp$$cmpcode);
10878   %}
10879 
10880   ins_pipe(icond_reg);
10881 %}
10882 
10883 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10884   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10885 
10886   ins_cost(INSN_COST * 2);
10887   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
10888 
10889   ins_encode %{
10890     __ cselw(as_Register($dst$$reg),
10891              as_Register($src$$reg),
10892              zr,
10893              (Assembler::Condition)$cmp$$cmpcode);
10894   %}
10895 
10896   ins_pipe(icond_reg);
10897 %}
10898 
10899 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10900   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10901 
10902   ins_cost(INSN_COST * 2);
10903   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
10904 
10905   ins_encode %{
10906     __ cselw(as_Register($dst$$reg),
10907              as_Register($src$$reg),
10908              zr,
10909              (Assembler::Condition)$cmp$$cmpcode);
10910   %}
10911 
10912   ins_pipe(icond_reg);
10913 %}
10914 
10915 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
10916 %{
10917   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10918 
10919   ins_cost(INSN_COST * 3);
10920 
10921   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10922   ins_encode %{
10923     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10924     __ fcsels(as_FloatRegister($dst$$reg),
10925               as_FloatRegister($src2$$reg),
10926               as_FloatRegister($src1$$reg),
10927               cond);
10928   %}
10929 
10930   ins_pipe(fp_cond_reg_reg_s);
10931 %}
10932 
10933 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
10934 %{
10935   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10936 
10937   ins_cost(INSN_COST * 3);
10938 
10939   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10940   ins_encode %{
10941     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10942     __ fcsels(as_FloatRegister($dst$$reg),
10943               as_FloatRegister($src2$$reg),
10944               as_FloatRegister($src1$$reg),
10945               cond);
10946   %}
10947 
10948   ins_pipe(fp_cond_reg_reg_s);
10949 %}
10950 
10951 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
10952 %{
10953   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10954 
10955   ins_cost(INSN_COST * 3);
10956 
10957   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10958   ins_encode %{
10959     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10960     __ fcseld(as_FloatRegister($dst$$reg),
10961               as_FloatRegister($src2$$reg),
10962               as_FloatRegister($src1$$reg),
10963               cond);
10964   %}
10965 
10966   ins_pipe(fp_cond_reg_reg_d);
10967 %}
10968 
10969 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
10970 %{
10971   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10972 
10973   ins_cost(INSN_COST * 3);
10974 
10975   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10976   ins_encode %{
10977     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10978     __ fcseld(as_FloatRegister($dst$$reg),
10979               as_FloatRegister($src2$$reg),
10980               as_FloatRegister($src1$$reg),
10981               cond);
10982   %}
10983 
10984   ins_pipe(fp_cond_reg_reg_d);
10985 %}
10986 
10987 // ============================================================================
10988 // Arithmetic Instructions
10989 //
10990 
10991 // Integer Addition
10992 
10993 // TODO
10994 // these currently employ operations which do not set CR and hence are
10995 // not flagged as killing CR but we would like to isolate the cases
10996 // where we want to set flags from those where we don't. need to work
10997 // out how to do that.
10998 
10999 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11000   match(Set dst (AddI src1 src2));
11001 
11002   ins_cost(INSN_COST);
11003   format %{ "addw  $dst, $src1, $src2" %}
11004 
11005   ins_encode %{
11006     __ addw(as_Register($dst$$reg),
11007             as_Register($src1$$reg),
11008             as_Register($src2$$reg));
11009   %}
11010 
11011   ins_pipe(ialu_reg_reg);
11012 %}
11013 
11014 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
11015   match(Set dst (AddI src1 src2));
11016 
11017   ins_cost(INSN_COST);
11018   format %{ "addw $dst, $src1, $src2" %}
11019 
11020   // use opcode to indicate that this is an add not a sub
11021   opcode(0x0);
11022 
11023   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
11024 
11025   ins_pipe(ialu_reg_imm);
11026 %}
11027 
11028 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
11029   match(Set dst (AddI (ConvL2I src1) src2));
11030 
11031   ins_cost(INSN_COST);
11032   format %{ "addw $dst, $src1, $src2" %}
11033 
11034   // use opcode to indicate that this is an add not a sub
11035   opcode(0x0);
11036 
11037   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
11038 
11039   ins_pipe(ialu_reg_imm);
11040 %}
11041 
11042 // Pointer Addition
11043 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
11044   match(Set dst (AddP src1 src2));
11045 
11046   ins_cost(INSN_COST);
11047   format %{ "add $dst, $src1, $src2\t# ptr" %}
11048 
11049   ins_encode %{
11050     __ add(as_Register($dst$$reg),
11051            as_Register($src1$$reg),
11052            as_Register($src2$$reg));
11053   %}
11054 
11055   ins_pipe(ialu_reg_reg);
11056 %}
11057 
11058 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
11059   match(Set dst (AddP src1 (ConvI2L src2)));
11060 
11061   ins_cost(1.9 * INSN_COST);
11062   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
11063 
11064   ins_encode %{
11065     __ add(as_Register($dst$$reg),
11066            as_Register($src1$$reg),
11067            as_Register($src2$$reg), ext::sxtw);
11068   %}
11069 
11070   ins_pipe(ialu_reg_reg);
11071 %}
11072 
11073 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
11074   match(Set dst (AddP src1 (LShiftL src2 scale)));
11075 
11076   ins_cost(1.9 * INSN_COST);
11077   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
11078 
11079   ins_encode %{
11080     __ lea(as_Register($dst$$reg),
11081            Address(as_Register($src1$$reg), as_Register($src2$$reg),
11082                    Address::lsl($scale$$constant)));
11083   %}
11084 
11085   ins_pipe(ialu_reg_reg_shift);
11086 %}
11087 
11088 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
11089   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
11090 
11091   ins_cost(1.9 * INSN_COST);
11092   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
11093 
11094   ins_encode %{
11095     __ lea(as_Register($dst$$reg),
11096            Address(as_Register($src1$$reg), as_Register($src2$$reg),
11097                    Address::sxtw($scale$$constant)));
11098   %}
11099 
11100   ins_pipe(ialu_reg_reg_shift);
11101 %}
11102 
11103 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
11104   match(Set dst (LShiftL (ConvI2L src) scale));
11105 
11106   ins_cost(INSN_COST);
11107   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
11108 
11109   ins_encode %{
11110     __ sbfiz(as_Register($dst$$reg),
11111           as_Register($src$$reg),
11112           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
11113   %}
11114 
11115   ins_pipe(ialu_reg_shift);
11116 %}
11117 
11118 // Pointer Immediate Addition
11119 // n.b. this needs to be more expensive than using an indirect memory
11120 // operand
11121 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
11122   match(Set dst (AddP src1 src2));
11123 
11124   ins_cost(INSN_COST);
11125   format %{ "add $dst, $src1, $src2\t# ptr" %}
11126 
11127   // use opcode to indicate that this is an add not a sub
11128   opcode(0x0);
11129 
11130   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11131 
11132   ins_pipe(ialu_reg_imm);
11133 %}
11134 
11135 // Long Addition
11136 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11137 
11138   match(Set dst (AddL src1 src2));
11139 
11140   ins_cost(INSN_COST);
11141   format %{ "add  $dst, $src1, $src2" %}
11142 
11143   ins_encode %{
11144     __ add(as_Register($dst$$reg),
11145            as_Register($src1$$reg),
11146            as_Register($src2$$reg));
11147   %}
11148 
11149   ins_pipe(ialu_reg_reg);
11150 %}
11151 
11152 // No constant pool entries requiredLong Immediate Addition.
11153 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
11154   match(Set dst (AddL src1 src2));
11155 
11156   ins_cost(INSN_COST);
11157   format %{ "add $dst, $src1, $src2" %}
11158 
11159   // use opcode to indicate that this is an add not a sub
11160   opcode(0x0);
11161 
11162   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11163 
11164   ins_pipe(ialu_reg_imm);
11165 %}
11166 
11167 // Integer Subtraction
11168 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11169   match(Set dst (SubI src1 src2));
11170 
11171   ins_cost(INSN_COST);
11172   format %{ "subw  $dst, $src1, $src2" %}
11173 
11174   ins_encode %{
11175     __ subw(as_Register($dst$$reg),
11176             as_Register($src1$$reg),
11177             as_Register($src2$$reg));
11178   %}
11179 
11180   ins_pipe(ialu_reg_reg);
11181 %}
11182 
11183 // Immediate Subtraction
11184 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
11185   match(Set dst (SubI src1 src2));
11186 
11187   ins_cost(INSN_COST);
11188   format %{ "subw $dst, $src1, $src2" %}
11189 
11190   // use opcode to indicate that this is a sub not an add
11191   opcode(0x1);
11192 
11193   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
11194 
11195   ins_pipe(ialu_reg_imm);
11196 %}
11197 
11198 // Long Subtraction
11199 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11200 
11201   match(Set dst (SubL src1 src2));
11202 
11203   ins_cost(INSN_COST);
11204   format %{ "sub  $dst, $src1, $src2" %}
11205 
11206   ins_encode %{
11207     __ sub(as_Register($dst$$reg),
11208            as_Register($src1$$reg),
11209            as_Register($src2$$reg));
11210   %}
11211 
11212   ins_pipe(ialu_reg_reg);
11213 %}
11214 
11215 // No constant pool entries requiredLong Immediate Subtraction.
11216 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
11217   match(Set dst (SubL src1 src2));
11218 
11219   ins_cost(INSN_COST);
11220   format %{ "sub$dst, $src1, $src2" %}
11221 
11222   // use opcode to indicate that this is a sub not an add
11223   opcode(0x1);
11224 
11225   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
11226 
11227   ins_pipe(ialu_reg_imm);
11228 %}
11229 
11230 // Integer Negation (special case for sub)
11231 
11232 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
11233   match(Set dst (SubI zero src));
11234 
11235   ins_cost(INSN_COST);
11236   format %{ "negw $dst, $src\t# int" %}
11237 
11238   ins_encode %{
11239     __ negw(as_Register($dst$$reg),
11240             as_Register($src$$reg));
11241   %}
11242 
11243   ins_pipe(ialu_reg);
11244 %}
11245 
11246 // Long Negation
11247 
11248 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
11249   match(Set dst (SubL zero src));
11250 
11251   ins_cost(INSN_COST);
11252   format %{ "neg $dst, $src\t# long" %}
11253 
11254   ins_encode %{
11255     __ neg(as_Register($dst$$reg),
11256            as_Register($src$$reg));
11257   %}
11258 
11259   ins_pipe(ialu_reg);
11260 %}
11261 
11262 // Integer Multiply
11263 
11264 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11265   match(Set dst (MulI src1 src2));
11266 
11267   ins_cost(INSN_COST * 3);
11268   format %{ "mulw  $dst, $src1, $src2" %}
11269 
11270   ins_encode %{
11271     __ mulw(as_Register($dst$$reg),
11272             as_Register($src1$$reg),
11273             as_Register($src2$$reg));
11274   %}
11275 
11276   ins_pipe(imul_reg_reg);
11277 %}
11278 
11279 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11280   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
11281 
11282   ins_cost(INSN_COST * 3);
11283   format %{ "smull  $dst, $src1, $src2" %}
11284 
11285   ins_encode %{
11286     __ smull(as_Register($dst$$reg),
11287              as_Register($src1$$reg),
11288              as_Register($src2$$reg));
11289   %}
11290 
11291   ins_pipe(imul_reg_reg);
11292 %}
11293 
11294 // Long Multiply
11295 
11296 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11297   match(Set dst (MulL src1 src2));
11298 
11299   ins_cost(INSN_COST * 5);
11300   format %{ "mul  $dst, $src1, $src2" %}
11301 
11302   ins_encode %{
11303     __ mul(as_Register($dst$$reg),
11304            as_Register($src1$$reg),
11305            as_Register($src2$$reg));
11306   %}
11307 
11308   ins_pipe(lmul_reg_reg);
11309 %}
11310 
11311 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
11312 %{
11313   match(Set dst (MulHiL src1 src2));
11314 
11315   ins_cost(INSN_COST * 7);
11316   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
11317 
11318   ins_encode %{
11319     __ smulh(as_Register($dst$$reg),
11320              as_Register($src1$$reg),
11321              as_Register($src2$$reg));
11322   %}
11323 
11324   ins_pipe(lmul_reg_reg);
11325 %}
11326 
11327 // Combined Integer Multiply & Add/Sub
11328 
11329 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11330   match(Set dst (AddI src3 (MulI src1 src2)));
11331 
11332   ins_cost(INSN_COST * 3);
11333   format %{ "madd  $dst, $src1, $src2, $src3" %}
11334 
11335   ins_encode %{
11336     __ maddw(as_Register($dst$$reg),
11337              as_Register($src1$$reg),
11338              as_Register($src2$$reg),
11339              as_Register($src3$$reg));
11340   %}
11341 
11342   ins_pipe(imac_reg_reg);
11343 %}
11344 
11345 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
11346   match(Set dst (SubI src3 (MulI src1 src2)));
11347 
11348   ins_cost(INSN_COST * 3);
11349   format %{ "msub  $dst, $src1, $src2, $src3" %}
11350 
11351   ins_encode %{
11352     __ msubw(as_Register($dst$$reg),
11353              as_Register($src1$$reg),
11354              as_Register($src2$$reg),
11355              as_Register($src3$$reg));
11356   %}
11357 
11358   ins_pipe(imac_reg_reg);
11359 %}
11360 
11361 // Combined Long Multiply & Add/Sub
11362 
11363 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11364   match(Set dst (AddL src3 (MulL src1 src2)));
11365 
11366   ins_cost(INSN_COST * 5);
11367   format %{ "madd  $dst, $src1, $src2, $src3" %}
11368 
11369   ins_encode %{
11370     __ madd(as_Register($dst$$reg),
11371             as_Register($src1$$reg),
11372             as_Register($src2$$reg),
11373             as_Register($src3$$reg));
11374   %}
11375 
11376   ins_pipe(lmac_reg_reg);
11377 %}
11378 
11379 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
11380   match(Set dst (SubL src3 (MulL src1 src2)));
11381 
11382   ins_cost(INSN_COST * 5);
11383   format %{ "msub  $dst, $src1, $src2, $src3" %}
11384 
11385   ins_encode %{
11386     __ msub(as_Register($dst$$reg),
11387             as_Register($src1$$reg),
11388             as_Register($src2$$reg),
11389             as_Register($src3$$reg));
11390   %}
11391 
11392   ins_pipe(lmac_reg_reg);
11393 %}
11394 
11395 // Integer Divide
11396 
11397 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11398   match(Set dst (DivI src1 src2));
11399 
11400   ins_cost(INSN_COST * 19);
11401   format %{ "sdivw  $dst, $src1, $src2" %}
11402 
11403   ins_encode(aarch64_enc_divw(dst, src1, src2));
11404   ins_pipe(idiv_reg_reg);
11405 %}
11406 
11407 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
11408   match(Set dst (URShiftI (RShiftI src1 div1) div2));
11409   ins_cost(INSN_COST);
11410   format %{ "lsrw $dst, $src1, $div1" %}
11411   ins_encode %{
11412     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
11413   %}
11414   ins_pipe(ialu_reg_shift);
11415 %}
11416 
11417 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
11418   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
11419   ins_cost(INSN_COST);
11420   format %{ "addw $dst, $src, LSR $div1" %}
11421 
11422   ins_encode %{
11423     __ addw(as_Register($dst$$reg),
11424               as_Register($src$$reg),
11425               as_Register($src$$reg),
11426               Assembler::LSR, 31);
11427   %}
11428   ins_pipe(ialu_reg);
11429 %}
11430 
11431 // Long Divide
11432 
11433 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11434   match(Set dst (DivL src1 src2));
11435 
11436   ins_cost(INSN_COST * 35);
11437   format %{ "sdiv   $dst, $src1, $src2" %}
11438 
11439   ins_encode(aarch64_enc_div(dst, src1, src2));
11440   ins_pipe(ldiv_reg_reg);
11441 %}
11442 
11443 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
11444   match(Set dst (URShiftL (RShiftL src1 div1) div2));
11445   ins_cost(INSN_COST);
11446   format %{ "lsr $dst, $src1, $div1" %}
11447   ins_encode %{
11448     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11449   %}
11450   ins_pipe(ialu_reg_shift);
11451 %}
11452 
11453 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
11454   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11455   ins_cost(INSN_COST);
11456   format %{ "add $dst, $src, $div1" %}
11457 
11458   ins_encode %{
11459     __ add(as_Register($dst$$reg),
11460               as_Register($src$$reg),
11461               as_Register($src$$reg),
11462               Assembler::LSR, 63);
11463   %}
11464   ins_pipe(ialu_reg);
11465 %}
11466 
11467 // Integer Remainder
11468 
11469 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11470   match(Set dst (ModI src1 src2));
11471 
11472   ins_cost(INSN_COST * 22);
11473   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
11474             "msubw($dst, rscratch1, $src2, $src1" %}
11475 
11476   ins_encode(aarch64_enc_modw(dst, src1, src2));
11477   ins_pipe(idiv_reg_reg);
11478 %}
11479 
11480 // Long Remainder
11481 
11482 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11483   match(Set dst (ModL src1 src2));
11484 
11485   ins_cost(INSN_COST * 38);
11486   format %{ "sdiv   rscratch1, $src1, $src2\n"
11487             "msub($dst, rscratch1, $src2, $src1" %}
11488 
11489   ins_encode(aarch64_enc_mod(dst, src1, src2));
11490   ins_pipe(ldiv_reg_reg);
11491 %}
11492 
11493 // Integer Shifts
11494 
11495 // Shift Left Register
11496 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11497   match(Set dst (LShiftI src1 src2));
11498 
11499   ins_cost(INSN_COST * 2);
11500   format %{ "lslvw  $dst, $src1, $src2" %}
11501 
11502   ins_encode %{
11503     __ lslvw(as_Register($dst$$reg),
11504              as_Register($src1$$reg),
11505              as_Register($src2$$reg));
11506   %}
11507 
11508   ins_pipe(ialu_reg_reg_vshift);
11509 %}
11510 
11511 // Shift Left Immediate
11512 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11513   match(Set dst (LShiftI src1 src2));
11514 
11515   ins_cost(INSN_COST);
11516   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11517 
11518   ins_encode %{
11519     __ lslw(as_Register($dst$$reg),
11520             as_Register($src1$$reg),
11521             $src2$$constant & 0x1f);
11522   %}
11523 
11524   ins_pipe(ialu_reg_shift);
11525 %}
11526 
11527 // Shift Right Logical Register
11528 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11529   match(Set dst (URShiftI src1 src2));
11530 
11531   ins_cost(INSN_COST * 2);
11532   format %{ "lsrvw  $dst, $src1, $src2" %}
11533 
11534   ins_encode %{
11535     __ lsrvw(as_Register($dst$$reg),
11536              as_Register($src1$$reg),
11537              as_Register($src2$$reg));
11538   %}
11539 
11540   ins_pipe(ialu_reg_reg_vshift);
11541 %}
11542 
11543 // Shift Right Logical Immediate
11544 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11545   match(Set dst (URShiftI src1 src2));
11546 
11547   ins_cost(INSN_COST);
11548   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11549 
11550   ins_encode %{
11551     __ lsrw(as_Register($dst$$reg),
11552             as_Register($src1$$reg),
11553             $src2$$constant & 0x1f);
11554   %}
11555 
11556   ins_pipe(ialu_reg_shift);
11557 %}
11558 
11559 // Shift Right Arithmetic Register
11560 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11561   match(Set dst (RShiftI src1 src2));
11562 
11563   ins_cost(INSN_COST * 2);
11564   format %{ "asrvw  $dst, $src1, $src2" %}
11565 
11566   ins_encode %{
11567     __ asrvw(as_Register($dst$$reg),
11568              as_Register($src1$$reg),
11569              as_Register($src2$$reg));
11570   %}
11571 
11572   ins_pipe(ialu_reg_reg_vshift);
11573 %}
11574 
11575 // Shift Right Arithmetic Immediate
11576 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11577   match(Set dst (RShiftI src1 src2));
11578 
11579   ins_cost(INSN_COST);
11580   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11581 
11582   ins_encode %{
11583     __ asrw(as_Register($dst$$reg),
11584             as_Register($src1$$reg),
11585             $src2$$constant & 0x1f);
11586   %}
11587 
11588   ins_pipe(ialu_reg_shift);
11589 %}
11590 
11591 // Combined Int Mask and Right Shift (using UBFM)
11592 // TODO
11593 
11594 // Long Shifts
11595 
11596 // Shift Left Register
11597 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11598   match(Set dst (LShiftL src1 src2));
11599 
11600   ins_cost(INSN_COST * 2);
11601   format %{ "lslv  $dst, $src1, $src2" %}
11602 
11603   ins_encode %{
11604     __ lslv(as_Register($dst$$reg),
11605             as_Register($src1$$reg),
11606             as_Register($src2$$reg));
11607   %}
11608 
11609   ins_pipe(ialu_reg_reg_vshift);
11610 %}
11611 
11612 // Shift Left Immediate
11613 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11614   match(Set dst (LShiftL src1 src2));
11615 
11616   ins_cost(INSN_COST);
11617   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11618 
11619   ins_encode %{
11620     __ lsl(as_Register($dst$$reg),
11621             as_Register($src1$$reg),
11622             $src2$$constant & 0x3f);
11623   %}
11624 
11625   ins_pipe(ialu_reg_shift);
11626 %}
11627 
11628 // Shift Right Logical Register
11629 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11630   match(Set dst (URShiftL src1 src2));
11631 
11632   ins_cost(INSN_COST * 2);
11633   format %{ "lsrv  $dst, $src1, $src2" %}
11634 
11635   ins_encode %{
11636     __ lsrv(as_Register($dst$$reg),
11637             as_Register($src1$$reg),
11638             as_Register($src2$$reg));
11639   %}
11640 
11641   ins_pipe(ialu_reg_reg_vshift);
11642 %}
11643 
11644 // Shift Right Logical Immediate
11645 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11646   match(Set dst (URShiftL src1 src2));
11647 
11648   ins_cost(INSN_COST);
11649   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11650 
11651   ins_encode %{
11652     __ lsr(as_Register($dst$$reg),
11653            as_Register($src1$$reg),
11654            $src2$$constant & 0x3f);
11655   %}
11656 
11657   ins_pipe(ialu_reg_shift);
11658 %}
11659 
11660 // A special-case pattern for card table stores.
11661 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11662   match(Set dst (URShiftL (CastP2X src1) src2));
11663 
11664   ins_cost(INSN_COST);
11665   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11666 
11667   ins_encode %{
11668     __ lsr(as_Register($dst$$reg),
11669            as_Register($src1$$reg),
11670            $src2$$constant & 0x3f);
11671   %}
11672 
11673   ins_pipe(ialu_reg_shift);
11674 %}
11675 
11676 // Shift Right Arithmetic Register
11677 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11678   match(Set dst (RShiftL src1 src2));
11679 
11680   ins_cost(INSN_COST * 2);
11681   format %{ "asrv  $dst, $src1, $src2" %}
11682 
11683   ins_encode %{
11684     __ asrv(as_Register($dst$$reg),
11685             as_Register($src1$$reg),
11686             as_Register($src2$$reg));
11687   %}
11688 
11689   ins_pipe(ialu_reg_reg_vshift);
11690 %}
11691 
11692 // Shift Right Arithmetic Immediate
11693 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11694   match(Set dst (RShiftL src1 src2));
11695 
11696   ins_cost(INSN_COST);
11697   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11698 
11699   ins_encode %{
11700     __ asr(as_Register($dst$$reg),
11701            as_Register($src1$$reg),
11702            $src2$$constant & 0x3f);
11703   %}
11704 
11705   ins_pipe(ialu_reg_shift);
11706 %}
11707 
11708 // BEGIN This section of the file is automatically generated. Do not edit --------------
11709 
11710 instruct regL_not_reg(iRegLNoSp dst,
11711                          iRegL src1, immL_M1 m1,
11712                          rFlagsReg cr) %{
11713   match(Set dst (XorL src1 m1));
11714   ins_cost(INSN_COST);
11715   format %{ "eon  $dst, $src1, zr" %}
11716 
11717   ins_encode %{
11718     __ eon(as_Register($dst$$reg),
11719               as_Register($src1$$reg),
11720               zr,
11721               Assembler::LSL, 0);
11722   %}
11723 
11724   ins_pipe(ialu_reg);
11725 %}
11726 instruct regI_not_reg(iRegINoSp dst,
11727                          iRegIorL2I src1, immI_M1 m1,
11728                          rFlagsReg cr) %{
11729   match(Set dst (XorI src1 m1));
11730   ins_cost(INSN_COST);
11731   format %{ "eonw  $dst, $src1, zr" %}
11732 
11733   ins_encode %{
11734     __ eonw(as_Register($dst$$reg),
11735               as_Register($src1$$reg),
11736               zr,
11737               Assembler::LSL, 0);
11738   %}
11739 
11740   ins_pipe(ialu_reg);
11741 %}
11742 
11743 instruct AndI_reg_not_reg(iRegINoSp dst,
11744                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11745                          rFlagsReg cr) %{
11746   match(Set dst (AndI src1 (XorI src2 m1)));
11747   ins_cost(INSN_COST);
11748   format %{ "bicw  $dst, $src1, $src2" %}
11749 
11750   ins_encode %{
11751     __ bicw(as_Register($dst$$reg),
11752               as_Register($src1$$reg),
11753               as_Register($src2$$reg),
11754               Assembler::LSL, 0);
11755   %}
11756 
11757   ins_pipe(ialu_reg_reg);
11758 %}
11759 
11760 instruct AndL_reg_not_reg(iRegLNoSp dst,
11761                          iRegL src1, iRegL src2, immL_M1 m1,
11762                          rFlagsReg cr) %{
11763   match(Set dst (AndL src1 (XorL src2 m1)));
11764   ins_cost(INSN_COST);
11765   format %{ "bic  $dst, $src1, $src2" %}
11766 
11767   ins_encode %{
11768     __ bic(as_Register($dst$$reg),
11769               as_Register($src1$$reg),
11770               as_Register($src2$$reg),
11771               Assembler::LSL, 0);
11772   %}
11773 
11774   ins_pipe(ialu_reg_reg);
11775 %}
11776 
11777 instruct OrI_reg_not_reg(iRegINoSp dst,
11778                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11779                          rFlagsReg cr) %{
11780   match(Set dst (OrI src1 (XorI src2 m1)));
11781   ins_cost(INSN_COST);
11782   format %{ "ornw  $dst, $src1, $src2" %}
11783 
11784   ins_encode %{
11785     __ ornw(as_Register($dst$$reg),
11786               as_Register($src1$$reg),
11787               as_Register($src2$$reg),
11788               Assembler::LSL, 0);
11789   %}
11790 
11791   ins_pipe(ialu_reg_reg);
11792 %}
11793 
11794 instruct OrL_reg_not_reg(iRegLNoSp dst,
11795                          iRegL src1, iRegL src2, immL_M1 m1,
11796                          rFlagsReg cr) %{
11797   match(Set dst (OrL src1 (XorL src2 m1)));
11798   ins_cost(INSN_COST);
11799   format %{ "orn  $dst, $src1, $src2" %}
11800 
11801   ins_encode %{
11802     __ orn(as_Register($dst$$reg),
11803               as_Register($src1$$reg),
11804               as_Register($src2$$reg),
11805               Assembler::LSL, 0);
11806   %}
11807 
11808   ins_pipe(ialu_reg_reg);
11809 %}
11810 
11811 instruct XorI_reg_not_reg(iRegINoSp dst,
11812                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11813                          rFlagsReg cr) %{
11814   match(Set dst (XorI m1 (XorI src2 src1)));
11815   ins_cost(INSN_COST);
11816   format %{ "eonw  $dst, $src1, $src2" %}
11817 
11818   ins_encode %{
11819     __ eonw(as_Register($dst$$reg),
11820               as_Register($src1$$reg),
11821               as_Register($src2$$reg),
11822               Assembler::LSL, 0);
11823   %}
11824 
11825   ins_pipe(ialu_reg_reg);
11826 %}
11827 
11828 instruct XorL_reg_not_reg(iRegLNoSp dst,
11829                          iRegL src1, iRegL src2, immL_M1 m1,
11830                          rFlagsReg cr) %{
11831   match(Set dst (XorL m1 (XorL src2 src1)));
11832   ins_cost(INSN_COST);
11833   format %{ "eon  $dst, $src1, $src2" %}
11834 
11835   ins_encode %{
11836     __ eon(as_Register($dst$$reg),
11837               as_Register($src1$$reg),
11838               as_Register($src2$$reg),
11839               Assembler::LSL, 0);
11840   %}
11841 
11842   ins_pipe(ialu_reg_reg);
11843 %}
11844 
11845 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11846                          iRegIorL2I src1, iRegIorL2I src2,
11847                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11848   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11849   ins_cost(1.9 * INSN_COST);
11850   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
11851 
11852   ins_encode %{
11853     __ bicw(as_Register($dst$$reg),
11854               as_Register($src1$$reg),
11855               as_Register($src2$$reg),
11856               Assembler::LSR,
11857               $src3$$constant & 0x1f);
11858   %}
11859 
11860   ins_pipe(ialu_reg_reg_shift);
11861 %}
11862 
11863 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11864                          iRegL src1, iRegL src2,
11865                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11866   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11867   ins_cost(1.9 * INSN_COST);
11868   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
11869 
11870   ins_encode %{
11871     __ bic(as_Register($dst$$reg),
11872               as_Register($src1$$reg),
11873               as_Register($src2$$reg),
11874               Assembler::LSR,
11875               $src3$$constant & 0x3f);
11876   %}
11877 
11878   ins_pipe(ialu_reg_reg_shift);
11879 %}
11880 
11881 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11882                          iRegIorL2I src1, iRegIorL2I src2,
11883                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11884   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11885   ins_cost(1.9 * INSN_COST);
11886   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
11887 
11888   ins_encode %{
11889     __ bicw(as_Register($dst$$reg),
11890               as_Register($src1$$reg),
11891               as_Register($src2$$reg),
11892               Assembler::ASR,
11893               $src3$$constant & 0x1f);
11894   %}
11895 
11896   ins_pipe(ialu_reg_reg_shift);
11897 %}
11898 
11899 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11900                          iRegL src1, iRegL src2,
11901                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11902   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11903   ins_cost(1.9 * INSN_COST);
11904   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
11905 
11906   ins_encode %{
11907     __ bic(as_Register($dst$$reg),
11908               as_Register($src1$$reg),
11909               as_Register($src2$$reg),
11910               Assembler::ASR,
11911               $src3$$constant & 0x3f);
11912   %}
11913 
11914   ins_pipe(ialu_reg_reg_shift);
11915 %}
11916 
11917 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11918                          iRegIorL2I src1, iRegIorL2I src2,
11919                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11920   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11921   ins_cost(1.9 * INSN_COST);
11922   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
11923 
11924   ins_encode %{
11925     __ bicw(as_Register($dst$$reg),
11926               as_Register($src1$$reg),
11927               as_Register($src2$$reg),
11928               Assembler::LSL,
11929               $src3$$constant & 0x1f);
11930   %}
11931 
11932   ins_pipe(ialu_reg_reg_shift);
11933 %}
11934 
11935 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11936                          iRegL src1, iRegL src2,
11937                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11938   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11939   ins_cost(1.9 * INSN_COST);
11940   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
11941 
11942   ins_encode %{
11943     __ bic(as_Register($dst$$reg),
11944               as_Register($src1$$reg),
11945               as_Register($src2$$reg),
11946               Assembler::LSL,
11947               $src3$$constant & 0x3f);
11948   %}
11949 
11950   ins_pipe(ialu_reg_reg_shift);
11951 %}
11952 
11953 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11954                          iRegIorL2I src1, iRegIorL2I src2,
11955                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11956   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11957   ins_cost(1.9 * INSN_COST);
11958   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
11959 
11960   ins_encode %{
11961     __ eonw(as_Register($dst$$reg),
11962               as_Register($src1$$reg),
11963               as_Register($src2$$reg),
11964               Assembler::LSR,
11965               $src3$$constant & 0x1f);
11966   %}
11967 
11968   ins_pipe(ialu_reg_reg_shift);
11969 %}
11970 
11971 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11972                          iRegL src1, iRegL src2,
11973                          immI src3, immL_M1 src4, rFlagsReg cr) %{
11974   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11975   ins_cost(1.9 * INSN_COST);
11976   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
11977 
11978   ins_encode %{
11979     __ eon(as_Register($dst$$reg),
11980               as_Register($src1$$reg),
11981               as_Register($src2$$reg),
11982               Assembler::LSR,
11983               $src3$$constant & 0x3f);
11984   %}
11985 
11986   ins_pipe(ialu_reg_reg_shift);
11987 %}
11988 
11989 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11990                          iRegIorL2I src1, iRegIorL2I src2,
11991                          immI src3, immI_M1 src4, rFlagsReg cr) %{
11992   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11993   ins_cost(1.9 * INSN_COST);
11994   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
11995 
11996   ins_encode %{
11997     __ eonw(as_Register($dst$$reg),
11998               as_Register($src1$$reg),
11999               as_Register($src2$$reg),
12000               Assembler::ASR,
12001               $src3$$constant & 0x1f);
12002   %}
12003 
12004   ins_pipe(ialu_reg_reg_shift);
12005 %}
12006 
12007 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
12008                          iRegL src1, iRegL src2,
12009                          immI src3, immL_M1 src4, rFlagsReg cr) %{
12010   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
12011   ins_cost(1.9 * INSN_COST);
12012   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
12013 
12014   ins_encode %{
12015     __ eon(as_Register($dst$$reg),
12016               as_Register($src1$$reg),
12017               as_Register($src2$$reg),
12018               Assembler::ASR,
12019               $src3$$constant & 0x3f);
12020   %}
12021 
12022   ins_pipe(ialu_reg_reg_shift);
12023 %}
12024 
12025 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
12026                          iRegIorL2I src1, iRegIorL2I src2,
12027                          immI src3, immI_M1 src4, rFlagsReg cr) %{
12028   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
12029   ins_cost(1.9 * INSN_COST);
12030   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
12031 
12032   ins_encode %{
12033     __ eonw(as_Register($dst$$reg),
12034               as_Register($src1$$reg),
12035               as_Register($src2$$reg),
12036               Assembler::LSL,
12037               $src3$$constant & 0x1f);
12038   %}
12039 
12040   ins_pipe(ialu_reg_reg_shift);
12041 %}
12042 
12043 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
12044                          iRegL src1, iRegL src2,
12045                          immI src3, immL_M1 src4, rFlagsReg cr) %{
12046   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
12047   ins_cost(1.9 * INSN_COST);
12048   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
12049 
12050   ins_encode %{
12051     __ eon(as_Register($dst$$reg),
12052               as_Register($src1$$reg),
12053               as_Register($src2$$reg),
12054               Assembler::LSL,
12055               $src3$$constant & 0x3f);
12056   %}
12057 
12058   ins_pipe(ialu_reg_reg_shift);
12059 %}
12060 
12061 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
12062                          iRegIorL2I src1, iRegIorL2I src2,
12063                          immI src3, immI_M1 src4, rFlagsReg cr) %{
12064   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
12065   ins_cost(1.9 * INSN_COST);
12066   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
12067 
12068   ins_encode %{
12069     __ ornw(as_Register($dst$$reg),
12070               as_Register($src1$$reg),
12071               as_Register($src2$$reg),
12072               Assembler::LSR,
12073               $src3$$constant & 0x1f);
12074   %}
12075 
12076   ins_pipe(ialu_reg_reg_shift);
12077 %}
12078 
12079 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
12080                          iRegL src1, iRegL src2,
12081                          immI src3, immL_M1 src4, rFlagsReg cr) %{
12082   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
12083   ins_cost(1.9 * INSN_COST);
12084   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
12085 
12086   ins_encode %{
12087     __ orn(as_Register($dst$$reg),
12088               as_Register($src1$$reg),
12089               as_Register($src2$$reg),
12090               Assembler::LSR,
12091               $src3$$constant & 0x3f);
12092   %}
12093 
12094   ins_pipe(ialu_reg_reg_shift);
12095 %}
12096 
12097 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
12098                          iRegIorL2I src1, iRegIorL2I src2,
12099                          immI src3, immI_M1 src4, rFlagsReg cr) %{
12100   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
12101   ins_cost(1.9 * INSN_COST);
12102   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
12103 
12104   ins_encode %{
12105     __ ornw(as_Register($dst$$reg),
12106               as_Register($src1$$reg),
12107               as_Register($src2$$reg),
12108               Assembler::ASR,
12109               $src3$$constant & 0x1f);
12110   %}
12111 
12112   ins_pipe(ialu_reg_reg_shift);
12113 %}
12114 
12115 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
12116                          iRegL src1, iRegL src2,
12117                          immI src3, immL_M1 src4, rFlagsReg cr) %{
12118   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
12119   ins_cost(1.9 * INSN_COST);
12120   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
12121 
12122   ins_encode %{
12123     __ orn(as_Register($dst$$reg),
12124               as_Register($src1$$reg),
12125               as_Register($src2$$reg),
12126               Assembler::ASR,
12127               $src3$$constant & 0x3f);
12128   %}
12129 
12130   ins_pipe(ialu_reg_reg_shift);
12131 %}
12132 
12133 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
12134                          iRegIorL2I src1, iRegIorL2I src2,
12135                          immI src3, immI_M1 src4, rFlagsReg cr) %{
12136   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
12137   ins_cost(1.9 * INSN_COST);
12138   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
12139 
12140   ins_encode %{
12141     __ ornw(as_Register($dst$$reg),
12142               as_Register($src1$$reg),
12143               as_Register($src2$$reg),
12144               Assembler::LSL,
12145               $src3$$constant & 0x1f);
12146   %}
12147 
12148   ins_pipe(ialu_reg_reg_shift);
12149 %}
12150 
12151 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
12152                          iRegL src1, iRegL src2,
12153                          immI src3, immL_M1 src4, rFlagsReg cr) %{
12154   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
12155   ins_cost(1.9 * INSN_COST);
12156   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
12157 
12158   ins_encode %{
12159     __ orn(as_Register($dst$$reg),
12160               as_Register($src1$$reg),
12161               as_Register($src2$$reg),
12162               Assembler::LSL,
12163               $src3$$constant & 0x3f);
12164   %}
12165 
12166   ins_pipe(ialu_reg_reg_shift);
12167 %}
12168 
12169 instruct AndI_reg_URShift_reg(iRegINoSp dst,
12170                          iRegIorL2I src1, iRegIorL2I src2,
12171                          immI src3, rFlagsReg cr) %{
12172   match(Set dst (AndI src1 (URShiftI src2 src3)));
12173 
12174   ins_cost(1.9 * INSN_COST);
12175   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
12176 
12177   ins_encode %{
12178     __ andw(as_Register($dst$$reg),
12179               as_Register($src1$$reg),
12180               as_Register($src2$$reg),
12181               Assembler::LSR,
12182               $src3$$constant & 0x1f);
12183   %}
12184 
12185   ins_pipe(ialu_reg_reg_shift);
12186 %}
12187 
12188 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
12189                          iRegL src1, iRegL src2,
12190                          immI src3, rFlagsReg cr) %{
12191   match(Set dst (AndL src1 (URShiftL src2 src3)));
12192 
12193   ins_cost(1.9 * INSN_COST);
12194   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
12195 
12196   ins_encode %{
12197     __ andr(as_Register($dst$$reg),
12198               as_Register($src1$$reg),
12199               as_Register($src2$$reg),
12200               Assembler::LSR,
12201               $src3$$constant & 0x3f);
12202   %}
12203 
12204   ins_pipe(ialu_reg_reg_shift);
12205 %}
12206 
12207 instruct AndI_reg_RShift_reg(iRegINoSp dst,
12208                          iRegIorL2I src1, iRegIorL2I src2,
12209                          immI src3, rFlagsReg cr) %{
12210   match(Set dst (AndI src1 (RShiftI src2 src3)));
12211 
12212   ins_cost(1.9 * INSN_COST);
12213   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
12214 
12215   ins_encode %{
12216     __ andw(as_Register($dst$$reg),
12217               as_Register($src1$$reg),
12218               as_Register($src2$$reg),
12219               Assembler::ASR,
12220               $src3$$constant & 0x1f);
12221   %}
12222 
12223   ins_pipe(ialu_reg_reg_shift);
12224 %}
12225 
12226 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
12227                          iRegL src1, iRegL src2,
12228                          immI src3, rFlagsReg cr) %{
12229   match(Set dst (AndL src1 (RShiftL src2 src3)));
12230 
12231   ins_cost(1.9 * INSN_COST);
12232   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
12233 
12234   ins_encode %{
12235     __ andr(as_Register($dst$$reg),
12236               as_Register($src1$$reg),
12237               as_Register($src2$$reg),
12238               Assembler::ASR,
12239               $src3$$constant & 0x3f);
12240   %}
12241 
12242   ins_pipe(ialu_reg_reg_shift);
12243 %}
12244 
12245 instruct AndI_reg_LShift_reg(iRegINoSp dst,
12246                          iRegIorL2I src1, iRegIorL2I src2,
12247                          immI src3, rFlagsReg cr) %{
12248   match(Set dst (AndI src1 (LShiftI src2 src3)));
12249 
12250   ins_cost(1.9 * INSN_COST);
12251   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
12252 
12253   ins_encode %{
12254     __ andw(as_Register($dst$$reg),
12255               as_Register($src1$$reg),
12256               as_Register($src2$$reg),
12257               Assembler::LSL,
12258               $src3$$constant & 0x1f);
12259   %}
12260 
12261   ins_pipe(ialu_reg_reg_shift);
12262 %}
12263 
12264 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
12265                          iRegL src1, iRegL src2,
12266                          immI src3, rFlagsReg cr) %{
12267   match(Set dst (AndL src1 (LShiftL src2 src3)));
12268 
12269   ins_cost(1.9 * INSN_COST);
12270   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
12271 
12272   ins_encode %{
12273     __ andr(as_Register($dst$$reg),
12274               as_Register($src1$$reg),
12275               as_Register($src2$$reg),
12276               Assembler::LSL,
12277               $src3$$constant & 0x3f);
12278   %}
12279 
12280   ins_pipe(ialu_reg_reg_shift);
12281 %}
12282 
12283 instruct XorI_reg_URShift_reg(iRegINoSp dst,
12284                          iRegIorL2I src1, iRegIorL2I src2,
12285                          immI src3, rFlagsReg cr) %{
12286   match(Set dst (XorI src1 (URShiftI src2 src3)));
12287 
12288   ins_cost(1.9 * INSN_COST);
12289   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
12290 
12291   ins_encode %{
12292     __ eorw(as_Register($dst$$reg),
12293               as_Register($src1$$reg),
12294               as_Register($src2$$reg),
12295               Assembler::LSR,
12296               $src3$$constant & 0x1f);
12297   %}
12298 
12299   ins_pipe(ialu_reg_reg_shift);
12300 %}
12301 
12302 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
12303                          iRegL src1, iRegL src2,
12304                          immI src3, rFlagsReg cr) %{
12305   match(Set dst (XorL src1 (URShiftL src2 src3)));
12306 
12307   ins_cost(1.9 * INSN_COST);
12308   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
12309 
12310   ins_encode %{
12311     __ eor(as_Register($dst$$reg),
12312               as_Register($src1$$reg),
12313               as_Register($src2$$reg),
12314               Assembler::LSR,
12315               $src3$$constant & 0x3f);
12316   %}
12317 
12318   ins_pipe(ialu_reg_reg_shift);
12319 %}
12320 
12321 instruct XorI_reg_RShift_reg(iRegINoSp dst,
12322                          iRegIorL2I src1, iRegIorL2I src2,
12323                          immI src3, rFlagsReg cr) %{
12324   match(Set dst (XorI src1 (RShiftI src2 src3)));
12325 
12326   ins_cost(1.9 * INSN_COST);
12327   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
12328 
12329   ins_encode %{
12330     __ eorw(as_Register($dst$$reg),
12331               as_Register($src1$$reg),
12332               as_Register($src2$$reg),
12333               Assembler::ASR,
12334               $src3$$constant & 0x1f);
12335   %}
12336 
12337   ins_pipe(ialu_reg_reg_shift);
12338 %}
12339 
12340 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
12341                          iRegL src1, iRegL src2,
12342                          immI src3, rFlagsReg cr) %{
12343   match(Set dst (XorL src1 (RShiftL src2 src3)));
12344 
12345   ins_cost(1.9 * INSN_COST);
12346   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
12347 
12348   ins_encode %{
12349     __ eor(as_Register($dst$$reg),
12350               as_Register($src1$$reg),
12351               as_Register($src2$$reg),
12352               Assembler::ASR,
12353               $src3$$constant & 0x3f);
12354   %}
12355 
12356   ins_pipe(ialu_reg_reg_shift);
12357 %}
12358 
12359 instruct XorI_reg_LShift_reg(iRegINoSp dst,
12360                          iRegIorL2I src1, iRegIorL2I src2,
12361                          immI src3, rFlagsReg cr) %{
12362   match(Set dst (XorI src1 (LShiftI src2 src3)));
12363 
12364   ins_cost(1.9 * INSN_COST);
12365   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
12366 
12367   ins_encode %{
12368     __ eorw(as_Register($dst$$reg),
12369               as_Register($src1$$reg),
12370               as_Register($src2$$reg),
12371               Assembler::LSL,
12372               $src3$$constant & 0x1f);
12373   %}
12374 
12375   ins_pipe(ialu_reg_reg_shift);
12376 %}
12377 
12378 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
12379                          iRegL src1, iRegL src2,
12380                          immI src3, rFlagsReg cr) %{
12381   match(Set dst (XorL src1 (LShiftL src2 src3)));
12382 
12383   ins_cost(1.9 * INSN_COST);
12384   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
12385 
12386   ins_encode %{
12387     __ eor(as_Register($dst$$reg),
12388               as_Register($src1$$reg),
12389               as_Register($src2$$reg),
12390               Assembler::LSL,
12391               $src3$$constant & 0x3f);
12392   %}
12393 
12394   ins_pipe(ialu_reg_reg_shift);
12395 %}
12396 
12397 instruct OrI_reg_URShift_reg(iRegINoSp dst,
12398                          iRegIorL2I src1, iRegIorL2I src2,
12399                          immI src3, rFlagsReg cr) %{
12400   match(Set dst (OrI src1 (URShiftI src2 src3)));
12401 
12402   ins_cost(1.9 * INSN_COST);
12403   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
12404 
12405   ins_encode %{
12406     __ orrw(as_Register($dst$$reg),
12407               as_Register($src1$$reg),
12408               as_Register($src2$$reg),
12409               Assembler::LSR,
12410               $src3$$constant & 0x1f);
12411   %}
12412 
12413   ins_pipe(ialu_reg_reg_shift);
12414 %}
12415 
12416 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
12417                          iRegL src1, iRegL src2,
12418                          immI src3, rFlagsReg cr) %{
12419   match(Set dst (OrL src1 (URShiftL src2 src3)));
12420 
12421   ins_cost(1.9 * INSN_COST);
12422   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
12423 
12424   ins_encode %{
12425     __ orr(as_Register($dst$$reg),
12426               as_Register($src1$$reg),
12427               as_Register($src2$$reg),
12428               Assembler::LSR,
12429               $src3$$constant & 0x3f);
12430   %}
12431 
12432   ins_pipe(ialu_reg_reg_shift);
12433 %}
12434 
12435 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12436                          iRegIorL2I src1, iRegIorL2I src2,
12437                          immI src3, rFlagsReg cr) %{
12438   match(Set dst (OrI src1 (RShiftI src2 src3)));
12439 
12440   ins_cost(1.9 * INSN_COST);
12441   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
12442 
12443   ins_encode %{
12444     __ orrw(as_Register($dst$$reg),
12445               as_Register($src1$$reg),
12446               as_Register($src2$$reg),
12447               Assembler::ASR,
12448               $src3$$constant & 0x1f);
12449   %}
12450 
12451   ins_pipe(ialu_reg_reg_shift);
12452 %}
12453 
12454 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12455                          iRegL src1, iRegL src2,
12456                          immI src3, rFlagsReg cr) %{
12457   match(Set dst (OrL src1 (RShiftL src2 src3)));
12458 
12459   ins_cost(1.9 * INSN_COST);
12460   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
12461 
12462   ins_encode %{
12463     __ orr(as_Register($dst$$reg),
12464               as_Register($src1$$reg),
12465               as_Register($src2$$reg),
12466               Assembler::ASR,
12467               $src3$$constant & 0x3f);
12468   %}
12469 
12470   ins_pipe(ialu_reg_reg_shift);
12471 %}
12472 
12473 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12474                          iRegIorL2I src1, iRegIorL2I src2,
12475                          immI src3, rFlagsReg cr) %{
12476   match(Set dst (OrI src1 (LShiftI src2 src3)));
12477 
12478   ins_cost(1.9 * INSN_COST);
12479   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
12480 
12481   ins_encode %{
12482     __ orrw(as_Register($dst$$reg),
12483               as_Register($src1$$reg),
12484               as_Register($src2$$reg),
12485               Assembler::LSL,
12486               $src3$$constant & 0x1f);
12487   %}
12488 
12489   ins_pipe(ialu_reg_reg_shift);
12490 %}
12491 
12492 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12493                          iRegL src1, iRegL src2,
12494                          immI src3, rFlagsReg cr) %{
12495   match(Set dst (OrL src1 (LShiftL src2 src3)));
12496 
12497   ins_cost(1.9 * INSN_COST);
12498   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
12499 
12500   ins_encode %{
12501     __ orr(as_Register($dst$$reg),
12502               as_Register($src1$$reg),
12503               as_Register($src2$$reg),
12504               Assembler::LSL,
12505               $src3$$constant & 0x3f);
12506   %}
12507 
12508   ins_pipe(ialu_reg_reg_shift);
12509 %}
12510 
12511 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12512                          iRegIorL2I src1, iRegIorL2I src2,
12513                          immI src3, rFlagsReg cr) %{
12514   match(Set dst (AddI src1 (URShiftI src2 src3)));
12515 
12516   ins_cost(1.9 * INSN_COST);
12517   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
12518 
12519   ins_encode %{
12520     __ addw(as_Register($dst$$reg),
12521               as_Register($src1$$reg),
12522               as_Register($src2$$reg),
12523               Assembler::LSR,
12524               $src3$$constant & 0x1f);
12525   %}
12526 
12527   ins_pipe(ialu_reg_reg_shift);
12528 %}
12529 
12530 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12531                          iRegL src1, iRegL src2,
12532                          immI src3, rFlagsReg cr) %{
12533   match(Set dst (AddL src1 (URShiftL src2 src3)));
12534 
12535   ins_cost(1.9 * INSN_COST);
12536   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
12537 
12538   ins_encode %{
12539     __ add(as_Register($dst$$reg),
12540               as_Register($src1$$reg),
12541               as_Register($src2$$reg),
12542               Assembler::LSR,
12543               $src3$$constant & 0x3f);
12544   %}
12545 
12546   ins_pipe(ialu_reg_reg_shift);
12547 %}
12548 
12549 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12550                          iRegIorL2I src1, iRegIorL2I src2,
12551                          immI src3, rFlagsReg cr) %{
12552   match(Set dst (AddI src1 (RShiftI src2 src3)));
12553 
12554   ins_cost(1.9 * INSN_COST);
12555   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
12556 
12557   ins_encode %{
12558     __ addw(as_Register($dst$$reg),
12559               as_Register($src1$$reg),
12560               as_Register($src2$$reg),
12561               Assembler::ASR,
12562               $src3$$constant & 0x1f);
12563   %}
12564 
12565   ins_pipe(ialu_reg_reg_shift);
12566 %}
12567 
12568 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12569                          iRegL src1, iRegL src2,
12570                          immI src3, rFlagsReg cr) %{
12571   match(Set dst (AddL src1 (RShiftL src2 src3)));
12572 
12573   ins_cost(1.9 * INSN_COST);
12574   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
12575 
12576   ins_encode %{
12577     __ add(as_Register($dst$$reg),
12578               as_Register($src1$$reg),
12579               as_Register($src2$$reg),
12580               Assembler::ASR,
12581               $src3$$constant & 0x3f);
12582   %}
12583 
12584   ins_pipe(ialu_reg_reg_shift);
12585 %}
12586 
12587 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12588                          iRegIorL2I src1, iRegIorL2I src2,
12589                          immI src3, rFlagsReg cr) %{
12590   match(Set dst (AddI src1 (LShiftI src2 src3)));
12591 
12592   ins_cost(1.9 * INSN_COST);
12593   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
12594 
12595   ins_encode %{
12596     __ addw(as_Register($dst$$reg),
12597               as_Register($src1$$reg),
12598               as_Register($src2$$reg),
12599               Assembler::LSL,
12600               $src3$$constant & 0x1f);
12601   %}
12602 
12603   ins_pipe(ialu_reg_reg_shift);
12604 %}
12605 
12606 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12607                          iRegL src1, iRegL src2,
12608                          immI src3, rFlagsReg cr) %{
12609   match(Set dst (AddL src1 (LShiftL src2 src3)));
12610 
12611   ins_cost(1.9 * INSN_COST);
12612   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
12613 
12614   ins_encode %{
12615     __ add(as_Register($dst$$reg),
12616               as_Register($src1$$reg),
12617               as_Register($src2$$reg),
12618               Assembler::LSL,
12619               $src3$$constant & 0x3f);
12620   %}
12621 
12622   ins_pipe(ialu_reg_reg_shift);
12623 %}
12624 
12625 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12626                          iRegIorL2I src1, iRegIorL2I src2,
12627                          immI src3, rFlagsReg cr) %{
12628   match(Set dst (SubI src1 (URShiftI src2 src3)));
12629 
12630   ins_cost(1.9 * INSN_COST);
12631   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
12632 
12633   ins_encode %{
12634     __ subw(as_Register($dst$$reg),
12635               as_Register($src1$$reg),
12636               as_Register($src2$$reg),
12637               Assembler::LSR,
12638               $src3$$constant & 0x1f);
12639   %}
12640 
12641   ins_pipe(ialu_reg_reg_shift);
12642 %}
12643 
12644 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12645                          iRegL src1, iRegL src2,
12646                          immI src3, rFlagsReg cr) %{
12647   match(Set dst (SubL src1 (URShiftL src2 src3)));
12648 
12649   ins_cost(1.9 * INSN_COST);
12650   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
12651 
12652   ins_encode %{
12653     __ sub(as_Register($dst$$reg),
12654               as_Register($src1$$reg),
12655               as_Register($src2$$reg),
12656               Assembler::LSR,
12657               $src3$$constant & 0x3f);
12658   %}
12659 
12660   ins_pipe(ialu_reg_reg_shift);
12661 %}
12662 
12663 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12664                          iRegIorL2I src1, iRegIorL2I src2,
12665                          immI src3, rFlagsReg cr) %{
12666   match(Set dst (SubI src1 (RShiftI src2 src3)));
12667 
12668   ins_cost(1.9 * INSN_COST);
12669   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
12670 
12671   ins_encode %{
12672     __ subw(as_Register($dst$$reg),
12673               as_Register($src1$$reg),
12674               as_Register($src2$$reg),
12675               Assembler::ASR,
12676               $src3$$constant & 0x1f);
12677   %}
12678 
12679   ins_pipe(ialu_reg_reg_shift);
12680 %}
12681 
12682 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12683                          iRegL src1, iRegL src2,
12684                          immI src3, rFlagsReg cr) %{
12685   match(Set dst (SubL src1 (RShiftL src2 src3)));
12686 
12687   ins_cost(1.9 * INSN_COST);
12688   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
12689 
12690   ins_encode %{
12691     __ sub(as_Register($dst$$reg),
12692               as_Register($src1$$reg),
12693               as_Register($src2$$reg),
12694               Assembler::ASR,
12695               $src3$$constant & 0x3f);
12696   %}
12697 
12698   ins_pipe(ialu_reg_reg_shift);
12699 %}
12700 
12701 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12702                          iRegIorL2I src1, iRegIorL2I src2,
12703                          immI src3, rFlagsReg cr) %{
12704   match(Set dst (SubI src1 (LShiftI src2 src3)));
12705 
12706   ins_cost(1.9 * INSN_COST);
12707   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
12708 
12709   ins_encode %{
12710     __ subw(as_Register($dst$$reg),
12711               as_Register($src1$$reg),
12712               as_Register($src2$$reg),
12713               Assembler::LSL,
12714               $src3$$constant & 0x1f);
12715   %}
12716 
12717   ins_pipe(ialu_reg_reg_shift);
12718 %}
12719 
12720 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12721                          iRegL src1, iRegL src2,
12722                          immI src3, rFlagsReg cr) %{
12723   match(Set dst (SubL src1 (LShiftL src2 src3)));
12724 
12725   ins_cost(1.9 * INSN_COST);
12726   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
12727 
12728   ins_encode %{
12729     __ sub(as_Register($dst$$reg),
12730               as_Register($src1$$reg),
12731               as_Register($src2$$reg),
12732               Assembler::LSL,
12733               $src3$$constant & 0x3f);
12734   %}
12735 
12736   ins_pipe(ialu_reg_reg_shift);
12737 %}
12738 
12739 
12740 
12741 // Shift Left followed by Shift Right.
12742 // This idiom is used by the compiler for the i2b bytecode etc.
12743 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12744 %{
12745   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12746   // Make sure we are not going to exceed what sbfm can do.
12747   predicate((unsigned int)n->in(2)->get_int() <= 63
12748             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12749 
12750   ins_cost(INSN_COST * 2);
12751   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12752   ins_encode %{
12753     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12754     int s = 63 - lshift;
12755     int r = (rshift - lshift) & 63;
12756     __ sbfm(as_Register($dst$$reg),
12757             as_Register($src$$reg),
12758             r, s);
12759   %}
12760 
12761   ins_pipe(ialu_reg_shift);
12762 %}
12763 
12764 // Shift Left followed by Shift Right.
12765 // This idiom is used by the compiler for the i2b bytecode etc.
12766 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12767 %{
12768   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12769   // Make sure we are not going to exceed what sbfmw can do.
12770   predicate((unsigned int)n->in(2)->get_int() <= 31
12771             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12772 
12773   ins_cost(INSN_COST * 2);
12774   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12775   ins_encode %{
12776     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12777     int s = 31 - lshift;
12778     int r = (rshift - lshift) & 31;
12779     __ sbfmw(as_Register($dst$$reg),
12780             as_Register($src$$reg),
12781             r, s);
12782   %}
12783 
12784   ins_pipe(ialu_reg_shift);
12785 %}
12786 
12787 // Shift Left followed by Shift Right.
12788 // This idiom is used by the compiler for the i2b bytecode etc.
12789 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12790 %{
12791   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12792   // Make sure we are not going to exceed what ubfm can do.
12793   predicate((unsigned int)n->in(2)->get_int() <= 63
12794             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12795 
12796   ins_cost(INSN_COST * 2);
12797   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12798   ins_encode %{
12799     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12800     int s = 63 - lshift;
12801     int r = (rshift - lshift) & 63;
12802     __ ubfm(as_Register($dst$$reg),
12803             as_Register($src$$reg),
12804             r, s);
12805   %}
12806 
12807   ins_pipe(ialu_reg_shift);
12808 %}
12809 
12810 // Shift Left followed by Shift Right.
12811 // This idiom is used by the compiler for the i2b bytecode etc.
12812 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12813 %{
12814   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12815   // Make sure we are not going to exceed what ubfmw can do.
12816   predicate((unsigned int)n->in(2)->get_int() <= 31
12817             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12818 
12819   ins_cost(INSN_COST * 2);
12820   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12821   ins_encode %{
12822     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12823     int s = 31 - lshift;
12824     int r = (rshift - lshift) & 31;
12825     __ ubfmw(as_Register($dst$$reg),
12826             as_Register($src$$reg),
12827             r, s);
12828   %}
12829 
12830   ins_pipe(ialu_reg_shift);
12831 %}
12832 // Bitfield extract with shift & mask
12833 
12834 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12835 %{
12836   match(Set dst (AndI (URShiftI src rshift) mask));
12837 
12838   ins_cost(INSN_COST);
12839   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
12840   ins_encode %{
12841     int rshift = $rshift$$constant;
12842     long mask = $mask$$constant;
12843     int width = exact_log2(mask+1);
12844     __ ubfxw(as_Register($dst$$reg),
12845             as_Register($src$$reg), rshift, width);
12846   %}
12847   ins_pipe(ialu_reg_shift);
12848 %}
12849 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12850 %{
12851   match(Set dst (AndL (URShiftL src rshift) mask));
12852 
12853   ins_cost(INSN_COST);
12854   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12855   ins_encode %{
12856     int rshift = $rshift$$constant;
12857     long mask = $mask$$constant;
12858     int width = exact_log2(mask+1);
12859     __ ubfx(as_Register($dst$$reg),
12860             as_Register($src$$reg), rshift, width);
12861   %}
12862   ins_pipe(ialu_reg_shift);
12863 %}
12864 
12865 // We can use ubfx when extending an And with a mask when we know mask
12866 // is positive.  We know that because immI_bitmask guarantees it.
12867 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12868 %{
12869   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12870 
12871   ins_cost(INSN_COST * 2);
12872   format %{ "ubfx $dst, $src, $rshift, $mask" %}
12873   ins_encode %{
12874     int rshift = $rshift$$constant;
12875     long mask = $mask$$constant;
12876     int width = exact_log2(mask+1);
12877     __ ubfx(as_Register($dst$$reg),
12878             as_Register($src$$reg), rshift, width);
12879   %}
12880   ins_pipe(ialu_reg_shift);
12881 %}
12882 
12883 // We can use ubfiz when masking by a positive number and then left shifting the result.
12884 // We know that the mask is positive because immI_bitmask guarantees it.
12885 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12886 %{
12887   match(Set dst (LShiftI (AndI src mask) lshift));
12888   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12889     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
12890 
12891   ins_cost(INSN_COST);
12892   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
12893   ins_encode %{
12894     int lshift = $lshift$$constant;
12895     long mask = $mask$$constant;
12896     int width = exact_log2(mask+1);
12897     __ ubfizw(as_Register($dst$$reg),
12898           as_Register($src$$reg), lshift, width);
12899   %}
12900   ins_pipe(ialu_reg_shift);
12901 %}
12902 // We can use ubfiz when masking by a positive number and then left shifting the result.
12903 // We know that the mask is positive because immL_bitmask guarantees it.
12904 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
12905 %{
12906   match(Set dst (LShiftL (AndL src mask) lshift));
12907   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
12908     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
12909 
12910   ins_cost(INSN_COST);
12911   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12912   ins_encode %{
12913     int lshift = $lshift$$constant;
12914     long mask = $mask$$constant;
12915     int width = exact_log2(mask+1);
12916     __ ubfiz(as_Register($dst$$reg),
12917           as_Register($src$$reg), lshift, width);
12918   %}
12919   ins_pipe(ialu_reg_shift);
12920 %}
12921 
12922 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
12923 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
12924 %{
12925   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
12926   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
12927     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
12928 
12929   ins_cost(INSN_COST);
12930   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
12931   ins_encode %{
12932     int lshift = $lshift$$constant;
12933     long mask = $mask$$constant;
12934     int width = exact_log2(mask+1);
12935     __ ubfiz(as_Register($dst$$reg),
12936              as_Register($src$$reg), lshift, width);
12937   %}
12938   ins_pipe(ialu_reg_shift);
12939 %}
12940 
12941 // Rotations
12942 
12943 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12944 %{
12945   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12946   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12947 
12948   ins_cost(INSN_COST);
12949   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12950 
12951   ins_encode %{
12952     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12953             $rshift$$constant & 63);
12954   %}
12955   ins_pipe(ialu_reg_reg_extr);
12956 %}
12957 
12958 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12959 %{
12960   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12961   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12962 
12963   ins_cost(INSN_COST);
12964   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12965 
12966   ins_encode %{
12967     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12968             $rshift$$constant & 31);
12969   %}
12970   ins_pipe(ialu_reg_reg_extr);
12971 %}
12972 
12973 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12974 %{
12975   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12976   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12977 
12978   ins_cost(INSN_COST);
12979   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12980 
12981   ins_encode %{
12982     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12983             $rshift$$constant & 63);
12984   %}
12985   ins_pipe(ialu_reg_reg_extr);
12986 %}
12987 
12988 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12989 %{
12990   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12991   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12992 
12993   ins_cost(INSN_COST);
12994   format %{ "extr $dst, $src1, $src2, #$rshift" %}
12995 
12996   ins_encode %{
12997     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12998             $rshift$$constant & 31);
12999   %}
13000   ins_pipe(ialu_reg_reg_extr);
13001 %}
13002 
13003 
13004 // rol expander
13005 
13006 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
13007 %{
13008   effect(DEF dst, USE src, USE shift);
13009 
13010   format %{ "rol    $dst, $src, $shift" %}
13011   ins_cost(INSN_COST * 3);
13012   ins_encode %{
13013     __ subw(rscratch1, zr, as_Register($shift$$reg));
13014     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
13015             rscratch1);
13016     %}
13017   ins_pipe(ialu_reg_reg_vshift);
13018 %}
13019 
13020 // rol expander
13021 
13022 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
13023 %{
13024   effect(DEF dst, USE src, USE shift);
13025 
13026   format %{ "rol    $dst, $src, $shift" %}
13027   ins_cost(INSN_COST * 3);
13028   ins_encode %{
13029     __ subw(rscratch1, zr, as_Register($shift$$reg));
13030     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
13031             rscratch1);
13032     %}
13033   ins_pipe(ialu_reg_reg_vshift);
13034 %}
13035 
13036 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
13037 %{
13038   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
13039 
13040   expand %{
13041     rolL_rReg(dst, src, shift, cr);
13042   %}
13043 %}
13044 
13045 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
13046 %{
13047   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
13048 
13049   expand %{
13050     rolL_rReg(dst, src, shift, cr);
13051   %}
13052 %}
13053 
13054 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
13055 %{
13056   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
13057 
13058   expand %{
13059     rolI_rReg(dst, src, shift, cr);
13060   %}
13061 %}
13062 
13063 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
13064 %{
13065   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
13066 
13067   expand %{
13068     rolI_rReg(dst, src, shift, cr);
13069   %}
13070 %}
13071 
13072 // ror expander
13073 
13074 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
13075 %{
13076   effect(DEF dst, USE src, USE shift);
13077 
13078   format %{ "ror    $dst, $src, $shift" %}
13079   ins_cost(INSN_COST);
13080   ins_encode %{
13081     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
13082             as_Register($shift$$reg));
13083     %}
13084   ins_pipe(ialu_reg_reg_vshift);
13085 %}
13086 
13087 // ror expander
13088 
13089 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
13090 %{
13091   effect(DEF dst, USE src, USE shift);
13092 
13093   format %{ "ror    $dst, $src, $shift" %}
13094   ins_cost(INSN_COST);
13095   ins_encode %{
13096     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
13097             as_Register($shift$$reg));
13098     %}
13099   ins_pipe(ialu_reg_reg_vshift);
13100 %}
13101 
13102 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
13103 %{
13104   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
13105 
13106   expand %{
13107     rorL_rReg(dst, src, shift, cr);
13108   %}
13109 %}
13110 
13111 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
13112 %{
13113   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
13114 
13115   expand %{
13116     rorL_rReg(dst, src, shift, cr);
13117   %}
13118 %}
13119 
13120 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
13121 %{
13122   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
13123 
13124   expand %{
13125     rorI_rReg(dst, src, shift, cr);
13126   %}
13127 %}
13128 
13129 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
13130 %{
13131   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
13132 
13133   expand %{
13134     rorI_rReg(dst, src, shift, cr);
13135   %}
13136 %}
13137 
13138 // Add/subtract (extended)
13139 
13140 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
13141 %{
13142   match(Set dst (AddL src1 (ConvI2L src2)));
13143   ins_cost(INSN_COST);
13144   format %{ "add  $dst, $src1, $src2, sxtw" %}
13145 
13146    ins_encode %{
13147      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13148             as_Register($src2$$reg), ext::sxtw);
13149    %}
13150   ins_pipe(ialu_reg_reg);
13151 %};
13152 
13153 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
13154 %{
13155   match(Set dst (SubL src1 (ConvI2L src2)));
13156   ins_cost(INSN_COST);
13157   format %{ "sub  $dst, $src1, $src2, sxtw" %}
13158 
13159    ins_encode %{
13160      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13161             as_Register($src2$$reg), ext::sxtw);
13162    %}
13163   ins_pipe(ialu_reg_reg);
13164 %};
13165 
13166 
13167 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
13168 %{
13169   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
13170   ins_cost(INSN_COST);
13171   format %{ "add  $dst, $src1, $src2, sxth" %}
13172 
13173    ins_encode %{
13174      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13175             as_Register($src2$$reg), ext::sxth);
13176    %}
13177   ins_pipe(ialu_reg_reg);
13178 %}
13179 
13180 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
13181 %{
13182   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
13183   ins_cost(INSN_COST);
13184   format %{ "add  $dst, $src1, $src2, sxtb" %}
13185 
13186    ins_encode %{
13187      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13188             as_Register($src2$$reg), ext::sxtb);
13189    %}
13190   ins_pipe(ialu_reg_reg);
13191 %}
13192 
13193 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
13194 %{
13195   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
13196   ins_cost(INSN_COST);
13197   format %{ "add  $dst, $src1, $src2, uxtb" %}
13198 
13199    ins_encode %{
13200      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13201             as_Register($src2$$reg), ext::uxtb);
13202    %}
13203   ins_pipe(ialu_reg_reg);
13204 %}
13205 
13206 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
13207 %{
13208   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13209   ins_cost(INSN_COST);
13210   format %{ "add  $dst, $src1, $src2, sxth" %}
13211 
13212    ins_encode %{
13213      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13214             as_Register($src2$$reg), ext::sxth);
13215    %}
13216   ins_pipe(ialu_reg_reg);
13217 %}
13218 
13219 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
13220 %{
13221   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13222   ins_cost(INSN_COST);
13223   format %{ "add  $dst, $src1, $src2, sxtw" %}
13224 
13225    ins_encode %{
13226      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13227             as_Register($src2$$reg), ext::sxtw);
13228    %}
13229   ins_pipe(ialu_reg_reg);
13230 %}
13231 
13232 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13233 %{
13234   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
13235   ins_cost(INSN_COST);
13236   format %{ "add  $dst, $src1, $src2, sxtb" %}
13237 
13238    ins_encode %{
13239      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13240             as_Register($src2$$reg), ext::sxtb);
13241    %}
13242   ins_pipe(ialu_reg_reg);
13243 %}
13244 
13245 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
13246 %{
13247   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
13248   ins_cost(INSN_COST);
13249   format %{ "add  $dst, $src1, $src2, uxtb" %}
13250 
13251    ins_encode %{
13252      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13253             as_Register($src2$$reg), ext::uxtb);
13254    %}
13255   ins_pipe(ialu_reg_reg);
13256 %}
13257 
13258 
13259 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13260 %{
13261   match(Set dst (AddI src1 (AndI src2 mask)));
13262   ins_cost(INSN_COST);
13263   format %{ "addw  $dst, $src1, $src2, uxtb" %}
13264 
13265    ins_encode %{
13266      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13267             as_Register($src2$$reg), ext::uxtb);
13268    %}
13269   ins_pipe(ialu_reg_reg);
13270 %}
13271 
13272 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13273 %{
13274   match(Set dst (AddI src1 (AndI src2 mask)));
13275   ins_cost(INSN_COST);
13276   format %{ "addw  $dst, $src1, $src2, uxth" %}
13277 
13278    ins_encode %{
13279      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13280             as_Register($src2$$reg), ext::uxth);
13281    %}
13282   ins_pipe(ialu_reg_reg);
13283 %}
13284 
13285 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13286 %{
13287   match(Set dst (AddL src1 (AndL src2 mask)));
13288   ins_cost(INSN_COST);
13289   format %{ "add  $dst, $src1, $src2, uxtb" %}
13290 
13291    ins_encode %{
13292      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13293             as_Register($src2$$reg), ext::uxtb);
13294    %}
13295   ins_pipe(ialu_reg_reg);
13296 %}
13297 
13298 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13299 %{
13300   match(Set dst (AddL src1 (AndL src2 mask)));
13301   ins_cost(INSN_COST);
13302   format %{ "add  $dst, $src1, $src2, uxth" %}
13303 
13304    ins_encode %{
13305      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13306             as_Register($src2$$reg), ext::uxth);
13307    %}
13308   ins_pipe(ialu_reg_reg);
13309 %}
13310 
13311 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13312 %{
13313   match(Set dst (AddL src1 (AndL src2 mask)));
13314   ins_cost(INSN_COST);
13315   format %{ "add  $dst, $src1, $src2, uxtw" %}
13316 
13317    ins_encode %{
13318      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13319             as_Register($src2$$reg), ext::uxtw);
13320    %}
13321   ins_pipe(ialu_reg_reg);
13322 %}
13323 
13324 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
13325 %{
13326   match(Set dst (SubI src1 (AndI src2 mask)));
13327   ins_cost(INSN_COST);
13328   format %{ "subw  $dst, $src1, $src2, uxtb" %}
13329 
13330    ins_encode %{
13331      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13332             as_Register($src2$$reg), ext::uxtb);
13333    %}
13334   ins_pipe(ialu_reg_reg);
13335 %}
13336 
13337 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
13338 %{
13339   match(Set dst (SubI src1 (AndI src2 mask)));
13340   ins_cost(INSN_COST);
13341   format %{ "subw  $dst, $src1, $src2, uxth" %}
13342 
13343    ins_encode %{
13344      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13345             as_Register($src2$$reg), ext::uxth);
13346    %}
13347   ins_pipe(ialu_reg_reg);
13348 %}
13349 
13350 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
13351 %{
13352   match(Set dst (SubL src1 (AndL src2 mask)));
13353   ins_cost(INSN_COST);
13354   format %{ "sub  $dst, $src1, $src2, uxtb" %}
13355 
13356    ins_encode %{
13357      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13358             as_Register($src2$$reg), ext::uxtb);
13359    %}
13360   ins_pipe(ialu_reg_reg);
13361 %}
13362 
13363 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
13364 %{
13365   match(Set dst (SubL src1 (AndL src2 mask)));
13366   ins_cost(INSN_COST);
13367   format %{ "sub  $dst, $src1, $src2, uxth" %}
13368 
13369    ins_encode %{
13370      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13371             as_Register($src2$$reg), ext::uxth);
13372    %}
13373   ins_pipe(ialu_reg_reg);
13374 %}
13375 
13376 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
13377 %{
13378   match(Set dst (SubL src1 (AndL src2 mask)));
13379   ins_cost(INSN_COST);
13380   format %{ "sub  $dst, $src1, $src2, uxtw" %}
13381 
13382    ins_encode %{
13383      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13384             as_Register($src2$$reg), ext::uxtw);
13385    %}
13386   ins_pipe(ialu_reg_reg);
13387 %}
13388 
13389 
13390 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13391 %{
13392   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13393   ins_cost(1.9 * INSN_COST);
13394   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
13395 
13396    ins_encode %{
13397      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13398             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13399    %}
13400   ins_pipe(ialu_reg_reg_shift);
13401 %}
13402 
13403 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13404 %{
13405   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13406   ins_cost(1.9 * INSN_COST);
13407   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
13408 
13409    ins_encode %{
13410      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13411             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13412    %}
13413   ins_pipe(ialu_reg_reg_shift);
13414 %}
13415 
13416 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13417 %{
13418   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13419   ins_cost(1.9 * INSN_COST);
13420   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
13421 
13422    ins_encode %{
13423      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13424             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13425    %}
13426   ins_pipe(ialu_reg_reg_shift);
13427 %}
13428 
13429 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
13430 %{
13431   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13432   ins_cost(1.9 * INSN_COST);
13433   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
13434 
13435    ins_encode %{
13436      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13437             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13438    %}
13439   ins_pipe(ialu_reg_reg_shift);
13440 %}
13441 
13442 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
13443 %{
13444   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13445   ins_cost(1.9 * INSN_COST);
13446   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
13447 
13448    ins_encode %{
13449      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13450             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13451    %}
13452   ins_pipe(ialu_reg_reg_shift);
13453 %}
13454 
13455 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
13456 %{
13457   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
13458   ins_cost(1.9 * INSN_COST);
13459   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
13460 
13461    ins_encode %{
13462      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13463             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
13464    %}
13465   ins_pipe(ialu_reg_reg_shift);
13466 %}
13467 
13468 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13469 %{
13470   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13471   ins_cost(1.9 * INSN_COST);
13472   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
13473 
13474    ins_encode %{
13475      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13476             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13477    %}
13478   ins_pipe(ialu_reg_reg_shift);
13479 %}
13480 
13481 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13482 %{
13483   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13484   ins_cost(1.9 * INSN_COST);
13485   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
13486 
13487    ins_encode %{
13488      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13489             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13490    %}
13491   ins_pipe(ialu_reg_reg_shift);
13492 %}
13493 
13494 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
13495 %{
13496   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13497   ins_cost(1.9 * INSN_COST);
13498   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
13499 
13500    ins_encode %{
13501      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13502             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
13503    %}
13504   ins_pipe(ialu_reg_reg_shift);
13505 %}
13506 
13507 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
13508 %{
13509   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
13510   ins_cost(1.9 * INSN_COST);
13511   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
13512 
13513    ins_encode %{
13514      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13515             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
13516    %}
13517   ins_pipe(ialu_reg_reg_shift);
13518 %}
13519 
13520 
13521 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13522 %{
13523   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
13524   ins_cost(1.9 * INSN_COST);
13525   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
13526 
13527    ins_encode %{
13528      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13529             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13530    %}
13531   ins_pipe(ialu_reg_reg_shift);
13532 %};
13533 
13534 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
13535 %{
13536   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
13537   ins_cost(1.9 * INSN_COST);
13538   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
13539 
13540    ins_encode %{
13541      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13542             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
13543    %}
13544   ins_pipe(ialu_reg_reg_shift);
13545 %};
13546 
13547 
13548 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13549 %{
13550   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13551   ins_cost(1.9 * INSN_COST);
13552   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
13553 
13554    ins_encode %{
13555      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13556             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13557    %}
13558   ins_pipe(ialu_reg_reg_shift);
13559 %}
13560 
13561 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13562 %{
13563   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13564   ins_cost(1.9 * INSN_COST);
13565   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
13566 
13567    ins_encode %{
13568      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13569             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13570    %}
13571   ins_pipe(ialu_reg_reg_shift);
13572 %}
13573 
13574 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13575 %{
13576   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
13577   ins_cost(1.9 * INSN_COST);
13578   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
13579 
13580    ins_encode %{
13581      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
13582             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13583    %}
13584   ins_pipe(ialu_reg_reg_shift);
13585 %}
13586 
13587 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
13588 %{
13589   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13590   ins_cost(1.9 * INSN_COST);
13591   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
13592 
13593    ins_encode %{
13594      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13595             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13596    %}
13597   ins_pipe(ialu_reg_reg_shift);
13598 %}
13599 
13600 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
13601 %{
13602   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13603   ins_cost(1.9 * INSN_COST);
13604   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
13605 
13606    ins_encode %{
13607      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13608             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13609    %}
13610   ins_pipe(ialu_reg_reg_shift);
13611 %}
13612 
13613 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
13614 %{
13615   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
13616   ins_cost(1.9 * INSN_COST);
13617   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
13618 
13619    ins_encode %{
13620      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
13621             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
13622    %}
13623   ins_pipe(ialu_reg_reg_shift);
13624 %}
13625 
13626 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13627 %{
13628   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13629   ins_cost(1.9 * INSN_COST);
13630   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
13631 
13632    ins_encode %{
13633      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13634             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13635    %}
13636   ins_pipe(ialu_reg_reg_shift);
13637 %}
13638 
13639 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13640 %{
13641   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
13642   ins_cost(1.9 * INSN_COST);
13643   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
13644 
13645    ins_encode %{
13646      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
13647             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13648    %}
13649   ins_pipe(ialu_reg_reg_shift);
13650 %}
13651 
13652 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
13653 %{
13654   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13655   ins_cost(1.9 * INSN_COST);
13656   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
13657 
13658    ins_encode %{
13659      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13660             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
13661    %}
13662   ins_pipe(ialu_reg_reg_shift);
13663 %}
13664 
13665 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
13666 %{
13667   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
13668   ins_cost(1.9 * INSN_COST);
13669   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
13670 
13671    ins_encode %{
13672      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
13673             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
13674    %}
13675   ins_pipe(ialu_reg_reg_shift);
13676 %}
13677 // END This section of the file is automatically generated. Do not edit --------------
13678 
13679 // ============================================================================
13680 // Floating Point Arithmetic Instructions
13681 
13682 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13683   match(Set dst (AddF src1 src2));
13684 
13685   ins_cost(INSN_COST * 5);
13686   format %{ "fadds   $dst, $src1, $src2" %}
13687 
13688   ins_encode %{
13689     __ fadds(as_FloatRegister($dst$$reg),
13690              as_FloatRegister($src1$$reg),
13691              as_FloatRegister($src2$$reg));
13692   %}
13693 
13694   ins_pipe(fp_dop_reg_reg_s);
13695 %}
13696 
13697 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13698   match(Set dst (AddD src1 src2));
13699 
13700   ins_cost(INSN_COST * 5);
13701   format %{ "faddd   $dst, $src1, $src2" %}
13702 
13703   ins_encode %{
13704     __ faddd(as_FloatRegister($dst$$reg),
13705              as_FloatRegister($src1$$reg),
13706              as_FloatRegister($src2$$reg));
13707   %}
13708 
13709   ins_pipe(fp_dop_reg_reg_d);
13710 %}
13711 
13712 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13713   match(Set dst (SubF src1 src2));
13714 
13715   ins_cost(INSN_COST * 5);
13716   format %{ "fsubs   $dst, $src1, $src2" %}
13717 
13718   ins_encode %{
13719     __ fsubs(as_FloatRegister($dst$$reg),
13720              as_FloatRegister($src1$$reg),
13721              as_FloatRegister($src2$$reg));
13722   %}
13723 
13724   ins_pipe(fp_dop_reg_reg_s);
13725 %}
13726 
13727 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13728   match(Set dst (SubD src1 src2));
13729 
13730   ins_cost(INSN_COST * 5);
13731   format %{ "fsubd   $dst, $src1, $src2" %}
13732 
13733   ins_encode %{
13734     __ fsubd(as_FloatRegister($dst$$reg),
13735              as_FloatRegister($src1$$reg),
13736              as_FloatRegister($src2$$reg));
13737   %}
13738 
13739   ins_pipe(fp_dop_reg_reg_d);
13740 %}
13741 
13742 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13743   match(Set dst (MulF src1 src2));
13744 
13745   ins_cost(INSN_COST * 6);
13746   format %{ "fmuls   $dst, $src1, $src2" %}
13747 
13748   ins_encode %{
13749     __ fmuls(as_FloatRegister($dst$$reg),
13750              as_FloatRegister($src1$$reg),
13751              as_FloatRegister($src2$$reg));
13752   %}
13753 
13754   ins_pipe(fp_dop_reg_reg_s);
13755 %}
13756 
13757 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13758   match(Set dst (MulD src1 src2));
13759 
13760   ins_cost(INSN_COST * 6);
13761   format %{ "fmuld   $dst, $src1, $src2" %}
13762 
13763   ins_encode %{
13764     __ fmuld(as_FloatRegister($dst$$reg),
13765              as_FloatRegister($src1$$reg),
13766              as_FloatRegister($src2$$reg));
13767   %}
13768 
13769   ins_pipe(fp_dop_reg_reg_d);
13770 %}
13771 
13772 // src1 * src2 + src3
13773 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13774   predicate(UseFMA);
13775   match(Set dst (FmaF src3 (Binary src1 src2)));
13776 
13777   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
13778 
13779   ins_encode %{
13780     __ fmadds(as_FloatRegister($dst$$reg),
13781              as_FloatRegister($src1$$reg),
13782              as_FloatRegister($src2$$reg),
13783              as_FloatRegister($src3$$reg));
13784   %}
13785 
13786   ins_pipe(pipe_class_default);
13787 %}
13788 
13789 // src1 * src2 + src3
13790 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13791   predicate(UseFMA);
13792   match(Set dst (FmaD src3 (Binary src1 src2)));
13793 
13794   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
13795 
13796   ins_encode %{
13797     __ fmaddd(as_FloatRegister($dst$$reg),
13798              as_FloatRegister($src1$$reg),
13799              as_FloatRegister($src2$$reg),
13800              as_FloatRegister($src3$$reg));
13801   %}
13802 
13803   ins_pipe(pipe_class_default);
13804 %}
13805 
13806 // -src1 * src2 + src3
13807 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13808   predicate(UseFMA);
13809   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
13810   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
13811 
13812   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
13813 
13814   ins_encode %{
13815     __ fmsubs(as_FloatRegister($dst$$reg),
13816               as_FloatRegister($src1$$reg),
13817               as_FloatRegister($src2$$reg),
13818               as_FloatRegister($src3$$reg));
13819   %}
13820 
13821   ins_pipe(pipe_class_default);
13822 %}
13823 
13824 // -src1 * src2 + src3
13825 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13826   predicate(UseFMA);
13827   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
13828   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
13829 
13830   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
13831 
13832   ins_encode %{
13833     __ fmsubd(as_FloatRegister($dst$$reg),
13834               as_FloatRegister($src1$$reg),
13835               as_FloatRegister($src2$$reg),
13836               as_FloatRegister($src3$$reg));
13837   %}
13838 
13839   ins_pipe(pipe_class_default);
13840 %}
13841 
13842 // -src1 * src2 - src3
13843 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13844   predicate(UseFMA);
13845   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
13846   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
13847 
13848   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
13849 
13850   ins_encode %{
13851     __ fnmadds(as_FloatRegister($dst$$reg),
13852                as_FloatRegister($src1$$reg),
13853                as_FloatRegister($src2$$reg),
13854                as_FloatRegister($src3$$reg));
13855   %}
13856 
13857   ins_pipe(pipe_class_default);
13858 %}
13859 
13860 // -src1 * src2 - src3
13861 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13862   predicate(UseFMA);
13863   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
13864   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
13865 
13866   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
13867 
13868   ins_encode %{
13869     __ fnmaddd(as_FloatRegister($dst$$reg),
13870                as_FloatRegister($src1$$reg),
13871                as_FloatRegister($src2$$reg),
13872                as_FloatRegister($src3$$reg));
13873   %}
13874 
13875   ins_pipe(pipe_class_default);
13876 %}
13877 
13878 // src1 * src2 - src3
13879 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13880   predicate(UseFMA);
13881   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
13882 
13883   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
13884 
13885   ins_encode %{
13886     __ fnmsubs(as_FloatRegister($dst$$reg),
13887                as_FloatRegister($src1$$reg),
13888                as_FloatRegister($src2$$reg),
13889                as_FloatRegister($src3$$reg));
13890   %}
13891 
13892   ins_pipe(pipe_class_default);
13893 %}
13894 
13895 // src1 * src2 - src3
13896 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13897   predicate(UseFMA);
13898   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
13899 
13900   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
13901 
13902   ins_encode %{
13903   // n.b. insn name should be fnmsubd
13904     __ fnmsub(as_FloatRegister($dst$$reg),
13905               as_FloatRegister($src1$$reg),
13906               as_FloatRegister($src2$$reg),
13907               as_FloatRegister($src3$$reg));
13908   %}
13909 
13910   ins_pipe(pipe_class_default);
13911 %}
13912 
13913 
13914 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13915   match(Set dst (DivF src1  src2));
13916 
13917   ins_cost(INSN_COST * 18);
13918   format %{ "fdivs   $dst, $src1, $src2" %}
13919 
13920   ins_encode %{
13921     __ fdivs(as_FloatRegister($dst$$reg),
13922              as_FloatRegister($src1$$reg),
13923              as_FloatRegister($src2$$reg));
13924   %}
13925 
13926   ins_pipe(fp_div_s);
13927 %}
13928 
13929 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13930   match(Set dst (DivD src1  src2));
13931 
13932   ins_cost(INSN_COST * 32);
13933   format %{ "fdivd   $dst, $src1, $src2" %}
13934 
13935   ins_encode %{
13936     __ fdivd(as_FloatRegister($dst$$reg),
13937              as_FloatRegister($src1$$reg),
13938              as_FloatRegister($src2$$reg));
13939   %}
13940 
13941   ins_pipe(fp_div_d);
13942 %}
13943 
13944 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13945   match(Set dst (NegF src));
13946 
13947   ins_cost(INSN_COST * 3);
13948   format %{ "fneg   $dst, $src" %}
13949 
13950   ins_encode %{
13951     __ fnegs(as_FloatRegister($dst$$reg),
13952              as_FloatRegister($src$$reg));
13953   %}
13954 
13955   ins_pipe(fp_uop_s);
13956 %}
13957 
13958 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13959   match(Set dst (NegD src));
13960 
13961   ins_cost(INSN_COST * 3);
13962   format %{ "fnegd   $dst, $src" %}
13963 
13964   ins_encode %{
13965     __ fnegd(as_FloatRegister($dst$$reg),
13966              as_FloatRegister($src$$reg));
13967   %}
13968 
13969   ins_pipe(fp_uop_d);
13970 %}
13971 
13972 instruct absF_reg(vRegF dst, vRegF src) %{
13973   match(Set dst (AbsF src));
13974 
13975   ins_cost(INSN_COST * 3);
13976   format %{ "fabss   $dst, $src" %}
13977   ins_encode %{
13978     __ fabss(as_FloatRegister($dst$$reg),
13979              as_FloatRegister($src$$reg));
13980   %}
13981 
13982   ins_pipe(fp_uop_s);
13983 %}
13984 
13985 instruct absD_reg(vRegD dst, vRegD src) %{
13986   match(Set dst (AbsD src));
13987 
13988   ins_cost(INSN_COST * 3);
13989   format %{ "fabsd   $dst, $src" %}
13990   ins_encode %{
13991     __ fabsd(as_FloatRegister($dst$$reg),
13992              as_FloatRegister($src$$reg));
13993   %}
13994 
13995   ins_pipe(fp_uop_d);
13996 %}
13997 
13998 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13999   match(Set dst (SqrtD src));
14000 
14001   ins_cost(INSN_COST * 50);
14002   format %{ "fsqrtd  $dst, $src" %}
14003   ins_encode %{
14004     __ fsqrtd(as_FloatRegister($dst$$reg),
14005              as_FloatRegister($src$$reg));
14006   %}
14007 
14008   ins_pipe(fp_div_s);
14009 %}
14010 
14011 instruct sqrtF_reg(vRegF dst, vRegF src) %{
14012   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
14013 
14014   ins_cost(INSN_COST * 50);
14015   format %{ "fsqrts  $dst, $src" %}
14016   ins_encode %{
14017     __ fsqrts(as_FloatRegister($dst$$reg),
14018              as_FloatRegister($src$$reg));
14019   %}
14020 
14021   ins_pipe(fp_div_d);
14022 %}
14023 
14024 // ============================================================================
14025 // Logical Instructions
14026 
14027 // Integer Logical Instructions
14028 
14029 // And Instructions
14030 
14031 
14032 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
14033   match(Set dst (AndI src1 src2));
14034 
14035   format %{ "andw  $dst, $src1, $src2\t# int" %}
14036 
14037   ins_cost(INSN_COST);
14038   ins_encode %{
14039     __ andw(as_Register($dst$$reg),
14040             as_Register($src1$$reg),
14041             as_Register($src2$$reg));
14042   %}
14043 
14044   ins_pipe(ialu_reg_reg);
14045 %}
14046 
14047 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
14048   match(Set dst (AndI src1 src2));
14049 
14050   format %{ "andsw  $dst, $src1, $src2\t# int" %}
14051 
14052   ins_cost(INSN_COST);
14053   ins_encode %{
14054     __ andw(as_Register($dst$$reg),
14055             as_Register($src1$$reg),
14056             (unsigned long)($src2$$constant));
14057   %}
14058 
14059   ins_pipe(ialu_reg_imm);
14060 %}
14061 
14062 // Or Instructions
14063 
14064 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
14065   match(Set dst (OrI src1 src2));
14066 
14067   format %{ "orrw  $dst, $src1, $src2\t# int" %}
14068 
14069   ins_cost(INSN_COST);
14070   ins_encode %{
14071     __ orrw(as_Register($dst$$reg),
14072             as_Register($src1$$reg),
14073             as_Register($src2$$reg));
14074   %}
14075 
14076   ins_pipe(ialu_reg_reg);
14077 %}
14078 
14079 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
14080   match(Set dst (OrI src1 src2));
14081 
14082   format %{ "orrw  $dst, $src1, $src2\t# int" %}
14083 
14084   ins_cost(INSN_COST);
14085   ins_encode %{
14086     __ orrw(as_Register($dst$$reg),
14087             as_Register($src1$$reg),
14088             (unsigned long)($src2$$constant));
14089   %}
14090 
14091   ins_pipe(ialu_reg_imm);
14092 %}
14093 
14094 // Xor Instructions
14095 
14096 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
14097   match(Set dst (XorI src1 src2));
14098 
14099   format %{ "eorw  $dst, $src1, $src2\t# int" %}
14100 
14101   ins_cost(INSN_COST);
14102   ins_encode %{
14103     __ eorw(as_Register($dst$$reg),
14104             as_Register($src1$$reg),
14105             as_Register($src2$$reg));
14106   %}
14107 
14108   ins_pipe(ialu_reg_reg);
14109 %}
14110 
14111 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
14112   match(Set dst (XorI src1 src2));
14113 
14114   format %{ "eorw  $dst, $src1, $src2\t# int" %}
14115 
14116   ins_cost(INSN_COST);
14117   ins_encode %{
14118     __ eorw(as_Register($dst$$reg),
14119             as_Register($src1$$reg),
14120             (unsigned long)($src2$$constant));
14121   %}
14122 
14123   ins_pipe(ialu_reg_imm);
14124 %}
14125 
14126 // Long Logical Instructions
14127 // TODO
14128 
14129 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
14130   match(Set dst (AndL src1 src2));
14131 
14132   format %{ "and  $dst, $src1, $src2\t# int" %}
14133 
14134   ins_cost(INSN_COST);
14135   ins_encode %{
14136     __ andr(as_Register($dst$$reg),
14137             as_Register($src1$$reg),
14138             as_Register($src2$$reg));
14139   %}
14140 
14141   ins_pipe(ialu_reg_reg);
14142 %}
14143 
14144 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
14145   match(Set dst (AndL src1 src2));
14146 
14147   format %{ "and  $dst, $src1, $src2\t# int" %}
14148 
14149   ins_cost(INSN_COST);
14150   ins_encode %{
14151     __ andr(as_Register($dst$$reg),
14152             as_Register($src1$$reg),
14153             (unsigned long)($src2$$constant));
14154   %}
14155 
14156   ins_pipe(ialu_reg_imm);
14157 %}
14158 
14159 // Or Instructions
14160 
14161 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
14162   match(Set dst (OrL src1 src2));
14163 
14164   format %{ "orr  $dst, $src1, $src2\t# int" %}
14165 
14166   ins_cost(INSN_COST);
14167   ins_encode %{
14168     __ orr(as_Register($dst$$reg),
14169            as_Register($src1$$reg),
14170            as_Register($src2$$reg));
14171   %}
14172 
14173   ins_pipe(ialu_reg_reg);
14174 %}
14175 
14176 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
14177   match(Set dst (OrL src1 src2));
14178 
14179   format %{ "orr  $dst, $src1, $src2\t# int" %}
14180 
14181   ins_cost(INSN_COST);
14182   ins_encode %{
14183     __ orr(as_Register($dst$$reg),
14184            as_Register($src1$$reg),
14185            (unsigned long)($src2$$constant));
14186   %}
14187 
14188   ins_pipe(ialu_reg_imm);
14189 %}
14190 
14191 // Xor Instructions
14192 
14193 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
14194   match(Set dst (XorL src1 src2));
14195 
14196   format %{ "eor  $dst, $src1, $src2\t# int" %}
14197 
14198   ins_cost(INSN_COST);
14199   ins_encode %{
14200     __ eor(as_Register($dst$$reg),
14201            as_Register($src1$$reg),
14202            as_Register($src2$$reg));
14203   %}
14204 
14205   ins_pipe(ialu_reg_reg);
14206 %}
14207 
14208 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
14209   match(Set dst (XorL src1 src2));
14210 
14211   ins_cost(INSN_COST);
14212   format %{ "eor  $dst, $src1, $src2\t# int" %}
14213 
14214   ins_encode %{
14215     __ eor(as_Register($dst$$reg),
14216            as_Register($src1$$reg),
14217            (unsigned long)($src2$$constant));
14218   %}
14219 
14220   ins_pipe(ialu_reg_imm);
14221 %}
14222 
14223 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
14224 %{
14225   match(Set dst (ConvI2L src));
14226 
14227   ins_cost(INSN_COST);
14228   format %{ "sxtw  $dst, $src\t# i2l" %}
14229   ins_encode %{
14230     __ sbfm($dst$$Register, $src$$Register, 0, 31);
14231   %}
14232   ins_pipe(ialu_reg_shift);
14233 %}
14234 
14235 // this pattern occurs in bigmath arithmetic
14236 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
14237 %{
14238   match(Set dst (AndL (ConvI2L src) mask));
14239 
14240   ins_cost(INSN_COST);
14241   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
14242   ins_encode %{
14243     __ ubfm($dst$$Register, $src$$Register, 0, 31);
14244   %}
14245 
14246   ins_pipe(ialu_reg_shift);
14247 %}
14248 
14249 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
14250   match(Set dst (ConvL2I src));
14251 
14252   ins_cost(INSN_COST);
14253   format %{ "movw  $dst, $src \t// l2i" %}
14254 
14255   ins_encode %{
14256     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
14257   %}
14258 
14259   ins_pipe(ialu_reg);
14260 %}
14261 
14262 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
14263 %{
14264   match(Set dst (Conv2B src));
14265   effect(KILL cr);
14266 
14267   format %{
14268     "cmpw $src, zr\n\t"
14269     "cset $dst, ne"
14270   %}
14271 
14272   ins_encode %{
14273     __ cmpw(as_Register($src$$reg), zr);
14274     __ cset(as_Register($dst$$reg), Assembler::NE);
14275   %}
14276 
14277   ins_pipe(ialu_reg);
14278 %}
14279 
14280 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
14281 %{
14282   match(Set dst (Conv2B src));
14283   effect(KILL cr);
14284 
14285   format %{
14286     "cmp  $src, zr\n\t"
14287     "cset $dst, ne"
14288   %}
14289 
14290   ins_encode %{
14291     __ cmp(as_Register($src$$reg), zr);
14292     __ cset(as_Register($dst$$reg), Assembler::NE);
14293   %}
14294 
14295   ins_pipe(ialu_reg);
14296 %}
14297 
14298 instruct convD2F_reg(vRegF dst, vRegD src) %{
14299   match(Set dst (ConvD2F src));
14300 
14301   ins_cost(INSN_COST * 5);
14302   format %{ "fcvtd  $dst, $src \t// d2f" %}
14303 
14304   ins_encode %{
14305     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14306   %}
14307 
14308   ins_pipe(fp_d2f);
14309 %}
14310 
14311 instruct convF2D_reg(vRegD dst, vRegF src) %{
14312   match(Set dst (ConvF2D src));
14313 
14314   ins_cost(INSN_COST * 5);
14315   format %{ "fcvts  $dst, $src \t// f2d" %}
14316 
14317   ins_encode %{
14318     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
14319   %}
14320 
14321   ins_pipe(fp_f2d);
14322 %}
14323 
14324 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14325   match(Set dst (ConvF2I src));
14326 
14327   ins_cost(INSN_COST * 5);
14328   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
14329 
14330   ins_encode %{
14331     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14332   %}
14333 
14334   ins_pipe(fp_f2i);
14335 %}
14336 
14337 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
14338   match(Set dst (ConvF2L src));
14339 
14340   ins_cost(INSN_COST * 5);
14341   format %{ "fcvtzs  $dst, $src \t// f2l" %}
14342 
14343   ins_encode %{
14344     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14345   %}
14346 
14347   ins_pipe(fp_f2l);
14348 %}
14349 
14350 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
14351   match(Set dst (ConvI2F src));
14352 
14353   ins_cost(INSN_COST * 5);
14354   format %{ "scvtfws  $dst, $src \t// i2f" %}
14355 
14356   ins_encode %{
14357     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14358   %}
14359 
14360   ins_pipe(fp_i2f);
14361 %}
14362 
14363 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
14364   match(Set dst (ConvL2F src));
14365 
14366   ins_cost(INSN_COST * 5);
14367   format %{ "scvtfs  $dst, $src \t// l2f" %}
14368 
14369   ins_encode %{
14370     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14371   %}
14372 
14373   ins_pipe(fp_l2f);
14374 %}
14375 
14376 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
14377   match(Set dst (ConvD2I src));
14378 
14379   ins_cost(INSN_COST * 5);
14380   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
14381 
14382   ins_encode %{
14383     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14384   %}
14385 
14386   ins_pipe(fp_d2i);
14387 %}
14388 
14389 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14390   match(Set dst (ConvD2L src));
14391 
14392   ins_cost(INSN_COST * 5);
14393   format %{ "fcvtzd  $dst, $src \t// d2l" %}
14394 
14395   ins_encode %{
14396     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
14397   %}
14398 
14399   ins_pipe(fp_d2l);
14400 %}
14401 
14402 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
14403   match(Set dst (ConvI2D src));
14404 
14405   ins_cost(INSN_COST * 5);
14406   format %{ "scvtfwd  $dst, $src \t// i2d" %}
14407 
14408   ins_encode %{
14409     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14410   %}
14411 
14412   ins_pipe(fp_i2d);
14413 %}
14414 
14415 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
14416   match(Set dst (ConvL2D src));
14417 
14418   ins_cost(INSN_COST * 5);
14419   format %{ "scvtfd  $dst, $src \t// l2d" %}
14420 
14421   ins_encode %{
14422     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
14423   %}
14424 
14425   ins_pipe(fp_l2d);
14426 %}
14427 
14428 // stack <-> reg and reg <-> reg shuffles with no conversion
14429 
14430 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
14431 
14432   match(Set dst (MoveF2I src));
14433 
14434   effect(DEF dst, USE src);
14435 
14436   ins_cost(4 * INSN_COST);
14437 
14438   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
14439 
14440   ins_encode %{
14441     __ ldrw($dst$$Register, Address(sp, $src$$disp));
14442   %}
14443 
14444   ins_pipe(iload_reg_reg);
14445 
14446 %}
14447 
14448 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
14449 
14450   match(Set dst (MoveI2F src));
14451 
14452   effect(DEF dst, USE src);
14453 
14454   ins_cost(4 * INSN_COST);
14455 
14456   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
14457 
14458   ins_encode %{
14459     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14460   %}
14461 
14462   ins_pipe(pipe_class_memory);
14463 
14464 %}
14465 
14466 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
14467 
14468   match(Set dst (MoveD2L src));
14469 
14470   effect(DEF dst, USE src);
14471 
14472   ins_cost(4 * INSN_COST);
14473 
14474   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
14475 
14476   ins_encode %{
14477     __ ldr($dst$$Register, Address(sp, $src$$disp));
14478   %}
14479 
14480   ins_pipe(iload_reg_reg);
14481 
14482 %}
14483 
14484 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
14485 
14486   match(Set dst (MoveL2D src));
14487 
14488   effect(DEF dst, USE src);
14489 
14490   ins_cost(4 * INSN_COST);
14491 
14492   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
14493 
14494   ins_encode %{
14495     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
14496   %}
14497 
14498   ins_pipe(pipe_class_memory);
14499 
14500 %}
14501 
14502 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
14503 
14504   match(Set dst (MoveF2I src));
14505 
14506   effect(DEF dst, USE src);
14507 
14508   ins_cost(INSN_COST);
14509 
14510   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
14511 
14512   ins_encode %{
14513     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14514   %}
14515 
14516   ins_pipe(pipe_class_memory);
14517 
14518 %}
14519 
14520 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
14521 
14522   match(Set dst (MoveI2F src));
14523 
14524   effect(DEF dst, USE src);
14525 
14526   ins_cost(INSN_COST);
14527 
14528   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
14529 
14530   ins_encode %{
14531     __ strw($src$$Register, Address(sp, $dst$$disp));
14532   %}
14533 
14534   ins_pipe(istore_reg_reg);
14535 
14536 %}
14537 
14538 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
14539 
14540   match(Set dst (MoveD2L src));
14541 
14542   effect(DEF dst, USE src);
14543 
14544   ins_cost(INSN_COST);
14545 
14546   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
14547 
14548   ins_encode %{
14549     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
14550   %}
14551 
14552   ins_pipe(pipe_class_memory);
14553 
14554 %}
14555 
14556 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
14557 
14558   match(Set dst (MoveL2D src));
14559 
14560   effect(DEF dst, USE src);
14561 
14562   ins_cost(INSN_COST);
14563 
14564   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
14565 
14566   ins_encode %{
14567     __ str($src$$Register, Address(sp, $dst$$disp));
14568   %}
14569 
14570   ins_pipe(istore_reg_reg);
14571 
14572 %}
14573 
14574 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
14575 
14576   match(Set dst (MoveF2I src));
14577 
14578   effect(DEF dst, USE src);
14579 
14580   ins_cost(INSN_COST);
14581 
14582   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
14583 
14584   ins_encode %{
14585     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
14586   %}
14587 
14588   ins_pipe(fp_f2i);
14589 
14590 %}
14591 
14592 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
14593 
14594   match(Set dst (MoveI2F src));
14595 
14596   effect(DEF dst, USE src);
14597 
14598   ins_cost(INSN_COST);
14599 
14600   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
14601 
14602   ins_encode %{
14603     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
14604   %}
14605 
14606   ins_pipe(fp_i2f);
14607 
14608 %}
14609 
14610 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
14611 
14612   match(Set dst (MoveD2L src));
14613 
14614   effect(DEF dst, USE src);
14615 
14616   ins_cost(INSN_COST);
14617 
14618   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
14619 
14620   ins_encode %{
14621     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
14622   %}
14623 
14624   ins_pipe(fp_d2l);
14625 
14626 %}
14627 
14628 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
14629 
14630   match(Set dst (MoveL2D src));
14631 
14632   effect(DEF dst, USE src);
14633 
14634   ins_cost(INSN_COST);
14635 
14636   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
14637 
14638   ins_encode %{
14639     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
14640   %}
14641 
14642   ins_pipe(fp_l2d);
14643 
14644 %}
14645 
14646 // ============================================================================
14647 // clearing of an array
14648 
14649 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14650 %{
14651   match(Set dummy (ClearArray cnt base));
14652   effect(USE_KILL cnt, USE_KILL base);
14653 
14654   ins_cost(4 * INSN_COST);
14655   format %{ "ClearArray $cnt, $base" %}
14656 
14657   ins_encode %{
14658     __ zero_words($base$$Register, $cnt$$Register);
14659   %}
14660 
14661   ins_pipe(pipe_class_memory);
14662 %}
14663 
14664 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
14665 %{
14666   predicate((u_int64_t)n->in(2)->get_long()
14667             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
14668   match(Set dummy (ClearArray cnt base));
14669   effect(USE_KILL base);
14670 
14671   ins_cost(4 * INSN_COST);
14672   format %{ "ClearArray $cnt, $base" %}
14673 
14674   ins_encode %{
14675     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
14676   %}
14677 
14678   ins_pipe(pipe_class_memory);
14679 %}
14680 
14681 // ============================================================================
14682 // Overflow Math Instructions
14683 
14684 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14685 %{
14686   match(Set cr (OverflowAddI op1 op2));
14687 
14688   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14689   ins_cost(INSN_COST);
14690   ins_encode %{
14691     __ cmnw($op1$$Register, $op2$$Register);
14692   %}
14693 
14694   ins_pipe(icmp_reg_reg);
14695 %}
14696 
14697 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14698 %{
14699   match(Set cr (OverflowAddI op1 op2));
14700 
14701   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
14702   ins_cost(INSN_COST);
14703   ins_encode %{
14704     __ cmnw($op1$$Register, $op2$$constant);
14705   %}
14706 
14707   ins_pipe(icmp_reg_imm);
14708 %}
14709 
14710 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14711 %{
14712   match(Set cr (OverflowAddL op1 op2));
14713 
14714   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14715   ins_cost(INSN_COST);
14716   ins_encode %{
14717     __ cmn($op1$$Register, $op2$$Register);
14718   %}
14719 
14720   ins_pipe(icmp_reg_reg);
14721 %}
14722 
14723 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14724 %{
14725   match(Set cr (OverflowAddL op1 op2));
14726 
14727   format %{ "cmn   $op1, $op2\t# overflow check long" %}
14728   ins_cost(INSN_COST);
14729   ins_encode %{
14730     __ cmn($op1$$Register, $op2$$constant);
14731   %}
14732 
14733   ins_pipe(icmp_reg_imm);
14734 %}
14735 
14736 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14737 %{
14738   match(Set cr (OverflowSubI op1 op2));
14739 
14740   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14741   ins_cost(INSN_COST);
14742   ins_encode %{
14743     __ cmpw($op1$$Register, $op2$$Register);
14744   %}
14745 
14746   ins_pipe(icmp_reg_reg);
14747 %}
14748 
14749 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
14750 %{
14751   match(Set cr (OverflowSubI op1 op2));
14752 
14753   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
14754   ins_cost(INSN_COST);
14755   ins_encode %{
14756     __ cmpw($op1$$Register, $op2$$constant);
14757   %}
14758 
14759   ins_pipe(icmp_reg_imm);
14760 %}
14761 
14762 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14763 %{
14764   match(Set cr (OverflowSubL op1 op2));
14765 
14766   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14767   ins_cost(INSN_COST);
14768   ins_encode %{
14769     __ cmp($op1$$Register, $op2$$Register);
14770   %}
14771 
14772   ins_pipe(icmp_reg_reg);
14773 %}
14774 
14775 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14776 %{
14777   match(Set cr (OverflowSubL op1 op2));
14778 
14779   format %{ "cmp   $op1, $op2\t# overflow check long" %}
14780   ins_cost(INSN_COST);
14781   ins_encode %{
14782     __ cmp($op1$$Register, $op2$$constant);
14783   %}
14784 
14785   ins_pipe(icmp_reg_imm);
14786 %}
14787 
14788 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14789 %{
14790   match(Set cr (OverflowSubI zero op1));
14791 
14792   format %{ "cmpw  zr, $op1\t# overflow check int" %}
14793   ins_cost(INSN_COST);
14794   ins_encode %{
14795     __ cmpw(zr, $op1$$Register);
14796   %}
14797 
14798   ins_pipe(icmp_reg_imm);
14799 %}
14800 
14801 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14802 %{
14803   match(Set cr (OverflowSubL zero op1));
14804 
14805   format %{ "cmp   zr, $op1\t# overflow check long" %}
14806   ins_cost(INSN_COST);
14807   ins_encode %{
14808     __ cmp(zr, $op1$$Register);
14809   %}
14810 
14811   ins_pipe(icmp_reg_imm);
14812 %}
14813 
14814 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14815 %{
14816   match(Set cr (OverflowMulI op1 op2));
14817 
14818   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14819             "cmp   rscratch1, rscratch1, sxtw\n\t"
14820             "movw  rscratch1, #0x80000000\n\t"
14821             "cselw rscratch1, rscratch1, zr, NE\n\t"
14822             "cmpw  rscratch1, #1" %}
14823   ins_cost(5 * INSN_COST);
14824   ins_encode %{
14825     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14826     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14827     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14828     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14829     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14830   %}
14831 
14832   ins_pipe(pipe_slow);
14833 %}
14834 
14835 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14836 %{
14837   match(If cmp (OverflowMulI op1 op2));
14838   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14839             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14840   effect(USE labl, KILL cr);
14841 
14842   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14843             "cmp   rscratch1, rscratch1, sxtw\n\t"
14844             "b$cmp   $labl" %}
14845   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14846   ins_encode %{
14847     Label* L = $labl$$label;
14848     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14849     __ smull(rscratch1, $op1$$Register, $op2$$Register);
14850     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
14851     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14852   %}
14853 
14854   ins_pipe(pipe_serial);
14855 %}
14856 
14857 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14858 %{
14859   match(Set cr (OverflowMulL op1 op2));
14860 
14861   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14862             "smulh rscratch2, $op1, $op2\n\t"
14863             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14864             "movw  rscratch1, #0x80000000\n\t"
14865             "cselw rscratch1, rscratch1, zr, NE\n\t"
14866             "cmpw  rscratch1, #1" %}
14867   ins_cost(6 * INSN_COST);
14868   ins_encode %{
14869     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14870     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14871     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14872     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
14873     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14874     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
14875   %}
14876 
14877   ins_pipe(pipe_slow);
14878 %}
14879 
14880 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14881 %{
14882   match(If cmp (OverflowMulL op1 op2));
14883   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14884             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14885   effect(USE labl, KILL cr);
14886 
14887   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
14888             "smulh rscratch2, $op1, $op2\n\t"
14889             "cmp   rscratch2, rscratch1, ASR #63\n\t"
14890             "b$cmp $labl" %}
14891   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14892   ins_encode %{
14893     Label* L = $labl$$label;
14894     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14895     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
14896     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14897     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
14898     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14899   %}
14900 
14901   ins_pipe(pipe_serial);
14902 %}
14903 
14904 // ============================================================================
14905 // Compare Instructions
14906 
14907 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14908 %{
14909   match(Set cr (CmpI op1 op2));
14910 
14911   effect(DEF cr, USE op1, USE op2);
14912 
14913   ins_cost(INSN_COST);
14914   format %{ "cmpw  $op1, $op2" %}
14915 
14916   ins_encode(aarch64_enc_cmpw(op1, op2));
14917 
14918   ins_pipe(icmp_reg_reg);
14919 %}
14920 
14921 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14922 %{
14923   match(Set cr (CmpI op1 zero));
14924 
14925   effect(DEF cr, USE op1);
14926 
14927   ins_cost(INSN_COST);
14928   format %{ "cmpw $op1, 0" %}
14929 
14930   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14931 
14932   ins_pipe(icmp_reg_imm);
14933 %}
14934 
14935 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14936 %{
14937   match(Set cr (CmpI op1 op2));
14938 
14939   effect(DEF cr, USE op1);
14940 
14941   ins_cost(INSN_COST);
14942   format %{ "cmpw  $op1, $op2" %}
14943 
14944   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14945 
14946   ins_pipe(icmp_reg_imm);
14947 %}
14948 
14949 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14950 %{
14951   match(Set cr (CmpI op1 op2));
14952 
14953   effect(DEF cr, USE op1);
14954 
14955   ins_cost(INSN_COST * 2);
14956   format %{ "cmpw  $op1, $op2" %}
14957 
14958   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14959 
14960   ins_pipe(icmp_reg_imm);
14961 %}
14962 
14963 // Unsigned compare Instructions; really, same as signed compare
14964 // except it should only be used to feed an If or a CMovI which takes a
14965 // cmpOpU.
14966 
14967 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14968 %{
14969   match(Set cr (CmpU op1 op2));
14970 
14971   effect(DEF cr, USE op1, USE op2);
14972 
14973   ins_cost(INSN_COST);
14974   format %{ "cmpw  $op1, $op2\t# unsigned" %}
14975 
14976   ins_encode(aarch64_enc_cmpw(op1, op2));
14977 
14978   ins_pipe(icmp_reg_reg);
14979 %}
14980 
14981 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14982 %{
14983   match(Set cr (CmpU op1 zero));
14984 
14985   effect(DEF cr, USE op1);
14986 
14987   ins_cost(INSN_COST);
14988   format %{ "cmpw $op1, #0\t# unsigned" %}
14989 
14990   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14991 
14992   ins_pipe(icmp_reg_imm);
14993 %}
14994 
14995 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14996 %{
14997   match(Set cr (CmpU op1 op2));
14998 
14999   effect(DEF cr, USE op1);
15000 
15001   ins_cost(INSN_COST);
15002   format %{ "cmpw  $op1, $op2\t# unsigned" %}
15003 
15004   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
15005 
15006   ins_pipe(icmp_reg_imm);
15007 %}
15008 
15009 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
15010 %{
15011   match(Set cr (CmpU op1 op2));
15012 
15013   effect(DEF cr, USE op1);
15014 
15015   ins_cost(INSN_COST * 2);
15016   format %{ "cmpw  $op1, $op2\t# unsigned" %}
15017 
15018   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
15019 
15020   ins_pipe(icmp_reg_imm);
15021 %}
15022 
15023 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
15024 %{
15025   match(Set cr (CmpL op1 op2));
15026 
15027   effect(DEF cr, USE op1, USE op2);
15028 
15029   ins_cost(INSN_COST);
15030   format %{ "cmp  $op1, $op2" %}
15031 
15032   ins_encode(aarch64_enc_cmp(op1, op2));
15033 
15034   ins_pipe(icmp_reg_reg);
15035 %}
15036 
15037 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
15038 %{
15039   match(Set cr (CmpL op1 zero));
15040 
15041   effect(DEF cr, USE op1);
15042 
15043   ins_cost(INSN_COST);
15044   format %{ "tst  $op1" %}
15045 
15046   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
15047 
15048   ins_pipe(icmp_reg_imm);
15049 %}
15050 
15051 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
15052 %{
15053   match(Set cr (CmpL op1 op2));
15054 
15055   effect(DEF cr, USE op1);
15056 
15057   ins_cost(INSN_COST);
15058   format %{ "cmp  $op1, $op2" %}
15059 
15060   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
15061 
15062   ins_pipe(icmp_reg_imm);
15063 %}
15064 
15065 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
15066 %{
15067   match(Set cr (CmpL op1 op2));
15068 
15069   effect(DEF cr, USE op1);
15070 
15071   ins_cost(INSN_COST * 2);
15072   format %{ "cmp  $op1, $op2" %}
15073 
15074   ins_encode(aarch64_enc_cmp_imm(op1, op2));
15075 
15076   ins_pipe(icmp_reg_imm);
15077 %}
15078 
15079 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
15080 %{
15081   match(Set cr (CmpUL op1 op2));
15082 
15083   effect(DEF cr, USE op1, USE op2);
15084 
15085   ins_cost(INSN_COST);
15086   format %{ "cmp  $op1, $op2" %}
15087 
15088   ins_encode(aarch64_enc_cmp(op1, op2));
15089 
15090   ins_pipe(icmp_reg_reg);
15091 %}
15092 
15093 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
15094 %{
15095   match(Set cr (CmpUL op1 zero));
15096 
15097   effect(DEF cr, USE op1);
15098 
15099   ins_cost(INSN_COST);
15100   format %{ "tst  $op1" %}
15101 
15102   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
15103 
15104   ins_pipe(icmp_reg_imm);
15105 %}
15106 
15107 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
15108 %{
15109   match(Set cr (CmpUL op1 op2));
15110 
15111   effect(DEF cr, USE op1);
15112 
15113   ins_cost(INSN_COST);
15114   format %{ "cmp  $op1, $op2" %}
15115 
15116   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
15117 
15118   ins_pipe(icmp_reg_imm);
15119 %}
15120 
15121 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
15122 %{
15123   match(Set cr (CmpUL op1 op2));
15124 
15125   effect(DEF cr, USE op1);
15126 
15127   ins_cost(INSN_COST * 2);
15128   format %{ "cmp  $op1, $op2" %}
15129 
15130   ins_encode(aarch64_enc_cmp_imm(op1, op2));
15131 
15132   ins_pipe(icmp_reg_imm);
15133 %}
15134 
15135 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
15136 %{
15137   match(Set cr (CmpP op1 op2));
15138 
15139   effect(DEF cr, USE op1, USE op2);
15140 
15141   ins_cost(INSN_COST);
15142   format %{ "cmp  $op1, $op2\t // ptr" %}
15143 
15144   ins_encode(aarch64_enc_cmpp(op1, op2));
15145 
15146   ins_pipe(icmp_reg_reg);
15147 %}
15148 
15149 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
15150 %{
15151   match(Set cr (CmpN op1 op2));
15152 
15153   effect(DEF cr, USE op1, USE op2);
15154 
15155   ins_cost(INSN_COST);
15156   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
15157 
15158   ins_encode(aarch64_enc_cmpn(op1, op2));
15159 
15160   ins_pipe(icmp_reg_reg);
15161 %}
15162 
15163 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
15164 %{
15165   match(Set cr (CmpP op1 zero));
15166 
15167   effect(DEF cr, USE op1, USE zero);
15168 
15169   ins_cost(INSN_COST);
15170   format %{ "cmp  $op1, 0\t // ptr" %}
15171 
15172   ins_encode(aarch64_enc_testp(op1));
15173 
15174   ins_pipe(icmp_reg_imm);
15175 %}
15176 
15177 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
15178 %{
15179   match(Set cr (CmpN op1 zero));
15180 
15181   effect(DEF cr, USE op1, USE zero);
15182 
15183   ins_cost(INSN_COST);
15184   format %{ "cmp  $op1, 0\t // compressed ptr" %}
15185 
15186   ins_encode(aarch64_enc_testn(op1));
15187 
15188   ins_pipe(icmp_reg_imm);
15189 %}
15190 
15191 // FP comparisons
15192 //
15193 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
15194 // using normal cmpOp. See declaration of rFlagsReg for details.
15195 
15196 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
15197 %{
15198   match(Set cr (CmpF src1 src2));
15199 
15200   ins_cost(3 * INSN_COST);
15201   format %{ "fcmps $src1, $src2" %}
15202 
15203   ins_encode %{
15204     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15205   %}
15206 
15207   ins_pipe(pipe_class_compare);
15208 %}
15209 
15210 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
15211 %{
15212   match(Set cr (CmpF src1 src2));
15213 
15214   ins_cost(3 * INSN_COST);
15215   format %{ "fcmps $src1, 0.0" %}
15216 
15217   ins_encode %{
15218     __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
15219   %}
15220 
15221   ins_pipe(pipe_class_compare);
15222 %}
15223 // FROM HERE
15224 
15225 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
15226 %{
15227   match(Set cr (CmpD src1 src2));
15228 
15229   ins_cost(3 * INSN_COST);
15230   format %{ "fcmpd $src1, $src2" %}
15231 
15232   ins_encode %{
15233     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15234   %}
15235 
15236   ins_pipe(pipe_class_compare);
15237 %}
15238 
15239 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
15240 %{
15241   match(Set cr (CmpD src1 src2));
15242 
15243   ins_cost(3 * INSN_COST);
15244   format %{ "fcmpd $src1, 0.0" %}
15245 
15246   ins_encode %{
15247     __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
15248   %}
15249 
15250   ins_pipe(pipe_class_compare);
15251 %}
15252 
15253 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
15254 %{
15255   match(Set dst (CmpF3 src1 src2));
15256   effect(KILL cr);
15257 
15258   ins_cost(5 * INSN_COST);
15259   format %{ "fcmps $src1, $src2\n\t"
15260             "csinvw($dst, zr, zr, eq\n\t"
15261             "csnegw($dst, $dst, $dst, lt)"
15262   %}
15263 
15264   ins_encode %{
15265     Label done;
15266     FloatRegister s1 = as_FloatRegister($src1$$reg);
15267     FloatRegister s2 = as_FloatRegister($src2$$reg);
15268     Register d = as_Register($dst$$reg);
15269     __ fcmps(s1, s2);
15270     // installs 0 if EQ else -1
15271     __ csinvw(d, zr, zr, Assembler::EQ);
15272     // keeps -1 if less or unordered else installs 1
15273     __ csnegw(d, d, d, Assembler::LT);
15274     __ bind(done);
15275   %}
15276 
15277   ins_pipe(pipe_class_default);
15278 
15279 %}
15280 
15281 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
15282 %{
15283   match(Set dst (CmpD3 src1 src2));
15284   effect(KILL cr);
15285 
15286   ins_cost(5 * INSN_COST);
15287   format %{ "fcmpd $src1, $src2\n\t"
15288             "csinvw($dst, zr, zr, eq\n\t"
15289             "csnegw($dst, $dst, $dst, lt)"
15290   %}
15291 
15292   ins_encode %{
15293     Label done;
15294     FloatRegister s1 = as_FloatRegister($src1$$reg);
15295     FloatRegister s2 = as_FloatRegister($src2$$reg);
15296     Register d = as_Register($dst$$reg);
15297     __ fcmpd(s1, s2);
15298     // installs 0 if EQ else -1
15299     __ csinvw(d, zr, zr, Assembler::EQ);
15300     // keeps -1 if less or unordered else installs 1
15301     __ csnegw(d, d, d, Assembler::LT);
15302     __ bind(done);
15303   %}
15304   ins_pipe(pipe_class_default);
15305 
15306 %}
15307 
15308 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
15309 %{
15310   match(Set dst (CmpF3 src1 zero));
15311   effect(KILL cr);
15312 
15313   ins_cost(5 * INSN_COST);
15314   format %{ "fcmps $src1, 0.0\n\t"
15315             "csinvw($dst, zr, zr, eq\n\t"
15316             "csnegw($dst, $dst, $dst, lt)"
15317   %}
15318 
15319   ins_encode %{
15320     Label done;
15321     FloatRegister s1 = as_FloatRegister($src1$$reg);
15322     Register d = as_Register($dst$$reg);
15323     __ fcmps(s1, 0.0D);
15324     // installs 0 if EQ else -1
15325     __ csinvw(d, zr, zr, Assembler::EQ);
15326     // keeps -1 if less or unordered else installs 1
15327     __ csnegw(d, d, d, Assembler::LT);
15328     __ bind(done);
15329   %}
15330 
15331   ins_pipe(pipe_class_default);
15332 
15333 %}
15334 
15335 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
15336 %{
15337   match(Set dst (CmpD3 src1 zero));
15338   effect(KILL cr);
15339 
15340   ins_cost(5 * INSN_COST);
15341   format %{ "fcmpd $src1, 0.0\n\t"
15342             "csinvw($dst, zr, zr, eq\n\t"
15343             "csnegw($dst, $dst, $dst, lt)"
15344   %}
15345 
15346   ins_encode %{
15347     Label done;
15348     FloatRegister s1 = as_FloatRegister($src1$$reg);
15349     Register d = as_Register($dst$$reg);
15350     __ fcmpd(s1, 0.0D);
15351     // installs 0 if EQ else -1
15352     __ csinvw(d, zr, zr, Assembler::EQ);
15353     // keeps -1 if less or unordered else installs 1
15354     __ csnegw(d, d, d, Assembler::LT);
15355     __ bind(done);
15356   %}
15357   ins_pipe(pipe_class_default);
15358 
15359 %}
15360 
15361 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
15362 %{
15363   match(Set dst (CmpLTMask p q));
15364   effect(KILL cr);
15365 
15366   ins_cost(3 * INSN_COST);
15367 
15368   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
15369             "csetw $dst, lt\n\t"
15370             "subw $dst, zr, $dst"
15371   %}
15372 
15373   ins_encode %{
15374     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
15375     __ csetw(as_Register($dst$$reg), Assembler::LT);
15376     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
15377   %}
15378 
15379   ins_pipe(ialu_reg_reg);
15380 %}
15381 
15382 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
15383 %{
15384   match(Set dst (CmpLTMask src zero));
15385   effect(KILL cr);
15386 
15387   ins_cost(INSN_COST);
15388 
15389   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
15390 
15391   ins_encode %{
15392     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
15393   %}
15394 
15395   ins_pipe(ialu_reg_shift);
15396 %}
15397 
15398 // ============================================================================
15399 // Max and Min
15400 
15401 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15402 %{
15403   match(Set dst (MinI src1 src2));
15404 
15405   effect(DEF dst, USE src1, USE src2, KILL cr);
15406   size(8);
15407 
15408   ins_cost(INSN_COST * 3);
15409   format %{
15410     "cmpw $src1 $src2\t signed int\n\t"
15411     "cselw $dst, $src1, $src2 lt\t"
15412   %}
15413 
15414   ins_encode %{
15415     __ cmpw(as_Register($src1$$reg),
15416             as_Register($src2$$reg));
15417     __ cselw(as_Register($dst$$reg),
15418              as_Register($src1$$reg),
15419              as_Register($src2$$reg),
15420              Assembler::LT);
15421   %}
15422 
15423   ins_pipe(ialu_reg_reg);
15424 %}
15425 // FROM HERE
15426 
15427 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
15428 %{
15429   match(Set dst (MaxI src1 src2));
15430 
15431   effect(DEF dst, USE src1, USE src2, KILL cr);
15432   size(8);
15433 
15434   ins_cost(INSN_COST * 3);
15435   format %{
15436     "cmpw $src1 $src2\t signed int\n\t"
15437     "cselw $dst, $src1, $src2 gt\t"
15438   %}
15439 
15440   ins_encode %{
15441     __ cmpw(as_Register($src1$$reg),
15442             as_Register($src2$$reg));
15443     __ cselw(as_Register($dst$$reg),
15444              as_Register($src1$$reg),
15445              as_Register($src2$$reg),
15446              Assembler::GT);
15447   %}
15448 
15449   ins_pipe(ialu_reg_reg);
15450 %}
15451 
15452 // ============================================================================
15453 // Branch Instructions
15454 
15455 // Direct Branch.
15456 instruct branch(label lbl)
15457 %{
15458   match(Goto);
15459 
15460   effect(USE lbl);
15461 
15462   ins_cost(BRANCH_COST);
15463   format %{ "b  $lbl" %}
15464 
15465   ins_encode(aarch64_enc_b(lbl));
15466 
15467   ins_pipe(pipe_branch);
15468 %}
15469 
15470 // Conditional Near Branch
15471 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
15472 %{
15473   // Same match rule as `branchConFar'.
15474   match(If cmp cr);
15475 
15476   effect(USE lbl);
15477 
15478   ins_cost(BRANCH_COST);
15479   // If set to 1 this indicates that the current instruction is a
15480   // short variant of a long branch. This avoids using this
15481   // instruction in first-pass matching. It will then only be used in
15482   // the `Shorten_branches' pass.
15483   // ins_short_branch(1);
15484   format %{ "b$cmp  $lbl" %}
15485 
15486   ins_encode(aarch64_enc_br_con(cmp, lbl));
15487 
15488   ins_pipe(pipe_branch_cond);
15489 %}
15490 
15491 // Conditional Near Branch Unsigned
15492 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15493 %{
15494   // Same match rule as `branchConFar'.
15495   match(If cmp cr);
15496 
15497   effect(USE lbl);
15498 
15499   ins_cost(BRANCH_COST);
15500   // If set to 1 this indicates that the current instruction is a
15501   // short variant of a long branch. This avoids using this
15502   // instruction in first-pass matching. It will then only be used in
15503   // the `Shorten_branches' pass.
15504   // ins_short_branch(1);
15505   format %{ "b$cmp  $lbl\t# unsigned" %}
15506 
15507   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15508 
15509   ins_pipe(pipe_branch_cond);
15510 %}
15511 
15512 // Make use of CBZ and CBNZ.  These instructions, as well as being
15513 // shorter than (cmp; branch), have the additional benefit of not
15514 // killing the flags.
15515 
15516 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
15517   match(If cmp (CmpI op1 op2));
15518   effect(USE labl);
15519 
15520   ins_cost(BRANCH_COST);
15521   format %{ "cbw$cmp   $op1, $labl" %}
15522   ins_encode %{
15523     Label* L = $labl$$label;
15524     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15525     if (cond == Assembler::EQ)
15526       __ cbzw($op1$$Register, *L);
15527     else
15528       __ cbnzw($op1$$Register, *L);
15529   %}
15530   ins_pipe(pipe_cmp_branch);
15531 %}
15532 
15533 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
15534   match(If cmp (CmpL op1 op2));
15535   effect(USE labl);
15536 
15537   ins_cost(BRANCH_COST);
15538   format %{ "cb$cmp   $op1, $labl" %}
15539   ins_encode %{
15540     Label* L = $labl$$label;
15541     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15542     if (cond == Assembler::EQ)
15543       __ cbz($op1$$Register, *L);
15544     else
15545       __ cbnz($op1$$Register, *L);
15546   %}
15547   ins_pipe(pipe_cmp_branch);
15548 %}
15549 
15550 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
15551   match(If cmp (CmpP op1 op2));
15552   effect(USE labl);
15553 
15554   ins_cost(BRANCH_COST);
15555   format %{ "cb$cmp   $op1, $labl" %}
15556   ins_encode %{
15557     Label* L = $labl$$label;
15558     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15559     if (cond == Assembler::EQ)
15560       __ cbz($op1$$Register, *L);
15561     else
15562       __ cbnz($op1$$Register, *L);
15563   %}
15564   ins_pipe(pipe_cmp_branch);
15565 %}
15566 
15567 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
15568   match(If cmp (CmpN op1 op2));
15569   effect(USE labl);
15570 
15571   ins_cost(BRANCH_COST);
15572   format %{ "cbw$cmp   $op1, $labl" %}
15573   ins_encode %{
15574     Label* L = $labl$$label;
15575     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15576     if (cond == Assembler::EQ)
15577       __ cbzw($op1$$Register, *L);
15578     else
15579       __ cbnzw($op1$$Register, *L);
15580   %}
15581   ins_pipe(pipe_cmp_branch);
15582 %}
15583 
15584 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
15585   match(If cmp (CmpP (DecodeN oop) zero));
15586   effect(USE labl);
15587 
15588   ins_cost(BRANCH_COST);
15589   format %{ "cb$cmp   $oop, $labl" %}
15590   ins_encode %{
15591     Label* L = $labl$$label;
15592     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15593     if (cond == Assembler::EQ)
15594       __ cbzw($oop$$Register, *L);
15595     else
15596       __ cbnzw($oop$$Register, *L);
15597   %}
15598   ins_pipe(pipe_cmp_branch);
15599 %}
15600 
15601 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
15602   match(If cmp (CmpU op1 op2));
15603   effect(USE labl);
15604 
15605   ins_cost(BRANCH_COST);
15606   format %{ "cbw$cmp   $op1, $labl" %}
15607   ins_encode %{
15608     Label* L = $labl$$label;
15609     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15610     if (cond == Assembler::EQ || cond == Assembler::LS)
15611       __ cbzw($op1$$Register, *L);
15612     else
15613       __ cbnzw($op1$$Register, *L);
15614   %}
15615   ins_pipe(pipe_cmp_branch);
15616 %}
15617 
15618 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
15619   match(If cmp (CmpUL op1 op2));
15620   effect(USE labl);
15621 
15622   ins_cost(BRANCH_COST);
15623   format %{ "cb$cmp   $op1, $labl" %}
15624   ins_encode %{
15625     Label* L = $labl$$label;
15626     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15627     if (cond == Assembler::EQ || cond == Assembler::LS)
15628       __ cbz($op1$$Register, *L);
15629     else
15630       __ cbnz($op1$$Register, *L);
15631   %}
15632   ins_pipe(pipe_cmp_branch);
15633 %}
15634 
15635 // Test bit and Branch
15636 
15637 // Patterns for short (< 32KiB) variants
15638 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15639   match(If cmp (CmpL op1 op2));
15640   effect(USE labl);
15641 
15642   ins_cost(BRANCH_COST);
15643   format %{ "cb$cmp   $op1, $labl # long" %}
15644   ins_encode %{
15645     Label* L = $labl$$label;
15646     Assembler::Condition cond =
15647       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15648     __ tbr(cond, $op1$$Register, 63, *L);
15649   %}
15650   ins_pipe(pipe_cmp_branch);
15651   ins_short_branch(1);
15652 %}
15653 
15654 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15655   match(If cmp (CmpI op1 op2));
15656   effect(USE labl);
15657 
15658   ins_cost(BRANCH_COST);
15659   format %{ "cb$cmp   $op1, $labl # int" %}
15660   ins_encode %{
15661     Label* L = $labl$$label;
15662     Assembler::Condition cond =
15663       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15664     __ tbr(cond, $op1$$Register, 31, *L);
15665   %}
15666   ins_pipe(pipe_cmp_branch);
15667   ins_short_branch(1);
15668 %}
15669 
15670 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15671   match(If cmp (CmpL (AndL op1 op2) op3));
15672   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15673   effect(USE labl);
15674 
15675   ins_cost(BRANCH_COST);
15676   format %{ "tb$cmp   $op1, $op2, $labl" %}
15677   ins_encode %{
15678     Label* L = $labl$$label;
15679     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15680     int bit = exact_log2($op2$$constant);
15681     __ tbr(cond, $op1$$Register, bit, *L);
15682   %}
15683   ins_pipe(pipe_cmp_branch);
15684   ins_short_branch(1);
15685 %}
15686 
15687 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15688   match(If cmp (CmpI (AndI op1 op2) op3));
15689   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15690   effect(USE labl);
15691 
15692   ins_cost(BRANCH_COST);
15693   format %{ "tb$cmp   $op1, $op2, $labl" %}
15694   ins_encode %{
15695     Label* L = $labl$$label;
15696     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15697     int bit = exact_log2($op2$$constant);
15698     __ tbr(cond, $op1$$Register, bit, *L);
15699   %}
15700   ins_pipe(pipe_cmp_branch);
15701   ins_short_branch(1);
15702 %}
15703 
15704 // And far variants
15705 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
15706   match(If cmp (CmpL op1 op2));
15707   effect(USE labl);
15708 
15709   ins_cost(BRANCH_COST);
15710   format %{ "cb$cmp   $op1, $labl # long" %}
15711   ins_encode %{
15712     Label* L = $labl$$label;
15713     Assembler::Condition cond =
15714       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15715     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
15716   %}
15717   ins_pipe(pipe_cmp_branch);
15718 %}
15719 
15720 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
15721   match(If cmp (CmpI op1 op2));
15722   effect(USE labl);
15723 
15724   ins_cost(BRANCH_COST);
15725   format %{ "cb$cmp   $op1, $labl # int" %}
15726   ins_encode %{
15727     Label* L = $labl$$label;
15728     Assembler::Condition cond =
15729       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
15730     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
15731   %}
15732   ins_pipe(pipe_cmp_branch);
15733 %}
15734 
15735 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
15736   match(If cmp (CmpL (AndL op1 op2) op3));
15737   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
15738   effect(USE labl);
15739 
15740   ins_cost(BRANCH_COST);
15741   format %{ "tb$cmp   $op1, $op2, $labl" %}
15742   ins_encode %{
15743     Label* L = $labl$$label;
15744     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15745     int bit = exact_log2($op2$$constant);
15746     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15747   %}
15748   ins_pipe(pipe_cmp_branch);
15749 %}
15750 
15751 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
15752   match(If cmp (CmpI (AndI op1 op2) op3));
15753   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
15754   effect(USE labl);
15755 
15756   ins_cost(BRANCH_COST);
15757   format %{ "tb$cmp   $op1, $op2, $labl" %}
15758   ins_encode %{
15759     Label* L = $labl$$label;
15760     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
15761     int bit = exact_log2($op2$$constant);
15762     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
15763   %}
15764   ins_pipe(pipe_cmp_branch);
15765 %}
15766 
15767 // Test bits
15768 
15769 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
15770   match(Set cr (CmpL (AndL op1 op2) op3));
15771   predicate(Assembler::operand_valid_for_logical_immediate
15772             (/*is_32*/false, n->in(1)->in(2)->get_long()));
15773 
15774   ins_cost(INSN_COST);
15775   format %{ "tst $op1, $op2 # long" %}
15776   ins_encode %{
15777     __ tst($op1$$Register, $op2$$constant);
15778   %}
15779   ins_pipe(ialu_reg_reg);
15780 %}
15781 
15782 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
15783   match(Set cr (CmpI (AndI op1 op2) op3));
15784   predicate(Assembler::operand_valid_for_logical_immediate
15785             (/*is_32*/true, n->in(1)->in(2)->get_int()));
15786 
15787   ins_cost(INSN_COST);
15788   format %{ "tst $op1, $op2 # int" %}
15789   ins_encode %{
15790     __ tstw($op1$$Register, $op2$$constant);
15791   %}
15792   ins_pipe(ialu_reg_reg);
15793 %}
15794 
15795 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
15796   match(Set cr (CmpL (AndL op1 op2) op3));
15797 
15798   ins_cost(INSN_COST);
15799   format %{ "tst $op1, $op2 # long" %}
15800   ins_encode %{
15801     __ tst($op1$$Register, $op2$$Register);
15802   %}
15803   ins_pipe(ialu_reg_reg);
15804 %}
15805 
15806 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
15807   match(Set cr (CmpI (AndI op1 op2) op3));
15808 
15809   ins_cost(INSN_COST);
15810   format %{ "tstw $op1, $op2 # int" %}
15811   ins_encode %{
15812     __ tstw($op1$$Register, $op2$$Register);
15813   %}
15814   ins_pipe(ialu_reg_reg);
15815 %}
15816 
15817 
15818 // Conditional Far Branch
15819 // Conditional Far Branch Unsigned
15820 // TODO: fixme
15821 
15822 // counted loop end branch near
15823 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
15824 %{
15825   match(CountedLoopEnd cmp cr);
15826 
15827   effect(USE lbl);
15828 
15829   ins_cost(BRANCH_COST);
15830   // short variant.
15831   // ins_short_branch(1);
15832   format %{ "b$cmp $lbl \t// counted loop end" %}
15833 
15834   ins_encode(aarch64_enc_br_con(cmp, lbl));
15835 
15836   ins_pipe(pipe_branch);
15837 %}
15838 
15839 // counted loop end branch near Unsigned
15840 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15841 %{
15842   match(CountedLoopEnd cmp cr);
15843 
15844   effect(USE lbl);
15845 
15846   ins_cost(BRANCH_COST);
15847   // short variant.
15848   // ins_short_branch(1);
15849   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15850 
15851   ins_encode(aarch64_enc_br_conU(cmp, lbl));
15852 
15853   ins_pipe(pipe_branch);
15854 %}
15855 
15856 // counted loop end branch far
15857 // counted loop end branch far unsigned
15858 // TODO: fixme
15859 
15860 // ============================================================================
15861 // inlined locking and unlocking
15862 
15863 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15864 %{
15865   match(Set cr (FastLock object box));
15866   effect(TEMP tmp, TEMP tmp2);
15867 
15868   // TODO
15869   // identify correct cost
15870   ins_cost(5 * INSN_COST);
15871   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15872 
15873   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15874 
15875   ins_pipe(pipe_serial);
15876 %}
15877 
15878 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15879 %{
15880   match(Set cr (FastUnlock object box));
15881   effect(TEMP tmp, TEMP tmp2);
15882 
15883   ins_cost(5 * INSN_COST);
15884   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15885 
15886   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15887 
15888   ins_pipe(pipe_serial);
15889 %}
15890 
15891 
15892 // ============================================================================
15893 // Safepoint Instructions
15894 
15895 // TODO
15896 // provide a near and far version of this code
15897 
15898 instruct safePoint(iRegP poll)
15899 %{
15900   match(SafePoint poll);
15901 
15902   format %{
15903     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15904   %}
15905   ins_encode %{
15906     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15907   %}
15908   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15909 %}
15910 
15911 
15912 // ============================================================================
15913 // Procedure Call/Return Instructions
15914 
15915 // Call Java Static Instruction
15916 
15917 instruct CallStaticJavaDirect(method meth)
15918 %{
15919   match(CallStaticJava);
15920 
15921   effect(USE meth);
15922 
15923   ins_cost(CALL_COST);
15924 
15925   format %{ "call,static $meth \t// ==> " %}
15926 
15927   ins_encode( aarch64_enc_java_static_call(meth),
15928               aarch64_enc_call_epilog );
15929 
15930   ins_pipe(pipe_class_call);
15931 %}
15932 
15933 // TO HERE
15934 
15935 // Call Java Dynamic Instruction
15936 instruct CallDynamicJavaDirect(method meth)
15937 %{
15938   match(CallDynamicJava);
15939 
15940   effect(USE meth);
15941 
15942   ins_cost(CALL_COST);
15943 
15944   format %{ "CALL,dynamic $meth \t// ==> " %}
15945 
15946   ins_encode( aarch64_enc_java_dynamic_call(meth),
15947                aarch64_enc_call_epilog );
15948 
15949   ins_pipe(pipe_class_call);
15950 %}
15951 
15952 // Call Runtime Instruction
15953 
15954 instruct CallRuntimeDirect(method meth)
15955 %{
15956   match(CallRuntime);
15957 
15958   effect(USE meth);
15959 
15960   ins_cost(CALL_COST);
15961 
15962   format %{ "CALL, runtime $meth" %}
15963 
15964   ins_encode( aarch64_enc_java_to_runtime(meth) );
15965 
15966   ins_pipe(pipe_class_call);
15967 %}
15968 
15969 // Call Runtime Instruction
15970 
15971 instruct CallLeafDirect(method meth)
15972 %{
15973   match(CallLeaf);
15974 
15975   effect(USE meth);
15976 
15977   ins_cost(CALL_COST);
15978 
15979   format %{ "CALL, runtime leaf $meth" %}
15980 
15981   ins_encode( aarch64_enc_java_to_runtime(meth) );
15982 
15983   ins_pipe(pipe_class_call);
15984 %}
15985 
15986 // Call Runtime Instruction
15987 
15988 instruct CallLeafNoFPDirect(method meth)
15989 %{
15990   match(CallLeafNoFP);
15991 
15992   effect(USE meth);
15993 
15994   ins_cost(CALL_COST);
15995 
15996   format %{ "CALL, runtime leaf nofp $meth" %}
15997 
15998   ins_encode( aarch64_enc_java_to_runtime(meth) );
15999 
16000   ins_pipe(pipe_class_call);
16001 %}
16002 
16003 // Tail Call; Jump from runtime stub to Java code.
16004 // Also known as an 'interprocedural jump'.
16005 // Target of jump will eventually return to caller.
16006 // TailJump below removes the return address.
16007 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
16008 %{
16009   match(TailCall jump_target method_oop);
16010 
16011   ins_cost(CALL_COST);
16012 
16013   format %{ "br $jump_target\t# $method_oop holds method oop" %}
16014 
16015   ins_encode(aarch64_enc_tail_call(jump_target));
16016 
16017   ins_pipe(pipe_class_call);
16018 %}
16019 
16020 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
16021 %{
16022   match(TailJump jump_target ex_oop);
16023 
16024   ins_cost(CALL_COST);
16025 
16026   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
16027 
16028   ins_encode(aarch64_enc_tail_jmp(jump_target));
16029 
16030   ins_pipe(pipe_class_call);
16031 %}
16032 
16033 // Create exception oop: created by stack-crawling runtime code.
16034 // Created exception is now available to this handler, and is setup
16035 // just prior to jumping to this handler. No code emitted.
16036 // TODO check
16037 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
16038 instruct CreateException(iRegP_R0 ex_oop)
16039 %{
16040   match(Set ex_oop (CreateEx));
16041 
16042   format %{ " -- \t// exception oop; no code emitted" %}
16043 
16044   size(0);
16045 
16046   ins_encode( /*empty*/ );
16047 
16048   ins_pipe(pipe_class_empty);
16049 %}
16050 
16051 // Rethrow exception: The exception oop will come in the first
16052 // argument position. Then JUMP (not call) to the rethrow stub code.
16053 instruct RethrowException() %{
16054   match(Rethrow);
16055   ins_cost(CALL_COST);
16056 
16057   format %{ "b rethrow_stub" %}
16058 
16059   ins_encode( aarch64_enc_rethrow() );
16060 
16061   ins_pipe(pipe_class_call);
16062 %}
16063 
16064 
16065 // Return Instruction
16066 // epilog node loads ret address into lr as part of frame pop
16067 instruct Ret()
16068 %{
16069   match(Return);
16070 
16071   format %{ "ret\t// return register" %}
16072 
16073   ins_encode( aarch64_enc_ret() );
16074 
16075   ins_pipe(pipe_branch);
16076 %}
16077 
16078 // Die now.
16079 instruct ShouldNotReachHere() %{
16080   match(Halt);
16081 
16082   ins_cost(CALL_COST);
16083   format %{ "ShouldNotReachHere" %}
16084 
16085   ins_encode %{
16086     // +1 so NativeInstruction::is_sigill_zombie_not_entrant() doesn't
16087     // return true
16088     __ dpcs1(0xdead + 1);
16089   %}
16090 
16091   ins_pipe(pipe_class_default);
16092 %}
16093 
16094 // ============================================================================
16095 // Partial Subtype Check
16096 //
16097 // superklass array for an instance of the superklass.  Set a hidden
16098 // internal cache on a hit (cache is checked with exposed code in
16099 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
16100 // encoding ALSO sets flags.
16101 
16102 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
16103 %{
16104   match(Set result (PartialSubtypeCheck sub super));
16105   effect(KILL cr, KILL temp);
16106 
16107   ins_cost(1100);  // slightly larger than the next version
16108   format %{ "partialSubtypeCheck $result, $sub, $super" %}
16109 
16110   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
16111 
16112   opcode(0x1); // Force zero of result reg on hit
16113 
16114   ins_pipe(pipe_class_memory);
16115 %}
16116 
16117 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
16118 %{
16119   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
16120   effect(KILL temp, KILL result);
16121 
16122   ins_cost(1100);  // slightly larger than the next version
16123   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
16124 
16125   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
16126 
16127   opcode(0x0); // Don't zero result reg on hit
16128 
16129   ins_pipe(pipe_class_memory);
16130 %}
16131 
16132 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
16133                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
16134 %{
16135   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
16136   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
16137   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
16138 
16139   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
16140   ins_encode %{
16141     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16142     __ string_compare($str1$$Register, $str2$$Register,
16143                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16144                       $tmp1$$Register,
16145                       fnoreg, fnoreg, StrIntrinsicNode::UU);
16146   %}
16147   ins_pipe(pipe_class_memory);
16148 %}
16149 
16150 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
16151                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
16152 %{
16153   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
16154   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
16155   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
16156 
16157   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
16158   ins_encode %{
16159     __ string_compare($str1$$Register, $str2$$Register,
16160                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16161                       $tmp1$$Register,
16162                       fnoreg, fnoreg, StrIntrinsicNode::LL);
16163   %}
16164   ins_pipe(pipe_class_memory);
16165 %}
16166 
16167 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
16168                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
16169 %{
16170   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
16171   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
16172   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
16173 
16174   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
16175   ins_encode %{
16176     __ string_compare($str1$$Register, $str2$$Register,
16177                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16178                       $tmp1$$Register,
16179                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
16180   %}
16181   ins_pipe(pipe_class_memory);
16182 %}
16183 
16184 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
16185                         iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
16186 %{
16187   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
16188   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
16189   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
16190 
16191   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
16192   ins_encode %{
16193     __ string_compare($str1$$Register, $str2$$Register,
16194                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
16195                       $tmp1$$Register,
16196                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
16197   %}
16198   ins_pipe(pipe_class_memory);
16199 %}
16200 
16201 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16202        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16203 %{
16204   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
16205   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16206   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16207          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16208   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
16209 
16210   ins_encode %{
16211     __ string_indexof($str1$$Register, $str2$$Register,
16212                       $cnt1$$Register, $cnt2$$Register,
16213                       $tmp1$$Register, $tmp2$$Register,
16214                       $tmp3$$Register, $tmp4$$Register,
16215                       -1, $result$$Register, StrIntrinsicNode::UU);
16216   %}
16217   ins_pipe(pipe_class_memory);
16218 %}
16219 
16220 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16221        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16222 %{
16223   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16224   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16225   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16226          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16227   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
16228 
16229   ins_encode %{
16230     __ string_indexof($str1$$Register, $str2$$Register,
16231                       $cnt1$$Register, $cnt2$$Register,
16232                       $tmp1$$Register, $tmp2$$Register,
16233                       $tmp3$$Register, $tmp4$$Register,
16234                       -1, $result$$Register, StrIntrinsicNode::LL);
16235   %}
16236   ins_pipe(pipe_class_memory);
16237 %}
16238 
16239 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16240        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16241 %{
16242   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16243   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16244   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16245          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16246   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
16247 
16248   ins_encode %{
16249     __ string_indexof($str1$$Register, $str2$$Register,
16250                       $cnt1$$Register, $cnt2$$Register,
16251                       $tmp1$$Register, $tmp2$$Register,
16252                       $tmp3$$Register, $tmp4$$Register,
16253                       -1, $result$$Register, StrIntrinsicNode::UL);
16254   %}
16255   ins_pipe(pipe_class_memory);
16256 %}
16257 
16258 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
16259        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16260 %{
16261   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
16262   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
16263   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
16264          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16265   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
16266 
16267   ins_encode %{
16268     __ string_indexof($str1$$Register, $str2$$Register,
16269                       $cnt1$$Register, $cnt2$$Register,
16270                       $tmp1$$Register, $tmp2$$Register,
16271                       $tmp3$$Register, $tmp4$$Register,
16272                       -1, $result$$Register, StrIntrinsicNode::LU);
16273   %}
16274   ins_pipe(pipe_class_memory);
16275 %}
16276 
16277 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16278                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16279                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16280 %{
16281   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
16282   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16283   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16284          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16285   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
16286 
16287   ins_encode %{
16288     int icnt2 = (int)$int_cnt2$$constant;
16289     __ string_indexof($str1$$Register, $str2$$Register,
16290                       $cnt1$$Register, zr,
16291                       $tmp1$$Register, $tmp2$$Register,
16292                       $tmp3$$Register, $tmp4$$Register,
16293                       icnt2, $result$$Register, StrIntrinsicNode::UU);
16294   %}
16295   ins_pipe(pipe_class_memory);
16296 %}
16297 
16298 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16299                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16300                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16301 %{
16302   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
16303   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16304   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16305          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16306   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
16307 
16308   ins_encode %{
16309     int icnt2 = (int)$int_cnt2$$constant;
16310     __ string_indexof($str1$$Register, $str2$$Register,
16311                       $cnt1$$Register, zr,
16312                       $tmp1$$Register, $tmp2$$Register,
16313                       $tmp3$$Register, $tmp4$$Register,
16314                       icnt2, $result$$Register, StrIntrinsicNode::LL);
16315   %}
16316   ins_pipe(pipe_class_memory);
16317 %}
16318 
16319 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16320                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16321                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16322 %{
16323   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
16324   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16325   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16326          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16327   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
16328 
16329   ins_encode %{
16330     int icnt2 = (int)$int_cnt2$$constant;
16331     __ string_indexof($str1$$Register, $str2$$Register,
16332                       $cnt1$$Register, zr,
16333                       $tmp1$$Register, $tmp2$$Register,
16334                       $tmp3$$Register, $tmp4$$Register,
16335                       icnt2, $result$$Register, StrIntrinsicNode::UL);
16336   %}
16337   ins_pipe(pipe_class_memory);
16338 %}
16339 
16340 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
16341                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16342                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
16343 %{
16344   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
16345   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
16346   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
16347          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
16348   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
16349 
16350   ins_encode %{
16351     int icnt2 = (int)$int_cnt2$$constant;
16352     __ string_indexof($str1$$Register, $str2$$Register,
16353                       $cnt1$$Register, zr,
16354                       $tmp1$$Register, $tmp2$$Register,
16355                       $tmp3$$Register, $tmp4$$Register,
16356                       icnt2, $result$$Register, StrIntrinsicNode::LU);
16357   %}
16358   ins_pipe(pipe_class_memory);
16359 %}
16360 
16361 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
16362                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
16363                               iRegINoSp tmp3, rFlagsReg cr)
16364 %{
16365   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
16366   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
16367          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
16368 
16369   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
16370 
16371   ins_encode %{
16372     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
16373                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
16374                            $tmp3$$Register);
16375   %}
16376   ins_pipe(pipe_class_memory);
16377 %}
16378 
16379 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16380                         iRegI_R0 result, rFlagsReg cr)
16381 %{
16382   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
16383   match(Set result (StrEquals (Binary str1 str2) cnt));
16384   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16385 
16386   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16387   ins_encode %{
16388     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16389     __ arrays_equals($str1$$Register, $str2$$Register,
16390                      $result$$Register, $cnt$$Register,
16391                      1, /*is_string*/true);
16392   %}
16393   ins_pipe(pipe_class_memory);
16394 %}
16395 
16396 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
16397                         iRegI_R0 result, rFlagsReg cr)
16398 %{
16399   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
16400   match(Set result (StrEquals (Binary str1 str2) cnt));
16401   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
16402 
16403   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
16404   ins_encode %{
16405     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
16406     __ asrw($cnt$$Register, $cnt$$Register, 1);
16407     __ arrays_equals($str1$$Register, $str2$$Register,
16408                      $result$$Register, $cnt$$Register,
16409                      2, /*is_string*/true);
16410   %}
16411   ins_pipe(pipe_class_memory);
16412 %}
16413 
16414 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16415                       iRegP_R10 tmp, rFlagsReg cr)
16416 %{
16417   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
16418   match(Set result (AryEq ary1 ary2));
16419   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
16420 
16421   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16422   ins_encode %{
16423     __ arrays_equals($ary1$$Register, $ary2$$Register,
16424                      $result$$Register, $tmp$$Register,
16425                      1, /*is_string*/false);
16426     %}
16427   ins_pipe(pipe_class_memory);
16428 %}
16429 
16430 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
16431                       iRegP_R10 tmp, rFlagsReg cr)
16432 %{
16433   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
16434   match(Set result (AryEq ary1 ary2));
16435   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
16436 
16437   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
16438   ins_encode %{
16439     __ arrays_equals($ary1$$Register, $ary2$$Register,
16440                      $result$$Register, $tmp$$Register,
16441                      2, /*is_string*/false);
16442   %}
16443   ins_pipe(pipe_class_memory);
16444 %}
16445 
16446 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
16447 %{
16448   match(Set result (HasNegatives ary1 len));
16449   effect(USE_KILL ary1, USE_KILL len, KILL cr);
16450   format %{ "has negatives byte[] $ary1,$len -> $result" %}
16451   ins_encode %{
16452     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
16453   %}
16454   ins_pipe( pipe_slow );
16455 %}
16456 
16457 // fast char[] to byte[] compression
16458 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16459                          vRegD_V0 tmp1, vRegD_V1 tmp2,
16460                          vRegD_V2 tmp3, vRegD_V3 tmp4,
16461                          iRegI_R0 result, rFlagsReg cr)
16462 %{
16463   match(Set result (StrCompressedCopy src (Binary dst len)));
16464   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16465 
16466   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
16467   ins_encode %{
16468     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
16469                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
16470                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
16471                            $result$$Register);
16472   %}
16473   ins_pipe( pipe_slow );
16474 %}
16475 
16476 // fast byte[] to char[] inflation
16477 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
16478                         vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
16479 %{
16480   match(Set dummy (StrInflatedCopy src (Binary dst len)));
16481   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
16482 
16483   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
16484   ins_encode %{
16485     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
16486                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
16487   %}
16488   ins_pipe(pipe_class_memory);
16489 %}
16490 
16491 // encode char[] to byte[] in ISO_8859_1
16492 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
16493                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
16494                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
16495                           iRegI_R0 result, rFlagsReg cr)
16496 %{
16497   match(Set result (EncodeISOArray src (Binary dst len)));
16498   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
16499          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
16500 
16501   format %{ "Encode array $src,$dst,$len -> $result" %}
16502   ins_encode %{
16503     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
16504          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
16505          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
16506   %}
16507   ins_pipe( pipe_class_memory );
16508 %}
16509 
16510 // ============================================================================
16511 // This name is KNOWN by the ADLC and cannot be changed.
16512 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
16513 // for this guy.
16514 instruct tlsLoadP(thread_RegP dst)
16515 %{
16516   match(Set dst (ThreadLocal));
16517 
16518   ins_cost(0);
16519 
16520   format %{ " -- \t// $dst=Thread::current(), empty" %}
16521 
16522   size(0);
16523 
16524   ins_encode( /*empty*/ );
16525 
16526   ins_pipe(pipe_class_empty);
16527 %}
16528 
16529 // ====================VECTOR INSTRUCTIONS=====================================
16530 
16531 // Load vector (32 bits)
16532 instruct loadV4(vecD dst, vmem4 mem)
16533 %{
16534   predicate(n->as_LoadVector()->memory_size() == 4);
16535   match(Set dst (LoadVector mem));
16536   ins_cost(4 * INSN_COST);
16537   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
16538   ins_encode( aarch64_enc_ldrvS(dst, mem) );
16539   ins_pipe(vload_reg_mem64);
16540 %}
16541 
16542 // Load vector (64 bits)
16543 instruct loadV8(vecD dst, vmem8 mem)
16544 %{
16545   predicate(n->as_LoadVector()->memory_size() == 8);
16546   match(Set dst (LoadVector mem));
16547   ins_cost(4 * INSN_COST);
16548   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
16549   ins_encode( aarch64_enc_ldrvD(dst, mem) );
16550   ins_pipe(vload_reg_mem64);
16551 %}
16552 
16553 // Load Vector (128 bits)
16554 instruct loadV16(vecX dst, vmem16 mem)
16555 %{
16556   predicate(n->as_LoadVector()->memory_size() == 16);
16557   match(Set dst (LoadVector mem));
16558   ins_cost(4 * INSN_COST);
16559   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
16560   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
16561   ins_pipe(vload_reg_mem128);
16562 %}
16563 
16564 // Store Vector (32 bits)
16565 instruct storeV4(vecD src, vmem4 mem)
16566 %{
16567   predicate(n->as_StoreVector()->memory_size() == 4);
16568   match(Set mem (StoreVector mem src));
16569   ins_cost(4 * INSN_COST);
16570   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
16571   ins_encode( aarch64_enc_strvS(src, mem) );
16572   ins_pipe(vstore_reg_mem64);
16573 %}
16574 
16575 // Store Vector (64 bits)
16576 instruct storeV8(vecD src, vmem8 mem)
16577 %{
16578   predicate(n->as_StoreVector()->memory_size() == 8);
16579   match(Set mem (StoreVector mem src));
16580   ins_cost(4 * INSN_COST);
16581   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
16582   ins_encode( aarch64_enc_strvD(src, mem) );
16583   ins_pipe(vstore_reg_mem64);
16584 %}
16585 
16586 // Store Vector (128 bits)
16587 instruct storeV16(vecX src, vmem16 mem)
16588 %{
16589   predicate(n->as_StoreVector()->memory_size() == 16);
16590   match(Set mem (StoreVector mem src));
16591   ins_cost(4 * INSN_COST);
16592   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
16593   ins_encode( aarch64_enc_strvQ(src, mem) );
16594   ins_pipe(vstore_reg_mem128);
16595 %}
16596 
16597 instruct replicate8B(vecD dst, iRegIorL2I src)
16598 %{
16599   predicate(n->as_Vector()->length() == 4 ||
16600             n->as_Vector()->length() == 8);
16601   match(Set dst (ReplicateB src));
16602   ins_cost(INSN_COST);
16603   format %{ "dup  $dst, $src\t# vector (8B)" %}
16604   ins_encode %{
16605     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
16606   %}
16607   ins_pipe(vdup_reg_reg64);
16608 %}
16609 
16610 instruct replicate16B(vecX dst, iRegIorL2I src)
16611 %{
16612   predicate(n->as_Vector()->length() == 16);
16613   match(Set dst (ReplicateB src));
16614   ins_cost(INSN_COST);
16615   format %{ "dup  $dst, $src\t# vector (16B)" %}
16616   ins_encode %{
16617     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
16618   %}
16619   ins_pipe(vdup_reg_reg128);
16620 %}
16621 
16622 instruct replicate8B_imm(vecD dst, immI con)
16623 %{
16624   predicate(n->as_Vector()->length() == 4 ||
16625             n->as_Vector()->length() == 8);
16626   match(Set dst (ReplicateB con));
16627   ins_cost(INSN_COST);
16628   format %{ "movi  $dst, $con\t# vector(8B)" %}
16629   ins_encode %{
16630     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
16631   %}
16632   ins_pipe(vmovi_reg_imm64);
16633 %}
16634 
16635 instruct replicate16B_imm(vecX dst, immI con)
16636 %{
16637   predicate(n->as_Vector()->length() == 16);
16638   match(Set dst (ReplicateB con));
16639   ins_cost(INSN_COST);
16640   format %{ "movi  $dst, $con\t# vector(16B)" %}
16641   ins_encode %{
16642     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
16643   %}
16644   ins_pipe(vmovi_reg_imm128);
16645 %}
16646 
16647 instruct replicate4S(vecD dst, iRegIorL2I src)
16648 %{
16649   predicate(n->as_Vector()->length() == 2 ||
16650             n->as_Vector()->length() == 4);
16651   match(Set dst (ReplicateS src));
16652   ins_cost(INSN_COST);
16653   format %{ "dup  $dst, $src\t# vector (4S)" %}
16654   ins_encode %{
16655     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
16656   %}
16657   ins_pipe(vdup_reg_reg64);
16658 %}
16659 
16660 instruct replicate8S(vecX dst, iRegIorL2I src)
16661 %{
16662   predicate(n->as_Vector()->length() == 8);
16663   match(Set dst (ReplicateS src));
16664   ins_cost(INSN_COST);
16665   format %{ "dup  $dst, $src\t# vector (8S)" %}
16666   ins_encode %{
16667     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
16668   %}
16669   ins_pipe(vdup_reg_reg128);
16670 %}
16671 
16672 instruct replicate4S_imm(vecD dst, immI con)
16673 %{
16674   predicate(n->as_Vector()->length() == 2 ||
16675             n->as_Vector()->length() == 4);
16676   match(Set dst (ReplicateS con));
16677   ins_cost(INSN_COST);
16678   format %{ "movi  $dst, $con\t# vector(4H)" %}
16679   ins_encode %{
16680     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
16681   %}
16682   ins_pipe(vmovi_reg_imm64);
16683 %}
16684 
16685 instruct replicate8S_imm(vecX dst, immI con)
16686 %{
16687   predicate(n->as_Vector()->length() == 8);
16688   match(Set dst (ReplicateS con));
16689   ins_cost(INSN_COST);
16690   format %{ "movi  $dst, $con\t# vector(8H)" %}
16691   ins_encode %{
16692     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
16693   %}
16694   ins_pipe(vmovi_reg_imm128);
16695 %}
16696 
16697 instruct replicate2I(vecD dst, iRegIorL2I src)
16698 %{
16699   predicate(n->as_Vector()->length() == 2);
16700   match(Set dst (ReplicateI src));
16701   ins_cost(INSN_COST);
16702   format %{ "dup  $dst, $src\t# vector (2I)" %}
16703   ins_encode %{
16704     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
16705   %}
16706   ins_pipe(vdup_reg_reg64);
16707 %}
16708 
16709 instruct replicate4I(vecX dst, iRegIorL2I src)
16710 %{
16711   predicate(n->as_Vector()->length() == 4);
16712   match(Set dst (ReplicateI src));
16713   ins_cost(INSN_COST);
16714   format %{ "dup  $dst, $src\t# vector (4I)" %}
16715   ins_encode %{
16716     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
16717   %}
16718   ins_pipe(vdup_reg_reg128);
16719 %}
16720 
16721 instruct replicate2I_imm(vecD dst, immI con)
16722 %{
16723   predicate(n->as_Vector()->length() == 2);
16724   match(Set dst (ReplicateI con));
16725   ins_cost(INSN_COST);
16726   format %{ "movi  $dst, $con\t# vector(2I)" %}
16727   ins_encode %{
16728     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
16729   %}
16730   ins_pipe(vmovi_reg_imm64);
16731 %}
16732 
16733 instruct replicate4I_imm(vecX dst, immI con)
16734 %{
16735   predicate(n->as_Vector()->length() == 4);
16736   match(Set dst (ReplicateI con));
16737   ins_cost(INSN_COST);
16738   format %{ "movi  $dst, $con\t# vector(4I)" %}
16739   ins_encode %{
16740     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
16741   %}
16742   ins_pipe(vmovi_reg_imm128);
16743 %}
16744 
16745 instruct replicate2L(vecX dst, iRegL src)
16746 %{
16747   predicate(n->as_Vector()->length() == 2);
16748   match(Set dst (ReplicateL src));
16749   ins_cost(INSN_COST);
16750   format %{ "dup  $dst, $src\t# vector (2L)" %}
16751   ins_encode %{
16752     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
16753   %}
16754   ins_pipe(vdup_reg_reg128);
16755 %}
16756 
16757 instruct replicate2L_zero(vecX dst, immI0 zero)
16758 %{
16759   predicate(n->as_Vector()->length() == 2);
16760   match(Set dst (ReplicateI zero));
16761   ins_cost(INSN_COST);
16762   format %{ "movi  $dst, $zero\t# vector(4I)" %}
16763   ins_encode %{
16764     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16765            as_FloatRegister($dst$$reg),
16766            as_FloatRegister($dst$$reg));
16767   %}
16768   ins_pipe(vmovi_reg_imm128);
16769 %}
16770 
16771 instruct replicate2F(vecD dst, vRegF src)
16772 %{
16773   predicate(n->as_Vector()->length() == 2);
16774   match(Set dst (ReplicateF src));
16775   ins_cost(INSN_COST);
16776   format %{ "dup  $dst, $src\t# vector (2F)" %}
16777   ins_encode %{
16778     __ dup(as_FloatRegister($dst$$reg), __ T2S,
16779            as_FloatRegister($src$$reg));
16780   %}
16781   ins_pipe(vdup_reg_freg64);
16782 %}
16783 
16784 instruct replicate4F(vecX dst, vRegF src)
16785 %{
16786   predicate(n->as_Vector()->length() == 4);
16787   match(Set dst (ReplicateF src));
16788   ins_cost(INSN_COST);
16789   format %{ "dup  $dst, $src\t# vector (4F)" %}
16790   ins_encode %{
16791     __ dup(as_FloatRegister($dst$$reg), __ T4S,
16792            as_FloatRegister($src$$reg));
16793   %}
16794   ins_pipe(vdup_reg_freg128);
16795 %}
16796 
16797 instruct replicate2D(vecX dst, vRegD src)
16798 %{
16799   predicate(n->as_Vector()->length() == 2);
16800   match(Set dst (ReplicateD src));
16801   ins_cost(INSN_COST);
16802   format %{ "dup  $dst, $src\t# vector (2D)" %}
16803   ins_encode %{
16804     __ dup(as_FloatRegister($dst$$reg), __ T2D,
16805            as_FloatRegister($src$$reg));
16806   %}
16807   ins_pipe(vdup_reg_dreg128);
16808 %}
16809 
16810 // ====================REDUCTION ARITHMETIC====================================
16811 
16812 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
16813 %{
16814   match(Set dst (AddReductionVI src1 src2));
16815   ins_cost(INSN_COST);
16816   effect(TEMP tmp, TEMP tmp2);
16817   format %{ "umov  $tmp, $src2, S, 0\n\t"
16818             "umov  $tmp2, $src2, S, 1\n\t"
16819             "addw  $dst, $src1, $tmp\n\t"
16820             "addw  $dst, $dst, $tmp2\t add reduction2i"
16821   %}
16822   ins_encode %{
16823     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16824     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16825     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
16826     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
16827   %}
16828   ins_pipe(pipe_class_default);
16829 %}
16830 
16831 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16832 %{
16833   match(Set dst (AddReductionVI src1 src2));
16834   ins_cost(INSN_COST);
16835   effect(TEMP tmp, TEMP tmp2);
16836   format %{ "addv  $tmp, T4S, $src2\n\t"
16837             "umov  $tmp2, $tmp, S, 0\n\t"
16838             "addw  $dst, $tmp2, $src1\t add reduction4i"
16839   %}
16840   ins_encode %{
16841     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16842             as_FloatRegister($src2$$reg));
16843     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16844     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16845   %}
16846   ins_pipe(pipe_class_default);
16847 %}
16848 
16849 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
16850 %{
16851   match(Set dst (MulReductionVI src1 src2));
16852   ins_cost(INSN_COST);
16853   effect(TEMP tmp, TEMP dst);
16854   format %{ "umov  $tmp, $src2, S, 0\n\t"
16855             "mul   $dst, $tmp, $src1\n\t"
16856             "umov  $tmp, $src2, S, 1\n\t"
16857             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
16858   %}
16859   ins_encode %{
16860     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16861     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16862     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16863     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16864   %}
16865   ins_pipe(pipe_class_default);
16866 %}
16867 
16868 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
16869 %{
16870   match(Set dst (MulReductionVI src1 src2));
16871   ins_cost(INSN_COST);
16872   effect(TEMP tmp, TEMP tmp2, TEMP dst);
16873   format %{ "ins   $tmp, $src2, 0, 1\n\t"
16874             "mul   $tmp, $tmp, $src2\n\t"
16875             "umov  $tmp2, $tmp, S, 0\n\t"
16876             "mul   $dst, $tmp2, $src1\n\t"
16877             "umov  $tmp2, $tmp, S, 1\n\t"
16878             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
16879   %}
16880   ins_encode %{
16881     __ ins(as_FloatRegister($tmp$$reg), __ D,
16882            as_FloatRegister($src2$$reg), 0, 1);
16883     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16884            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16885     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16886     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16887     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16888     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16889   %}
16890   ins_pipe(pipe_class_default);
16891 %}
16892 
16893 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16894 %{
16895   match(Set dst (AddReductionVF src1 src2));
16896   ins_cost(INSN_COST);
16897   effect(TEMP tmp, TEMP dst);
16898   format %{ "fadds $dst, $src1, $src2\n\t"
16899             "ins   $tmp, S, $src2, 0, 1\n\t"
16900             "fadds $dst, $dst, $tmp\t add reduction2f"
16901   %}
16902   ins_encode %{
16903     __ fadds(as_FloatRegister($dst$$reg),
16904              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16905     __ ins(as_FloatRegister($tmp$$reg), __ S,
16906            as_FloatRegister($src2$$reg), 0, 1);
16907     __ fadds(as_FloatRegister($dst$$reg),
16908              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16909   %}
16910   ins_pipe(pipe_class_default);
16911 %}
16912 
16913 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16914 %{
16915   match(Set dst (AddReductionVF src1 src2));
16916   ins_cost(INSN_COST);
16917   effect(TEMP tmp, TEMP dst);
16918   format %{ "fadds $dst, $src1, $src2\n\t"
16919             "ins   $tmp, S, $src2, 0, 1\n\t"
16920             "fadds $dst, $dst, $tmp\n\t"
16921             "ins   $tmp, S, $src2, 0, 2\n\t"
16922             "fadds $dst, $dst, $tmp\n\t"
16923             "ins   $tmp, S, $src2, 0, 3\n\t"
16924             "fadds $dst, $dst, $tmp\t add reduction4f"
16925   %}
16926   ins_encode %{
16927     __ fadds(as_FloatRegister($dst$$reg),
16928              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16929     __ ins(as_FloatRegister($tmp$$reg), __ S,
16930            as_FloatRegister($src2$$reg), 0, 1);
16931     __ fadds(as_FloatRegister($dst$$reg),
16932              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16933     __ ins(as_FloatRegister($tmp$$reg), __ S,
16934            as_FloatRegister($src2$$reg), 0, 2);
16935     __ fadds(as_FloatRegister($dst$$reg),
16936              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16937     __ ins(as_FloatRegister($tmp$$reg), __ S,
16938            as_FloatRegister($src2$$reg), 0, 3);
16939     __ fadds(as_FloatRegister($dst$$reg),
16940              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16941   %}
16942   ins_pipe(pipe_class_default);
16943 %}
16944 
16945 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16946 %{
16947   match(Set dst (MulReductionVF src1 src2));
16948   ins_cost(INSN_COST);
16949   effect(TEMP tmp, TEMP dst);
16950   format %{ "fmuls $dst, $src1, $src2\n\t"
16951             "ins   $tmp, S, $src2, 0, 1\n\t"
16952             "fmuls $dst, $dst, $tmp\t add reduction4f"
16953   %}
16954   ins_encode %{
16955     __ fmuls(as_FloatRegister($dst$$reg),
16956              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16957     __ ins(as_FloatRegister($tmp$$reg), __ S,
16958            as_FloatRegister($src2$$reg), 0, 1);
16959     __ fmuls(as_FloatRegister($dst$$reg),
16960              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16961   %}
16962   ins_pipe(pipe_class_default);
16963 %}
16964 
16965 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16966 %{
16967   match(Set dst (MulReductionVF src1 src2));
16968   ins_cost(INSN_COST);
16969   effect(TEMP tmp, TEMP dst);
16970   format %{ "fmuls $dst, $src1, $src2\n\t"
16971             "ins   $tmp, S, $src2, 0, 1\n\t"
16972             "fmuls $dst, $dst, $tmp\n\t"
16973             "ins   $tmp, S, $src2, 0, 2\n\t"
16974             "fmuls $dst, $dst, $tmp\n\t"
16975             "ins   $tmp, S, $src2, 0, 3\n\t"
16976             "fmuls $dst, $dst, $tmp\t add reduction4f"
16977   %}
16978   ins_encode %{
16979     __ fmuls(as_FloatRegister($dst$$reg),
16980              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16981     __ ins(as_FloatRegister($tmp$$reg), __ S,
16982            as_FloatRegister($src2$$reg), 0, 1);
16983     __ fmuls(as_FloatRegister($dst$$reg),
16984              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16985     __ ins(as_FloatRegister($tmp$$reg), __ S,
16986            as_FloatRegister($src2$$reg), 0, 2);
16987     __ fmuls(as_FloatRegister($dst$$reg),
16988              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16989     __ ins(as_FloatRegister($tmp$$reg), __ S,
16990            as_FloatRegister($src2$$reg), 0, 3);
16991     __ fmuls(as_FloatRegister($dst$$reg),
16992              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16993   %}
16994   ins_pipe(pipe_class_default);
16995 %}
16996 
16997 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16998 %{
16999   match(Set dst (AddReductionVD src1 src2));
17000   ins_cost(INSN_COST);
17001   effect(TEMP tmp, TEMP dst);
17002   format %{ "faddd $dst, $src1, $src2\n\t"
17003             "ins   $tmp, D, $src2, 0, 1\n\t"
17004             "faddd $dst, $dst, $tmp\t add reduction2d"
17005   %}
17006   ins_encode %{
17007     __ faddd(as_FloatRegister($dst$$reg),
17008              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
17009     __ ins(as_FloatRegister($tmp$$reg), __ D,
17010            as_FloatRegister($src2$$reg), 0, 1);
17011     __ faddd(as_FloatRegister($dst$$reg),
17012              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
17013   %}
17014   ins_pipe(pipe_class_default);
17015 %}
17016 
17017 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
17018 %{
17019   match(Set dst (MulReductionVD src1 src2));
17020   ins_cost(INSN_COST);
17021   effect(TEMP tmp, TEMP dst);
17022   format %{ "fmuld $dst, $src1, $src2\n\t"
17023             "ins   $tmp, D, $src2, 0, 1\n\t"
17024             "fmuld $dst, $dst, $tmp\t add reduction2d"
17025   %}
17026   ins_encode %{
17027     __ fmuld(as_FloatRegister($dst$$reg),
17028              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
17029     __ ins(as_FloatRegister($tmp$$reg), __ D,
17030            as_FloatRegister($src2$$reg), 0, 1);
17031     __ fmuld(as_FloatRegister($dst$$reg),
17032              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
17033   %}
17034   ins_pipe(pipe_class_default);
17035 %}
17036 
17037 // ====================VECTOR ARITHMETIC=======================================
17038 
17039 // --------------------------------- ADD --------------------------------------
17040 
17041 instruct vadd8B(vecD dst, vecD src1, vecD src2)
17042 %{
17043   predicate(n->as_Vector()->length() == 4 ||
17044             n->as_Vector()->length() == 8);
17045   match(Set dst (AddVB src1 src2));
17046   ins_cost(INSN_COST);
17047   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
17048   ins_encode %{
17049     __ addv(as_FloatRegister($dst$$reg), __ T8B,
17050             as_FloatRegister($src1$$reg),
17051             as_FloatRegister($src2$$reg));
17052   %}
17053   ins_pipe(vdop64);
17054 %}
17055 
17056 instruct vadd16B(vecX dst, vecX src1, vecX src2)
17057 %{
17058   predicate(n->as_Vector()->length() == 16);
17059   match(Set dst (AddVB src1 src2));
17060   ins_cost(INSN_COST);
17061   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
17062   ins_encode %{
17063     __ addv(as_FloatRegister($dst$$reg), __ T16B,
17064             as_FloatRegister($src1$$reg),
17065             as_FloatRegister($src2$$reg));
17066   %}
17067   ins_pipe(vdop128);
17068 %}
17069 
17070 instruct vadd4S(vecD dst, vecD src1, vecD src2)
17071 %{
17072   predicate(n->as_Vector()->length() == 2 ||
17073             n->as_Vector()->length() == 4);
17074   match(Set dst (AddVS src1 src2));
17075   ins_cost(INSN_COST);
17076   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
17077   ins_encode %{
17078     __ addv(as_FloatRegister($dst$$reg), __ T4H,
17079             as_FloatRegister($src1$$reg),
17080             as_FloatRegister($src2$$reg));
17081   %}
17082   ins_pipe(vdop64);
17083 %}
17084 
17085 instruct vadd8S(vecX dst, vecX src1, vecX src2)
17086 %{
17087   predicate(n->as_Vector()->length() == 8);
17088   match(Set dst (AddVS src1 src2));
17089   ins_cost(INSN_COST);
17090   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
17091   ins_encode %{
17092     __ addv(as_FloatRegister($dst$$reg), __ T8H,
17093             as_FloatRegister($src1$$reg),
17094             as_FloatRegister($src2$$reg));
17095   %}
17096   ins_pipe(vdop128);
17097 %}
17098 
17099 instruct vadd2I(vecD dst, vecD src1, vecD src2)
17100 %{
17101   predicate(n->as_Vector()->length() == 2);
17102   match(Set dst (AddVI src1 src2));
17103   ins_cost(INSN_COST);
17104   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
17105   ins_encode %{
17106     __ addv(as_FloatRegister($dst$$reg), __ T2S,
17107             as_FloatRegister($src1$$reg),
17108             as_FloatRegister($src2$$reg));
17109   %}
17110   ins_pipe(vdop64);
17111 %}
17112 
17113 instruct vadd4I(vecX dst, vecX src1, vecX src2)
17114 %{
17115   predicate(n->as_Vector()->length() == 4);
17116   match(Set dst (AddVI src1 src2));
17117   ins_cost(INSN_COST);
17118   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
17119   ins_encode %{
17120     __ addv(as_FloatRegister($dst$$reg), __ T4S,
17121             as_FloatRegister($src1$$reg),
17122             as_FloatRegister($src2$$reg));
17123   %}
17124   ins_pipe(vdop128);
17125 %}
17126 
17127 instruct vadd2L(vecX dst, vecX src1, vecX src2)
17128 %{
17129   predicate(n->as_Vector()->length() == 2);
17130   match(Set dst (AddVL src1 src2));
17131   ins_cost(INSN_COST);
17132   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
17133   ins_encode %{
17134     __ addv(as_FloatRegister($dst$$reg), __ T2D,
17135             as_FloatRegister($src1$$reg),
17136             as_FloatRegister($src2$$reg));
17137   %}
17138   ins_pipe(vdop128);
17139 %}
17140 
17141 instruct vadd2F(vecD dst, vecD src1, vecD src2)
17142 %{
17143   predicate(n->as_Vector()->length() == 2);
17144   match(Set dst (AddVF src1 src2));
17145   ins_cost(INSN_COST);
17146   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
17147   ins_encode %{
17148     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
17149             as_FloatRegister($src1$$reg),
17150             as_FloatRegister($src2$$reg));
17151   %}
17152   ins_pipe(vdop_fp64);
17153 %}
17154 
17155 instruct vadd4F(vecX dst, vecX src1, vecX src2)
17156 %{
17157   predicate(n->as_Vector()->length() == 4);
17158   match(Set dst (AddVF src1 src2));
17159   ins_cost(INSN_COST);
17160   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
17161   ins_encode %{
17162     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
17163             as_FloatRegister($src1$$reg),
17164             as_FloatRegister($src2$$reg));
17165   %}
17166   ins_pipe(vdop_fp128);
17167 %}
17168 
17169 instruct vadd2D(vecX dst, vecX src1, vecX src2)
17170 %{
17171   match(Set dst (AddVD src1 src2));
17172   ins_cost(INSN_COST);
17173   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
17174   ins_encode %{
17175     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
17176             as_FloatRegister($src1$$reg),
17177             as_FloatRegister($src2$$reg));
17178   %}
17179   ins_pipe(vdop_fp128);
17180 %}
17181 
17182 // --------------------------------- SUB --------------------------------------
17183 
17184 instruct vsub8B(vecD dst, vecD src1, vecD src2)
17185 %{
17186   predicate(n->as_Vector()->length() == 4 ||
17187             n->as_Vector()->length() == 8);
17188   match(Set dst (SubVB src1 src2));
17189   ins_cost(INSN_COST);
17190   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
17191   ins_encode %{
17192     __ subv(as_FloatRegister($dst$$reg), __ T8B,
17193             as_FloatRegister($src1$$reg),
17194             as_FloatRegister($src2$$reg));
17195   %}
17196   ins_pipe(vdop64);
17197 %}
17198 
17199 instruct vsub16B(vecX dst, vecX src1, vecX src2)
17200 %{
17201   predicate(n->as_Vector()->length() == 16);
17202   match(Set dst (SubVB src1 src2));
17203   ins_cost(INSN_COST);
17204   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
17205   ins_encode %{
17206     __ subv(as_FloatRegister($dst$$reg), __ T16B,
17207             as_FloatRegister($src1$$reg),
17208             as_FloatRegister($src2$$reg));
17209   %}
17210   ins_pipe(vdop128);
17211 %}
17212 
17213 instruct vsub4S(vecD dst, vecD src1, vecD src2)
17214 %{
17215   predicate(n->as_Vector()->length() == 2 ||
17216             n->as_Vector()->length() == 4);
17217   match(Set dst (SubVS src1 src2));
17218   ins_cost(INSN_COST);
17219   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
17220   ins_encode %{
17221     __ subv(as_FloatRegister($dst$$reg), __ T4H,
17222             as_FloatRegister($src1$$reg),
17223             as_FloatRegister($src2$$reg));
17224   %}
17225   ins_pipe(vdop64);
17226 %}
17227 
17228 instruct vsub8S(vecX dst, vecX src1, vecX src2)
17229 %{
17230   predicate(n->as_Vector()->length() == 8);
17231   match(Set dst (SubVS src1 src2));
17232   ins_cost(INSN_COST);
17233   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
17234   ins_encode %{
17235     __ subv(as_FloatRegister($dst$$reg), __ T8H,
17236             as_FloatRegister($src1$$reg),
17237             as_FloatRegister($src2$$reg));
17238   %}
17239   ins_pipe(vdop128);
17240 %}
17241 
17242 instruct vsub2I(vecD dst, vecD src1, vecD src2)
17243 %{
17244   predicate(n->as_Vector()->length() == 2);
17245   match(Set dst (SubVI src1 src2));
17246   ins_cost(INSN_COST);
17247   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
17248   ins_encode %{
17249     __ subv(as_FloatRegister($dst$$reg), __ T2S,
17250             as_FloatRegister($src1$$reg),
17251             as_FloatRegister($src2$$reg));
17252   %}
17253   ins_pipe(vdop64);
17254 %}
17255 
17256 instruct vsub4I(vecX dst, vecX src1, vecX src2)
17257 %{
17258   predicate(n->as_Vector()->length() == 4);
17259   match(Set dst (SubVI src1 src2));
17260   ins_cost(INSN_COST);
17261   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
17262   ins_encode %{
17263     __ subv(as_FloatRegister($dst$$reg), __ T4S,
17264             as_FloatRegister($src1$$reg),
17265             as_FloatRegister($src2$$reg));
17266   %}
17267   ins_pipe(vdop128);
17268 %}
17269 
17270 instruct vsub2L(vecX dst, vecX src1, vecX src2)
17271 %{
17272   predicate(n->as_Vector()->length() == 2);
17273   match(Set dst (SubVL src1 src2));
17274   ins_cost(INSN_COST);
17275   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
17276   ins_encode %{
17277     __ subv(as_FloatRegister($dst$$reg), __ T2D,
17278             as_FloatRegister($src1$$reg),
17279             as_FloatRegister($src2$$reg));
17280   %}
17281   ins_pipe(vdop128);
17282 %}
17283 
17284 instruct vsub2F(vecD dst, vecD src1, vecD src2)
17285 %{
17286   predicate(n->as_Vector()->length() == 2);
17287   match(Set dst (SubVF src1 src2));
17288   ins_cost(INSN_COST);
17289   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
17290   ins_encode %{
17291     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
17292             as_FloatRegister($src1$$reg),
17293             as_FloatRegister($src2$$reg));
17294   %}
17295   ins_pipe(vdop_fp64);
17296 %}
17297 
17298 instruct vsub4F(vecX dst, vecX src1, vecX src2)
17299 %{
17300   predicate(n->as_Vector()->length() == 4);
17301   match(Set dst (SubVF src1 src2));
17302   ins_cost(INSN_COST);
17303   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
17304   ins_encode %{
17305     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
17306             as_FloatRegister($src1$$reg),
17307             as_FloatRegister($src2$$reg));
17308   %}
17309   ins_pipe(vdop_fp128);
17310 %}
17311 
17312 instruct vsub2D(vecX dst, vecX src1, vecX src2)
17313 %{
17314   predicate(n->as_Vector()->length() == 2);
17315   match(Set dst (SubVD src1 src2));
17316   ins_cost(INSN_COST);
17317   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
17318   ins_encode %{
17319     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
17320             as_FloatRegister($src1$$reg),
17321             as_FloatRegister($src2$$reg));
17322   %}
17323   ins_pipe(vdop_fp128);
17324 %}
17325 
17326 // --------------------------------- MUL --------------------------------------
17327 
17328 instruct vmul4S(vecD dst, vecD src1, vecD src2)
17329 %{
17330   predicate(n->as_Vector()->length() == 2 ||
17331             n->as_Vector()->length() == 4);
17332   match(Set dst (MulVS src1 src2));
17333   ins_cost(INSN_COST);
17334   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
17335   ins_encode %{
17336     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
17337             as_FloatRegister($src1$$reg),
17338             as_FloatRegister($src2$$reg));
17339   %}
17340   ins_pipe(vmul64);
17341 %}
17342 
17343 instruct vmul8S(vecX dst, vecX src1, vecX src2)
17344 %{
17345   predicate(n->as_Vector()->length() == 8);
17346   match(Set dst (MulVS src1 src2));
17347   ins_cost(INSN_COST);
17348   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
17349   ins_encode %{
17350     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
17351             as_FloatRegister($src1$$reg),
17352             as_FloatRegister($src2$$reg));
17353   %}
17354   ins_pipe(vmul128);
17355 %}
17356 
17357 instruct vmul2I(vecD dst, vecD src1, vecD src2)
17358 %{
17359   predicate(n->as_Vector()->length() == 2);
17360   match(Set dst (MulVI src1 src2));
17361   ins_cost(INSN_COST);
17362   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
17363   ins_encode %{
17364     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
17365             as_FloatRegister($src1$$reg),
17366             as_FloatRegister($src2$$reg));
17367   %}
17368   ins_pipe(vmul64);
17369 %}
17370 
17371 instruct vmul4I(vecX dst, vecX src1, vecX src2)
17372 %{
17373   predicate(n->as_Vector()->length() == 4);
17374   match(Set dst (MulVI src1 src2));
17375   ins_cost(INSN_COST);
17376   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
17377   ins_encode %{
17378     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
17379             as_FloatRegister($src1$$reg),
17380             as_FloatRegister($src2$$reg));
17381   %}
17382   ins_pipe(vmul128);
17383 %}
17384 
17385 instruct vmul2F(vecD dst, vecD src1, vecD src2)
17386 %{
17387   predicate(n->as_Vector()->length() == 2);
17388   match(Set dst (MulVF src1 src2));
17389   ins_cost(INSN_COST);
17390   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
17391   ins_encode %{
17392     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
17393             as_FloatRegister($src1$$reg),
17394             as_FloatRegister($src2$$reg));
17395   %}
17396   ins_pipe(vmuldiv_fp64);
17397 %}
17398 
17399 instruct vmul4F(vecX dst, vecX src1, vecX src2)
17400 %{
17401   predicate(n->as_Vector()->length() == 4);
17402   match(Set dst (MulVF src1 src2));
17403   ins_cost(INSN_COST);
17404   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
17405   ins_encode %{
17406     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
17407             as_FloatRegister($src1$$reg),
17408             as_FloatRegister($src2$$reg));
17409   %}
17410   ins_pipe(vmuldiv_fp128);
17411 %}
17412 
17413 instruct vmul2D(vecX dst, vecX src1, vecX src2)
17414 %{
17415   predicate(n->as_Vector()->length() == 2);
17416   match(Set dst (MulVD src1 src2));
17417   ins_cost(INSN_COST);
17418   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
17419   ins_encode %{
17420     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
17421             as_FloatRegister($src1$$reg),
17422             as_FloatRegister($src2$$reg));
17423   %}
17424   ins_pipe(vmuldiv_fp128);
17425 %}
17426 
17427 // --------------------------------- MLA --------------------------------------
17428 
17429 instruct vmla4S(vecD dst, vecD src1, vecD src2)
17430 %{
17431   predicate(n->as_Vector()->length() == 2 ||
17432             n->as_Vector()->length() == 4);
17433   match(Set dst (AddVS dst (MulVS src1 src2)));
17434   ins_cost(INSN_COST);
17435   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
17436   ins_encode %{
17437     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
17438             as_FloatRegister($src1$$reg),
17439             as_FloatRegister($src2$$reg));
17440   %}
17441   ins_pipe(vmla64);
17442 %}
17443 
17444 instruct vmla8S(vecX dst, vecX src1, vecX src2)
17445 %{
17446   predicate(n->as_Vector()->length() == 8);
17447   match(Set dst (AddVS dst (MulVS src1 src2)));
17448   ins_cost(INSN_COST);
17449   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
17450   ins_encode %{
17451     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
17452             as_FloatRegister($src1$$reg),
17453             as_FloatRegister($src2$$reg));
17454   %}
17455   ins_pipe(vmla128);
17456 %}
17457 
17458 instruct vmla2I(vecD dst, vecD src1, vecD src2)
17459 %{
17460   predicate(n->as_Vector()->length() == 2);
17461   match(Set dst (AddVI dst (MulVI src1 src2)));
17462   ins_cost(INSN_COST);
17463   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
17464   ins_encode %{
17465     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
17466             as_FloatRegister($src1$$reg),
17467             as_FloatRegister($src2$$reg));
17468   %}
17469   ins_pipe(vmla64);
17470 %}
17471 
17472 instruct vmla4I(vecX dst, vecX src1, vecX src2)
17473 %{
17474   predicate(n->as_Vector()->length() == 4);
17475   match(Set dst (AddVI dst (MulVI src1 src2)));
17476   ins_cost(INSN_COST);
17477   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
17478   ins_encode %{
17479     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
17480             as_FloatRegister($src1$$reg),
17481             as_FloatRegister($src2$$reg));
17482   %}
17483   ins_pipe(vmla128);
17484 %}
17485 
17486 // dst + src1 * src2
17487 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
17488   predicate(UseFMA && n->as_Vector()->length() == 2);
17489   match(Set dst (FmaVF  dst (Binary src1 src2)));
17490   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
17491   ins_cost(INSN_COST);
17492   ins_encode %{
17493     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
17494             as_FloatRegister($src1$$reg),
17495             as_FloatRegister($src2$$reg));
17496   %}
17497   ins_pipe(vmuldiv_fp64);
17498 %}
17499 
17500 // dst + src1 * src2
17501 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
17502   predicate(UseFMA && n->as_Vector()->length() == 4);
17503   match(Set dst (FmaVF  dst (Binary src1 src2)));
17504   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
17505   ins_cost(INSN_COST);
17506   ins_encode %{
17507     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
17508             as_FloatRegister($src1$$reg),
17509             as_FloatRegister($src2$$reg));
17510   %}
17511   ins_pipe(vmuldiv_fp128);
17512 %}
17513 
17514 // dst + src1 * src2
17515 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
17516   predicate(UseFMA && n->as_Vector()->length() == 2);
17517   match(Set dst (FmaVD  dst (Binary src1 src2)));
17518   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
17519   ins_cost(INSN_COST);
17520   ins_encode %{
17521     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
17522             as_FloatRegister($src1$$reg),
17523             as_FloatRegister($src2$$reg));
17524   %}
17525   ins_pipe(vmuldiv_fp128);
17526 %}
17527 
17528 // --------------------------------- MLS --------------------------------------
17529 
17530 instruct vmls4S(vecD dst, vecD src1, vecD src2)
17531 %{
17532   predicate(n->as_Vector()->length() == 2 ||
17533             n->as_Vector()->length() == 4);
17534   match(Set dst (SubVS dst (MulVS src1 src2)));
17535   ins_cost(INSN_COST);
17536   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
17537   ins_encode %{
17538     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
17539             as_FloatRegister($src1$$reg),
17540             as_FloatRegister($src2$$reg));
17541   %}
17542   ins_pipe(vmla64);
17543 %}
17544 
17545 instruct vmls8S(vecX dst, vecX src1, vecX src2)
17546 %{
17547   predicate(n->as_Vector()->length() == 8);
17548   match(Set dst (SubVS dst (MulVS src1 src2)));
17549   ins_cost(INSN_COST);
17550   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
17551   ins_encode %{
17552     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
17553             as_FloatRegister($src1$$reg),
17554             as_FloatRegister($src2$$reg));
17555   %}
17556   ins_pipe(vmla128);
17557 %}
17558 
17559 instruct vmls2I(vecD dst, vecD src1, vecD src2)
17560 %{
17561   predicate(n->as_Vector()->length() == 2);
17562   match(Set dst (SubVI dst (MulVI src1 src2)));
17563   ins_cost(INSN_COST);
17564   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
17565   ins_encode %{
17566     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
17567             as_FloatRegister($src1$$reg),
17568             as_FloatRegister($src2$$reg));
17569   %}
17570   ins_pipe(vmla64);
17571 %}
17572 
17573 instruct vmls4I(vecX dst, vecX src1, vecX src2)
17574 %{
17575   predicate(n->as_Vector()->length() == 4);
17576   match(Set dst (SubVI dst (MulVI src1 src2)));
17577   ins_cost(INSN_COST);
17578   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
17579   ins_encode %{
17580     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
17581             as_FloatRegister($src1$$reg),
17582             as_FloatRegister($src2$$reg));
17583   %}
17584   ins_pipe(vmla128);
17585 %}
17586 
17587 // dst - src1 * src2
17588 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
17589   predicate(UseFMA && n->as_Vector()->length() == 2);
17590   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17591   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17592   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
17593   ins_cost(INSN_COST);
17594   ins_encode %{
17595     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
17596             as_FloatRegister($src1$$reg),
17597             as_FloatRegister($src2$$reg));
17598   %}
17599   ins_pipe(vmuldiv_fp64);
17600 %}
17601 
17602 // dst - src1 * src2
17603 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
17604   predicate(UseFMA && n->as_Vector()->length() == 4);
17605   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
17606   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
17607   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
17608   ins_cost(INSN_COST);
17609   ins_encode %{
17610     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
17611             as_FloatRegister($src1$$reg),
17612             as_FloatRegister($src2$$reg));
17613   %}
17614   ins_pipe(vmuldiv_fp128);
17615 %}
17616 
17617 // dst - src1 * src2
17618 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
17619   predicate(UseFMA && n->as_Vector()->length() == 2);
17620   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
17621   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
17622   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
17623   ins_cost(INSN_COST);
17624   ins_encode %{
17625     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
17626             as_FloatRegister($src1$$reg),
17627             as_FloatRegister($src2$$reg));
17628   %}
17629   ins_pipe(vmuldiv_fp128);
17630 %}
17631 
17632 // --------------------------------- DIV --------------------------------------
17633 
17634 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
17635 %{
17636   predicate(n->as_Vector()->length() == 2);
17637   match(Set dst (DivVF src1 src2));
17638   ins_cost(INSN_COST);
17639   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
17640   ins_encode %{
17641     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
17642             as_FloatRegister($src1$$reg),
17643             as_FloatRegister($src2$$reg));
17644   %}
17645   ins_pipe(vmuldiv_fp64);
17646 %}
17647 
17648 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
17649 %{
17650   predicate(n->as_Vector()->length() == 4);
17651   match(Set dst (DivVF src1 src2));
17652   ins_cost(INSN_COST);
17653   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
17654   ins_encode %{
17655     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
17656             as_FloatRegister($src1$$reg),
17657             as_FloatRegister($src2$$reg));
17658   %}
17659   ins_pipe(vmuldiv_fp128);
17660 %}
17661 
17662 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
17663 %{
17664   predicate(n->as_Vector()->length() == 2);
17665   match(Set dst (DivVD src1 src2));
17666   ins_cost(INSN_COST);
17667   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
17668   ins_encode %{
17669     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
17670             as_FloatRegister($src1$$reg),
17671             as_FloatRegister($src2$$reg));
17672   %}
17673   ins_pipe(vmuldiv_fp128);
17674 %}
17675 
17676 // --------------------------------- SQRT -------------------------------------
17677 
17678 instruct vsqrt2D(vecX dst, vecX src)
17679 %{
17680   predicate(n->as_Vector()->length() == 2);
17681   match(Set dst (SqrtVD src));
17682   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
17683   ins_encode %{
17684     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
17685              as_FloatRegister($src$$reg));
17686   %}
17687   ins_pipe(vsqrt_fp128);
17688 %}
17689 
17690 // --------------------------------- ABS --------------------------------------
17691 
17692 instruct vabs2F(vecD dst, vecD src)
17693 %{
17694   predicate(n->as_Vector()->length() == 2);
17695   match(Set dst (AbsVF src));
17696   ins_cost(INSN_COST * 3);
17697   format %{ "fabs  $dst,$src\t# vector (2S)" %}
17698   ins_encode %{
17699     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
17700             as_FloatRegister($src$$reg));
17701   %}
17702   ins_pipe(vunop_fp64);
17703 %}
17704 
17705 instruct vabs4F(vecX dst, vecX src)
17706 %{
17707   predicate(n->as_Vector()->length() == 4);
17708   match(Set dst (AbsVF src));
17709   ins_cost(INSN_COST * 3);
17710   format %{ "fabs  $dst,$src\t# vector (4S)" %}
17711   ins_encode %{
17712     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
17713             as_FloatRegister($src$$reg));
17714   %}
17715   ins_pipe(vunop_fp128);
17716 %}
17717 
17718 instruct vabs2D(vecX dst, vecX src)
17719 %{
17720   predicate(n->as_Vector()->length() == 2);
17721   match(Set dst (AbsVD src));
17722   ins_cost(INSN_COST * 3);
17723   format %{ "fabs  $dst,$src\t# vector (2D)" %}
17724   ins_encode %{
17725     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
17726             as_FloatRegister($src$$reg));
17727   %}
17728   ins_pipe(vunop_fp128);
17729 %}
17730 
17731 // --------------------------------- NEG --------------------------------------
17732 
17733 instruct vneg2F(vecD dst, vecD src)
17734 %{
17735   predicate(n->as_Vector()->length() == 2);
17736   match(Set dst (NegVF src));
17737   ins_cost(INSN_COST * 3);
17738   format %{ "fneg  $dst,$src\t# vector (2S)" %}
17739   ins_encode %{
17740     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
17741             as_FloatRegister($src$$reg));
17742   %}
17743   ins_pipe(vunop_fp64);
17744 %}
17745 
17746 instruct vneg4F(vecX dst, vecX src)
17747 %{
17748   predicate(n->as_Vector()->length() == 4);
17749   match(Set dst (NegVF src));
17750   ins_cost(INSN_COST * 3);
17751   format %{ "fneg  $dst,$src\t# vector (4S)" %}
17752   ins_encode %{
17753     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
17754             as_FloatRegister($src$$reg));
17755   %}
17756   ins_pipe(vunop_fp128);
17757 %}
17758 
17759 instruct vneg2D(vecX dst, vecX src)
17760 %{
17761   predicate(n->as_Vector()->length() == 2);
17762   match(Set dst (NegVD src));
17763   ins_cost(INSN_COST * 3);
17764   format %{ "fneg  $dst,$src\t# vector (2D)" %}
17765   ins_encode %{
17766     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
17767             as_FloatRegister($src$$reg));
17768   %}
17769   ins_pipe(vunop_fp128);
17770 %}
17771 
17772 // --------------------------------- AND --------------------------------------
17773 
17774 instruct vand8B(vecD dst, vecD src1, vecD src2)
17775 %{
17776   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17777             n->as_Vector()->length_in_bytes() == 8);
17778   match(Set dst (AndV src1 src2));
17779   ins_cost(INSN_COST);
17780   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17781   ins_encode %{
17782     __ andr(as_FloatRegister($dst$$reg), __ T8B,
17783             as_FloatRegister($src1$$reg),
17784             as_FloatRegister($src2$$reg));
17785   %}
17786   ins_pipe(vlogical64);
17787 %}
17788 
17789 instruct vand16B(vecX dst, vecX src1, vecX src2)
17790 %{
17791   predicate(n->as_Vector()->length_in_bytes() == 16);
17792   match(Set dst (AndV src1 src2));
17793   ins_cost(INSN_COST);
17794   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
17795   ins_encode %{
17796     __ andr(as_FloatRegister($dst$$reg), __ T16B,
17797             as_FloatRegister($src1$$reg),
17798             as_FloatRegister($src2$$reg));
17799   %}
17800   ins_pipe(vlogical128);
17801 %}
17802 
17803 // --------------------------------- OR ---------------------------------------
17804 
17805 instruct vor8B(vecD dst, vecD src1, vecD src2)
17806 %{
17807   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17808             n->as_Vector()->length_in_bytes() == 8);
17809   match(Set dst (OrV src1 src2));
17810   ins_cost(INSN_COST);
17811   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
17812   ins_encode %{
17813     __ orr(as_FloatRegister($dst$$reg), __ T8B,
17814             as_FloatRegister($src1$$reg),
17815             as_FloatRegister($src2$$reg));
17816   %}
17817   ins_pipe(vlogical64);
17818 %}
17819 
17820 instruct vor16B(vecX dst, vecX src1, vecX src2)
17821 %{
17822   predicate(n->as_Vector()->length_in_bytes() == 16);
17823   match(Set dst (OrV src1 src2));
17824   ins_cost(INSN_COST);
17825   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
17826   ins_encode %{
17827     __ orr(as_FloatRegister($dst$$reg), __ T16B,
17828             as_FloatRegister($src1$$reg),
17829             as_FloatRegister($src2$$reg));
17830   %}
17831   ins_pipe(vlogical128);
17832 %}
17833 
17834 // --------------------------------- XOR --------------------------------------
17835 
17836 instruct vxor8B(vecD dst, vecD src1, vecD src2)
17837 %{
17838   predicate(n->as_Vector()->length_in_bytes() == 4 ||
17839             n->as_Vector()->length_in_bytes() == 8);
17840   match(Set dst (XorV src1 src2));
17841   ins_cost(INSN_COST);
17842   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
17843   ins_encode %{
17844     __ eor(as_FloatRegister($dst$$reg), __ T8B,
17845             as_FloatRegister($src1$$reg),
17846             as_FloatRegister($src2$$reg));
17847   %}
17848   ins_pipe(vlogical64);
17849 %}
17850 
17851 instruct vxor16B(vecX dst, vecX src1, vecX src2)
17852 %{
17853   predicate(n->as_Vector()->length_in_bytes() == 16);
17854   match(Set dst (XorV src1 src2));
17855   ins_cost(INSN_COST);
17856   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
17857   ins_encode %{
17858     __ eor(as_FloatRegister($dst$$reg), __ T16B,
17859             as_FloatRegister($src1$$reg),
17860             as_FloatRegister($src2$$reg));
17861   %}
17862   ins_pipe(vlogical128);
17863 %}
17864 
17865 // ------------------------------ Shift ---------------------------------------
17866 
17867 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
17868   match(Set dst (LShiftCntV cnt));
17869   format %{ "dup  $dst, $cnt\t# shift count (vecX)" %}
17870   ins_encode %{
17871     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17872   %}
17873   ins_pipe(vdup_reg_reg128);
17874 %}
17875 
17876 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
17877 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
17878   match(Set dst (RShiftCntV cnt));
17879   format %{ "dup  $dst, $cnt\t# shift count (vecX)\n\tneg  $dst, $dst\t T16B" %}
17880   ins_encode %{
17881     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
17882     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
17883   %}
17884   ins_pipe(vdup_reg_reg128);
17885 %}
17886 
17887 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
17888   predicate(n->as_Vector()->length() == 4 ||
17889             n->as_Vector()->length() == 8);
17890   match(Set dst (LShiftVB src shift));
17891   match(Set dst (RShiftVB src shift));
17892   ins_cost(INSN_COST);
17893   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
17894   ins_encode %{
17895     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
17896             as_FloatRegister($src$$reg),
17897             as_FloatRegister($shift$$reg));
17898   %}
17899   ins_pipe(vshift64);
17900 %}
17901 
17902 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
17903   predicate(n->as_Vector()->length() == 16);
17904   match(Set dst (LShiftVB src shift));
17905   match(Set dst (RShiftVB src shift));
17906   ins_cost(INSN_COST);
17907   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
17908   ins_encode %{
17909     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
17910             as_FloatRegister($src$$reg),
17911             as_FloatRegister($shift$$reg));
17912   %}
17913   ins_pipe(vshift128);
17914 %}
17915 
17916 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
17917   predicate(n->as_Vector()->length() == 4 ||
17918             n->as_Vector()->length() == 8);
17919   match(Set dst (URShiftVB src shift));
17920   ins_cost(INSN_COST);
17921   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
17922   ins_encode %{
17923     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
17924             as_FloatRegister($src$$reg),
17925             as_FloatRegister($shift$$reg));
17926   %}
17927   ins_pipe(vshift64);
17928 %}
17929 
17930 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17931   predicate(n->as_Vector()->length() == 16);
17932   match(Set dst (URShiftVB src shift));
17933   ins_cost(INSN_COST);
17934   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
17935   ins_encode %{
17936     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17937             as_FloatRegister($src$$reg),
17938             as_FloatRegister($shift$$reg));
17939   %}
17940   ins_pipe(vshift128);
17941 %}
17942 
17943 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17944   predicate(n->as_Vector()->length() == 4 ||
17945             n->as_Vector()->length() == 8);
17946   match(Set dst (LShiftVB src shift));
17947   ins_cost(INSN_COST);
17948   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
17949   ins_encode %{
17950     int sh = (int)$shift$$constant & 31;
17951     if (sh >= 8) {
17952       __ eor(as_FloatRegister($dst$$reg), __ T8B,
17953              as_FloatRegister($src$$reg),
17954              as_FloatRegister($src$$reg));
17955     } else {
17956       __ shl(as_FloatRegister($dst$$reg), __ T8B,
17957              as_FloatRegister($src$$reg), sh);
17958     }
17959   %}
17960   ins_pipe(vshift64_imm);
17961 %}
17962 
17963 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17964   predicate(n->as_Vector()->length() == 16);
17965   match(Set dst (LShiftVB src shift));
17966   ins_cost(INSN_COST);
17967   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
17968   ins_encode %{
17969     int sh = (int)$shift$$constant & 31;
17970     if (sh >= 8) {
17971       __ eor(as_FloatRegister($dst$$reg), __ T16B,
17972              as_FloatRegister($src$$reg),
17973              as_FloatRegister($src$$reg));
17974     } else {
17975       __ shl(as_FloatRegister($dst$$reg), __ T16B,
17976              as_FloatRegister($src$$reg), sh);
17977     }
17978   %}
17979   ins_pipe(vshift128_imm);
17980 %}
17981 
17982 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17983   predicate(n->as_Vector()->length() == 4 ||
17984             n->as_Vector()->length() == 8);
17985   match(Set dst (RShiftVB src shift));
17986   ins_cost(INSN_COST);
17987   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
17988   ins_encode %{
17989     int sh = (int)$shift$$constant & 31;
17990     if (sh >= 8) sh = 7;
17991     sh = -sh & 7;
17992     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17993            as_FloatRegister($src$$reg), sh);
17994   %}
17995   ins_pipe(vshift64_imm);
17996 %}
17997 
17998 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17999   predicate(n->as_Vector()->length() == 16);
18000   match(Set dst (RShiftVB src shift));
18001   ins_cost(INSN_COST);
18002   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
18003   ins_encode %{
18004     int sh = (int)$shift$$constant & 31;
18005     if (sh >= 8) sh = 7;
18006     sh = -sh & 7;
18007     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
18008            as_FloatRegister($src$$reg), sh);
18009   %}
18010   ins_pipe(vshift128_imm);
18011 %}
18012 
18013 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
18014   predicate(n->as_Vector()->length() == 4 ||
18015             n->as_Vector()->length() == 8);
18016   match(Set dst (URShiftVB src shift));
18017   ins_cost(INSN_COST);
18018   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
18019   ins_encode %{
18020     int sh = (int)$shift$$constant & 31;
18021     if (sh >= 8) {
18022       __ eor(as_FloatRegister($dst$$reg), __ T8B,
18023              as_FloatRegister($src$$reg),
18024              as_FloatRegister($src$$reg));
18025     } else {
18026       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
18027              as_FloatRegister($src$$reg), -sh & 7);
18028     }
18029   %}
18030   ins_pipe(vshift64_imm);
18031 %}
18032 
18033 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
18034   predicate(n->as_Vector()->length() == 16);
18035   match(Set dst (URShiftVB src shift));
18036   ins_cost(INSN_COST);
18037   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
18038   ins_encode %{
18039     int sh = (int)$shift$$constant & 31;
18040     if (sh >= 8) {
18041       __ eor(as_FloatRegister($dst$$reg), __ T16B,
18042              as_FloatRegister($src$$reg),
18043              as_FloatRegister($src$$reg));
18044     } else {
18045       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
18046              as_FloatRegister($src$$reg), -sh & 7);
18047     }
18048   %}
18049   ins_pipe(vshift128_imm);
18050 %}
18051 
18052 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
18053   predicate(n->as_Vector()->length() == 2 ||
18054             n->as_Vector()->length() == 4);
18055   match(Set dst (LShiftVS src shift));
18056   match(Set dst (RShiftVS src shift));
18057   ins_cost(INSN_COST);
18058   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
18059   ins_encode %{
18060     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
18061             as_FloatRegister($src$$reg),
18062             as_FloatRegister($shift$$reg));
18063   %}
18064   ins_pipe(vshift64);
18065 %}
18066 
18067 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
18068   predicate(n->as_Vector()->length() == 8);
18069   match(Set dst (LShiftVS src shift));
18070   match(Set dst (RShiftVS src shift));
18071   ins_cost(INSN_COST);
18072   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
18073   ins_encode %{
18074     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
18075             as_FloatRegister($src$$reg),
18076             as_FloatRegister($shift$$reg));
18077   %}
18078   ins_pipe(vshift128);
18079 %}
18080 
18081 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
18082   predicate(n->as_Vector()->length() == 2 ||
18083             n->as_Vector()->length() == 4);
18084   match(Set dst (URShiftVS src shift));
18085   ins_cost(INSN_COST);
18086   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
18087   ins_encode %{
18088     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
18089             as_FloatRegister($src$$reg),
18090             as_FloatRegister($shift$$reg));
18091   %}
18092   ins_pipe(vshift64);
18093 %}
18094 
18095 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
18096   predicate(n->as_Vector()->length() == 8);
18097   match(Set dst (URShiftVS src shift));
18098   ins_cost(INSN_COST);
18099   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
18100   ins_encode %{
18101     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
18102             as_FloatRegister($src$$reg),
18103             as_FloatRegister($shift$$reg));
18104   %}
18105   ins_pipe(vshift128);
18106 %}
18107 
18108 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
18109   predicate(n->as_Vector()->length() == 2 ||
18110             n->as_Vector()->length() == 4);
18111   match(Set dst (LShiftVS src shift));
18112   ins_cost(INSN_COST);
18113   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
18114   ins_encode %{
18115     int sh = (int)$shift$$constant & 31;
18116     if (sh >= 16) {
18117       __ eor(as_FloatRegister($dst$$reg), __ T8B,
18118              as_FloatRegister($src$$reg),
18119              as_FloatRegister($src$$reg));
18120     } else {
18121       __ shl(as_FloatRegister($dst$$reg), __ T4H,
18122              as_FloatRegister($src$$reg), sh);
18123     }
18124   %}
18125   ins_pipe(vshift64_imm);
18126 %}
18127 
18128 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
18129   predicate(n->as_Vector()->length() == 8);
18130   match(Set dst (LShiftVS src shift));
18131   ins_cost(INSN_COST);
18132   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
18133   ins_encode %{
18134     int sh = (int)$shift$$constant & 31;
18135     if (sh >= 16) {
18136       __ eor(as_FloatRegister($dst$$reg), __ T16B,
18137              as_FloatRegister($src$$reg),
18138              as_FloatRegister($src$$reg));
18139     } else {
18140       __ shl(as_FloatRegister($dst$$reg), __ T8H,
18141              as_FloatRegister($src$$reg), sh);
18142     }
18143   %}
18144   ins_pipe(vshift128_imm);
18145 %}
18146 
18147 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
18148   predicate(n->as_Vector()->length() == 2 ||
18149             n->as_Vector()->length() == 4);
18150   match(Set dst (RShiftVS src shift));
18151   ins_cost(INSN_COST);
18152   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
18153   ins_encode %{
18154     int sh = (int)$shift$$constant & 31;
18155     if (sh >= 16) sh = 15;
18156     sh = -sh & 15;
18157     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
18158            as_FloatRegister($src$$reg), sh);
18159   %}
18160   ins_pipe(vshift64_imm);
18161 %}
18162 
18163 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
18164   predicate(n->as_Vector()->length() == 8);
18165   match(Set dst (RShiftVS src shift));
18166   ins_cost(INSN_COST);
18167   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
18168   ins_encode %{
18169     int sh = (int)$shift$$constant & 31;
18170     if (sh >= 16) sh = 15;
18171     sh = -sh & 15;
18172     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
18173            as_FloatRegister($src$$reg), sh);
18174   %}
18175   ins_pipe(vshift128_imm);
18176 %}
18177 
18178 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
18179   predicate(n->as_Vector()->length() == 2 ||
18180             n->as_Vector()->length() == 4);
18181   match(Set dst (URShiftVS src shift));
18182   ins_cost(INSN_COST);
18183   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
18184   ins_encode %{
18185     int sh = (int)$shift$$constant & 31;
18186     if (sh >= 16) {
18187       __ eor(as_FloatRegister($dst$$reg), __ T8B,
18188              as_FloatRegister($src$$reg),
18189              as_FloatRegister($src$$reg));
18190     } else {
18191       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
18192              as_FloatRegister($src$$reg), -sh & 15);
18193     }
18194   %}
18195   ins_pipe(vshift64_imm);
18196 %}
18197 
18198 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
18199   predicate(n->as_Vector()->length() == 8);
18200   match(Set dst (URShiftVS src shift));
18201   ins_cost(INSN_COST);
18202   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
18203   ins_encode %{
18204     int sh = (int)$shift$$constant & 31;
18205     if (sh >= 16) {
18206       __ eor(as_FloatRegister($dst$$reg), __ T16B,
18207              as_FloatRegister($src$$reg),
18208              as_FloatRegister($src$$reg));
18209     } else {
18210       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
18211              as_FloatRegister($src$$reg), -sh & 15);
18212     }
18213   %}
18214   ins_pipe(vshift128_imm);
18215 %}
18216 
18217 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
18218   predicate(n->as_Vector()->length() == 2);
18219   match(Set dst (LShiftVI src shift));
18220   match(Set dst (RShiftVI src shift));
18221   ins_cost(INSN_COST);
18222   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
18223   ins_encode %{
18224     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
18225             as_FloatRegister($src$$reg),
18226             as_FloatRegister($shift$$reg));
18227   %}
18228   ins_pipe(vshift64);
18229 %}
18230 
18231 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
18232   predicate(n->as_Vector()->length() == 4);
18233   match(Set dst (LShiftVI src shift));
18234   match(Set dst (RShiftVI src shift));
18235   ins_cost(INSN_COST);
18236   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
18237   ins_encode %{
18238     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
18239             as_FloatRegister($src$$reg),
18240             as_FloatRegister($shift$$reg));
18241   %}
18242   ins_pipe(vshift128);
18243 %}
18244 
18245 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
18246   predicate(n->as_Vector()->length() == 2);
18247   match(Set dst (URShiftVI src shift));
18248   ins_cost(INSN_COST);
18249   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
18250   ins_encode %{
18251     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
18252             as_FloatRegister($src$$reg),
18253             as_FloatRegister($shift$$reg));
18254   %}
18255   ins_pipe(vshift64);
18256 %}
18257 
18258 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
18259   predicate(n->as_Vector()->length() == 4);
18260   match(Set dst (URShiftVI src shift));
18261   ins_cost(INSN_COST);
18262   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
18263   ins_encode %{
18264     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
18265             as_FloatRegister($src$$reg),
18266             as_FloatRegister($shift$$reg));
18267   %}
18268   ins_pipe(vshift128);
18269 %}
18270 
18271 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
18272   predicate(n->as_Vector()->length() == 2);
18273   match(Set dst (LShiftVI src shift));
18274   ins_cost(INSN_COST);
18275   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
18276   ins_encode %{
18277     __ shl(as_FloatRegister($dst$$reg), __ T2S,
18278            as_FloatRegister($src$$reg),
18279            (int)$shift$$constant & 31);
18280   %}
18281   ins_pipe(vshift64_imm);
18282 %}
18283 
18284 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
18285   predicate(n->as_Vector()->length() == 4);
18286   match(Set dst (LShiftVI src shift));
18287   ins_cost(INSN_COST);
18288   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
18289   ins_encode %{
18290     __ shl(as_FloatRegister($dst$$reg), __ T4S,
18291            as_FloatRegister($src$$reg),
18292            (int)$shift$$constant & 31);
18293   %}
18294   ins_pipe(vshift128_imm);
18295 %}
18296 
18297 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
18298   predicate(n->as_Vector()->length() == 2);
18299   match(Set dst (RShiftVI src shift));
18300   ins_cost(INSN_COST);
18301   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
18302   ins_encode %{
18303     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
18304             as_FloatRegister($src$$reg),
18305             -(int)$shift$$constant & 31);
18306   %}
18307   ins_pipe(vshift64_imm);
18308 %}
18309 
18310 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
18311   predicate(n->as_Vector()->length() == 4);
18312   match(Set dst (RShiftVI src shift));
18313   ins_cost(INSN_COST);
18314   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
18315   ins_encode %{
18316     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
18317             as_FloatRegister($src$$reg),
18318             -(int)$shift$$constant & 31);
18319   %}
18320   ins_pipe(vshift128_imm);
18321 %}
18322 
18323 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
18324   predicate(n->as_Vector()->length() == 2);
18325   match(Set dst (URShiftVI src shift));
18326   ins_cost(INSN_COST);
18327   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
18328   ins_encode %{
18329     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
18330             as_FloatRegister($src$$reg),
18331             -(int)$shift$$constant & 31);
18332   %}
18333   ins_pipe(vshift64_imm);
18334 %}
18335 
18336 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
18337   predicate(n->as_Vector()->length() == 4);
18338   match(Set dst (URShiftVI src shift));
18339   ins_cost(INSN_COST);
18340   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
18341   ins_encode %{
18342     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
18343             as_FloatRegister($src$$reg),
18344             -(int)$shift$$constant & 31);
18345   %}
18346   ins_pipe(vshift128_imm);
18347 %}
18348 
18349 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
18350   predicate(n->as_Vector()->length() == 2);
18351   match(Set dst (LShiftVL src shift));
18352   match(Set dst (RShiftVL src shift));
18353   ins_cost(INSN_COST);
18354   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
18355   ins_encode %{
18356     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
18357             as_FloatRegister($src$$reg),
18358             as_FloatRegister($shift$$reg));
18359   %}
18360   ins_pipe(vshift128);
18361 %}
18362 
18363 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
18364   predicate(n->as_Vector()->length() == 2);
18365   match(Set dst (URShiftVL src shift));
18366   ins_cost(INSN_COST);
18367   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
18368   ins_encode %{
18369     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
18370             as_FloatRegister($src$$reg),
18371             as_FloatRegister($shift$$reg));
18372   %}
18373   ins_pipe(vshift128);
18374 %}
18375 
18376 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
18377   predicate(n->as_Vector()->length() == 2);
18378   match(Set dst (LShiftVL src shift));
18379   ins_cost(INSN_COST);
18380   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
18381   ins_encode %{
18382     __ shl(as_FloatRegister($dst$$reg), __ T2D,
18383            as_FloatRegister($src$$reg),
18384            (int)$shift$$constant & 63);
18385   %}
18386   ins_pipe(vshift128_imm);
18387 %}
18388 
18389 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
18390   predicate(n->as_Vector()->length() == 2);
18391   match(Set dst (RShiftVL src shift));
18392   ins_cost(INSN_COST);
18393   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
18394   ins_encode %{
18395     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
18396             as_FloatRegister($src$$reg),
18397             -(int)$shift$$constant & 63);
18398   %}
18399   ins_pipe(vshift128_imm);
18400 %}
18401 
18402 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
18403   predicate(n->as_Vector()->length() == 2);
18404   match(Set dst (URShiftVL src shift));
18405   ins_cost(INSN_COST);
18406   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
18407   ins_encode %{
18408     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
18409             as_FloatRegister($src$$reg),
18410             -(int)$shift$$constant & 63);
18411   %}
18412   ins_pipe(vshift128_imm);
18413 %}
18414 
18415 //----------PEEPHOLE RULES-----------------------------------------------------
18416 // These must follow all instruction definitions as they use the names
18417 // defined in the instructions definitions.
18418 //
18419 // peepmatch ( root_instr_name [preceding_instruction]* );
18420 //
18421 // peepconstraint %{
18422 // (instruction_number.operand_name relational_op instruction_number.operand_name
18423 //  [, ...] );
18424 // // instruction numbers are zero-based using left to right order in peepmatch
18425 //
18426 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
18427 // // provide an instruction_number.operand_name for each operand that appears
18428 // // in the replacement instruction's match rule
18429 //
18430 // ---------VM FLAGS---------------------------------------------------------
18431 //
18432 // All peephole optimizations can be turned off using -XX:-OptoPeephole
18433 //
18434 // Each peephole rule is given an identifying number starting with zero and
18435 // increasing by one in the order seen by the parser.  An individual peephole
18436 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
18437 // on the command-line.
18438 //
18439 // ---------CURRENT LIMITATIONS----------------------------------------------
18440 //
18441 // Only match adjacent instructions in same basic block
18442 // Only equality constraints
18443 // Only constraints between operands, not (0.dest_reg == RAX_enc)
18444 // Only one replacement instruction
18445 //
18446 // ---------EXAMPLE----------------------------------------------------------
18447 //
18448 // // pertinent parts of existing instructions in architecture description
18449 // instruct movI(iRegINoSp dst, iRegI src)
18450 // %{
18451 //   match(Set dst (CopyI src));
18452 // %}
18453 //
18454 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
18455 // %{
18456 //   match(Set dst (AddI dst src));
18457 //   effect(KILL cr);
18458 // %}
18459 //
18460 // // Change (inc mov) to lea
18461 // peephole %{
18462 //   // increment preceeded by register-register move
18463 //   peepmatch ( incI_iReg movI );
18464 //   // require that the destination register of the increment
18465 //   // match the destination register of the move
18466 //   peepconstraint ( 0.dst == 1.dst );
18467 //   // construct a replacement instruction that sets
18468 //   // the destination to ( move's source register + one )
18469 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
18470 // %}
18471 //
18472 
18473 // Implementation no longer uses movX instructions since
18474 // machine-independent system no longer uses CopyX nodes.
18475 //
18476 // peephole
18477 // %{
18478 //   peepmatch (incI_iReg movI);
18479 //   peepconstraint (0.dst == 1.dst);
18480 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18481 // %}
18482 
18483 // peephole
18484 // %{
18485 //   peepmatch (decI_iReg movI);
18486 //   peepconstraint (0.dst == 1.dst);
18487 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18488 // %}
18489 
18490 // peephole
18491 // %{
18492 //   peepmatch (addI_iReg_imm movI);
18493 //   peepconstraint (0.dst == 1.dst);
18494 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
18495 // %}
18496 
18497 // peephole
18498 // %{
18499 //   peepmatch (incL_iReg movL);
18500 //   peepconstraint (0.dst == 1.dst);
18501 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18502 // %}
18503 
18504 // peephole
18505 // %{
18506 //   peepmatch (decL_iReg movL);
18507 //   peepconstraint (0.dst == 1.dst);
18508 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18509 // %}
18510 
18511 // peephole
18512 // %{
18513 //   peepmatch (addL_iReg_imm movL);
18514 //   peepconstraint (0.dst == 1.dst);
18515 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
18516 // %}
18517 
18518 // peephole
18519 // %{
18520 //   peepmatch (addP_iReg_imm movP);
18521 //   peepconstraint (0.dst == 1.dst);
18522 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
18523 // %}
18524 
18525 // // Change load of spilled value to only a spill
18526 // instruct storeI(memory mem, iRegI src)
18527 // %{
18528 //   match(Set mem (StoreI mem src));
18529 // %}
18530 //
18531 // instruct loadI(iRegINoSp dst, memory mem)
18532 // %{
18533 //   match(Set dst (LoadI mem));
18534 // %}
18535 //
18536 
18537 //----------SMARTSPILL RULES---------------------------------------------------
18538 // These must follow all instruction definitions as they use the names
18539 // defined in the instructions definitions.
18540 
18541 // Local Variables:
18542 // mode: c++
18543 // End: