1 //
2 // Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
3 // Copyright (c) 2014, Red Hat Inc. All rights reserved.
4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 //
6 // This code is free software; you can redistribute it and/or modify it
7 // under the terms of the GNU General Public License version 2 only, as
8 // published by the Free Software Foundation.
9 //
10 // This code is distributed in the hope that it will be useful, but WITHOUT
11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 // version 2 for more details (a copy is included in the LICENSE file that
14 // accompanied this code).
15 //
16 // You should have received a copy of the GNU General Public License version
17 // 2 along with this work; if not, write to the Free Software Foundation,
18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 //
20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 // or visit www.oracle.com if you need additional information or have any
22 // questions.
23 //
24 //
25
26 // AArch64 Architecture Description File
27
28 //----------REGISTER DEFINITION BLOCK------------------------------------------
29 // This information is used by the matcher and the register allocator to
30 // describe individual registers and classes of registers within the target
31 // archtecture.
32
33 register %{
34 //----------Architecture Description Register Definitions----------------------
35 // General Registers
36 // "reg_def" name ( register save type, C convention save type,
37 // ideal register type, encoding );
38 // Register Save Types:
39 //
40 // NS = No-Save: The register allocator assumes that these registers
41 // can be used without saving upon entry to the method, &
42 // that they do not need to be saved at call sites.
43 //
44 // SOC = Save-On-Call: The register allocator assumes that these registers
45 // can be used without saving upon entry to the method,
46 // but that they must be saved at call sites.
47 //
48 // SOE = Save-On-Entry: The register allocator assumes that these registers
49 // must be saved before using them upon entry to the
50 // method, but they do not need to be saved at call
51 // sites.
52 //
53 // AS = Always-Save: The register allocator assumes that these registers
54 // must be saved before using them upon entry to the
55 // method, & that they must be saved at call sites.
56 //
57 // Ideal Register Type is used to determine how to save & restore a
58 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
59 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
60 //
61 // The encoding number is the actual bit-pattern placed into the opcodes.
62
63 // We must define the 64 bit int registers in two 32 bit halves, the
64 // real lower register and a virtual upper half register. upper halves
65 // are used by the register allocator but are not actually supplied as
66 // operands to memory ops.
67 //
68 // follow the C1 compiler in making registers
69 //
70 // r0-r7,r10-r26 volatile (caller save)
71 // r27-r32 system (no save, no allocate)
72 // r8-r9 invisible to the allocator (so we can use them as scratch regs)
73 //
74 // as regards Java usage. we don't use any callee save registers
75 // because this makes it difficult to de-optimise a frame (see comment
76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
77 //
78
79 // General Registers
80
81 reg_def R0 ( SOC, SOC, Op_RegI, 0, r0->as_VMReg() );
82 reg_def R0_H ( SOC, SOC, Op_RegI, 0, r0->as_VMReg()->next() );
83 reg_def R1 ( SOC, SOC, Op_RegI, 1, r1->as_VMReg() );
84 reg_def R1_H ( SOC, SOC, Op_RegI, 1, r1->as_VMReg()->next() );
85 reg_def R2 ( SOC, SOC, Op_RegI, 2, r2->as_VMReg() );
86 reg_def R2_H ( SOC, SOC, Op_RegI, 2, r2->as_VMReg()->next() );
87 reg_def R3 ( SOC, SOC, Op_RegI, 3, r3->as_VMReg() );
88 reg_def R3_H ( SOC, SOC, Op_RegI, 3, r3->as_VMReg()->next() );
89 reg_def R4 ( SOC, SOC, Op_RegI, 4, r4->as_VMReg() );
90 reg_def R4_H ( SOC, SOC, Op_RegI, 4, r4->as_VMReg()->next() );
91 reg_def R5 ( SOC, SOC, Op_RegI, 5, r5->as_VMReg() );
92 reg_def R5_H ( SOC, SOC, Op_RegI, 5, r5->as_VMReg()->next() );
93 reg_def R6 ( SOC, SOC, Op_RegI, 6, r6->as_VMReg() );
94 reg_def R6_H ( SOC, SOC, Op_RegI, 6, r6->as_VMReg()->next() );
95 reg_def R7 ( SOC, SOC, Op_RegI, 7, r7->as_VMReg() );
96 reg_def R7_H ( SOC, SOC, Op_RegI, 7, r7->as_VMReg()->next() );
97 reg_def R10 ( SOC, SOC, Op_RegI, 10, r10->as_VMReg() );
98 reg_def R10_H ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
99 reg_def R11 ( SOC, SOC, Op_RegI, 11, r11->as_VMReg() );
100 reg_def R11_H ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
101 reg_def R12 ( SOC, SOC, Op_RegI, 12, r12->as_VMReg() );
102 reg_def R12_H ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
103 reg_def R13 ( SOC, SOC, Op_RegI, 13, r13->as_VMReg() );
104 reg_def R13_H ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
105 reg_def R14 ( SOC, SOC, Op_RegI, 14, r14->as_VMReg() );
106 reg_def R14_H ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
107 reg_def R15 ( SOC, SOC, Op_RegI, 15, r15->as_VMReg() );
108 reg_def R15_H ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
109 reg_def R16 ( SOC, SOC, Op_RegI, 16, r16->as_VMReg() );
110 reg_def R16_H ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
111 reg_def R17 ( SOC, SOC, Op_RegI, 17, r17->as_VMReg() );
112 reg_def R17_H ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
113 reg_def R18 ( SOC, SOC, Op_RegI, 18, r18->as_VMReg() );
114 reg_def R18_H ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
115 reg_def R19 ( SOC, SOE, Op_RegI, 19, r19->as_VMReg() );
116 reg_def R19_H ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
117 reg_def R20 ( SOC, SOE, Op_RegI, 20, r20->as_VMReg() ); // caller esp
118 reg_def R20_H ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
119 reg_def R21 ( SOC, SOE, Op_RegI, 21, r21->as_VMReg() );
120 reg_def R21_H ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
121 reg_def R22 ( SOC, SOE, Op_RegI, 22, r22->as_VMReg() );
122 reg_def R22_H ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
123 reg_def R23 ( SOC, SOE, Op_RegI, 23, r23->as_VMReg() );
124 reg_def R23_H ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
125 reg_def R24 ( SOC, SOE, Op_RegI, 24, r24->as_VMReg() );
126 reg_def R24_H ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
127 reg_def R25 ( SOC, SOE, Op_RegI, 25, r25->as_VMReg() );
128 reg_def R25_H ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
129 reg_def R26 ( SOC, SOE, Op_RegI, 26, r26->as_VMReg() );
130 reg_def R26_H ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
131 reg_def R27 ( NS, SOE, Op_RegI, 27, r27->as_VMReg() ); // heapbase
132 reg_def R27_H ( NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
133 reg_def R28 ( NS, SOE, Op_RegI, 28, r28->as_VMReg() ); // thread
134 reg_def R28_H ( NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
135 reg_def R29 ( NS, NS, Op_RegI, 29, r29->as_VMReg() ); // fp
136 reg_def R29_H ( NS, NS, Op_RegI, 29, r29->as_VMReg()->next());
137 reg_def R30 ( NS, NS, Op_RegI, 30, r30->as_VMReg() ); // lr
138 reg_def R30_H ( NS, NS, Op_RegI, 30, r30->as_VMReg()->next());
139 reg_def R31 ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg() ); // sp
140 reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
141
142 // ----------------------------
143 // Float/Double Registers
144 // ----------------------------
145
146 // Double Registers
147
148 // The rules of ADL require that double registers be defined in pairs.
149 // Each pair must be two 32-bit values, but not necessarily a pair of
150 // single float registers. In each pair, ADLC-assigned register numbers
151 // must be adjacent, with the lower number even. Finally, when the
152 // CPU stores such a register pair to memory, the word associated with
153 // the lower ADLC-assigned number must be stored to the lower address.
154
155 // AArch64 has 32 floating-point registers. Each can store a vector of
156 // single or double precision floating-point values up to 8 * 32
157 // floats, 4 * 64 bit floats or 2 * 128 bit floats. We currently only
158 // use the first float or double element of the vector.
159
160 // for Java use float registers v0-v15 are always save on call whereas
161 // the platform ABI treats v8-v15 as callee save). float registers
162 // v16-v31 are SOC as per the platform spec
163
164 reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() );
165 reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() );
166 reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) );
167 reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) );
168
169 reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() );
170 reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() );
171 reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) );
172 reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) );
173
174 reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() );
175 reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() );
176 reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) );
177 reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) );
178
179 reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() );
180 reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() );
181 reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) );
182 reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) );
183
184 reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() );
185 reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() );
186 reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) );
187 reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) );
188
189 reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() );
190 reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() );
191 reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) );
192 reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) );
193
194 reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() );
195 reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() );
196 reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) );
197 reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) );
198
199 reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() );
200 reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() );
201 reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) );
202 reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) );
203
204 reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() );
205 reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() );
206 reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) );
207 reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) );
208
209 reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() );
210 reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() );
211 reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) );
212 reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) );
213
214 reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() );
215 reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
216 reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
217 reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
218
219 reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() );
220 reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
221 reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
222 reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
223
224 reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() );
225 reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
226 reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
227 reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
228
229 reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() );
230 reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
231 reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
232 reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
233
234 reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() );
235 reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
236 reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
237 reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
238
239 reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() );
240 reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
241 reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
242 reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
243
244 reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() );
245 reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
246 reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
247 reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
248
249 reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() );
250 reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
251 reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
252 reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
253
254 reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() );
255 reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
256 reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
257 reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
258
259 reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() );
260 reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
261 reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
262 reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
263
264 reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() );
265 reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
266 reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
267 reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
268
269 reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() );
270 reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
271 reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
272 reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
273
274 reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() );
275 reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
276 reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
277 reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
278
279 reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() );
280 reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
281 reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
282 reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
283
284 reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() );
285 reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
286 reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
287 reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
288
289 reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() );
290 reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
291 reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
292 reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
293
294 reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() );
295 reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
296 reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
297 reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
298
299 reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() );
300 reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
301 reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
302 reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
303
304 reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() );
305 reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
306 reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
307 reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
308
309 reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() );
310 reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
311 reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
312 reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
313
314 reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() );
315 reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
316 reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
317 reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
318
319 reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() );
320 reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
321 reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
322 reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
323
324 // ----------------------------
325 // Special Registers
326 // ----------------------------
327
328 // the AArch64 CSPR status flag register is not directly acessible as
329 // instruction operand. the FPSR status flag register is a system
330 // register which can be written/read using MSR/MRS but again does not
331 // appear as an operand (a code identifying the FSPR occurs as an
332 // immediate value in the instruction).
333
334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
335
336
337 // Specify priority of register selection within phases of register
338 // allocation. Highest priority is first. A useful heuristic is to
339 // give registers a low priority when they are required by machine
340 // instructions, like EAX and EDX on I486, and choose no-save registers
341 // before save-on-call, & save-on-call before save-on-entry. Registers
342 // which participate in fixed calling sequences should come last.
343 // Registers which are used as pairs must fall on an even boundary.
344
345 alloc_class chunk0(
346 // volatiles
347 R10, R10_H,
348 R11, R11_H,
349 R12, R12_H,
350 R13, R13_H,
351 R14, R14_H,
352 R15, R15_H,
353 R16, R16_H,
354 R17, R17_H,
355 R18, R18_H,
356
357 // arg registers
358 R0, R0_H,
359 R1, R1_H,
360 R2, R2_H,
361 R3, R3_H,
362 R4, R4_H,
363 R5, R5_H,
364 R6, R6_H,
365 R7, R7_H,
366
367 // non-volatiles
368 R19, R19_H,
369 R20, R20_H,
370 R21, R21_H,
371 R22, R22_H,
372 R23, R23_H,
373 R24, R24_H,
374 R25, R25_H,
375 R26, R26_H,
376
377 // non-allocatable registers
378
379 R27, R27_H, // heapbase
380 R28, R28_H, // thread
381 R29, R29_H, // fp
382 R30, R30_H, // lr
383 R31, R31_H, // sp
384 );
385
386 alloc_class chunk1(
387
388 // no save
389 V16, V16_H, V16_J, V16_K,
390 V17, V17_H, V17_J, V17_K,
391 V18, V18_H, V18_J, V18_K,
392 V19, V19_H, V19_J, V19_K,
393 V20, V20_H, V20_J, V20_K,
394 V21, V21_H, V21_J, V21_K,
395 V22, V22_H, V22_J, V22_K,
396 V23, V23_H, V23_J, V23_K,
397 V24, V24_H, V24_J, V24_K,
398 V25, V25_H, V25_J, V25_K,
399 V26, V26_H, V26_J, V26_K,
400 V27, V27_H, V27_J, V27_K,
401 V28, V28_H, V28_J, V28_K,
402 V29, V29_H, V29_J, V29_K,
403 V30, V30_H, V30_J, V30_K,
404 V31, V31_H, V31_J, V31_K,
405
406 // arg registers
407 V0, V0_H, V0_J, V0_K,
408 V1, V1_H, V1_J, V1_K,
409 V2, V2_H, V2_J, V2_K,
410 V3, V3_H, V3_J, V3_K,
411 V4, V4_H, V4_J, V4_K,
412 V5, V5_H, V5_J, V5_K,
413 V6, V6_H, V6_J, V6_K,
414 V7, V7_H, V7_J, V7_K,
415
416 // non-volatiles
417 V8, V8_H, V8_J, V8_K,
418 V9, V9_H, V9_J, V9_K,
419 V10, V10_H, V10_J, V10_K,
420 V11, V11_H, V11_J, V11_K,
421 V12, V12_H, V12_J, V12_K,
422 V13, V13_H, V13_J, V13_K,
423 V14, V14_H, V14_J, V14_K,
424 V15, V15_H, V15_J, V15_K,
425 );
426
427 alloc_class chunk2(RFLAGS);
428
429 //----------Architecture Description Register Classes--------------------------
430 // Several register classes are automatically defined based upon information in
431 // this architecture description.
432 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
433 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ )
434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
436 //
437
438 // Class for all 32 bit integer registers -- excludes SP which will
439 // never be used as an integer register
440 reg_class any_reg32(
441 R0,
442 R1,
443 R2,
444 R3,
445 R4,
446 R5,
447 R6,
448 R7,
449 R10,
450 R11,
451 R12,
452 R13,
453 R14,
454 R15,
455 R16,
456 R17,
457 R18,
458 R19,
459 R20,
460 R21,
461 R22,
462 R23,
463 R24,
464 R25,
465 R26,
466 R27,
467 R28,
468 R29,
469 R30
470 );
471
472 // Singleton class for R0 int register
473 reg_class int_r0_reg(R0);
474
475 // Singleton class for R2 int register
476 reg_class int_r2_reg(R2);
477
478 // Singleton class for R3 int register
479 reg_class int_r3_reg(R3);
480
481 // Singleton class for R4 int register
482 reg_class int_r4_reg(R4);
483
484 // Class for all long integer registers (including RSP)
485 reg_class any_reg(
486 R0, R0_H,
487 R1, R1_H,
488 R2, R2_H,
489 R3, R3_H,
490 R4, R4_H,
491 R5, R5_H,
492 R6, R6_H,
493 R7, R7_H,
494 R10, R10_H,
495 R11, R11_H,
496 R12, R12_H,
497 R13, R13_H,
498 R14, R14_H,
499 R15, R15_H,
500 R16, R16_H,
501 R17, R17_H,
502 R18, R18_H,
503 R19, R19_H,
504 R20, R20_H,
505 R21, R21_H,
506 R22, R22_H,
507 R23, R23_H,
508 R24, R24_H,
509 R25, R25_H,
510 R26, R26_H,
511 R27, R27_H,
512 R28, R28_H,
513 R29, R29_H,
514 R30, R30_H,
515 R31, R31_H
516 );
517
518 // Class for all non-special integer registers
519 reg_class no_special_reg32_no_fp(
520 R0,
521 R1,
522 R2,
523 R3,
524 R4,
525 R5,
526 R6,
527 R7,
528 R10,
529 R11,
530 R12, // rmethod
531 R13,
532 R14,
533 R15,
534 R16,
535 R17,
536 R18,
537 R19,
538 R20,
539 R21,
540 R22,
541 R23,
542 R24,
543 R25,
544 R26
545 /* R27, */ // heapbase
546 /* R28, */ // thread
547 /* R29, */ // fp
548 /* R30, */ // lr
549 /* R31 */ // sp
550 );
551
552 reg_class no_special_reg32_with_fp(
553 R0,
554 R1,
555 R2,
556 R3,
557 R4,
558 R5,
559 R6,
560 R7,
561 R10,
562 R11,
563 R12, // rmethod
564 R13,
565 R14,
566 R15,
567 R16,
568 R17,
569 R18,
570 R19,
571 R20,
572 R21,
573 R22,
574 R23,
575 R24,
576 R25,
577 R26
578 /* R27, */ // heapbase
579 /* R28, */ // thread
580 /* R29, */ // fp
581 /* R30, */ // lr
582 /* R31 */ // sp
583 );
584
585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
586
587 // Class for all non-special long integer registers
588 reg_class no_special_reg_no_fp(
589 R0, R0_H,
590 R1, R1_H,
591 R2, R2_H,
592 R3, R3_H,
593 R4, R4_H,
594 R5, R5_H,
595 R6, R6_H,
596 R7, R7_H,
597 R10, R10_H,
598 R11, R11_H,
599 R12, R12_H, // rmethod
600 R13, R13_H,
601 R14, R14_H,
602 R15, R15_H,
603 R16, R16_H,
604 R17, R17_H,
605 R18, R18_H,
606 R19, R19_H,
607 R20, R20_H,
608 R21, R21_H,
609 R22, R22_H,
610 R23, R23_H,
611 R24, R24_H,
612 R25, R25_H,
613 R26, R26_H,
614 /* R27, R27_H, */ // heapbase
615 /* R28, R28_H, */ // thread
616 /* R29, R29_H, */ // fp
617 /* R30, R30_H, */ // lr
618 /* R31, R31_H */ // sp
619 );
620
621 reg_class no_special_reg_with_fp(
622 R0, R0_H,
623 R1, R1_H,
624 R2, R2_H,
625 R3, R3_H,
626 R4, R4_H,
627 R5, R5_H,
628 R6, R6_H,
629 R7, R7_H,
630 R10, R10_H,
631 R11, R11_H,
632 R12, R12_H, // rmethod
633 R13, R13_H,
634 R14, R14_H,
635 R15, R15_H,
636 R16, R16_H,
637 R17, R17_H,
638 R18, R18_H,
639 R19, R19_H,
640 R20, R20_H,
641 R21, R21_H,
642 R22, R22_H,
643 R23, R23_H,
644 R24, R24_H,
645 R25, R25_H,
646 R26, R26_H,
647 /* R27, R27_H, */ // heapbase
648 /* R28, R28_H, */ // thread
649 /* R29, R29_H, */ // fp
650 /* R30, R30_H, */ // lr
651 /* R31, R31_H */ // sp
652 );
653
654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
655
656 // Class for 64 bit register r0
657 reg_class r0_reg(
658 R0, R0_H
659 );
660
661 // Class for 64 bit register r1
662 reg_class r1_reg(
663 R1, R1_H
664 );
665
666 // Class for 64 bit register r2
667 reg_class r2_reg(
668 R2, R2_H
669 );
670
671 // Class for 64 bit register r3
672 reg_class r3_reg(
673 R3, R3_H
674 );
675
676 // Class for 64 bit register r4
677 reg_class r4_reg(
678 R4, R4_H
679 );
680
681 // Class for 64 bit register r5
682 reg_class r5_reg(
683 R5, R5_H
684 );
685
686 // Class for 64 bit register r10
687 reg_class r10_reg(
688 R10, R10_H
689 );
690
691 // Class for 64 bit register r11
692 reg_class r11_reg(
693 R11, R11_H
694 );
695
696 // Class for method register
697 reg_class method_reg(
698 R12, R12_H
699 );
700
701 // Class for heapbase register
702 reg_class heapbase_reg(
703 R27, R27_H
704 );
705
706 // Class for thread register
707 reg_class thread_reg(
708 R28, R28_H
709 );
710
711 // Class for frame pointer register
712 reg_class fp_reg(
713 R29, R29_H
714 );
715
716 // Class for link register
717 reg_class lr_reg(
718 R30, R30_H
719 );
720
721 // Class for long sp register
722 reg_class sp_reg(
723 R31, R31_H
724 );
725
726 // Class for all pointer registers
727 reg_class ptr_reg(
728 R0, R0_H,
729 R1, R1_H,
730 R2, R2_H,
731 R3, R3_H,
732 R4, R4_H,
733 R5, R5_H,
734 R6, R6_H,
735 R7, R7_H,
736 R10, R10_H,
737 R11, R11_H,
738 R12, R12_H,
739 R13, R13_H,
740 R14, R14_H,
741 R15, R15_H,
742 R16, R16_H,
743 R17, R17_H,
744 R18, R18_H,
745 R19, R19_H,
746 R20, R20_H,
747 R21, R21_H,
748 R22, R22_H,
749 R23, R23_H,
750 R24, R24_H,
751 R25, R25_H,
752 R26, R26_H,
753 R27, R27_H,
754 R28, R28_H,
755 R29, R29_H,
756 R30, R30_H,
757 R31, R31_H
758 );
759
760 // Class for all non_special pointer registers
761 reg_class no_special_ptr_reg(
762 R0, R0_H,
763 R1, R1_H,
764 R2, R2_H,
765 R3, R3_H,
766 R4, R4_H,
767 R5, R5_H,
768 R6, R6_H,
769 R7, R7_H,
770 R10, R10_H,
771 R11, R11_H,
772 R12, R12_H,
773 R13, R13_H,
774 R14, R14_H,
775 R15, R15_H,
776 R16, R16_H,
777 R17, R17_H,
778 R18, R18_H,
779 R19, R19_H,
780 R20, R20_H,
781 R21, R21_H,
782 R22, R22_H,
783 R23, R23_H,
784 R24, R24_H,
785 R25, R25_H,
786 R26, R26_H,
787 /* R27, R27_H, */ // heapbase
788 /* R28, R28_H, */ // thread
789 /* R29, R29_H, */ // fp
790 /* R30, R30_H, */ // lr
791 /* R31, R31_H */ // sp
792 );
793
794 // Class for all float registers
795 reg_class float_reg(
796 V0,
797 V1,
798 V2,
799 V3,
800 V4,
801 V5,
802 V6,
803 V7,
804 V8,
805 V9,
806 V10,
807 V11,
808 V12,
809 V13,
810 V14,
811 V15,
812 V16,
813 V17,
814 V18,
815 V19,
816 V20,
817 V21,
818 V22,
819 V23,
820 V24,
821 V25,
822 V26,
823 V27,
824 V28,
825 V29,
826 V30,
827 V31
828 );
829
830 // Double precision float registers have virtual `high halves' that
831 // are needed by the allocator.
832 // Class for all double registers
833 reg_class double_reg(
834 V0, V0_H,
835 V1, V1_H,
836 V2, V2_H,
837 V3, V3_H,
838 V4, V4_H,
839 V5, V5_H,
840 V6, V6_H,
841 V7, V7_H,
842 V8, V8_H,
843 V9, V9_H,
844 V10, V10_H,
845 V11, V11_H,
846 V12, V12_H,
847 V13, V13_H,
848 V14, V14_H,
849 V15, V15_H,
850 V16, V16_H,
851 V17, V17_H,
852 V18, V18_H,
853 V19, V19_H,
854 V20, V20_H,
855 V21, V21_H,
856 V22, V22_H,
857 V23, V23_H,
858 V24, V24_H,
859 V25, V25_H,
860 V26, V26_H,
861 V27, V27_H,
862 V28, V28_H,
863 V29, V29_H,
864 V30, V30_H,
865 V31, V31_H
866 );
867
868 // Class for all 64bit vector registers
869 reg_class vectord_reg(
870 V0, V0_H,
871 V1, V1_H,
872 V2, V2_H,
873 V3, V3_H,
874 V4, V4_H,
875 V5, V5_H,
876 V6, V6_H,
877 V7, V7_H,
878 V8, V8_H,
879 V9, V9_H,
880 V10, V10_H,
881 V11, V11_H,
882 V12, V12_H,
883 V13, V13_H,
884 V14, V14_H,
885 V15, V15_H,
886 V16, V16_H,
887 V17, V17_H,
888 V18, V18_H,
889 V19, V19_H,
890 V20, V20_H,
891 V21, V21_H,
892 V22, V22_H,
893 V23, V23_H,
894 V24, V24_H,
895 V25, V25_H,
896 V26, V26_H,
897 V27, V27_H,
898 V28, V28_H,
899 V29, V29_H,
900 V30, V30_H,
901 V31, V31_H
902 );
903
904 // Class for all 128bit vector registers
905 reg_class vectorx_reg(
906 V0, V0_H, V0_J, V0_K,
907 V1, V1_H, V1_J, V1_K,
908 V2, V2_H, V2_J, V2_K,
909 V3, V3_H, V3_J, V3_K,
910 V4, V4_H, V4_J, V4_K,
911 V5, V5_H, V5_J, V5_K,
912 V6, V6_H, V6_J, V6_K,
913 V7, V7_H, V7_J, V7_K,
914 V8, V8_H, V8_J, V8_K,
915 V9, V9_H, V9_J, V9_K,
916 V10, V10_H, V10_J, V10_K,
917 V11, V11_H, V11_J, V11_K,
918 V12, V12_H, V12_J, V12_K,
919 V13, V13_H, V13_J, V13_K,
920 V14, V14_H, V14_J, V14_K,
921 V15, V15_H, V15_J, V15_K,
922 V16, V16_H, V16_J, V16_K,
923 V17, V17_H, V17_J, V17_K,
924 V18, V18_H, V18_J, V18_K,
925 V19, V19_H, V19_J, V19_K,
926 V20, V20_H, V20_J, V20_K,
927 V21, V21_H, V21_J, V21_K,
928 V22, V22_H, V22_J, V22_K,
929 V23, V23_H, V23_J, V23_K,
930 V24, V24_H, V24_J, V24_K,
931 V25, V25_H, V25_J, V25_K,
932 V26, V26_H, V26_J, V26_K,
933 V27, V27_H, V27_J, V27_K,
934 V28, V28_H, V28_J, V28_K,
935 V29, V29_H, V29_J, V29_K,
936 V30, V30_H, V30_J, V30_K,
937 V31, V31_H, V31_J, V31_K
938 );
939
940 // Class for 128 bit register v0
941 reg_class v0_reg(
942 V0, V0_H
943 );
944
945 // Class for 128 bit register v1
946 reg_class v1_reg(
947 V1, V1_H
948 );
949
950 // Class for 128 bit register v2
951 reg_class v2_reg(
952 V2, V2_H
953 );
954
955 // Class for 128 bit register v3
956 reg_class v3_reg(
957 V3, V3_H
958 );
959
960 // Singleton class for condition codes
961 reg_class int_flags(RFLAGS);
962
963 %}
964
965 //----------DEFINITION BLOCK---------------------------------------------------
966 // Define name --> value mappings to inform the ADLC of an integer valued name
967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
968 // Format:
969 // int_def <name> ( <int_value>, <expression>);
970 // Generated Code in ad_<arch>.hpp
971 // #define <name> (<expression>)
972 // // value == <int_value>
973 // Generated code in ad_<arch>.cpp adlc_verification()
974 // assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
975 //
976
977 // we follow the ppc-aix port in using a simple cost model which ranks
978 // register operations as cheap, memory ops as more expensive and
979 // branches as most expensive. the first two have a low as well as a
980 // normal cost. huge cost appears to be a way of saying don't do
981 // something
982
983 definitions %{
984 // The default cost (of a register move instruction).
985 int_def INSN_COST ( 100, 100);
986 int_def BRANCH_COST ( 200, 2 * INSN_COST);
987 int_def CALL_COST ( 200, 2 * INSN_COST);
988 int_def VOLATILE_REF_COST ( 1000, 10 * INSN_COST);
989 %}
990
991
992 //----------SOURCE BLOCK-------------------------------------------------------
993 // This is a block of C++ code which provides values, functions, and
994 // definitions necessary in the rest of the architecture description
995
996 source_hpp %{
997
998 #include "gc/shared/cardTableModRefBS.hpp"
999 #include "opto/addnode.hpp"
1000
1001 class CallStubImpl {
1002
1003 //--------------------------------------------------------------
1004 //---< Used for optimization in Compile::shorten_branches >---
1005 //--------------------------------------------------------------
1006
1007 public:
1008 // Size of call trampoline stub.
1009 static uint size_call_trampoline() {
1010 return 0; // no call trampolines on this platform
1011 }
1012
1013 // number of relocations needed by a call trampoline stub
1014 static uint reloc_call_trampoline() {
1015 return 0; // no call trampolines on this platform
1016 }
1017 };
1018
1019 class HandlerImpl {
1020
1021 public:
1022
1023 static int emit_exception_handler(CodeBuffer &cbuf);
1024 static int emit_deopt_handler(CodeBuffer& cbuf);
1025
1026 static uint size_exception_handler() {
1027 return MacroAssembler::far_branch_size();
1028 }
1029
1030 static uint size_deopt_handler() {
1031 // count one adr and one far branch instruction
1032 return 4 * NativeInstruction::instruction_size;
1033 }
1034 };
1035
1036 // graph traversal helpers
1037
1038 MemBarNode *parent_membar(const Node *n);
1039 MemBarNode *child_membar(const MemBarNode *n);
1040 bool leading_membar(const MemBarNode *barrier);
1041
1042 bool is_card_mark_membar(const MemBarNode *barrier);
1043 bool is_CAS(int opcode);
1044
1045 MemBarNode *leading_to_trailing(MemBarNode *leading);
1046 MemBarNode *card_mark_to_leading(const MemBarNode *barrier);
1047 MemBarNode *trailing_to_leading(const MemBarNode *trailing);
1048
1049 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1050
1051 bool unnecessary_acquire(const Node *barrier);
1052 bool needs_acquiring_load(const Node *load);
1053
1054 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1055
1056 bool unnecessary_release(const Node *barrier);
1057 bool unnecessary_volatile(const Node *barrier);
1058 bool needs_releasing_store(const Node *store);
1059
1060 // predicate controlling translation of CompareAndSwapX
1061 bool needs_acquiring_load_exclusive(const Node *load);
1062
1063 // predicate controlling translation of StoreCM
1064 bool unnecessary_storestore(const Node *storecm);
1065
1066 // predicate controlling addressing modes
1067 bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1068 %}
1069
1070 source %{
1071
1072 // Optimizaton of volatile gets and puts
1073 // -------------------------------------
1074 //
1075 // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1076 // use to implement volatile reads and writes. For a volatile read
1077 // we simply need
1078 //
1079 // ldar<x>
1080 //
1081 // and for a volatile write we need
1082 //
1083 // stlr<x>
1084 //
1085 // Alternatively, we can implement them by pairing a normal
1086 // load/store with a memory barrier. For a volatile read we need
1087 //
1088 // ldr<x>
1089 // dmb ishld
1090 //
1091 // for a volatile write
1092 //
1093 // dmb ish
1094 // str<x>
1095 // dmb ish
1096 //
1097 // We can also use ldaxr and stlxr to implement compare and swap CAS
1098 // sequences. These are normally translated to an instruction
1099 // sequence like the following
1100 //
1101 // dmb ish
1102 // retry:
1103 // ldxr<x> rval raddr
1104 // cmp rval rold
1105 // b.ne done
1106 // stlxr<x> rval, rnew, rold
1107 // cbnz rval retry
1108 // done:
1109 // cset r0, eq
1110 // dmb ishld
1111 //
1112 // Note that the exclusive store is already using an stlxr
1113 // instruction. That is required to ensure visibility to other
1114 // threads of the exclusive write (assuming it succeeds) before that
1115 // of any subsequent writes.
1116 //
1117 // The following instruction sequence is an improvement on the above
1118 //
1119 // retry:
1120 // ldaxr<x> rval raddr
1121 // cmp rval rold
1122 // b.ne done
1123 // stlxr<x> rval, rnew, rold
1124 // cbnz rval retry
1125 // done:
1126 // cset r0, eq
1127 //
1128 // We don't need the leading dmb ish since the stlxr guarantees
1129 // visibility of prior writes in the case that the swap is
1130 // successful. Crucially we don't have to worry about the case where
1131 // the swap is not successful since no valid program should be
1132 // relying on visibility of prior changes by the attempting thread
1133 // in the case where the CAS fails.
1134 //
1135 // Similarly, we don't need the trailing dmb ishld if we substitute
1136 // an ldaxr instruction since that will provide all the guarantees we
1137 // require regarding observation of changes made by other threads
1138 // before any change to the CAS address observed by the load.
1139 //
1140 // In order to generate the desired instruction sequence we need to
1141 // be able to identify specific 'signature' ideal graph node
1142 // sequences which i) occur as a translation of a volatile reads or
1143 // writes or CAS operations and ii) do not occur through any other
1144 // translation or graph transformation. We can then provide
1145 // alternative aldc matching rules which translate these node
1146 // sequences to the desired machine code sequences. Selection of the
1147 // alternative rules can be implemented by predicates which identify
1148 // the relevant node sequences.
1149 //
1150 // The ideal graph generator translates a volatile read to the node
1151 // sequence
1152 //
1153 // LoadX[mo_acquire]
1154 // MemBarAcquire
1155 //
1156 // As a special case when using the compressed oops optimization we
1157 // may also see this variant
1158 //
1159 // LoadN[mo_acquire]
1160 // DecodeN
1161 // MemBarAcquire
1162 //
1163 // A volatile write is translated to the node sequence
1164 //
1165 // MemBarRelease
1166 // StoreX[mo_release] {CardMark}-optional
1167 // MemBarVolatile
1168 //
1169 // n.b. the above node patterns are generated with a strict
1170 // 'signature' configuration of input and output dependencies (see
1171 // the predicates below for exact details). The card mark may be as
1172 // simple as a few extra nodes or, in a few GC configurations, may
1173 // include more complex control flow between the leading and
1174 // trailing memory barriers. However, whatever the card mark
1175 // configuration these signatures are unique to translated volatile
1176 // reads/stores -- they will not appear as a result of any other
1177 // bytecode translation or inlining nor as a consequence of
1178 // optimizing transforms.
1179 //
1180 // We also want to catch inlined unsafe volatile gets and puts and
1181 // be able to implement them using either ldar<x>/stlr<x> or some
1182 // combination of ldr<x>/stlr<x> and dmb instructions.
1183 //
1184 // Inlined unsafe volatiles puts manifest as a minor variant of the
1185 // normal volatile put node sequence containing an extra cpuorder
1186 // membar
1187 //
1188 // MemBarRelease
1189 // MemBarCPUOrder
1190 // StoreX[mo_release] {CardMark}-optional
1191 // MemBarVolatile
1192 //
1193 // n.b. as an aside, the cpuorder membar is not itself subject to
1194 // matching and translation by adlc rules. However, the rule
1195 // predicates need to detect its presence in order to correctly
1196 // select the desired adlc rules.
1197 //
1198 // Inlined unsafe volatile gets manifest as a somewhat different
1199 // node sequence to a normal volatile get
1200 //
1201 // MemBarCPUOrder
1202 // || \\
1203 // MemBarAcquire LoadX[mo_acquire]
1204 // ||
1205 // MemBarCPUOrder
1206 //
1207 // In this case the acquire membar does not directly depend on the
1208 // load. However, we can be sure that the load is generated from an
1209 // inlined unsafe volatile get if we see it dependent on this unique
1210 // sequence of membar nodes. Similarly, given an acquire membar we
1211 // can know that it was added because of an inlined unsafe volatile
1212 // get if it is fed and feeds a cpuorder membar and if its feed
1213 // membar also feeds an acquiring load.
1214 //
1215 // Finally an inlined (Unsafe) CAS operation is translated to the
1216 // following ideal graph
1217 //
1218 // MemBarRelease
1219 // MemBarCPUOrder
1220 // CompareAndSwapX {CardMark}-optional
1221 // MemBarCPUOrder
1222 // MemBarAcquire
1223 //
1224 // So, where we can identify these volatile read and write
1225 // signatures we can choose to plant either of the above two code
1226 // sequences. For a volatile read we can simply plant a normal
1227 // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1228 // also choose to inhibit translation of the MemBarAcquire and
1229 // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1230 //
1231 // When we recognise a volatile store signature we can choose to
1232 // plant at a dmb ish as a translation for the MemBarRelease, a
1233 // normal str<x> and then a dmb ish for the MemBarVolatile.
1234 // Alternatively, we can inhibit translation of the MemBarRelease
1235 // and MemBarVolatile and instead plant a simple stlr<x>
1236 // instruction.
1237 //
1238 // when we recognise a CAS signature we can choose to plant a dmb
1239 // ish as a translation for the MemBarRelease, the conventional
1240 // macro-instruction sequence for the CompareAndSwap node (which
1241 // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1242 // Alternatively, we can elide generation of the dmb instructions
1243 // and plant the alternative CompareAndSwap macro-instruction
1244 // sequence (which uses ldaxr<x>).
1245 //
1246 // Of course, the above only applies when we see these signature
1247 // configurations. We still want to plant dmb instructions in any
1248 // other cases where we may see a MemBarAcquire, MemBarRelease or
1249 // MemBarVolatile. For example, at the end of a constructor which
1250 // writes final/volatile fields we will see a MemBarRelease
1251 // instruction and this needs a 'dmb ish' lest we risk the
1252 // constructed object being visible without making the
1253 // final/volatile field writes visible.
1254 //
1255 // n.b. the translation rules below which rely on detection of the
1256 // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1257 // If we see anything other than the signature configurations we
1258 // always just translate the loads and stores to ldr<x> and str<x>
1259 // and translate acquire, release and volatile membars to the
1260 // relevant dmb instructions.
1261 //
1262
1263 // graph traversal helpers used for volatile put/get and CAS
1264 // optimization
1265
1266 // 1) general purpose helpers
1267
1268 // if node n is linked to a parent MemBarNode by an intervening
1269 // Control and Memory ProjNode return the MemBarNode otherwise return
1270 // NULL.
1271 //
1272 // n may only be a Load or a MemBar.
1273
1274 MemBarNode *parent_membar(const Node *n)
1275 {
1276 Node *ctl = NULL;
1277 Node *mem = NULL;
1278 Node *membar = NULL;
1279
1280 if (n->is_Load()) {
1281 ctl = n->lookup(LoadNode::Control);
1282 mem = n->lookup(LoadNode::Memory);
1283 } else if (n->is_MemBar()) {
1284 ctl = n->lookup(TypeFunc::Control);
1285 mem = n->lookup(TypeFunc::Memory);
1286 } else {
1287 return NULL;
1288 }
1289
1290 if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
1291 return NULL;
1292 }
1293
1294 membar = ctl->lookup(0);
1295
1296 if (!membar || !membar->is_MemBar()) {
1297 return NULL;
1298 }
1299
1300 if (mem->lookup(0) != membar) {
1301 return NULL;
1302 }
1303
1304 return membar->as_MemBar();
1305 }
1306
1307 // if n is linked to a child MemBarNode by intervening Control and
1308 // Memory ProjNodes return the MemBarNode otherwise return NULL.
1309
1310 MemBarNode *child_membar(const MemBarNode *n)
1311 {
1312 ProjNode *ctl = n->proj_out(TypeFunc::Control);
1313 ProjNode *mem = n->proj_out(TypeFunc::Memory);
1314
1315 // MemBar needs to have both a Ctl and Mem projection
1316 if (! ctl || ! mem)
1317 return NULL;
1318
1319 MemBarNode *child = NULL;
1320 Node *x;
1321
1322 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
1323 x = ctl->fast_out(i);
1324 // if we see a membar we keep hold of it. we may also see a new
1325 // arena copy of the original but it will appear later
1326 if (x->is_MemBar()) {
1327 child = x->as_MemBar();
1328 break;
1329 }
1330 }
1331
1332 if (child == NULL) {
1333 return NULL;
1334 }
1335
1336 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
1337 x = mem->fast_out(i);
1338 // if we see a membar we keep hold of it. we may also see a new
1339 // arena copy of the original but it will appear later
1340 if (x == child) {
1341 return child;
1342 }
1343 }
1344 return NULL;
1345 }
1346
1347 // helper predicate use to filter candidates for a leading memory
1348 // barrier
1349 //
1350 // returns true if barrier is a MemBarRelease or a MemBarCPUOrder
1351 // whose Ctl and Mem feeds come from a MemBarRelease otherwise false
1352
1353 bool leading_membar(const MemBarNode *barrier)
1354 {
1355 int opcode = barrier->Opcode();
1356 // if this is a release membar we are ok
1357 if (opcode == Op_MemBarRelease) {
1358 return true;
1359 }
1360 // if its a cpuorder membar . . .
1361 if (opcode != Op_MemBarCPUOrder) {
1362 return false;
1363 }
1364 // then the parent has to be a release membar
1365 MemBarNode *parent = parent_membar(barrier);
1366 if (!parent) {
1367 return false;
1368 }
1369 opcode = parent->Opcode();
1370 return opcode == Op_MemBarRelease;
1371 }
1372
1373 // 2) card mark detection helper
1374
1375 // helper predicate which can be used to detect a volatile membar
1376 // introduced as part of a conditional card mark sequence either by
1377 // G1 or by CMS when UseCondCardMark is true.
1378 //
1379 // membar can be definitively determined to be part of a card mark
1380 // sequence if and only if all the following hold
1381 //
1382 // i) it is a MemBarVolatile
1383 //
1384 // ii) either UseG1GC or (UseConcMarkSweepGC && UseCondCardMark) is
1385 // true
1386 //
1387 // iii) the node's Mem projection feeds a StoreCM node.
1388
1389 bool is_card_mark_membar(const MemBarNode *barrier)
1390 {
1391 if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
1392 return false;
1393 }
1394
1395 if (barrier->Opcode() != Op_MemBarVolatile) {
1396 return false;
1397 }
1398
1399 ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
1400
1401 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax ; i++) {
1402 Node *y = mem->fast_out(i);
1403 if (y->Opcode() == Op_StoreCM) {
1404 return true;
1405 }
1406 }
1407
1408 return false;
1409 }
1410
1411
1412 // 3) helper predicates to traverse volatile put or CAS graphs which
1413 // may contain GC barrier subgraphs
1414
1415 // Preamble
1416 // --------
1417 //
1418 // for volatile writes we can omit generating barriers and employ a
1419 // releasing store when we see a node sequence sequence with a
1420 // leading MemBarRelease and a trailing MemBarVolatile as follows
1421 //
1422 // MemBarRelease
1423 // { || } -- optional
1424 // {MemBarCPUOrder}
1425 // || \\
1426 // || StoreX[mo_release]
1427 // | \ Bot / ???
1428 // | MergeMem
1429 // | /
1430 // MemBarVolatile
1431 //
1432 // where
1433 // || and \\ represent Ctl and Mem feeds via Proj nodes
1434 // | \ and / indicate further routing of the Ctl and Mem feeds
1435 //
1436 // Note that the memory feed from the CPUOrder membar to the
1437 // MergeMem node is an AliasIdxBot slice while the feed from the
1438 // StoreX is for a slice determined by the type of value being
1439 // written.
1440 //
1441 // the diagram above shows the graph we see for non-object stores.
1442 // for a volatile Object store (StoreN/P) we may see other nodes
1443 // below the leading membar because of the need for a GC pre- or
1444 // post-write barrier.
1445 //
1446 // with most GC configurations we with see this simple variant which
1447 // includes a post-write barrier card mark.
1448 //
1449 // MemBarRelease______________________________
1450 // || \\ Ctl \ \\
1451 // || StoreN/P[mo_release] CastP2X StoreB/CM
1452 // | \ Bot / oop . . . /
1453 // | MergeMem
1454 // | /
1455 // || /
1456 // MemBarVolatile
1457 //
1458 // i.e. the leading membar feeds Ctl to a CastP2X (which converts
1459 // the object address to an int used to compute the card offset) and
1460 // Ctl+Mem to a StoreB node (which does the actual card mark).
1461 //
1462 // n.b. a StoreCM node is only ever used when CMS (with or without
1463 // CondCardMark) or G1 is configured. This abstract instruction
1464 // differs from a normal card mark write (StoreB) because it implies
1465 // a requirement to order visibility of the card mark (StoreCM)
1466 // after that of the object put (StoreP/N) using a StoreStore memory
1467 // barrier. Note that this is /not/ a requirement to order the
1468 // instructions in the generated code (that is already guaranteed by
1469 // the order of memory dependencies). Rather it is a requirement to
1470 // ensure visibility order which only applies on architectures like
1471 // AArch64 which do not implement TSO. This ordering is required for
1472 // both non-volatile and volatile puts.
1473 //
1474 // That implies that we need to translate a StoreCM using the
1475 // sequence
1476 //
1477 // dmb ishst
1478 // stlrb
1479 //
1480 // This dmb cannot be omitted even when the associated StoreX or
1481 // CompareAndSwapX is implemented using stlr. However, as described
1482 // below there are circumstances where a specific GC configuration
1483 // requires a stronger barrier in which case it can be omitted.
1484 //
1485 // With the Serial or Parallel GC using +CondCardMark the card mark
1486 // is performed conditionally on it currently being unmarked in
1487 // which case the volatile put graph looks slightly different
1488 //
1489 // MemBarRelease____________________________________________
1490 // || \\ Ctl \ Ctl \ \\ Mem \
1491 // || StoreN/P[mo_release] CastP2X If LoadB |
1492 // | \ Bot / oop \ |
1493 // | MergeMem . . . StoreB
1494 // | / /
1495 // || /
1496 // MemBarVolatile
1497 //
1498 // It is worth noting at this stage that all the above
1499 // configurations can be uniquely identified by checking that the
1500 // memory flow includes the following subgraph:
1501 //
1502 // MemBarRelease
1503 // {MemBarCPUOrder}
1504 // | \ . . .
1505 // | StoreX[mo_release] . . .
1506 // Bot | / oop
1507 // MergeMem
1508 // |
1509 // MemBarVolatile
1510 //
1511 // This is referred to as a *normal* volatile store subgraph. It can
1512 // easily be detected starting from any candidate MemBarRelease,
1513 // StoreX[mo_release] or MemBarVolatile node.
1514 //
1515 // A small variation on this normal case occurs for an unsafe CAS
1516 // operation. The basic memory flow subgraph for a non-object CAS is
1517 // as follows
1518 //
1519 // MemBarRelease
1520 // ||
1521 // MemBarCPUOrder
1522 // | \\ . . .
1523 // | CompareAndSwapX
1524 // | |
1525 // Bot | SCMemProj
1526 // \ / Bot
1527 // MergeMem
1528 // /
1529 // MemBarCPUOrder
1530 // ||
1531 // MemBarAcquire
1532 //
1533 // The same basic variations on this arrangement (mutatis mutandis)
1534 // occur when a card mark is introduced. i.e. the CPUOrder MemBar
1535 // feeds the extra CastP2X, LoadB etc nodes but the above memory
1536 // flow subgraph is still present.
1537 //
1538 // This is referred to as a *normal* CAS subgraph. It can easily be
1539 // detected starting from any candidate MemBarRelease,
1540 // StoreX[mo_release] or MemBarAcquire node.
1541 //
1542 // The code below uses two helper predicates, leading_to_trailing
1543 // and trailing_to_leading to identify these normal graphs, one
1544 // validating the layout starting from the top membar and searching
1545 // down and the other validating the layout starting from the lower
1546 // membar and searching up.
1547 //
1548 // There are two special case GC configurations when the simple
1549 // normal graphs above may not be generated: when using G1 (which
1550 // always employs a conditional card mark); and when using CMS with
1551 // conditional card marking (+CondCardMark) configured. These GCs
1552 // are both concurrent rather than stop-the world GCs. So they
1553 // introduce extra Ctl+Mem flow into the graph between the leading
1554 // and trailing membar nodes, in particular enforcing stronger
1555 // memory serialisation beween the object put and the corresponding
1556 // conditional card mark. CMS employs a post-write GC barrier while
1557 // G1 employs both a pre- and post-write GC barrier.
1558 //
1559 // The post-write barrier subgraph for these configurations includes
1560 // a MemBarVolatile node -- referred to as a card mark membar --
1561 // which is needed to order the card write (StoreCM) operation in
1562 // the barrier, the preceding StoreX (or CompareAndSwapX) and Store
1563 // operations performed by GC threads i.e. a card mark membar
1564 // constitutes a StoreLoad barrier hence must be translated to a dmb
1565 // ish (whether or not it sits inside a volatile store sequence).
1566 //
1567 // Of course, the use of the dmb ish for the card mark membar also
1568 // implies theat the StoreCM which follows can omit the dmb ishst
1569 // instruction. The necessary visibility ordering will already be
1570 // guaranteed by the dmb ish. In sum, the dmb ishst instruction only
1571 // needs to be generated for as part of the StoreCM sequence with GC
1572 // configuration +CMS -CondCardMark.
1573 //
1574 // Of course all these extra barrier nodes may well be absent --
1575 // they are only inserted for object puts. Their potential presence
1576 // significantly complicates the task of identifying whether a
1577 // MemBarRelease, StoreX[mo_release], MemBarVolatile or
1578 // MemBarAcquire forms part of a volatile put or CAS when using
1579 // these GC configurations (see below) and also complicates the
1580 // decision as to how to translate a MemBarVolatile and StoreCM.
1581 //
1582 // So, thjis means that a card mark MemBarVolatile occurring in the
1583 // post-barrier graph it needs to be distinguished from a normal
1584 // trailing MemBarVolatile. Resolving this is straightforward: a
1585 // card mark MemBarVolatile always projects a Mem feed to a StoreCM
1586 // node and that is a unique marker
1587 //
1588 // MemBarVolatile (card mark)
1589 // C | \ . . .
1590 // | StoreCM . . .
1591 // . . .
1592 //
1593 // Returning to the task of translating the object put and the
1594 // leading/trailing membar nodes: what do the node graphs look like
1595 // for these 2 special cases? and how can we determine the status of
1596 // a MemBarRelease, StoreX[mo_release] or MemBarVolatile in both
1597 // normal and non-normal cases?
1598 //
1599 // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
1600 // which selects conditonal execution based on the value loaded
1601 // (LoadB) from the card. Ctl and Mem are fed to the If via an
1602 // intervening StoreLoad barrier (MemBarVolatile).
1603 //
1604 // So, with CMS we may see a node graph for a volatile object store
1605 // which looks like this
1606 //
1607 // MemBarRelease
1608 // MemBarCPUOrder_(leading)____________________
1609 // C | | M \ \\ M | C \
1610 // | | \ StoreN/P[mo_release] | CastP2X
1611 // | | Bot \ / oop \ |
1612 // | | MergeMem \ /
1613 // | | / | /
1614 // MemBarVolatile (card mark) | /
1615 // C | || M | | /
1616 // | LoadB | Bot oop | / Bot
1617 // | | | / /
1618 // | Cmp |\ / /
1619 // | / | \ / /
1620 // If | \ / /
1621 // | \ | \ / /
1622 // IfFalse IfTrue | \ / /
1623 // \ / \ | | / /
1624 // \ / StoreCM | / /
1625 // \ / \ / / /
1626 // Region Phi / /
1627 // | \ Raw | / /
1628 // | . . . | / /
1629 // | MergeMem
1630 // | |
1631 // MemBarVolatile (trailing)
1632 //
1633 // Notice that there are two MergeMem nodes below the leading
1634 // membar. The first MergeMem merges the AliasIdxBot Mem slice from
1635 // the leading membar and the oopptr Mem slice from the Store into
1636 // the card mark membar. The trailing MergeMem merges the
1637 // AliasIdxBot Mem slice from the leading membar, the AliasIdxRaw
1638 // slice from the StoreCM and an oop slice from the StoreN/P node
1639 // into the trailing membar (n.b. the raw slice proceeds via a Phi
1640 // associated with the If region).
1641 //
1642 // So, in the case of CMS + CondCardMark the volatile object store
1643 // graph still includes a normal volatile store subgraph from the
1644 // leading membar to the trailing membar. However, it also contains
1645 // the same shape memory flow to the card mark membar. The two flows
1646 // can be distinguished by testing whether or not the downstream
1647 // membar is a card mark membar.
1648 //
1649 // The graph for a CAS also varies with CMS + CondCardMark, in
1650 // particular employing a control feed from the CompareAndSwapX node
1651 // through a CmpI and If to the card mark membar and StoreCM which
1652 // updates the associated card. This avoids executing the card mark
1653 // if the CAS fails. However, it can be seen from the diagram below
1654 // that the presence of the barrier does not alter the normal CAS
1655 // memory subgraph where the leading membar feeds a CompareAndSwapX,
1656 // an SCMemProj, a MergeMem then a final trailing MemBarCPUOrder and
1657 // MemBarAcquire pair.
1658 //
1659 // MemBarRelease
1660 // MemBarCPUOrder__(leading)_______________________
1661 // C / M | \\ C \
1662 // . . . | Bot CompareAndSwapN/P CastP2X
1663 // | C / M |
1664 // | CmpI |
1665 // | / |
1666 // | . . . |
1667 // | IfTrue |
1668 // | / |
1669 // MemBarVolatile (card mark) |
1670 // C | || M | |
1671 // | LoadB | Bot ______/|
1672 // | | | / |
1673 // | Cmp | / SCMemProj
1674 // | / | / |
1675 // If | / /
1676 // | \ | / / Bot
1677 // IfFalse IfTrue | / /
1678 // | / \ / / prec /
1679 // . . . | / StoreCM /
1680 // \ | / | raw /
1681 // Region . . . /
1682 // | \ /
1683 // | . . . \ / Bot
1684 // | MergeMem
1685 // | /
1686 // MemBarCPUOrder
1687 // MemBarAcquire (trailing)
1688 //
1689 // This has a slightly different memory subgraph to the one seen
1690 // previously but the core of it has a similar memory flow to the
1691 // CAS normal subgraph:
1692 //
1693 // MemBarRelease
1694 // MemBarCPUOrder____
1695 // | \ . . .
1696 // | CompareAndSwapX . . .
1697 // | C / M |
1698 // | CmpI |
1699 // | / |
1700 // | . . /
1701 // Bot | IfTrue /
1702 // | / /
1703 // MemBarVolatile /
1704 // | ... /
1705 // StoreCM ... /
1706 // | /
1707 // . . . SCMemProj
1708 // Raw \ / Bot
1709 // MergeMem
1710 // |
1711 // MemBarCPUOrder
1712 // MemBarAcquire
1713 //
1714 // The G1 graph for a volatile object put is a lot more complicated.
1715 // Nodes inserted on behalf of G1 may comprise: a pre-write graph
1716 // which adds the old value to the SATB queue; the releasing store
1717 // itself; and, finally, a post-write graph which performs a card
1718 // mark.
1719 //
1720 // The pre-write graph may be omitted, but only when the put is
1721 // writing to a newly allocated (young gen) object and then only if
1722 // there is a direct memory chain to the Initialize node for the
1723 // object allocation. This will not happen for a volatile put since
1724 // any memory chain passes through the leading membar.
1725 //
1726 // The pre-write graph includes a series of 3 If tests. The outermost
1727 // If tests whether SATB is enabled (no else case). The next If tests
1728 // whether the old value is non-NULL (no else case). The third tests
1729 // whether the SATB queue index is > 0, if so updating the queue. The
1730 // else case for this third If calls out to the runtime to allocate a
1731 // new queue buffer.
1732 //
1733 // So with G1 the pre-write and releasing store subgraph looks like
1734 // this (the nested Ifs are omitted).
1735 //
1736 // MemBarRelease (leading)____________
1737 // C | || M \ M \ M \ M \ . . .
1738 // | LoadB \ LoadL LoadN \
1739 // | / \ \
1740 // If |\ \
1741 // | \ | \ \
1742 // IfFalse IfTrue | \ \
1743 // | | | \ |
1744 // | If | /\ |
1745 // | | \ |
1746 // | \ |
1747 // | . . . \ |
1748 // | / | / | |
1749 // Region Phi[M] | |
1750 // | \ | | |
1751 // | \_____ | ___ | |
1752 // C | C \ | C \ M | |
1753 // | CastP2X | StoreN/P[mo_release] |
1754 // | | | |
1755 // C | M | M | M |
1756 // \ | Raw | oop / Bot
1757 // . . .
1758 // (post write subtree elided)
1759 // . . .
1760 // C \ M /
1761 // MemBarVolatile (trailing)
1762 //
1763 // Note that the three memory feeds into the post-write tree are an
1764 // AliasRawIdx slice associated with the writes in the pre-write
1765 // tree, an oop type slice from the StoreX specific to the type of
1766 // the volatile field and the AliasBotIdx slice emanating from the
1767 // leading membar.
1768 //
1769 // n.b. the LoadB in this subgraph is not the card read -- it's a
1770 // read of the SATB queue active flag.
1771 //
1772 // The CAS graph is once again a variant of the above with a
1773 // CompareAndSwapX node and SCMemProj in place of the StoreX. The
1774 // value from the CompareAndSwapX node is fed into the post-write
1775 // graph aling with the AliasIdxRaw feed from the pre-barrier and
1776 // the AliasIdxBot feeds from the leading membar and the ScMemProj.
1777 //
1778 // MemBarRelease (leading)____________
1779 // C | || M \ M \ M \ M \ . . .
1780 // | LoadB \ LoadL LoadN \
1781 // | / \ \
1782 // If |\ \
1783 // | \ | \ \
1784 // IfFalse IfTrue | \ \
1785 // | | | \ \
1786 // | If | \ |
1787 // | | \ |
1788 // | \ |
1789 // | . . . \ |
1790 // | / | / \ |
1791 // Region Phi[M] \ |
1792 // | \ | \ |
1793 // | \_____ | | |
1794 // C | C \ | | |
1795 // | CastP2X | CompareAndSwapX |
1796 // | | res | | |
1797 // C | M | | SCMemProj M |
1798 // \ | Raw | | Bot / Bot
1799 // . . .
1800 // (post write subtree elided)
1801 // . . .
1802 // C \ M /
1803 // MemBarVolatile (trailing)
1804 //
1805 // The G1 post-write subtree is also optional, this time when the
1806 // new value being written is either null or can be identified as a
1807 // newly allocated (young gen) object with no intervening control
1808 // flow. The latter cannot happen but the former may, in which case
1809 // the card mark membar is omitted and the memory feeds from the
1810 // leading membar and the SToreN/P are merged direct into the
1811 // trailing membar as per the normal subgraph. So, the only special
1812 // case which arises is when the post-write subgraph is generated.
1813 //
1814 // The kernel of the post-write G1 subgraph is the card mark itself
1815 // which includes a card mark memory barrier (MemBarVolatile), a
1816 // card test (LoadB), and a conditional update (If feeding a
1817 // StoreCM). These nodes are surrounded by a series of nested Ifs
1818 // which try to avoid doing the card mark. The top level If skips if
1819 // the object reference does not cross regions (i.e. it tests if
1820 // (adr ^ val) >> log2(regsize) != 0) -- intra-region references
1821 // need not be recorded. The next If, which skips on a NULL value,
1822 // may be absent (it is not generated if the type of value is >=
1823 // OopPtr::NotNull). The 3rd If skips writes to young regions (by
1824 // checking if card_val != young). n.b. although this test requires
1825 // a pre-read of the card it can safely be done before the StoreLoad
1826 // barrier. However that does not bypass the need to reread the card
1827 // after the barrier.
1828 //
1829 // (pre-write subtree elided)
1830 // . . . . . . . . . . . .
1831 // C | M | M | M |
1832 // Region Phi[M] StoreN |
1833 // | Raw | oop | Bot |
1834 // / \_______ |\ |\ |\
1835 // C / C \ . . . | \ | \ | \
1836 // If CastP2X . . . | \ | \ | \
1837 // / \ | \ | \ | \
1838 // / \ | \ | \ | \
1839 // IfFalse IfTrue | | | \
1840 // | | \ | / |
1841 // | If \ | \ / \ |
1842 // | / \ \ | / \ |
1843 // | / \ \ | / \ | |
1844 // | IfFalse IfTrue MergeMem \ | |
1845 // | . . . / \ | \ | |
1846 // | / \ | | | |
1847 // | IfFalse IfTrue | | | |
1848 // | . . . | | | | |
1849 // | If / | | |
1850 // | / \ / | | |
1851 // | / \ / | | |
1852 // | IfFalse IfTrue / | | |
1853 // | . . . | / | | |
1854 // | \ / | | |
1855 // | \ / | | |
1856 // | MemBarVolatile__(card mark ) | | |
1857 // | || C | \ | | |
1858 // | LoadB If | / | |
1859 // | / \ Raw | / / /
1860 // | . . . | / / /
1861 // | \ | / / /
1862 // | StoreCM / / /
1863 // | | / / /
1864 // | . . . / /
1865 // | / /
1866 // | . . . / /
1867 // | | | / / /
1868 // | | Phi[M] / / /
1869 // | | | / / /
1870 // | | | / / /
1871 // | Region . . . Phi[M] / /
1872 // | | | / /
1873 // \ | | / /
1874 // \ | . . . | / /
1875 // \ | | / /
1876 // Region Phi[M] / /
1877 // | \ / /
1878 // \ MergeMem
1879 // \ /
1880 // MemBarVolatile
1881 //
1882 // As with CMS + CondCardMark the first MergeMem merges the
1883 // AliasIdxBot Mem slice from the leading membar and the oopptr Mem
1884 // slice from the Store into the card mark membar. However, in this
1885 // case it may also merge an AliasRawIdx mem slice from the pre
1886 // barrier write.
1887 //
1888 // The trailing MergeMem merges an AliasIdxBot Mem slice from the
1889 // leading membar with an oop slice from the StoreN and an
1890 // AliasRawIdx slice from the post barrier writes. In this case the
1891 // AliasIdxRaw Mem slice is merged through a series of Phi nodes
1892 // which combine feeds from the If regions in the post barrier
1893 // subgraph.
1894 //
1895 // So, for G1 the same characteristic subgraph arises as for CMS +
1896 // CondCardMark. There is a normal subgraph feeding the card mark
1897 // membar and a normal subgraph feeding the trailing membar.
1898 //
1899 // The CAS graph when using G1GC also includes an optional
1900 // post-write subgraph. It is very similar to the above graph except
1901 // for a few details.
1902 //
1903 // - The control flow is gated by an additonal If which tests the
1904 // result from the CompareAndSwapX node
1905 //
1906 // - The MergeMem which feeds the card mark membar only merges the
1907 // AliasIdxBot slice from the leading membar and the AliasIdxRaw
1908 // slice from the pre-barrier. It does not merge the SCMemProj
1909 // AliasIdxBot slice. So, this subgraph does not look like the
1910 // normal CAS subgraph.
1911 //
1912 // - The MergeMem which feeds the trailing membar merges the
1913 // AliasIdxBot slice from the leading membar, the AliasIdxRaw slice
1914 // from the post-barrier and the SCMemProj AliasIdxBot slice i.e. it
1915 // has two AliasIdxBot input slices. However, this subgraph does
1916 // still look like the normal CAS subgraph.
1917 //
1918 // So, the upshot is:
1919 //
1920 // In all cases a volatile put graph will include a *normal*
1921 // volatile store subgraph betwen the leading membar and the
1922 // trailing membar. It may also include a normal volatile store
1923 // subgraph betwen the leading membar and the card mark membar.
1924 //
1925 // In all cases a CAS graph will contain a unique normal CAS graph
1926 // feeding the trailing membar.
1927 //
1928 // In all cases where there is a card mark membar (either as part of
1929 // a volatile object put or CAS) it will be fed by a MergeMem whose
1930 // AliasIdxBot slice feed will be a leading membar.
1931 //
1932 // The predicates controlling generation of instructions for store
1933 // and barrier nodes employ a few simple helper functions (described
1934 // below) which identify the presence or absence of all these
1935 // subgraph configurations and provide a means of traversing from
1936 // one node in the subgraph to another.
1937
1938 // is_CAS(int opcode)
1939 //
1940 // return true if opcode is one of the possible CompareAndSwapX
1941 // values otherwise false.
1942
1943 bool is_CAS(int opcode)
1944 {
1945 switch(opcode) {
1946 // We handle these
1947 case Op_CompareAndSwapI:
1948 case Op_CompareAndSwapL:
1949 case Op_CompareAndSwapP:
1950 case Op_CompareAndSwapN:
1951 // case Op_CompareAndSwapB:
1952 // case Op_CompareAndSwapS:
1953 return true;
1954 // These are TBD
1955 case Op_WeakCompareAndSwapB:
1956 case Op_WeakCompareAndSwapS:
1957 case Op_WeakCompareAndSwapI:
1958 case Op_WeakCompareAndSwapL:
1959 case Op_WeakCompareAndSwapP:
1960 case Op_WeakCompareAndSwapN:
1961 case Op_CompareAndExchangeB:
1962 case Op_CompareAndExchangeS:
1963 case Op_CompareAndExchangeI:
1964 case Op_CompareAndExchangeL:
1965 case Op_CompareAndExchangeP:
1966 case Op_CompareAndExchangeN:
1967 return false;
1968 default:
1969 return false;
1970 }
1971 }
1972
1973
1974 // leading_to_trailing
1975 //
1976 //graph traversal helper which detects the normal case Mem feed from
1977 // a release membar (or, optionally, its cpuorder child) to a
1978 // dependent volatile membar i.e. it ensures that one or other of
1979 // the following Mem flow subgraph is present.
1980 //
1981 // MemBarRelease {leading}
1982 // {MemBarCPUOrder} {optional}
1983 // Bot | \ . . .
1984 // | StoreN/P[mo_release] . . .
1985 // | /
1986 // MergeMem
1987 // |
1988 // MemBarVolatile {not card mark}
1989 //
1990 // MemBarRelease {leading}
1991 // {MemBarCPUOrder} {optional}
1992 // | \ . . .
1993 // | CompareAndSwapX . . .
1994 // |
1995 // . . . SCMemProj
1996 // \ |
1997 // | MergeMem
1998 // | /
1999 // MemBarCPUOrder
2000 // MemBarAcquire {trailing}
2001 //
2002 // the predicate needs to be capable of distinguishing the following
2003 // volatile put graph which may arises when a GC post barrier
2004 // inserts a card mark membar
2005 //
2006 // MemBarRelease {leading}
2007 // {MemBarCPUOrder}__
2008 // Bot | \ \
2009 // | StoreN/P \
2010 // | / \ |
2011 // MergeMem \ |
2012 // | \ |
2013 // MemBarVolatile \ |
2014 // {card mark} \ |
2015 // MergeMem
2016 // |
2017 // {not card mark} MemBarVolatile
2018 //
2019 // if the correct configuration is present returns the trailing
2020 // membar otherwise NULL.
2021 //
2022 // the input membar is expected to be either a cpuorder membar or a
2023 // release membar. in the latter case it should not have a cpu membar
2024 // child.
2025 //
2026 // the returned value may be a card mark or trailing membar
2027 //
2028
2029 MemBarNode *leading_to_trailing(MemBarNode *leading)
2030 {
2031 assert((leading->Opcode() == Op_MemBarRelease ||
2032 leading->Opcode() == Op_MemBarCPUOrder),
2033 "expecting a volatile or cpuroder membar!");
2034
2035 // check the mem flow
2036 ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2037
2038 if (!mem) {
2039 return NULL;
2040 }
2041
2042 Node *x = NULL;
2043 StoreNode * st = NULL;
2044 LoadStoreNode *cas = NULL;
2045 MergeMemNode *mm = NULL;
2046 MergeMemNode *mm2 = NULL;
2047
2048 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2049 x = mem->fast_out(i);
2050 if (x->is_MergeMem()) {
2051 if (mm != NULL) {
2052 if (mm2 != NULL) {
2053 // should not see more than 2 merge mems
2054 return NULL;
2055 } else {
2056 mm2 = x->as_MergeMem();
2057 }
2058 } else {
2059 mm = x->as_MergeMem();
2060 }
2061 } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2062 // two releasing stores/CAS nodes is one too many
2063 if (st != NULL || cas != NULL) {
2064 return NULL;
2065 }
2066 st = x->as_Store();
2067 } else if (is_CAS(x->Opcode())) {
2068 if (st != NULL || cas != NULL) {
2069 return NULL;
2070 }
2071 cas = x->as_LoadStore();
2072 }
2073 }
2074
2075 // must have a store or a cas
2076 if (!st && !cas) {
2077 return NULL;
2078 }
2079
2080 // must have at least one merge if we also have st
2081 if (st && !mm) {
2082 return NULL;
2083 }
2084
2085 if (cas) {
2086 Node *y = NULL;
2087 // look for an SCMemProj
2088 for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
2089 x = cas->fast_out(i);
2090 if (x->is_Proj()) {
2091 y = x;
2092 break;
2093 }
2094 }
2095 if (y == NULL) {
2096 return NULL;
2097 }
2098 // the proj must feed a MergeMem
2099 for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
2100 x = y->fast_out(i);
2101 if (x->is_MergeMem()) {
2102 mm = x->as_MergeMem();
2103 break;
2104 }
2105 }
2106 if (mm == NULL) {
2107 return NULL;
2108 }
2109 MemBarNode *mbar = NULL;
2110 // ensure the merge feeds a trailing membar cpuorder + acquire pair
2111 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2112 x = mm->fast_out(i);
2113 if (x->is_MemBar()) {
2114 int opcode = x->Opcode();
2115 if (opcode == Op_MemBarCPUOrder) {
2116 MemBarNode *z = x->as_MemBar();
2117 z = child_membar(z);
2118 if (z != NULL && z->Opcode() == Op_MemBarAcquire) {
2119 mbar = z;
2120 }
2121 }
2122 break;
2123 }
2124 }
2125 return mbar;
2126 } else {
2127 Node *y = NULL;
2128 // ensure the store feeds the first mergemem;
2129 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2130 if (st->fast_out(i) == mm) {
2131 y = st;
2132 break;
2133 }
2134 }
2135 if (y == NULL) {
2136 return NULL;
2137 }
2138 if (mm2 != NULL) {
2139 // ensure the store feeds the second mergemem;
2140 y = NULL;
2141 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2142 if (st->fast_out(i) == mm2) {
2143 y = st;
2144 }
2145 }
2146 if (y == NULL) {
2147 return NULL;
2148 }
2149 }
2150
2151 MemBarNode *mbar = NULL;
2152 // ensure the first mergemem feeds a volatile membar
2153 for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
2154 x = mm->fast_out(i);
2155 if (x->is_MemBar()) {
2156 int opcode = x->Opcode();
2157 if (opcode == Op_MemBarVolatile) {
2158 mbar = x->as_MemBar();
2159 }
2160 break;
2161 }
2162 }
2163 if (mm2 == NULL) {
2164 // this is our only option for a trailing membar
2165 return mbar;
2166 }
2167 // ensure the second mergemem feeds a volatile membar
2168 MemBarNode *mbar2 = NULL;
2169 for (DUIterator_Fast imax, i = mm2->fast_outs(imax); i < imax; i++) {
2170 x = mm2->fast_out(i);
2171 if (x->is_MemBar()) {
2172 int opcode = x->Opcode();
2173 if (opcode == Op_MemBarVolatile) {
2174 mbar2 = x->as_MemBar();
2175 }
2176 break;
2177 }
2178 }
2179 // if we have two merge mems we must have two volatile membars
2180 if (mbar == NULL || mbar2 == NULL) {
2181 return NULL;
2182 }
2183 // return the trailing membar
2184 if (is_card_mark_membar(mbar2)) {
2185 return mbar;
2186 } else {
2187 if (is_card_mark_membar(mbar)) {
2188 return mbar2;
2189 } else {
2190 return NULL;
2191 }
2192 }
2193 }
2194 }
2195
2196 // trailing_to_leading
2197 //
2198 // graph traversal helper which detects the normal case Mem feed
2199 // from a trailing membar to a preceding release membar (optionally
2200 // its cpuorder child) i.e. it ensures that one or other of the
2201 // following Mem flow subgraphs is present.
2202 //
2203 // MemBarRelease {leading}
2204 // MemBarCPUOrder {optional}
2205 // | Bot | \ . . .
2206 // | | StoreN/P[mo_release] . . .
2207 // | | /
2208 // | MergeMem
2209 // | |
2210 // MemBarVolatile {not card mark}
2211 //
2212 // MemBarRelease {leading}
2213 // MemBarCPUOrder {optional}
2214 // | \ . . .
2215 // | CompareAndSwapX . . .
2216 // |
2217 // . . . SCMemProj
2218 // \ |
2219 // | MergeMem
2220 // | |
2221 // MemBarCPUOrder
2222 // MemBarAcquire {trailing}
2223 //
2224 // this predicate checks for the same flow as the previous predicate
2225 // but starting from the bottom rather than the top.
2226 //
2227 // if the configuration is present returns the cpuorder member for
2228 // preference or when absent the release membar otherwise NULL.
2229 //
2230 // n.b. the input membar is expected to be a MemBarVolatile or
2231 // MemBarAcquire. if it is a MemBarVolatile it must *not* be a card
2232 // mark membar.
2233
2234 MemBarNode *trailing_to_leading(const MemBarNode *barrier)
2235 {
2236 // input must be a volatile membar
2237 assert((barrier->Opcode() == Op_MemBarVolatile ||
2238 barrier->Opcode() == Op_MemBarAcquire),
2239 "expecting a volatile or an acquire membar");
2240
2241 assert((barrier->Opcode() != Op_MemBarVolatile) ||
2242 !is_card_mark_membar(barrier),
2243 "not expecting a card mark membar");
2244 Node *x;
2245 bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
2246
2247 // if we have an acquire membar then it must be fed via a CPUOrder
2248 // membar
2249
2250 if (is_cas) {
2251 // skip to parent barrier which must be a cpuorder
2252 x = parent_membar(barrier);
2253 if (x->Opcode() != Op_MemBarCPUOrder)
2254 return NULL;
2255 } else {
2256 // start from the supplied barrier
2257 x = (Node *)barrier;
2258 }
2259
2260 // the Mem feed to the membar should be a merge
2261 x = x ->in(TypeFunc::Memory);
2262 if (!x->is_MergeMem())
2263 return NULL;
2264
2265 MergeMemNode *mm = x->as_MergeMem();
2266
2267 if (is_cas) {
2268 // the merge should be fed from the CAS via an SCMemProj node
2269 x = NULL;
2270 for (uint idx = 1; idx < mm->req(); idx++) {
2271 if (mm->in(idx)->Opcode() == Op_SCMemProj) {
2272 x = mm->in(idx);
2273 break;
2274 }
2275 }
2276 if (x == NULL) {
2277 return NULL;
2278 }
2279 // check for a CAS feeding this proj
2280 x = x->in(0);
2281 int opcode = x->Opcode();
2282 if (!is_CAS(opcode)) {
2283 return NULL;
2284 }
2285 // the CAS should get its mem feed from the leading membar
2286 x = x->in(MemNode::Memory);
2287 } else {
2288 // the merge should get its Bottom mem feed from the leading membar
2289 x = mm->in(Compile::AliasIdxBot);
2290 }
2291
2292 // ensure this is a non control projection
2293 if (!x->is_Proj() || x->is_CFG()) {
2294 return NULL;
2295 }
2296 // if it is fed by a membar that's the one we want
2297 x = x->in(0);
2298
2299 if (!x->is_MemBar()) {
2300 return NULL;
2301 }
2302
2303 MemBarNode *leading = x->as_MemBar();
2304 // reject invalid candidates
2305 if (!leading_membar(leading)) {
2306 return NULL;
2307 }
2308
2309 // ok, we have a leading membar, now for the sanity clauses
2310
2311 // the leading membar must feed Mem to a releasing store or CAS
2312 ProjNode *mem = leading->proj_out(TypeFunc::Memory);
2313 StoreNode *st = NULL;
2314 LoadStoreNode *cas = NULL;
2315 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2316 x = mem->fast_out(i);
2317 if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
2318 // two stores or CASes is one too many
2319 if (st != NULL || cas != NULL) {
2320 return NULL;
2321 }
2322 st = x->as_Store();
2323 } else if (is_CAS(x->Opcode())) {
2324 if (st != NULL || cas != NULL) {
2325 return NULL;
2326 }
2327 cas = x->as_LoadStore();
2328 }
2329 }
2330
2331 // we should not have both a store and a cas
2332 if (st == NULL & cas == NULL) {
2333 return NULL;
2334 }
2335
2336 if (st == NULL) {
2337 // nothing more to check
2338 return leading;
2339 } else {
2340 // we should not have a store if we started from an acquire
2341 if (is_cas) {
2342 return NULL;
2343 }
2344
2345 // the store should feed the merge we used to get here
2346 for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
2347 if (st->fast_out(i) == mm) {
2348 return leading;
2349 }
2350 }
2351 }
2352
2353 return NULL;
2354 }
2355
2356 // card_mark_to_leading
2357 //
2358 // graph traversal helper which traverses from a card mark volatile
2359 // membar to a leading membar i.e. it ensures that the following Mem
2360 // flow subgraph is present.
2361 //
2362 // MemBarRelease {leading}
2363 // {MemBarCPUOrder} {optional}
2364 // | . . .
2365 // Bot | /
2366 // MergeMem
2367 // |
2368 // MemBarVolatile (card mark)
2369 // | \
2370 // . . . StoreCM
2371 //
2372 // if the configuration is present returns the cpuorder member for
2373 // preference or when absent the release membar otherwise NULL.
2374 //
2375 // n.b. the input membar is expected to be a MemBarVolatile amd must
2376 // be a card mark membar.
2377
2378 MemBarNode *card_mark_to_leading(const MemBarNode *barrier)
2379 {
2380 // input must be a card mark volatile membar
2381 assert(is_card_mark_membar(barrier), "expecting a card mark membar");
2382
2383 // the Mem feed to the membar should be a merge
2384 Node *x = barrier->in(TypeFunc::Memory);
2385 if (!x->is_MergeMem()) {
2386 return NULL;
2387 }
2388
2389 MergeMemNode *mm = x->as_MergeMem();
2390
2391 x = mm->in(Compile::AliasIdxBot);
2392
2393 if (!x->is_MemBar()) {
2394 return NULL;
2395 }
2396
2397 MemBarNode *leading = x->as_MemBar();
2398
2399 if (leading_membar(leading)) {
2400 return leading;
2401 }
2402
2403 return NULL;
2404 }
2405
2406 bool unnecessary_acquire(const Node *barrier)
2407 {
2408 assert(barrier->is_MemBar(), "expecting a membar");
2409
2410 if (UseBarriersForVolatile) {
2411 // we need to plant a dmb
2412 return false;
2413 }
2414
2415 // a volatile read derived from bytecode (or also from an inlined
2416 // SHA field read via LibraryCallKit::load_field_from_object)
2417 // manifests as a LoadX[mo_acquire] followed by an acquire membar
2418 // with a bogus read dependency on it's preceding load. so in those
2419 // cases we will find the load node at the PARMS offset of the
2420 // acquire membar. n.b. there may be an intervening DecodeN node.
2421 //
2422 // a volatile load derived from an inlined unsafe field access
2423 // manifests as a cpuorder membar with Ctl and Mem projections
2424 // feeding both an acquire membar and a LoadX[mo_acquire]. The
2425 // acquire then feeds another cpuorder membar via Ctl and Mem
2426 // projections. The load has no output dependency on these trailing
2427 // membars because subsequent nodes inserted into the graph take
2428 // their control feed from the final membar cpuorder meaning they
2429 // are all ordered after the load.
2430
2431 Node *x = barrier->lookup(TypeFunc::Parms);
2432 if (x) {
2433 // we are starting from an acquire and it has a fake dependency
2434 //
2435 // need to check for
2436 //
2437 // LoadX[mo_acquire]
2438 // { |1 }
2439 // {DecodeN}
2440 // |Parms
2441 // MemBarAcquire*
2442 //
2443 // where * tags node we were passed
2444 // and |k means input k
2445 if (x->is_DecodeNarrowPtr()) {
2446 x = x->in(1);
2447 }
2448
2449 return (x->is_Load() && x->as_Load()->is_acquire());
2450 }
2451
2452 // now check for an unsafe volatile get
2453
2454 // need to check for
2455 //
2456 // MemBarCPUOrder
2457 // || \\
2458 // MemBarAcquire* LoadX[mo_acquire]
2459 // ||
2460 // MemBarCPUOrder
2461 //
2462 // where * tags node we were passed
2463 // and || or \\ are Ctl+Mem feeds via intermediate Proj Nodes
2464
2465 // check for a parent MemBarCPUOrder
2466 ProjNode *ctl;
2467 ProjNode *mem;
2468 MemBarNode *parent = parent_membar(barrier);
2469 if (!parent || parent->Opcode() != Op_MemBarCPUOrder)
2470 return false;
2471 ctl = parent->proj_out(TypeFunc::Control);
2472 mem = parent->proj_out(TypeFunc::Memory);
2473 if (!ctl || !mem) {
2474 return false;
2475 }
2476 // ensure the proj nodes both feed a LoadX[mo_acquire]
2477 LoadNode *ld = NULL;
2478 for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
2479 x = ctl->fast_out(i);
2480 // if we see a load we keep hold of it and stop searching
2481 if (x->is_Load()) {
2482 ld = x->as_Load();
2483 break;
2484 }
2485 }
2486 // it must be an acquiring load
2487 if (ld && ld->is_acquire()) {
2488
2489 for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
2490 x = mem->fast_out(i);
2491 // if we see the same load we drop it and stop searching
2492 if (x == ld) {
2493 ld = NULL;
2494 break;
2495 }
2496 }
2497 // we must have dropped the load
2498 if (ld == NULL) {
2499 // check for a child cpuorder membar
2500 MemBarNode *child = child_membar(barrier->as_MemBar());
2501 if (child && child->Opcode() == Op_MemBarCPUOrder)
2502 return true;
2503 }
2504 }
2505
2506 // final option for unnecessary mebar is that it is a trailing node
2507 // belonging to a CAS
2508
2509 MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
2510
2511 return leading != NULL;
2512 }
2513
2514 bool needs_acquiring_load(const Node *n)
2515 {
2516 assert(n->is_Load(), "expecting a load");
2517 if (UseBarriersForVolatile) {
2518 // we use a normal load and a dmb
2519 return false;
2520 }
2521
2522 LoadNode *ld = n->as_Load();
2523
2524 if (!ld->is_acquire()) {
2525 return false;
2526 }
2527
2528 // check if this load is feeding an acquire membar
2529 //
2530 // LoadX[mo_acquire]
2531 // { |1 }
2532 // {DecodeN}
2533 // |Parms
2534 // MemBarAcquire*
2535 //
2536 // where * tags node we were passed
2537 // and |k means input k
2538
2539 Node *start = ld;
2540 Node *mbacq = NULL;
2541
2542 // if we hit a DecodeNarrowPtr we reset the start node and restart
2543 // the search through the outputs
2544 restart:
2545
2546 for (DUIterator_Fast imax, i = start->fast_outs(imax); i < imax; i++) {
2547 Node *x = start->fast_out(i);
2548 if (x->is_MemBar() && x->Opcode() == Op_MemBarAcquire) {
2549 mbacq = x;
2550 } else if (!mbacq &&
2551 (x->is_DecodeNarrowPtr() ||
2552 (x->is_Mach() && x->Opcode() == Op_DecodeN))) {
2553 start = x;
2554 goto restart;
2555 }
2556 }
2557
2558 if (mbacq) {
2559 return true;
2560 }
2561
2562 // now check for an unsafe volatile get
2563
2564 // check if Ctl and Proj feed comes from a MemBarCPUOrder
2565 //
2566 // MemBarCPUOrder
2567 // || \\
2568 // MemBarAcquire* LoadX[mo_acquire]
2569 // ||
2570 // MemBarCPUOrder
2571
2572 MemBarNode *membar;
2573
2574 membar = parent_membar(ld);
2575
2576 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2577 return false;
2578 }
2579
2580 // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
2581
2582 membar = child_membar(membar);
2583
2584 if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
2585 return false;
2586 }
2587
2588 membar = child_membar(membar);
2589
2590 if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
2591 return false;
2592 }
2593
2594 return true;
2595 }
2596
2597 bool unnecessary_release(const Node *n)
2598 {
2599 assert((n->is_MemBar() &&
2600 n->Opcode() == Op_MemBarRelease),
2601 "expecting a release membar");
2602
2603 if (UseBarriersForVolatile) {
2604 // we need to plant a dmb
2605 return false;
2606 }
2607
2608 // if there is a dependent CPUOrder barrier then use that as the
2609 // leading
2610
2611 MemBarNode *barrier = n->as_MemBar();
2612 // check for an intervening cpuorder membar
2613 MemBarNode *b = child_membar(barrier);
2614 if (b && b->Opcode() == Op_MemBarCPUOrder) {
2615 // ok, so start the check from the dependent cpuorder barrier
2616 barrier = b;
2617 }
2618
2619 // must start with a normal feed
2620 MemBarNode *trailing = leading_to_trailing(barrier);
2621
2622 return (trailing != NULL);
2623 }
2624
2625 bool unnecessary_volatile(const Node *n)
2626 {
2627 // assert n->is_MemBar();
2628 if (UseBarriersForVolatile) {
2629 // we need to plant a dmb
2630 return false;
2631 }
2632
2633 MemBarNode *mbvol = n->as_MemBar();
2634
2635 // first we check if this is part of a card mark. if so then we have
2636 // to generate a StoreLoad barrier
2637
2638 if (is_card_mark_membar(mbvol)) {
2639 return false;
2640 }
2641
2642 // ok, if it's not a card mark then we still need to check if it is
2643 // a trailing membar of a volatile put graph.
2644
2645 return (trailing_to_leading(mbvol) != NULL);
2646 }
2647
2648 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
2649
2650 bool needs_releasing_store(const Node *n)
2651 {
2652 // assert n->is_Store();
2653 if (UseBarriersForVolatile) {
2654 // we use a normal store and dmb combination
2655 return false;
2656 }
2657
2658 StoreNode *st = n->as_Store();
2659
2660 // the store must be marked as releasing
2661 if (!st->is_release()) {
2662 return false;
2663 }
2664
2665 // the store must be fed by a membar
2666
2667 Node *x = st->lookup(StoreNode::Memory);
2668
2669 if (! x || !x->is_Proj()) {
2670 return false;
2671 }
2672
2673 ProjNode *proj = x->as_Proj();
2674
2675 x = proj->lookup(0);
2676
2677 if (!x || !x->is_MemBar()) {
2678 return false;
2679 }
2680
2681 MemBarNode *barrier = x->as_MemBar();
2682
2683 // if the barrier is a release membar or a cpuorder mmebar fed by a
2684 // release membar then we need to check whether that forms part of a
2685 // volatile put graph.
2686
2687 // reject invalid candidates
2688 if (!leading_membar(barrier)) {
2689 return false;
2690 }
2691
2692 // does this lead a normal subgraph?
2693 MemBarNode *trailing = leading_to_trailing(barrier);
2694
2695 return (trailing != NULL);
2696 }
2697
2698 // predicate controlling translation of CAS
2699 //
2700 // returns true if CAS needs to use an acquiring load otherwise false
2701
2702 bool needs_acquiring_load_exclusive(const Node *n)
2703 {
2704 assert(is_CAS(n->Opcode()), "expecting a compare and swap");
2705 if (UseBarriersForVolatile) {
2706 return false;
2707 }
2708
2709 // CAS nodes only ought to turn up in inlined unsafe CAS operations
2710 #ifdef ASSERT
2711 LoadStoreNode *st = n->as_LoadStore();
2712
2713 // the store must be fed by a membar
2714
2715 Node *x = st->lookup(StoreNode::Memory);
2716
2717 assert (x && x->is_Proj(), "CAS not fed by memory proj!");
2718
2719 ProjNode *proj = x->as_Proj();
2720
2721 x = proj->lookup(0);
2722
2723 assert (x && x->is_MemBar(), "CAS not fed by membar!");
2724
2725 MemBarNode *barrier = x->as_MemBar();
2726
2727 // the barrier must be a cpuorder mmebar fed by a release membar
2728
2729 assert(barrier->Opcode() == Op_MemBarCPUOrder,
2730 "CAS not fed by cpuorder membar!");
2731
2732 MemBarNode *b = parent_membar(barrier);
2733 assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
2734 "CAS not fed by cpuorder+release membar pair!");
2735
2736 // does this lead a normal subgraph?
2737 MemBarNode *mbar = leading_to_trailing(barrier);
2738
2739 assert(mbar != NULL, "CAS not embedded in normal graph!");
2740
2741 assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
2742 #endif // ASSERT
2743 // so we can just return true here
2744 return true;
2745 }
2746
2747 // predicate controlling translation of StoreCM
2748 //
2749 // returns true if a StoreStore must precede the card write otherwise
2750 // false
2751
2752 bool unnecessary_storestore(const Node *storecm)
2753 {
2754 assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM");
2755
2756 // we only ever need to generate a dmb ishst between an object put
2757 // and the associated card mark when we are using CMS without
2758 // conditional card marking. Any other occurence will happen when
2759 // performing a card mark using CMS with conditional card marking or
2760 // G1. In those cases the preceding MamBarVolatile will be
2761 // translated to a dmb ish which guarantes visibility of the
2762 // preceding StoreN/P before this StoreCM
2763
2764 if (!UseConcMarkSweepGC || UseCondCardMark) {
2765 return true;
2766 }
2767
2768 // if we are implementing volatile puts using barriers then we must
2769 // insert the dmb ishst
2770
2771 if (UseBarriersForVolatile) {
2772 return false;
2773 }
2774
2775 // we must be using CMS with conditional card marking so we ahve to
2776 // generate the StoreStore
2777
2778 return false;
2779 }
2780
2781
2782 #define __ _masm.
2783
2784 // advance declarations for helper functions to convert register
2785 // indices to register objects
2786
2787 // the ad file has to provide implementations of certain methods
2788 // expected by the generic code
2789 //
2790 // REQUIRED FUNCTIONALITY
2791
2792 //=============================================================================
2793
2794 // !!!!! Special hack to get all types of calls to specify the byte offset
2795 // from the start of the call to the point where the return address
2796 // will point.
2797
2798 int MachCallStaticJavaNode::ret_addr_offset()
2799 {
2800 // call should be a simple bl
2801 int off = 4;
2802 return off;
2803 }
2804
2805 int MachCallDynamicJavaNode::ret_addr_offset()
2806 {
2807 return 16; // movz, movk, movk, bl
2808 }
2809
2810 int MachCallRuntimeNode::ret_addr_offset() {
2811 // for generated stubs the call will be
2812 // far_call(addr)
2813 // for real runtime callouts it will be six instructions
2814 // see aarch64_enc_java_to_runtime
2815 // adr(rscratch2, retaddr)
2816 // lea(rscratch1, RuntimeAddress(addr)
2817 // stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
2818 // blrt rscratch1
2819 CodeBlob *cb = CodeCache::find_blob(_entry_point);
2820 if (cb) {
2821 return MacroAssembler::far_branch_size();
2822 } else {
2823 return 6 * NativeInstruction::instruction_size;
2824 }
2825 }
2826
2827 // Indicate if the safepoint node needs the polling page as an input
2828
2829 // the shared code plants the oop data at the start of the generated
2830 // code for the safepoint node and that needs ot be at the load
2831 // instruction itself. so we cannot plant a mov of the safepoint poll
2832 // address followed by a load. setting this to true means the mov is
2833 // scheduled as a prior instruction. that's better for scheduling
2834 // anyway.
2835
2836 bool SafePointNode::needs_polling_address_input()
2837 {
2838 return true;
2839 }
2840
2841 //=============================================================================
2842
2843 #ifndef PRODUCT
2844 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2845 st->print("BREAKPOINT");
2846 }
2847 #endif
2848
2849 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2850 MacroAssembler _masm(&cbuf);
2851 __ brk(0);
2852 }
2853
2854 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
2855 return MachNode::size(ra_);
2856 }
2857
2858 //=============================================================================
2859
2860 #ifndef PRODUCT
2861 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
2862 st->print("nop \t# %d bytes pad for loops and calls", _count);
2863 }
2864 #endif
2865
2866 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
2867 MacroAssembler _masm(&cbuf);
2868 for (int i = 0; i < _count; i++) {
2869 __ nop();
2870 }
2871 }
2872
2873 uint MachNopNode::size(PhaseRegAlloc*) const {
2874 return _count * NativeInstruction::instruction_size;
2875 }
2876
2877 //=============================================================================
2878 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
2879
2880 int Compile::ConstantTable::calculate_table_base_offset() const {
2881 return 0; // absolute addressing, no offset
2882 }
2883
2884 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
2885 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
2886 ShouldNotReachHere();
2887 }
2888
2889 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
2890 // Empty encoding
2891 }
2892
2893 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
2894 return 0;
2895 }
2896
2897 #ifndef PRODUCT
2898 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
2899 st->print("-- \t// MachConstantBaseNode (empty encoding)");
2900 }
2901 #endif
2902
2903 #ifndef PRODUCT
2904 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2905 Compile* C = ra_->C;
2906
2907 int framesize = C->frame_slots() << LogBytesPerInt;
2908
2909 if (C->need_stack_bang(framesize))
2910 st->print("# stack bang size=%d\n\t", framesize);
2911
2912 if (framesize < ((1 << 9) + 2 * wordSize)) {
2913 st->print("sub sp, sp, #%d\n\t", framesize);
2914 st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
2915 if (PreserveFramePointer) st->print("\n\tadd rfp, sp, #%d", framesize - 2 * wordSize);
2916 } else {
2917 st->print("stp lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
2918 if (PreserveFramePointer) st->print("mov rfp, sp\n\t");
2919 st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize);
2920 st->print("sub sp, sp, rscratch1");
2921 }
2922 }
2923 #endif
2924
2925 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
2926 Compile* C = ra_->C;
2927 MacroAssembler _masm(&cbuf);
2928
2929 // n.b. frame size includes space for return pc and rfp
2930 const long framesize = C->frame_size_in_bytes();
2931 assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
2932
2933 // insert a nop at the start of the prolog so we can patch in a
2934 // branch if we need to invalidate the method later
2935 __ nop();
2936
2937 int bangsize = C->bang_size_in_bytes();
2938 if (C->need_stack_bang(bangsize) && UseStackBanging)
2939 __ generate_stack_overflow_check(bangsize);
2940
2941 __ build_frame(framesize);
2942
2943 if (NotifySimulator) {
2944 __ notify(Assembler::method_entry);
2945 }
2946
2947 if (VerifyStackAtCalls) {
2948 Unimplemented();
2949 }
2950
2951 C->set_frame_complete(cbuf.insts_size());
2952
2953 if (C->has_mach_constant_base_node()) {
2954 // NOTE: We set the table base offset here because users might be
2955 // emitted before MachConstantBaseNode.
2956 Compile::ConstantTable& constant_table = C->constant_table();
2957 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
2958 }
2959 }
2960
2961 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
2962 {
2963 return MachNode::size(ra_); // too many variables; just compute it
2964 // the hard way
2965 }
2966
2967 int MachPrologNode::reloc() const
2968 {
2969 return 0;
2970 }
2971
2972 //=============================================================================
2973
2974 #ifndef PRODUCT
2975 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
2976 Compile* C = ra_->C;
2977 int framesize = C->frame_slots() << LogBytesPerInt;
2978
2979 st->print("# pop frame %d\n\t",framesize);
2980
2981 if (framesize == 0) {
2982 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2983 } else if (framesize < ((1 << 9) + 2 * wordSize)) {
2984 st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
2985 st->print("add sp, sp, #%d\n\t", framesize);
2986 } else {
2987 st->print("mov rscratch1, #%d\n\t", framesize - 2 * wordSize);
2988 st->print("add sp, sp, rscratch1\n\t");
2989 st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
2990 }
2991
2992 if (do_polling() && C->is_method_compilation()) {
2993 st->print("# touch polling page\n\t");
2994 st->print("mov rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
2995 st->print("ldr zr, [rscratch1]");
2996 }
2997 }
2998 #endif
2999
3000 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3001 Compile* C = ra_->C;
3002 MacroAssembler _masm(&cbuf);
3003 int framesize = C->frame_slots() << LogBytesPerInt;
3004
3005 __ remove_frame(framesize);
3006
3007 if (NotifySimulator) {
3008 __ notify(Assembler::method_reentry);
3009 }
3010
3011 if (do_polling() && C->is_method_compilation()) {
3012 __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
3013 }
3014 }
3015
3016 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
3017 // Variable size. Determine dynamically.
3018 return MachNode::size(ra_);
3019 }
3020
3021 int MachEpilogNode::reloc() const {
3022 // Return number of relocatable values contained in this instruction.
3023 return 1; // 1 for polling page.
3024 }
3025
3026 const Pipeline * MachEpilogNode::pipeline() const {
3027 return MachNode::pipeline_class();
3028 }
3029
3030 // This method seems to be obsolete. It is declared in machnode.hpp
3031 // and defined in all *.ad files, but it is never called. Should we
3032 // get rid of it?
3033 int MachEpilogNode::safepoint_offset() const {
3034 assert(do_polling(), "no return for this epilog node");
3035 return 4;
3036 }
3037
3038 //=============================================================================
3039
3040 // Figure out which register class each belongs in: rc_int, rc_float or
3041 // rc_stack.
3042 enum RC { rc_bad, rc_int, rc_float, rc_stack };
3043
3044 static enum RC rc_class(OptoReg::Name reg) {
3045
3046 if (reg == OptoReg::Bad) {
3047 return rc_bad;
3048 }
3049
3050 // we have 30 int registers * 2 halves
3051 // (rscratch1 and rscratch2 are omitted)
3052
3053 if (reg < 60) {
3054 return rc_int;
3055 }
3056
3057 // we have 32 float register * 2 halves
3058 if (reg < 60 + 128) {
3059 return rc_float;
3060 }
3061
3062 // Between float regs & stack is the flags regs.
3063 assert(OptoReg::is_stack(reg), "blow up if spilling flags");
3064
3065 return rc_stack;
3066 }
3067
3068 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
3069 Compile* C = ra_->C;
3070
3071 // Get registers to move.
3072 OptoReg::Name src_hi = ra_->get_reg_second(in(1));
3073 OptoReg::Name src_lo = ra_->get_reg_first(in(1));
3074 OptoReg::Name dst_hi = ra_->get_reg_second(this);
3075 OptoReg::Name dst_lo = ra_->get_reg_first(this);
3076
3077 enum RC src_hi_rc = rc_class(src_hi);
3078 enum RC src_lo_rc = rc_class(src_lo);
3079 enum RC dst_hi_rc = rc_class(dst_hi);
3080 enum RC dst_lo_rc = rc_class(dst_lo);
3081
3082 assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
3083
3084 if (src_hi != OptoReg::Bad) {
3085 assert((src_lo&1)==0 && src_lo+1==src_hi &&
3086 (dst_lo&1)==0 && dst_lo+1==dst_hi,
3087 "expected aligned-adjacent pairs");
3088 }
3089
3090 if (src_lo == dst_lo && src_hi == dst_hi) {
3091 return 0; // Self copy, no move.
3092 }
3093
3094 bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
3095 (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
3096 int src_offset = ra_->reg2offset(src_lo);
3097 int dst_offset = ra_->reg2offset(dst_lo);
3098
3099 if (bottom_type()->isa_vect() != NULL) {
3100 uint ireg = ideal_reg();
3101 assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
3102 if (cbuf) {
3103 MacroAssembler _masm(cbuf);
3104 assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
3105 if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
3106 // stack->stack
3107 assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
3108 if (ireg == Op_VecD) {
3109 __ unspill(rscratch1, true, src_offset);
3110 __ spill(rscratch1, true, dst_offset);
3111 } else {
3112 __ spill_copy128(src_offset, dst_offset);
3113 }
3114 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
3115 __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3116 ireg == Op_VecD ? __ T8B : __ T16B,
3117 as_FloatRegister(Matcher::_regEncode[src_lo]));
3118 } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
3119 __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3120 ireg == Op_VecD ? __ D : __ Q,
3121 ra_->reg2offset(dst_lo));
3122 } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
3123 __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3124 ireg == Op_VecD ? __ D : __ Q,
3125 ra_->reg2offset(src_lo));
3126 } else {
3127 ShouldNotReachHere();
3128 }
3129 }
3130 } else if (cbuf) {
3131 MacroAssembler _masm(cbuf);
3132 switch (src_lo_rc) {
3133 case rc_int:
3134 if (dst_lo_rc == rc_int) { // gpr --> gpr copy
3135 if (is64) {
3136 __ mov(as_Register(Matcher::_regEncode[dst_lo]),
3137 as_Register(Matcher::_regEncode[src_lo]));
3138 } else {
3139 MacroAssembler _masm(cbuf);
3140 __ movw(as_Register(Matcher::_regEncode[dst_lo]),
3141 as_Register(Matcher::_regEncode[src_lo]));
3142 }
3143 } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
3144 if (is64) {
3145 __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3146 as_Register(Matcher::_regEncode[src_lo]));
3147 } else {
3148 __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3149 as_Register(Matcher::_regEncode[src_lo]));
3150 }
3151 } else { // gpr --> stack spill
3152 assert(dst_lo_rc == rc_stack, "spill to bad register class");
3153 __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
3154 }
3155 break;
3156 case rc_float:
3157 if (dst_lo_rc == rc_int) { // fpr --> gpr copy
3158 if (is64) {
3159 __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
3160 as_FloatRegister(Matcher::_regEncode[src_lo]));
3161 } else {
3162 __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
3163 as_FloatRegister(Matcher::_regEncode[src_lo]));
3164 }
3165 } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
3166 if (cbuf) {
3167 __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3168 as_FloatRegister(Matcher::_regEncode[src_lo]));
3169 } else {
3170 __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3171 as_FloatRegister(Matcher::_regEncode[src_lo]));
3172 }
3173 } else { // fpr --> stack spill
3174 assert(dst_lo_rc == rc_stack, "spill to bad register class");
3175 __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
3176 is64 ? __ D : __ S, dst_offset);
3177 }
3178 break;
3179 case rc_stack:
3180 if (dst_lo_rc == rc_int) { // stack --> gpr load
3181 __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
3182 } else if (dst_lo_rc == rc_float) { // stack --> fpr load
3183 __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
3184 is64 ? __ D : __ S, src_offset);
3185 } else { // stack --> stack copy
3186 assert(dst_lo_rc == rc_stack, "spill to bad register class");
3187 __ unspill(rscratch1, is64, src_offset);
3188 __ spill(rscratch1, is64, dst_offset);
3189 }
3190 break;
3191 default:
3192 assert(false, "bad rc_class for spill");
3193 ShouldNotReachHere();
3194 }
3195 }
3196
3197 if (st) {
3198 st->print("spill ");
3199 if (src_lo_rc == rc_stack) {
3200 st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
3201 } else {
3202 st->print("%s -> ", Matcher::regName[src_lo]);
3203 }
3204 if (dst_lo_rc == rc_stack) {
3205 st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
3206 } else {
3207 st->print("%s", Matcher::regName[dst_lo]);
3208 }
3209 if (bottom_type()->isa_vect() != NULL) {
3210 st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
3211 } else {
3212 st->print("\t# spill size = %d", is64 ? 64:32);
3213 }
3214 }
3215
3216 return 0;
3217
3218 }
3219
3220 #ifndef PRODUCT
3221 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3222 if (!ra_)
3223 st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
3224 else
3225 implementation(NULL, ra_, false, st);
3226 }
3227 #endif
3228
3229 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3230 implementation(&cbuf, ra_, false, NULL);
3231 }
3232
3233 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
3234 return MachNode::size(ra_);
3235 }
3236
3237 //=============================================================================
3238
3239 #ifndef PRODUCT
3240 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
3241 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3242 int reg = ra_->get_reg_first(this);
3243 st->print("add %s, rsp, #%d]\t# box lock",
3244 Matcher::regName[reg], offset);
3245 }
3246 #endif
3247
3248 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
3249 MacroAssembler _masm(&cbuf);
3250
3251 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
3252 int reg = ra_->get_encode(this);
3253
3254 if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
3255 __ add(as_Register(reg), sp, offset);
3256 } else {
3257 ShouldNotReachHere();
3258 }
3259 }
3260
3261 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
3262 // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
3263 return 4;
3264 }
3265
3266 //=============================================================================
3267
3268 #ifndef PRODUCT
3269 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
3270 {
3271 st->print_cr("# MachUEPNode");
3272 if (UseCompressedClassPointers) {
3273 st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3274 if (Universe::narrow_klass_shift() != 0) {
3275 st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
3276 }
3277 } else {
3278 st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
3279 }
3280 st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
3281 st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
3282 }
3283 #endif
3284
3285 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
3286 {
3287 // This is the unverified entry point.
3288 MacroAssembler _masm(&cbuf);
3289
3290 __ cmp_klass(j_rarg0, rscratch2, rscratch1);
3291 Label skip;
3292 // TODO
3293 // can we avoid this skip and still use a reloc?
3294 __ br(Assembler::EQ, skip);
3295 __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
3296 __ bind(skip);
3297 }
3298
3299 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
3300 {
3301 return MachNode::size(ra_);
3302 }
3303
3304 // REQUIRED EMIT CODE
3305
3306 //=============================================================================
3307
3308 // Emit exception handler code.
3309 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
3310 {
3311 // mov rscratch1 #exception_blob_entry_point
3312 // br rscratch1
3313 // Note that the code buffer's insts_mark is always relative to insts.
3314 // That's why we must use the macroassembler to generate a handler.
3315 MacroAssembler _masm(&cbuf);
3316 address base = __ start_a_stub(size_exception_handler());
3317 if (base == NULL) {
3318 ciEnv::current()->record_failure("CodeCache is full");
3319 return 0; // CodeBuffer::expand failed
3320 }
3321 int offset = __ offset();
3322 __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
3323 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
3324 __ end_a_stub();
3325 return offset;
3326 }
3327
3328 // Emit deopt handler code.
3329 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
3330 {
3331 // Note that the code buffer's insts_mark is always relative to insts.
3332 // That's why we must use the macroassembler to generate a handler.
3333 MacroAssembler _masm(&cbuf);
3334 address base = __ start_a_stub(size_deopt_handler());
3335 if (base == NULL) {
3336 ciEnv::current()->record_failure("CodeCache is full");
3337 return 0; // CodeBuffer::expand failed
3338 }
3339 int offset = __ offset();
3340
3341 __ adr(lr, __ pc());
3342 __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
3343
3344 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
3345 __ end_a_stub();
3346 return offset;
3347 }
3348
3349 // REQUIRED MATCHER CODE
3350
3351 //=============================================================================
3352
3353 const bool Matcher::match_rule_supported(int opcode) {
3354
3355 switch (opcode) {
3356 default:
3357 break;
3358 }
3359
3360 if (!has_match_rule(opcode)) {
3361 return false;
3362 }
3363
3364 return true; // Per default match rules are supported.
3365 }
3366
3367 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
3368
3369 // TODO
3370 // identify extra cases that we might want to provide match rules for
3371 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
3372 bool ret_value = match_rule_supported(opcode);
3373 // Add rules here.
3374
3375 return ret_value; // Per default match rules are supported.
3376 }
3377
3378 const bool Matcher::has_predicated_vectors(void) {
3379 return false;
3380 }
3381
3382 const int Matcher::float_pressure(int default_pressure_threshold) {
3383 return default_pressure_threshold;
3384 }
3385
3386 int Matcher::regnum_to_fpu_offset(int regnum)
3387 {
3388 Unimplemented();
3389 return 0;
3390 }
3391
3392 // Is this branch offset short enough that a short branch can be used?
3393 //
3394 // NOTE: If the platform does not provide any short branch variants, then
3395 // this method should return false for offset 0.
3396 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
3397 // The passed offset is relative to address of the branch.
3398
3399 return (-32768 <= offset && offset < 32768);
3400 }
3401
3402 const bool Matcher::isSimpleConstant64(jlong value) {
3403 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
3404 // Probably always true, even if a temp register is required.
3405 return true;
3406 }
3407
3408 // true just means we have fast l2f conversion
3409 const bool Matcher::convL2FSupported(void) {
3410 return true;
3411 }
3412
3413 // Vector width in bytes.
3414 const int Matcher::vector_width_in_bytes(BasicType bt) {
3415 int size = MIN2(16,(int)MaxVectorSize);
3416 // Minimum 2 values in vector
3417 if (size < 2*type2aelembytes(bt)) size = 0;
3418 // But never < 4
3419 if (size < 4) size = 0;
3420 return size;
3421 }
3422
3423 // Limits on vector size (number of elements) loaded into vector.
3424 const int Matcher::max_vector_size(const BasicType bt) {
3425 return vector_width_in_bytes(bt)/type2aelembytes(bt);
3426 }
3427 const int Matcher::min_vector_size(const BasicType bt) {
3428 // For the moment limit the vector size to 8 bytes
3429 int size = 8 / type2aelembytes(bt);
3430 if (size < 2) size = 2;
3431 return size;
3432 }
3433
3434 // Vector ideal reg.
3435 const int Matcher::vector_ideal_reg(int len) {
3436 switch(len) {
3437 case 8: return Op_VecD;
3438 case 16: return Op_VecX;
3439 }
3440 ShouldNotReachHere();
3441 return 0;
3442 }
3443
3444 const int Matcher::vector_shift_count_ideal_reg(int size) {
3445 return Op_VecX;
3446 }
3447
3448 // AES support not yet implemented
3449 const bool Matcher::pass_original_key_for_aes() {
3450 return false;
3451 }
3452
3453 // x86 supports misaligned vectors store/load.
3454 const bool Matcher::misaligned_vectors_ok() {
3455 return !AlignVector; // can be changed by flag
3456 }
3457
3458 // false => size gets scaled to BytesPerLong, ok.
3459 const bool Matcher::init_array_count_is_in_bytes = false;
3460
3461 // Use conditional move (CMOVL)
3462 const int Matcher::long_cmove_cost() {
3463 // long cmoves are no more expensive than int cmoves
3464 return 0;
3465 }
3466
3467 const int Matcher::float_cmove_cost() {
3468 // float cmoves are no more expensive than int cmoves
3469 return 0;
3470 }
3471
3472 // Does the CPU require late expand (see block.cpp for description of late expand)?
3473 const bool Matcher::require_postalloc_expand = false;
3474
3475 // Do we need to mask the count passed to shift instructions or does
3476 // the cpu only look at the lower 5/6 bits anyway?
3477 const bool Matcher::need_masked_shift_count = false;
3478
3479 // This affects two different things:
3480 // - how Decode nodes are matched
3481 // - how ImplicitNullCheck opportunities are recognized
3482 // If true, the matcher will try to remove all Decodes and match them
3483 // (as operands) into nodes. NullChecks are not prepared to deal with
3484 // Decodes by final_graph_reshaping().
3485 // If false, final_graph_reshaping() forces the decode behind the Cmp
3486 // for a NullCheck. The matcher matches the Decode node into a register.
3487 // Implicit_null_check optimization moves the Decode along with the
3488 // memory operation back up before the NullCheck.
3489 bool Matcher::narrow_oop_use_complex_address() {
3490 return Universe::narrow_oop_shift() == 0;
3491 }
3492
3493 bool Matcher::narrow_klass_use_complex_address() {
3494 // TODO
3495 // decide whether we need to set this to true
3496 return false;
3497 }
3498
3499 bool Matcher::const_oop_prefer_decode() {
3500 // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
3501 return Universe::narrow_oop_base() == NULL;
3502 }
3503
3504 bool Matcher::const_klass_prefer_decode() {
3505 // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
3506 return Universe::narrow_klass_base() == NULL;
3507 }
3508
3509 // Is it better to copy float constants, or load them directly from
3510 // memory? Intel can load a float constant from a direct address,
3511 // requiring no extra registers. Most RISCs will have to materialize
3512 // an address into a register first, so they would do better to copy
3513 // the constant from stack.
3514 const bool Matcher::rematerialize_float_constants = false;
3515
3516 // If CPU can load and store mis-aligned doubles directly then no
3517 // fixup is needed. Else we split the double into 2 integer pieces
3518 // and move it piece-by-piece. Only happens when passing doubles into
3519 // C code as the Java calling convention forces doubles to be aligned.
3520 const bool Matcher::misaligned_doubles_ok = true;
3521
3522 // No-op on amd64
3523 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
3524 Unimplemented();
3525 }
3526
3527 // Advertise here if the CPU requires explicit rounding operations to
3528 // implement the UseStrictFP mode.
3529 const bool Matcher::strict_fp_requires_explicit_rounding = false;
3530
3531 // Are floats converted to double when stored to stack during
3532 // deoptimization?
3533 bool Matcher::float_in_double() { return true; }
3534
3535 // Do ints take an entire long register or just half?
3536 // The relevant question is how the int is callee-saved:
3537 // the whole long is written but de-opt'ing will have to extract
3538 // the relevant 32 bits.
3539 const bool Matcher::int_in_long = true;
3540
3541 // Return whether or not this register is ever used as an argument.
3542 // This function is used on startup to build the trampoline stubs in
3543 // generateOptoStub. Registers not mentioned will be killed by the VM
3544 // call in the trampoline, and arguments in those registers not be
3545 // available to the callee.
3546 bool Matcher::can_be_java_arg(int reg)
3547 {
3548 return
3549 reg == R0_num || reg == R0_H_num ||
3550 reg == R1_num || reg == R1_H_num ||
3551 reg == R2_num || reg == R2_H_num ||
3552 reg == R3_num || reg == R3_H_num ||
3553 reg == R4_num || reg == R4_H_num ||
3554 reg == R5_num || reg == R5_H_num ||
3555 reg == R6_num || reg == R6_H_num ||
3556 reg == R7_num || reg == R7_H_num ||
3557 reg == V0_num || reg == V0_H_num ||
3558 reg == V1_num || reg == V1_H_num ||
3559 reg == V2_num || reg == V2_H_num ||
3560 reg == V3_num || reg == V3_H_num ||
3561 reg == V4_num || reg == V4_H_num ||
3562 reg == V5_num || reg == V5_H_num ||
3563 reg == V6_num || reg == V6_H_num ||
3564 reg == V7_num || reg == V7_H_num;
3565 }
3566
3567 bool Matcher::is_spillable_arg(int reg)
3568 {
3569 return can_be_java_arg(reg);
3570 }
3571
3572 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
3573 return false;
3574 }
3575
3576 RegMask Matcher::divI_proj_mask() {
3577 ShouldNotReachHere();
3578 return RegMask();
3579 }
3580
3581 // Register for MODI projection of divmodI.
3582 RegMask Matcher::modI_proj_mask() {
3583 ShouldNotReachHere();
3584 return RegMask();
3585 }
3586
3587 // Register for DIVL projection of divmodL.
3588 RegMask Matcher::divL_proj_mask() {
3589 ShouldNotReachHere();
3590 return RegMask();
3591 }
3592
3593 // Register for MODL projection of divmodL.
3594 RegMask Matcher::modL_proj_mask() {
3595 ShouldNotReachHere();
3596 return RegMask();
3597 }
3598
3599 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
3600 return FP_REG_mask();
3601 }
3602
3603 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
3604 for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3605 Node* u = addp->fast_out(i);
3606 if (u->is_Mem()) {
3607 int opsize = u->as_Mem()->memory_size();
3608 assert(opsize > 0, "unexpected memory operand size");
3609 if (u->as_Mem()->memory_size() != (1<<shift)) {
3610 return false;
3611 }
3612 }
3613 }
3614 return true;
3615 }
3616
3617 const bool Matcher::convi2l_type_required = false;
3618
3619 // Should the Matcher clone shifts on addressing modes, expecting them
3620 // to be subsumed into complex addressing expressions or compute them
3621 // into registers?
3622 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
3623 if (clone_base_plus_offset_address(m, mstack, address_visited)) {
3624 return true;
3625 }
3626
3627 Node *off = m->in(AddPNode::Offset);
3628 if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
3629 size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
3630 // Are there other uses besides address expressions?
3631 !is_visited(off)) {
3632 address_visited.set(off->_idx); // Flag as address_visited
3633 mstack.push(off->in(2), Visit);
3634 Node *conv = off->in(1);
3635 if (conv->Opcode() == Op_ConvI2L &&
3636 // Are there other uses besides address expressions?
3637 !is_visited(conv)) {
3638 address_visited.set(conv->_idx); // Flag as address_visited
3639 mstack.push(conv->in(1), Pre_Visit);
3640 } else {
3641 mstack.push(conv, Pre_Visit);
3642 }
3643 address_visited.test_set(m->_idx); // Flag as address_visited
3644 mstack.push(m->in(AddPNode::Address), Pre_Visit);
3645 mstack.push(m->in(AddPNode::Base), Pre_Visit);
3646 return true;
3647 } else if (off->Opcode() == Op_ConvI2L &&
3648 // Are there other uses besides address expressions?
3649 !is_visited(off)) {
3650 address_visited.test_set(m->_idx); // Flag as address_visited
3651 address_visited.set(off->_idx); // Flag as address_visited
3652 mstack.push(off->in(1), Pre_Visit);
3653 mstack.push(m->in(AddPNode::Address), Pre_Visit);
3654 mstack.push(m->in(AddPNode::Base), Pre_Visit);
3655 return true;
3656 }
3657 return false;
3658 }
3659
3660 // Transform:
3661 // (AddP base (AddP base address (LShiftL index con)) offset)
3662 // into:
3663 // (AddP base (AddP base offset) (LShiftL index con))
3664 // to take full advantage of ARM's addressing modes
3665 void Compile::reshape_address(AddPNode* addp) {
3666 Node *addr = addp->in(AddPNode::Address);
3667 if (addr->is_AddP() && addr->in(AddPNode::Base) == addp->in(AddPNode::Base)) {
3668 const AddPNode *addp2 = addr->as_AddP();
3669 if ((addp2->in(AddPNode::Offset)->Opcode() == Op_LShiftL &&
3670 addp2->in(AddPNode::Offset)->in(2)->is_Con() &&
3671 size_fits_all_mem_uses(addp, addp2->in(AddPNode::Offset)->in(2)->get_int())) ||
3672 addp2->in(AddPNode::Offset)->Opcode() == Op_ConvI2L) {
3673
3674 // Any use that can't embed the address computation?
3675 for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
3676 Node* u = addp->fast_out(i);
3677 if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
3678 return;
3679 }
3680 }
3681
3682 Node* off = addp->in(AddPNode::Offset);
3683 Node* addr2 = addp2->in(AddPNode::Address);
3684 Node* base = addp->in(AddPNode::Base);
3685
3686 Node* new_addr = NULL;
3687 // Check whether the graph already has the new AddP we need
3688 // before we create one (no GVN available here).
3689 for (DUIterator_Fast imax, i = addr2->fast_outs(imax); i < imax; i++) {
3690 Node* u = addr2->fast_out(i);
3691 if (u->is_AddP() &&
3692 u->in(AddPNode::Base) == base &&
3693 u->in(AddPNode::Address) == addr2 &&
3694 u->in(AddPNode::Offset) == off) {
3695 new_addr = u;
3696 break;
3697 }
3698 }
3699
3700 if (new_addr == NULL) {
3701 new_addr = new AddPNode(base, addr2, off);
3702 }
3703 Node* new_off = addp2->in(AddPNode::Offset);
3704 addp->set_req(AddPNode::Address, new_addr);
3705 if (addr->outcnt() == 0) {
3706 addr->disconnect_inputs(NULL, this);
3707 }
3708 addp->set_req(AddPNode::Offset, new_off);
3709 if (off->outcnt() == 0) {
3710 off->disconnect_inputs(NULL, this);
3711 }
3712 }
3713 }
3714 }
3715
3716 // helper for encoding java_to_runtime calls on sim
3717 //
3718 // this is needed to compute the extra arguments required when
3719 // planting a call to the simulator blrt instruction. the TypeFunc
3720 // can be queried to identify the counts for integral, and floating
3721 // arguments and the return type
3722
3723 static void getCallInfo(const TypeFunc *tf, int &gpcnt, int &fpcnt, int &rtype)
3724 {
3725 int gps = 0;
3726 int fps = 0;
3727 const TypeTuple *domain = tf->domain();
3728 int max = domain->cnt();
3729 for (int i = TypeFunc::Parms; i < max; i++) {
3730 const Type *t = domain->field_at(i);
3731 switch(t->basic_type()) {
3732 case T_FLOAT:
3733 case T_DOUBLE:
3734 fps++;
3735 default:
3736 gps++;
3737 }
3738 }
3739 gpcnt = gps;
3740 fpcnt = fps;
3741 BasicType rt = tf->return_type();
3742 switch (rt) {
3743 case T_VOID:
3744 rtype = MacroAssembler::ret_type_void;
3745 break;
3746 default:
3747 rtype = MacroAssembler::ret_type_integral;
3748 break;
3749 case T_FLOAT:
3750 rtype = MacroAssembler::ret_type_float;
3751 break;
3752 case T_DOUBLE:
3753 rtype = MacroAssembler::ret_type_double;
3754 break;
3755 }
3756 }
3757
3758 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN) \
3759 MacroAssembler _masm(&cbuf); \
3760 { \
3761 guarantee(INDEX == -1, "mode not permitted for volatile"); \
3762 guarantee(DISP == 0, "mode not permitted for volatile"); \
3763 guarantee(SCALE == 0, "mode not permitted for volatile"); \
3764 __ INSN(REG, as_Register(BASE)); \
3765 }
3766
3767 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
3768 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
3769 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
3770 MacroAssembler::SIMD_RegVariant T, const Address &adr);
3771
3772 // Used for all non-volatile memory accesses. The use of
3773 // $mem->opcode() to discover whether this pattern uses sign-extended
3774 // offsets is something of a kludge.
3775 static void loadStore(MacroAssembler masm, mem_insn insn,
3776 Register reg, int opcode,
3777 Register base, int index, int size, int disp)
3778 {
3779 Address::extend scale;
3780
3781 // Hooboy, this is fugly. We need a way to communicate to the
3782 // encoder that the index needs to be sign extended, so we have to
3783 // enumerate all the cases.
3784 switch (opcode) {
3785 case INDINDEXSCALEDI2L:
3786 case INDINDEXSCALEDI2LN:
3787 case INDINDEXI2L:
3788 case INDINDEXI2LN:
3789 scale = Address::sxtw(size);
3790 break;
3791 default:
3792 scale = Address::lsl(size);
3793 }
3794
3795 if (index == -1) {
3796 (masm.*insn)(reg, Address(base, disp));
3797 } else {
3798 assert(disp == 0, "unsupported address mode: disp = %d", disp);
3799 (masm.*insn)(reg, Address(base, as_Register(index), scale));
3800 }
3801 }
3802
3803 static void loadStore(MacroAssembler masm, mem_float_insn insn,
3804 FloatRegister reg, int opcode,
3805 Register base, int index, int size, int disp)
3806 {
3807 Address::extend scale;
3808
3809 switch (opcode) {
3810 case INDINDEXSCALEDI2L:
3811 case INDINDEXSCALEDI2LN:
3812 scale = Address::sxtw(size);
3813 break;
3814 default:
3815 scale = Address::lsl(size);
3816 }
3817
3818 if (index == -1) {
3819 (masm.*insn)(reg, Address(base, disp));
3820 } else {
3821 assert(disp == 0, "unsupported address mode: disp = %d", disp);
3822 (masm.*insn)(reg, Address(base, as_Register(index), scale));
3823 }
3824 }
3825
3826 static void loadStore(MacroAssembler masm, mem_vector_insn insn,
3827 FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
3828 int opcode, Register base, int index, int size, int disp)
3829 {
3830 if (index == -1) {
3831 (masm.*insn)(reg, T, Address(base, disp));
3832 } else {
3833 assert(disp == 0, "unsupported address mode");
3834 (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
3835 }
3836 }
3837
3838 %}
3839
3840
3841
3842 //----------ENCODING BLOCK-----------------------------------------------------
3843 // This block specifies the encoding classes used by the compiler to
3844 // output byte streams. Encoding classes are parameterized macros
3845 // used by Machine Instruction Nodes in order to generate the bit
3846 // encoding of the instruction. Operands specify their base encoding
3847 // interface with the interface keyword. There are currently
3848 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
3849 // COND_INTER. REG_INTER causes an operand to generate a function
3850 // which returns its register number when queried. CONST_INTER causes
3851 // an operand to generate a function which returns the value of the
3852 // constant when queried. MEMORY_INTER causes an operand to generate
3853 // four functions which return the Base Register, the Index Register,
3854 // the Scale Value, and the Offset Value of the operand when queried.
3855 // COND_INTER causes an operand to generate six functions which return
3856 // the encoding code (ie - encoding bits for the instruction)
3857 // associated with each basic boolean condition for a conditional
3858 // instruction.
3859 //
3860 // Instructions specify two basic values for encoding. Again, a
3861 // function is available to check if the constant displacement is an
3862 // oop. They use the ins_encode keyword to specify their encoding
3863 // classes (which must be a sequence of enc_class names, and their
3864 // parameters, specified in the encoding block), and they use the
3865 // opcode keyword to specify, in order, their primary, secondary, and
3866 // tertiary opcode. Only the opcode sections which a particular
3867 // instruction needs for encoding need to be specified.
3868 encode %{
3869 // Build emit functions for each basic byte or larger field in the
3870 // intel encoding scheme (opcode, rm, sib, immediate), and call them
3871 // from C++ code in the enc_class source block. Emit functions will
3872 // live in the main source block for now. In future, we can
3873 // generalize this by adding a syntax that specifies the sizes of
3874 // fields in an order, so that the adlc can build the emit functions
3875 // automagically
3876
3877 // catch all for unimplemented encodings
3878 enc_class enc_unimplemented %{
3879 MacroAssembler _masm(&cbuf);
3880 __ unimplemented("C2 catch all");
3881 %}
3882
3883 // BEGIN Non-volatile memory access
3884
3885 enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
3886 Register dst_reg = as_Register($dst$$reg);
3887 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
3888 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3889 %}
3890
3891 enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
3892 Register dst_reg = as_Register($dst$$reg);
3893 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
3894 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3895 %}
3896
3897 enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
3898 Register dst_reg = as_Register($dst$$reg);
3899 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3900 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3901 %}
3902
3903 enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
3904 Register dst_reg = as_Register($dst$$reg);
3905 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
3906 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3907 %}
3908
3909 enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
3910 Register dst_reg = as_Register($dst$$reg);
3911 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
3912 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3913 %}
3914
3915 enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
3916 Register dst_reg = as_Register($dst$$reg);
3917 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
3918 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3919 %}
3920
3921 enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
3922 Register dst_reg = as_Register($dst$$reg);
3923 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3924 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3925 %}
3926
3927 enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
3928 Register dst_reg = as_Register($dst$$reg);
3929 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
3930 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3931 %}
3932
3933 enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
3934 Register dst_reg = as_Register($dst$$reg);
3935 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3936 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3937 %}
3938
3939 enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
3940 Register dst_reg = as_Register($dst$$reg);
3941 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
3942 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3943 %}
3944
3945 enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
3946 Register dst_reg = as_Register($dst$$reg);
3947 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
3948 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3949 %}
3950
3951 enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
3952 Register dst_reg = as_Register($dst$$reg);
3953 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
3954 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3955 %}
3956
3957 enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
3958 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3959 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
3960 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3961 %}
3962
3963 enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
3964 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3965 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
3966 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3967 %}
3968
3969 enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
3970 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3971 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
3972 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3973 %}
3974
3975 enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
3976 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3977 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
3978 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3979 %}
3980
3981 enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
3982 FloatRegister dst_reg = as_FloatRegister($dst$$reg);
3983 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
3984 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3985 %}
3986
3987 enc_class aarch64_enc_strb(iRegI src, memory mem) %{
3988 Register src_reg = as_Register($src$$reg);
3989 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
3990 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3991 %}
3992
3993 enc_class aarch64_enc_strb0(memory mem) %{
3994 MacroAssembler _masm(&cbuf);
3995 loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
3996 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
3997 %}
3998
3999 enc_class aarch64_enc_strb0_ordered(memory mem) %{
4000 MacroAssembler _masm(&cbuf);
4001 __ membar(Assembler::StoreStore);
4002 loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
4003 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4004 %}
4005
4006 enc_class aarch64_enc_strh(iRegI src, memory mem) %{
4007 Register src_reg = as_Register($src$$reg);
4008 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
4009 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4010 %}
4011
4012 enc_class aarch64_enc_strh0(memory mem) %{
4013 MacroAssembler _masm(&cbuf);
4014 loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
4015 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4016 %}
4017
4018 enc_class aarch64_enc_strw(iRegI src, memory mem) %{
4019 Register src_reg = as_Register($src$$reg);
4020 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
4021 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4022 %}
4023
4024 enc_class aarch64_enc_strw0(memory mem) %{
4025 MacroAssembler _masm(&cbuf);
4026 loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
4027 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4028 %}
4029
4030 enc_class aarch64_enc_str(iRegL src, memory mem) %{
4031 Register src_reg = as_Register($src$$reg);
4032 // we sometimes get asked to store the stack pointer into the
4033 // current thread -- we cannot do that directly on AArch64
4034 if (src_reg == r31_sp) {
4035 MacroAssembler _masm(&cbuf);
4036 assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4037 __ mov(rscratch2, sp);
4038 src_reg = rscratch2;
4039 }
4040 loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
4041 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4042 %}
4043
4044 enc_class aarch64_enc_str0(memory mem) %{
4045 MacroAssembler _masm(&cbuf);
4046 loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
4047 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4048 %}
4049
4050 enc_class aarch64_enc_strs(vRegF src, memory mem) %{
4051 FloatRegister src_reg = as_FloatRegister($src$$reg);
4052 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
4053 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4054 %}
4055
4056 enc_class aarch64_enc_strd(vRegD src, memory mem) %{
4057 FloatRegister src_reg = as_FloatRegister($src$$reg);
4058 loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
4059 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4060 %}
4061
4062 enc_class aarch64_enc_strvS(vecD src, memory mem) %{
4063 FloatRegister src_reg = as_FloatRegister($src$$reg);
4064 loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
4065 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4066 %}
4067
4068 enc_class aarch64_enc_strvD(vecD src, memory mem) %{
4069 FloatRegister src_reg = as_FloatRegister($src$$reg);
4070 loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
4071 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4072 %}
4073
4074 enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
4075 FloatRegister src_reg = as_FloatRegister($src$$reg);
4076 loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
4077 $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
4078 %}
4079
4080 // END Non-volatile memory access
4081
4082 // volatile loads and stores
4083
4084 enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
4085 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4086 rscratch1, stlrb);
4087 %}
4088
4089 enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
4090 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4091 rscratch1, stlrh);
4092 %}
4093
4094 enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
4095 MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4096 rscratch1, stlrw);
4097 %}
4098
4099
4100 enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
4101 Register dst_reg = as_Register($dst$$reg);
4102 MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4103 rscratch1, ldarb);
4104 __ sxtbw(dst_reg, dst_reg);
4105 %}
4106
4107 enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
4108 Register dst_reg = as_Register($dst$$reg);
4109 MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4110 rscratch1, ldarb);
4111 __ sxtb(dst_reg, dst_reg);
4112 %}
4113
4114 enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
4115 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4116 rscratch1, ldarb);
4117 %}
4118
4119 enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
4120 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4121 rscratch1, ldarb);
4122 %}
4123
4124 enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
4125 Register dst_reg = as_Register($dst$$reg);
4126 MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4127 rscratch1, ldarh);
4128 __ sxthw(dst_reg, dst_reg);
4129 %}
4130
4131 enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
4132 Register dst_reg = as_Register($dst$$reg);
4133 MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4134 rscratch1, ldarh);
4135 __ sxth(dst_reg, dst_reg);
4136 %}
4137
4138 enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
4139 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4140 rscratch1, ldarh);
4141 %}
4142
4143 enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
4144 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4145 rscratch1, ldarh);
4146 %}
4147
4148 enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
4149 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4150 rscratch1, ldarw);
4151 %}
4152
4153 enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
4154 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4155 rscratch1, ldarw);
4156 %}
4157
4158 enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
4159 MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4160 rscratch1, ldar);
4161 %}
4162
4163 enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
4164 MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4165 rscratch1, ldarw);
4166 __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
4167 %}
4168
4169 enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
4170 MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4171 rscratch1, ldar);
4172 __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
4173 %}
4174
4175 enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
4176 Register src_reg = as_Register($src$$reg);
4177 // we sometimes get asked to store the stack pointer into the
4178 // current thread -- we cannot do that directly on AArch64
4179 if (src_reg == r31_sp) {
4180 MacroAssembler _masm(&cbuf);
4181 assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
4182 __ mov(rscratch2, sp);
4183 src_reg = rscratch2;
4184 }
4185 MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4186 rscratch1, stlr);
4187 %}
4188
4189 enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
4190 {
4191 MacroAssembler _masm(&cbuf);
4192 FloatRegister src_reg = as_FloatRegister($src$$reg);
4193 __ fmovs(rscratch2, src_reg);
4194 }
4195 MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4196 rscratch1, stlrw);
4197 %}
4198
4199 enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
4200 {
4201 MacroAssembler _masm(&cbuf);
4202 FloatRegister src_reg = as_FloatRegister($src$$reg);
4203 __ fmovd(rscratch2, src_reg);
4204 }
4205 MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
4206 rscratch1, stlr);
4207 %}
4208
4209 // synchronized read/update encodings
4210
4211 enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
4212 MacroAssembler _masm(&cbuf);
4213 Register dst_reg = as_Register($dst$$reg);
4214 Register base = as_Register($mem$$base);
4215 int index = $mem$$index;
4216 int scale = $mem$$scale;
4217 int disp = $mem$$disp;
4218 if (index == -1) {
4219 if (disp != 0) {
4220 __ lea(rscratch1, Address(base, disp));
4221 __ ldaxr(dst_reg, rscratch1);
4222 } else {
4223 // TODO
4224 // should we ever get anything other than this case?
4225 __ ldaxr(dst_reg, base);
4226 }
4227 } else {
4228 Register index_reg = as_Register(index);
4229 if (disp == 0) {
4230 __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
4231 __ ldaxr(dst_reg, rscratch1);
4232 } else {
4233 __ lea(rscratch1, Address(base, disp));
4234 __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
4235 __ ldaxr(dst_reg, rscratch1);
4236 }
4237 }
4238 %}
4239
4240 enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
4241 MacroAssembler _masm(&cbuf);
4242 Register src_reg = as_Register($src$$reg);
4243 Register base = as_Register($mem$$base);
4244 int index = $mem$$index;
4245 int scale = $mem$$scale;
4246 int disp = $mem$$disp;
4247 if (index == -1) {
4248 if (disp != 0) {
4249 __ lea(rscratch2, Address(base, disp));
4250 __ stlxr(rscratch1, src_reg, rscratch2);
4251 } else {
4252 // TODO
4253 // should we ever get anything other than this case?
4254 __ stlxr(rscratch1, src_reg, base);
4255 }
4256 } else {
4257 Register index_reg = as_Register(index);
4258 if (disp == 0) {
4259 __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
4260 __ stlxr(rscratch1, src_reg, rscratch2);
4261 } else {
4262 __ lea(rscratch2, Address(base, disp));
4263 __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
4264 __ stlxr(rscratch1, src_reg, rscratch2);
4265 }
4266 }
4267 __ cmpw(rscratch1, zr);
4268 %}
4269
4270 enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4271 MacroAssembler _masm(&cbuf);
4272 guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4273 __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4274 Assembler::xword, /*acquire*/ false, /*release*/ true,
4275 /*weak*/ false, noreg);
4276 %}
4277
4278 enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4279 MacroAssembler _masm(&cbuf);
4280 guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4281 __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4282 Assembler::word, /*acquire*/ false, /*release*/ true,
4283 /*weak*/ false, noreg);
4284 %}
4285
4286
4287 // The only difference between aarch64_enc_cmpxchg and
4288 // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
4289 // CompareAndSwap sequence to serve as a barrier on acquiring a
4290 // lock.
4291 enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
4292 MacroAssembler _masm(&cbuf);
4293 guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4294 __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4295 Assembler::xword, /*acquire*/ true, /*release*/ true,
4296 /*weak*/ false, noreg);
4297 %}
4298
4299 enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
4300 MacroAssembler _masm(&cbuf);
4301 guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
4302 __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
4303 Assembler::word, /*acquire*/ true, /*release*/ true,
4304 /*weak*/ false, noreg);
4305 %}
4306
4307
4308 // auxiliary used for CompareAndSwapX to set result register
4309 enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
4310 MacroAssembler _masm(&cbuf);
4311 Register res_reg = as_Register($res$$reg);
4312 __ cset(res_reg, Assembler::EQ);
4313 %}
4314
4315 // prefetch encodings
4316
4317 enc_class aarch64_enc_prefetchw(memory mem) %{
4318 MacroAssembler _masm(&cbuf);
4319 Register base = as_Register($mem$$base);
4320 int index = $mem$$index;
4321 int scale = $mem$$scale;
4322 int disp = $mem$$disp;
4323 if (index == -1) {
4324 __ prfm(Address(base, disp), PSTL1KEEP);
4325 } else {
4326 Register index_reg = as_Register(index);
4327 if (disp == 0) {
4328 __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
4329 } else {
4330 __ lea(rscratch1, Address(base, disp));
4331 __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
4332 }
4333 }
4334 %}
4335
4336 /// mov envcodings
4337
4338 enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
4339 MacroAssembler _masm(&cbuf);
4340 u_int32_t con = (u_int32_t)$src$$constant;
4341 Register dst_reg = as_Register($dst$$reg);
4342 if (con == 0) {
4343 __ movw(dst_reg, zr);
4344 } else {
4345 __ movw(dst_reg, con);
4346 }
4347 %}
4348
4349 enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
4350 MacroAssembler _masm(&cbuf);
4351 Register dst_reg = as_Register($dst$$reg);
4352 u_int64_t con = (u_int64_t)$src$$constant;
4353 if (con == 0) {
4354 __ mov(dst_reg, zr);
4355 } else {
4356 __ mov(dst_reg, con);
4357 }
4358 %}
4359
4360 enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
4361 MacroAssembler _masm(&cbuf);
4362 Register dst_reg = as_Register($dst$$reg);
4363 address con = (address)$src$$constant;
4364 if (con == NULL || con == (address)1) {
4365 ShouldNotReachHere();
4366 } else {
4367 relocInfo::relocType rtype = $src->constant_reloc();
4368 if (rtype == relocInfo::oop_type) {
4369 __ movoop(dst_reg, (jobject)con, /*immediate*/true);
4370 } else if (rtype == relocInfo::metadata_type) {
4371 __ mov_metadata(dst_reg, (Metadata*)con);
4372 } else {
4373 assert(rtype == relocInfo::none, "unexpected reloc type");
4374 if (con < (address)(uintptr_t)os::vm_page_size()) {
4375 __ mov(dst_reg, con);
4376 } else {
4377 unsigned long offset;
4378 __ adrp(dst_reg, con, offset);
4379 __ add(dst_reg, dst_reg, offset);
4380 }
4381 }
4382 }
4383 %}
4384
4385 enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
4386 MacroAssembler _masm(&cbuf);
4387 Register dst_reg = as_Register($dst$$reg);
4388 __ mov(dst_reg, zr);
4389 %}
4390
4391 enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
4392 MacroAssembler _masm(&cbuf);
4393 Register dst_reg = as_Register($dst$$reg);
4394 __ mov(dst_reg, (u_int64_t)1);
4395 %}
4396
4397 enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
4398 MacroAssembler _masm(&cbuf);
4399 address page = (address)$src$$constant;
4400 Register dst_reg = as_Register($dst$$reg);
4401 unsigned long off;
4402 __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
4403 assert(off == 0, "assumed offset == 0");
4404 %}
4405
4406 enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
4407 MacroAssembler _masm(&cbuf);
4408 __ load_byte_map_base($dst$$Register);
4409 %}
4410
4411 enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
4412 MacroAssembler _masm(&cbuf);
4413 Register dst_reg = as_Register($dst$$reg);
4414 address con = (address)$src$$constant;
4415 if (con == NULL) {
4416 ShouldNotReachHere();
4417 } else {
4418 relocInfo::relocType rtype = $src->constant_reloc();
4419 assert(rtype == relocInfo::oop_type, "unexpected reloc type");
4420 __ set_narrow_oop(dst_reg, (jobject)con);
4421 }
4422 %}
4423
4424 enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
4425 MacroAssembler _masm(&cbuf);
4426 Register dst_reg = as_Register($dst$$reg);
4427 __ mov(dst_reg, zr);
4428 %}
4429
4430 enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
4431 MacroAssembler _masm(&cbuf);
4432 Register dst_reg = as_Register($dst$$reg);
4433 address con = (address)$src$$constant;
4434 if (con == NULL) {
4435 ShouldNotReachHere();
4436 } else {
4437 relocInfo::relocType rtype = $src->constant_reloc();
4438 assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
4439 __ set_narrow_klass(dst_reg, (Klass *)con);
4440 }
4441 %}
4442
4443 // arithmetic encodings
4444
4445 enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
4446 MacroAssembler _masm(&cbuf);
4447 Register dst_reg = as_Register($dst$$reg);
4448 Register src_reg = as_Register($src1$$reg);
4449 int32_t con = (int32_t)$src2$$constant;
4450 // add has primary == 0, subtract has primary == 1
4451 if ($primary) { con = -con; }
4452 if (con < 0) {
4453 __ subw(dst_reg, src_reg, -con);
4454 } else {
4455 __ addw(dst_reg, src_reg, con);
4456 }
4457 %}
4458
4459 enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
4460 MacroAssembler _masm(&cbuf);
4461 Register dst_reg = as_Register($dst$$reg);
4462 Register src_reg = as_Register($src1$$reg);
4463 int32_t con = (int32_t)$src2$$constant;
4464 // add has primary == 0, subtract has primary == 1
4465 if ($primary) { con = -con; }
4466 if (con < 0) {
4467 __ sub(dst_reg, src_reg, -con);
4468 } else {
4469 __ add(dst_reg, src_reg, con);
4470 }
4471 %}
4472
4473 enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
4474 MacroAssembler _masm(&cbuf);
4475 Register dst_reg = as_Register($dst$$reg);
4476 Register src1_reg = as_Register($src1$$reg);
4477 Register src2_reg = as_Register($src2$$reg);
4478 __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
4479 %}
4480
4481 enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
4482 MacroAssembler _masm(&cbuf);
4483 Register dst_reg = as_Register($dst$$reg);
4484 Register src1_reg = as_Register($src1$$reg);
4485 Register src2_reg = as_Register($src2$$reg);
4486 __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
4487 %}
4488
4489 enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
4490 MacroAssembler _masm(&cbuf);
4491 Register dst_reg = as_Register($dst$$reg);
4492 Register src1_reg = as_Register($src1$$reg);
4493 Register src2_reg = as_Register($src2$$reg);
4494 __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
4495 %}
4496
4497 enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
4498 MacroAssembler _masm(&cbuf);
4499 Register dst_reg = as_Register($dst$$reg);
4500 Register src1_reg = as_Register($src1$$reg);
4501 Register src2_reg = as_Register($src2$$reg);
4502 __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
4503 %}
4504
4505 // compare instruction encodings
4506
4507 enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
4508 MacroAssembler _masm(&cbuf);
4509 Register reg1 = as_Register($src1$$reg);
4510 Register reg2 = as_Register($src2$$reg);
4511 __ cmpw(reg1, reg2);
4512 %}
4513
4514 enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
4515 MacroAssembler _masm(&cbuf);
4516 Register reg = as_Register($src1$$reg);
4517 int32_t val = $src2$$constant;
4518 if (val >= 0) {
4519 __ subsw(zr, reg, val);
4520 } else {
4521 __ addsw(zr, reg, -val);
4522 }
4523 %}
4524
4525 enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
4526 MacroAssembler _masm(&cbuf);
4527 Register reg1 = as_Register($src1$$reg);
4528 u_int32_t val = (u_int32_t)$src2$$constant;
4529 __ movw(rscratch1, val);
4530 __ cmpw(reg1, rscratch1);
4531 %}
4532
4533 enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
4534 MacroAssembler _masm(&cbuf);
4535 Register reg1 = as_Register($src1$$reg);
4536 Register reg2 = as_Register($src2$$reg);
4537 __ cmp(reg1, reg2);
4538 %}
4539
4540 enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
4541 MacroAssembler _masm(&cbuf);
4542 Register reg = as_Register($src1$$reg);
4543 int64_t val = $src2$$constant;
4544 if (val >= 0) {
4545 __ subs(zr, reg, val);
4546 } else if (val != -val) {
4547 __ adds(zr, reg, -val);
4548 } else {
4549 // aargh, Long.MIN_VALUE is a special case
4550 __ orr(rscratch1, zr, (u_int64_t)val);
4551 __ subs(zr, reg, rscratch1);
4552 }
4553 %}
4554
4555 enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
4556 MacroAssembler _masm(&cbuf);
4557 Register reg1 = as_Register($src1$$reg);
4558 u_int64_t val = (u_int64_t)$src2$$constant;
4559 __ mov(rscratch1, val);
4560 __ cmp(reg1, rscratch1);
4561 %}
4562
4563 enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
4564 MacroAssembler _masm(&cbuf);
4565 Register reg1 = as_Register($src1$$reg);
4566 Register reg2 = as_Register($src2$$reg);
4567 __ cmp(reg1, reg2);
4568 %}
4569
4570 enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
4571 MacroAssembler _masm(&cbuf);
4572 Register reg1 = as_Register($src1$$reg);
4573 Register reg2 = as_Register($src2$$reg);
4574 __ cmpw(reg1, reg2);
4575 %}
4576
4577 enc_class aarch64_enc_testp(iRegP src) %{
4578 MacroAssembler _masm(&cbuf);
4579 Register reg = as_Register($src$$reg);
4580 __ cmp(reg, zr);
4581 %}
4582
4583 enc_class aarch64_enc_testn(iRegN src) %{
4584 MacroAssembler _masm(&cbuf);
4585 Register reg = as_Register($src$$reg);
4586 __ cmpw(reg, zr);
4587 %}
4588
4589 enc_class aarch64_enc_b(label lbl) %{
4590 MacroAssembler _masm(&cbuf);
4591 Label *L = $lbl$$label;
4592 __ b(*L);
4593 %}
4594
4595 enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
4596 MacroAssembler _masm(&cbuf);
4597 Label *L = $lbl$$label;
4598 __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4599 %}
4600
4601 enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
4602 MacroAssembler _masm(&cbuf);
4603 Label *L = $lbl$$label;
4604 __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
4605 %}
4606
4607 enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
4608 %{
4609 Register sub_reg = as_Register($sub$$reg);
4610 Register super_reg = as_Register($super$$reg);
4611 Register temp_reg = as_Register($temp$$reg);
4612 Register result_reg = as_Register($result$$reg);
4613
4614 Label miss;
4615 MacroAssembler _masm(&cbuf);
4616 __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
4617 NULL, &miss,
4618 /*set_cond_codes:*/ true);
4619 if ($primary) {
4620 __ mov(result_reg, zr);
4621 }
4622 __ bind(miss);
4623 %}
4624
4625 enc_class aarch64_enc_java_static_call(method meth) %{
4626 MacroAssembler _masm(&cbuf);
4627
4628 address addr = (address)$meth$$method;
4629 address call;
4630 if (!_method) {
4631 // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
4632 call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
4633 } else {
4634 int method_index = resolved_method_index(cbuf);
4635 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
4636 : static_call_Relocation::spec(method_index);
4637 call = __ trampoline_call(Address(addr, rspec), &cbuf);
4638
4639 // Emit stub for static call
4640 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
4641 if (stub == NULL) {
4642 ciEnv::current()->record_failure("CodeCache is full");
4643 return;
4644 }
4645 }
4646 if (call == NULL) {
4647 ciEnv::current()->record_failure("CodeCache is full");
4648 return;
4649 }
4650 %}
4651
4652 enc_class aarch64_enc_java_dynamic_call(method meth) %{
4653 MacroAssembler _masm(&cbuf);
4654 int method_index = resolved_method_index(cbuf);
4655 address call = __ ic_call((address)$meth$$method, method_index);
4656 if (call == NULL) {
4657 ciEnv::current()->record_failure("CodeCache is full");
4658 return;
4659 }
4660 %}
4661
4662 enc_class aarch64_enc_call_epilog() %{
4663 MacroAssembler _masm(&cbuf);
4664 if (VerifyStackAtCalls) {
4665 // Check that stack depth is unchanged: find majik cookie on stack
4666 __ call_Unimplemented();
4667 }
4668 %}
4669
4670 enc_class aarch64_enc_java_to_runtime(method meth) %{
4671 MacroAssembler _masm(&cbuf);
4672
4673 // some calls to generated routines (arraycopy code) are scheduled
4674 // by C2 as runtime calls. if so we can call them using a br (they
4675 // will be in a reachable segment) otherwise we have to use a blrt
4676 // which loads the absolute address into a register.
4677 address entry = (address)$meth$$method;
4678 CodeBlob *cb = CodeCache::find_blob(entry);
4679 if (cb) {
4680 address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
4681 if (call == NULL) {
4682 ciEnv::current()->record_failure("CodeCache is full");
4683 return;
4684 }
4685 } else {
4686 int gpcnt;
4687 int fpcnt;
4688 int rtype;
4689 getCallInfo(tf(), gpcnt, fpcnt, rtype);
4690 Label retaddr;
4691 __ adr(rscratch2, retaddr);
4692 __ lea(rscratch1, RuntimeAddress(entry));
4693 // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
4694 __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
4695 __ blrt(rscratch1, gpcnt, fpcnt, rtype);
4696 __ bind(retaddr);
4697 __ add(sp, sp, 2 * wordSize);
4698 }
4699 %}
4700
4701 enc_class aarch64_enc_rethrow() %{
4702 MacroAssembler _masm(&cbuf);
4703 __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
4704 %}
4705
4706 enc_class aarch64_enc_ret() %{
4707 MacroAssembler _masm(&cbuf);
4708 __ ret(lr);
4709 %}
4710
4711 enc_class aarch64_enc_tail_call(iRegP jump_target) %{
4712 MacroAssembler _masm(&cbuf);
4713 Register target_reg = as_Register($jump_target$$reg);
4714 __ br(target_reg);
4715 %}
4716
4717 enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
4718 MacroAssembler _masm(&cbuf);
4719 Register target_reg = as_Register($jump_target$$reg);
4720 // exception oop should be in r0
4721 // ret addr has been popped into lr
4722 // callee expects it in r3
4723 __ mov(r3, lr);
4724 __ br(target_reg);
4725 %}
4726
4727 enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4728 MacroAssembler _masm(&cbuf);
4729 Register oop = as_Register($object$$reg);
4730 Register box = as_Register($box$$reg);
4731 Register disp_hdr = as_Register($tmp$$reg);
4732 Register tmp = as_Register($tmp2$$reg);
4733 Label cont;
4734 Label object_has_monitor;
4735 Label cas_failed;
4736
4737 assert_different_registers(oop, box, tmp, disp_hdr);
4738
4739 // Load markOop from object into displaced_header.
4740 __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
4741
4742 // Always do locking in runtime.
4743 if (EmitSync & 0x01) {
4744 __ cmp(oop, zr);
4745 return;
4746 }
4747
4748 if (UseBiasedLocking && !UseOptoBiasInlining) {
4749 __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
4750 }
4751
4752 // Handle existing monitor
4753 if ((EmitSync & 0x02) == 0) {
4754 // we can use AArch64's bit test and branch here but
4755 // markoopDesc does not define a bit index just the bit value
4756 // so assert in case the bit pos changes
4757 # define __monitor_value_log2 1
4758 assert(markOopDesc::monitor_value == (1 << __monitor_value_log2), "incorrect bit position");
4759 __ tbnz(disp_hdr, __monitor_value_log2, object_has_monitor);
4760 # undef __monitor_value_log2
4761 }
4762
4763 // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
4764 __ orr(disp_hdr, disp_hdr, markOopDesc::unlocked_value);
4765
4766 // Load Compare Value application register.
4767
4768 // Initialize the box. (Must happen before we update the object mark!)
4769 __ str(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4770
4771 // Compare object markOop with mark and if equal exchange scratch1
4772 // with object markOop.
4773 if (UseLSE) {
4774 __ mov(tmp, disp_hdr);
4775 __ casal(Assembler::xword, tmp, box, oop);
4776 __ cmp(tmp, disp_hdr);
4777 __ br(Assembler::EQ, cont);
4778 } else {
4779 Label retry_load;
4780 if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4781 __ prfm(Address(oop), PSTL1STRM);
4782 __ bind(retry_load);
4783 __ ldaxr(tmp, oop);
4784 __ cmp(tmp, disp_hdr);
4785 __ br(Assembler::NE, cas_failed);
4786 // use stlxr to ensure update is immediately visible
4787 __ stlxr(tmp, box, oop);
4788 __ cbzw(tmp, cont);
4789 __ b(retry_load);
4790 }
4791
4792 // Formerly:
4793 // __ cmpxchgptr(/*oldv=*/disp_hdr,
4794 // /*newv=*/box,
4795 // /*addr=*/oop,
4796 // /*tmp=*/tmp,
4797 // cont,
4798 // /*fail*/NULL);
4799
4800 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4801
4802 // If the compare-and-exchange succeeded, then we found an unlocked
4803 // object, will have now locked it will continue at label cont
4804
4805 __ bind(cas_failed);
4806 // We did not see an unlocked object so try the fast recursive case.
4807
4808 // Check if the owner is self by comparing the value in the
4809 // markOop of object (disp_hdr) with the stack pointer.
4810 __ mov(rscratch1, sp);
4811 __ sub(disp_hdr, disp_hdr, rscratch1);
4812 __ mov(tmp, (address) (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
4813 // If condition is true we are cont and hence we can store 0 as the
4814 // displaced header in the box, which indicates that it is a recursive lock.
4815 __ ands(tmp/*==0?*/, disp_hdr, tmp);
4816 __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4817
4818 // Handle existing monitor.
4819 if ((EmitSync & 0x02) == 0) {
4820 __ b(cont);
4821
4822 __ bind(object_has_monitor);
4823 // The object's monitor m is unlocked iff m->owner == NULL,
4824 // otherwise m->owner may contain a thread or a stack address.
4825 //
4826 // Try to CAS m->owner from NULL to current thread.
4827 __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
4828 __ mov(disp_hdr, zr);
4829
4830 if (UseLSE) {
4831 __ mov(rscratch1, disp_hdr);
4832 __ casal(Assembler::xword, rscratch1, rthread, tmp);
4833 __ cmp(rscratch1, disp_hdr);
4834 } else {
4835 Label retry_load, fail;
4836 if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4837 __ prfm(Address(tmp), PSTL1STRM);
4838 __ bind(retry_load);
4839 __ ldaxr(rscratch1, tmp);
4840 __ cmp(disp_hdr, rscratch1);
4841 __ br(Assembler::NE, fail);
4842 // use stlxr to ensure update is immediately visible
4843 __ stlxr(rscratch1, rthread, tmp);
4844 __ cbnzw(rscratch1, retry_load);
4845 __ bind(fail);
4846 }
4847
4848 // Label next;
4849 // __ cmpxchgptr(/*oldv=*/disp_hdr,
4850 // /*newv=*/rthread,
4851 // /*addr=*/tmp,
4852 // /*tmp=*/rscratch1,
4853 // /*succeed*/next,
4854 // /*fail*/NULL);
4855 // __ bind(next);
4856
4857 // store a non-null value into the box.
4858 __ str(box, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4859
4860 // PPC port checks the following invariants
4861 // #ifdef ASSERT
4862 // bne(flag, cont);
4863 // We have acquired the monitor, check some invariants.
4864 // addw(/*monitor=*/tmp, tmp, -ObjectMonitor::owner_offset_in_bytes());
4865 // Invariant 1: _recursions should be 0.
4866 // assert(ObjectMonitor::recursions_size_in_bytes() == 8, "unexpected size");
4867 // assert_mem8_is_zero(ObjectMonitor::recursions_offset_in_bytes(), tmp,
4868 // "monitor->_recursions should be 0", -1);
4869 // Invariant 2: OwnerIsThread shouldn't be 0.
4870 // assert(ObjectMonitor::OwnerIsThread_size_in_bytes() == 4, "unexpected size");
4871 //assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), tmp,
4872 // "monitor->OwnerIsThread shouldn't be 0", -1);
4873 // #endif
4874 }
4875
4876 __ bind(cont);
4877 // flag == EQ indicates success
4878 // flag == NE indicates failure
4879
4880 %}
4881
4882 // TODO
4883 // reimplement this with custom cmpxchgptr code
4884 // which avoids some of the unnecessary branching
4885 enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
4886 MacroAssembler _masm(&cbuf);
4887 Register oop = as_Register($object$$reg);
4888 Register box = as_Register($box$$reg);
4889 Register disp_hdr = as_Register($tmp$$reg);
4890 Register tmp = as_Register($tmp2$$reg);
4891 Label cont;
4892 Label object_has_monitor;
4893 Label cas_failed;
4894
4895 assert_different_registers(oop, box, tmp, disp_hdr);
4896
4897 // Always do locking in runtime.
4898 if (EmitSync & 0x01) {
4899 __ cmp(oop, zr); // Oop can't be 0 here => always false.
4900 return;
4901 }
4902
4903 if (UseBiasedLocking && !UseOptoBiasInlining) {
4904 __ biased_locking_exit(oop, tmp, cont);
4905 }
4906
4907 // Find the lock address and load the displaced header from the stack.
4908 __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
4909
4910 // If the displaced header is 0, we have a recursive unlock.
4911 __ cmp(disp_hdr, zr);
4912 __ br(Assembler::EQ, cont);
4913
4914
4915 // Handle existing monitor.
4916 if ((EmitSync & 0x02) == 0) {
4917 __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
4918 __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
4919 }
4920
4921 // Check if it is still a light weight lock, this is is true if we
4922 // see the stack address of the basicLock in the markOop of the
4923 // object.
4924
4925 if (UseLSE) {
4926 __ mov(tmp, box);
4927 __ casl(Assembler::xword, tmp, disp_hdr, oop);
4928 __ cmp(tmp, box);
4929 } else {
4930 Label retry_load;
4931 if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
4932 __ prfm(Address(oop), PSTL1STRM);
4933 __ bind(retry_load);
4934 __ ldxr(tmp, oop);
4935 __ cmp(box, tmp);
4936 __ br(Assembler::NE, cas_failed);
4937 // use stlxr to ensure update is immediately visible
4938 __ stlxr(tmp, disp_hdr, oop);
4939 __ cbzw(tmp, cont);
4940 __ b(retry_load);
4941 }
4942
4943 // __ cmpxchgptr(/*compare_value=*/box,
4944 // /*exchange_value=*/disp_hdr,
4945 // /*where=*/oop,
4946 // /*result=*/tmp,
4947 // cont,
4948 // /*cas_failed*/NULL);
4949 assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
4950
4951 __ bind(cas_failed);
4952
4953 // Handle existing monitor.
4954 if ((EmitSync & 0x02) == 0) {
4955 __ b(cont);
4956
4957 __ bind(object_has_monitor);
4958 __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
4959 __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4960 __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
4961 __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
4962 __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
4963 __ cmp(rscratch1, zr);
4964 __ br(Assembler::NE, cont);
4965
4966 __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
4967 __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
4968 __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
4969 __ cmp(rscratch1, zr);
4970 __ cbnz(rscratch1, cont);
4971 // need a release store here
4972 __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
4973 __ stlr(rscratch1, tmp); // rscratch1 is zero
4974 }
4975
4976 __ bind(cont);
4977 // flag == EQ indicates success
4978 // flag == NE indicates failure
4979 %}
4980
4981 %}
4982
4983 //----------FRAME--------------------------------------------------------------
4984 // Definition of frame structure and management information.
4985 //
4986 // S T A C K L A Y O U T Allocators stack-slot number
4987 // | (to get allocators register number
4988 // G Owned by | | v add OptoReg::stack0())
4989 // r CALLER | |
4990 // o | +--------+ pad to even-align allocators stack-slot
4991 // w V | pad0 | numbers; owned by CALLER
4992 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned
4993 // h ^ | in | 5
4994 // | | args | 4 Holes in incoming args owned by SELF
4995 // | | | | 3
4996 // | | +--------+
4997 // V | | old out| Empty on Intel, window on Sparc
4998 // | old |preserve| Must be even aligned.
4999 // | SP-+--------+----> Matcher::_old_SP, even aligned
5000 // | | in | 3 area for Intel ret address
5001 // Owned by |preserve| Empty on Sparc.
5002 // SELF +--------+
5003 // | | pad2 | 2 pad to align old SP
5004 // | +--------+ 1
5005 // | | locks | 0
5006 // | +--------+----> OptoReg::stack0(), even aligned
5007 // | | pad1 | 11 pad to align new SP
5008 // | +--------+
5009 // | | | 10
5010 // | | spills | 9 spills
5011 // V | | 8 (pad0 slot for callee)
5012 // -----------+--------+----> Matcher::_out_arg_limit, unaligned
5013 // ^ | out | 7
5014 // | | args | 6 Holes in outgoing args owned by CALLEE
5015 // Owned by +--------+
5016 // CALLEE | new out| 6 Empty on Intel, window on Sparc
5017 // | new |preserve| Must be even-aligned.
5018 // | SP-+--------+----> Matcher::_new_SP, even aligned
5019 // | | |
5020 //
5021 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is
5022 // known from SELF's arguments and the Java calling convention.
5023 // Region 6-7 is determined per call site.
5024 // Note 2: If the calling convention leaves holes in the incoming argument
5025 // area, those holes are owned by SELF. Holes in the outgoing area
5026 // are owned by the CALLEE. Holes should not be nessecary in the
5027 // incoming area, as the Java calling convention is completely under
5028 // the control of the AD file. Doubles can be sorted and packed to
5029 // avoid holes. Holes in the outgoing arguments may be nessecary for
5030 // varargs C calling conventions.
5031 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is
5032 // even aligned with pad0 as needed.
5033 // Region 6 is even aligned. Region 6-7 is NOT even aligned;
5034 // (the latter is true on Intel but is it false on AArch64?)
5035 // region 6-11 is even aligned; it may be padded out more so that
5036 // the region from SP to FP meets the minimum stack alignment.
5037 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
5038 // alignment. Region 11, pad1, may be dynamically extended so that
5039 // SP meets the minimum alignment.
5040
5041 frame %{
5042 // What direction does stack grow in (assumed to be same for C & Java)
5043 stack_direction(TOWARDS_LOW);
5044
5045 // These three registers define part of the calling convention
5046 // between compiled code and the interpreter.
5047
5048 // Inline Cache Register or methodOop for I2C.
5049 inline_cache_reg(R12);
5050
5051 // Method Oop Register when calling interpreter.
5052 interpreter_method_oop_reg(R12);
5053
5054 // Number of stack slots consumed by locking an object
5055 sync_stack_slots(2);
5056
5057 // Compiled code's Frame Pointer
5058 frame_pointer(R31);
5059
5060 // Interpreter stores its frame pointer in a register which is
5061 // stored to the stack by I2CAdaptors.
5062 // I2CAdaptors convert from interpreted java to compiled java.
5063 interpreter_frame_pointer(R29);
5064
5065 // Stack alignment requirement
5066 stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
5067
5068 // Number of stack slots between incoming argument block and the start of
5069 // a new frame. The PROLOG must add this many slots to the stack. The
5070 // EPILOG must remove this many slots. aarch64 needs two slots for
5071 // return address and fp.
5072 // TODO think this is correct but check
5073 in_preserve_stack_slots(4);
5074
5075 // Number of outgoing stack slots killed above the out_preserve_stack_slots
5076 // for calls to C. Supports the var-args backing area for register parms.
5077 varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
5078
5079 // The after-PROLOG location of the return address. Location of
5080 // return address specifies a type (REG or STACK) and a number
5081 // representing the register number (i.e. - use a register name) or
5082 // stack slot.
5083 // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
5084 // Otherwise, it is above the locks and verification slot and alignment word
5085 // TODO this may well be correct but need to check why that - 2 is there
5086 // ppc port uses 0 but we definitely need to allow for fixed_slots
5087 // which folds in the space used for monitors
5088 return_addr(STACK - 2 +
5089 round_to((Compile::current()->in_preserve_stack_slots() +
5090 Compile::current()->fixed_slots()),
5091 stack_alignment_in_slots()));
5092
5093 // Body of function which returns an integer array locating
5094 // arguments either in registers or in stack slots. Passed an array
5095 // of ideal registers called "sig" and a "length" count. Stack-slot
5096 // offsets are based on outgoing arguments, i.e. a CALLER setting up
5097 // arguments for a CALLEE. Incoming stack arguments are
5098 // automatically biased by the preserve_stack_slots field above.
5099
5100 calling_convention
5101 %{
5102 // No difference between ingoing/outgoing just pass false
5103 SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
5104 %}
5105
5106 c_calling_convention
5107 %{
5108 // This is obviously always outgoing
5109 (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
5110 %}
5111
5112 // Location of compiled Java return values. Same as C for now.
5113 return_value
5114 %{
5115 // TODO do we allow ideal_reg == Op_RegN???
5116 assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
5117 "only return normal values");
5118
5119 static const int lo[Op_RegL + 1] = { // enum name
5120 0, // Op_Node
5121 0, // Op_Set
5122 R0_num, // Op_RegN
5123 R0_num, // Op_RegI
5124 R0_num, // Op_RegP
5125 V0_num, // Op_RegF
5126 V0_num, // Op_RegD
5127 R0_num // Op_RegL
5128 };
5129
5130 static const int hi[Op_RegL + 1] = { // enum name
5131 0, // Op_Node
5132 0, // Op_Set
5133 OptoReg::Bad, // Op_RegN
5134 OptoReg::Bad, // Op_RegI
5135 R0_H_num, // Op_RegP
5136 OptoReg::Bad, // Op_RegF
5137 V0_H_num, // Op_RegD
5138 R0_H_num // Op_RegL
5139 };
5140
5141 return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
5142 %}
5143 %}
5144
5145 //----------ATTRIBUTES---------------------------------------------------------
5146 //----------Operand Attributes-------------------------------------------------
5147 op_attrib op_cost(1); // Required cost attribute
5148
5149 //----------Instruction Attributes---------------------------------------------
5150 ins_attrib ins_cost(INSN_COST); // Required cost attribute
5151 ins_attrib ins_size(32); // Required size attribute (in bits)
5152 ins_attrib ins_short_branch(0); // Required flag: is this instruction
5153 // a non-matching short branch variant
5154 // of some long branch?
5155 ins_attrib ins_alignment(4); // Required alignment attribute (must
5156 // be a power of 2) specifies the
5157 // alignment that some part of the
5158 // instruction (not necessarily the
5159 // start) requires. If > 1, a
5160 // compute_padding() function must be
5161 // provided for the instruction
5162
5163 //----------OPERANDS-----------------------------------------------------------
5164 // Operand definitions must precede instruction definitions for correct parsing
5165 // in the ADLC because operands constitute user defined types which are used in
5166 // instruction definitions.
5167
5168 //----------Simple Operands----------------------------------------------------
5169
5170 // Integer operands 32 bit
5171 // 32 bit immediate
5172 operand immI()
5173 %{
5174 match(ConI);
5175
5176 op_cost(0);
5177 format %{ %}
5178 interface(CONST_INTER);
5179 %}
5180
5181 // 32 bit zero
5182 operand immI0()
5183 %{
5184 predicate(n->get_int() == 0);
5185 match(ConI);
5186
5187 op_cost(0);
5188 format %{ %}
5189 interface(CONST_INTER);
5190 %}
5191
5192 // 32 bit unit increment
5193 operand immI_1()
5194 %{
5195 predicate(n->get_int() == 1);
5196 match(ConI);
5197
5198 op_cost(0);
5199 format %{ %}
5200 interface(CONST_INTER);
5201 %}
5202
5203 // 32 bit unit decrement
5204 operand immI_M1()
5205 %{
5206 predicate(n->get_int() == -1);
5207 match(ConI);
5208
5209 op_cost(0);
5210 format %{ %}
5211 interface(CONST_INTER);
5212 %}
5213
5214 operand immI_le_4()
5215 %{
5216 predicate(n->get_int() <= 4);
5217 match(ConI);
5218
5219 op_cost(0);
5220 format %{ %}
5221 interface(CONST_INTER);
5222 %}
5223
5224 operand immI_31()
5225 %{
5226 predicate(n->get_int() == 31);
5227 match(ConI);
5228
5229 op_cost(0);
5230 format %{ %}
5231 interface(CONST_INTER);
5232 %}
5233
5234 operand immI_8()
5235 %{
5236 predicate(n->get_int() == 8);
5237 match(ConI);
5238
5239 op_cost(0);
5240 format %{ %}
5241 interface(CONST_INTER);
5242 %}
5243
5244 operand immI_16()
5245 %{
5246 predicate(n->get_int() == 16);
5247 match(ConI);
5248
5249 op_cost(0);
5250 format %{ %}
5251 interface(CONST_INTER);
5252 %}
5253
5254 operand immI_24()
5255 %{
5256 predicate(n->get_int() == 24);
5257 match(ConI);
5258
5259 op_cost(0);
5260 format %{ %}
5261 interface(CONST_INTER);
5262 %}
5263
5264 operand immI_32()
5265 %{
5266 predicate(n->get_int() == 32);
5267 match(ConI);
5268
5269 op_cost(0);
5270 format %{ %}
5271 interface(CONST_INTER);
5272 %}
5273
5274 operand immI_48()
5275 %{
5276 predicate(n->get_int() == 48);
5277 match(ConI);
5278
5279 op_cost(0);
5280 format %{ %}
5281 interface(CONST_INTER);
5282 %}
5283
5284 operand immI_56()
5285 %{
5286 predicate(n->get_int() == 56);
5287 match(ConI);
5288
5289 op_cost(0);
5290 format %{ %}
5291 interface(CONST_INTER);
5292 %}
5293
5294 operand immI_64()
5295 %{
5296 predicate(n->get_int() == 64);
5297 match(ConI);
5298
5299 op_cost(0);
5300 format %{ %}
5301 interface(CONST_INTER);
5302 %}
5303
5304 operand immI_255()
5305 %{
5306 predicate(n->get_int() == 255);
5307 match(ConI);
5308
5309 op_cost(0);
5310 format %{ %}
5311 interface(CONST_INTER);
5312 %}
5313
5314 operand immI_65535()
5315 %{
5316 predicate(n->get_int() == 65535);
5317 match(ConI);
5318
5319 op_cost(0);
5320 format %{ %}
5321 interface(CONST_INTER);
5322 %}
5323
5324 operand immL_63()
5325 %{
5326 predicate(n->get_int() == 63);
5327 match(ConI);
5328
5329 op_cost(0);
5330 format %{ %}
5331 interface(CONST_INTER);
5332 %}
5333
5334 operand immL_255()
5335 %{
5336 predicate(n->get_int() == 255);
5337 match(ConI);
5338
5339 op_cost(0);
5340 format %{ %}
5341 interface(CONST_INTER);
5342 %}
5343
5344 operand immL_65535()
5345 %{
5346 predicate(n->get_long() == 65535L);
5347 match(ConL);
5348
5349 op_cost(0);
5350 format %{ %}
5351 interface(CONST_INTER);
5352 %}
5353
5354 operand immL_4294967295()
5355 %{
5356 predicate(n->get_long() == 4294967295L);
5357 match(ConL);
5358
5359 op_cost(0);
5360 format %{ %}
5361 interface(CONST_INTER);
5362 %}
5363
5364 operand immL_bitmask()
5365 %{
5366 predicate(((n->get_long() & 0xc000000000000000l) == 0)
5367 && is_power_of_2(n->get_long() + 1));
5368 match(ConL);
5369
5370 op_cost(0);
5371 format %{ %}
5372 interface(CONST_INTER);
5373 %}
5374
5375 operand immI_bitmask()
5376 %{
5377 predicate(((n->get_int() & 0xc0000000) == 0)
5378 && is_power_of_2(n->get_int() + 1));
5379 match(ConI);
5380
5381 op_cost(0);
5382 format %{ %}
5383 interface(CONST_INTER);
5384 %}
5385
5386 // Scale values for scaled offset addressing modes (up to long but not quad)
5387 operand immIScale()
5388 %{
5389 predicate(0 <= n->get_int() && (n->get_int() <= 3));
5390 match(ConI);
5391
5392 op_cost(0);
5393 format %{ %}
5394 interface(CONST_INTER);
5395 %}
5396
5397 // 26 bit signed offset -- for pc-relative branches
5398 operand immI26()
5399 %{
5400 predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
5401 match(ConI);
5402
5403 op_cost(0);
5404 format %{ %}
5405 interface(CONST_INTER);
5406 %}
5407
5408 // 19 bit signed offset -- for pc-relative loads
5409 operand immI19()
5410 %{
5411 predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
5412 match(ConI);
5413
5414 op_cost(0);
5415 format %{ %}
5416 interface(CONST_INTER);
5417 %}
5418
5419 // 12 bit unsigned offset -- for base plus immediate loads
5420 operand immIU12()
5421 %{
5422 predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
5423 match(ConI);
5424
5425 op_cost(0);
5426 format %{ %}
5427 interface(CONST_INTER);
5428 %}
5429
5430 operand immLU12()
5431 %{
5432 predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
5433 match(ConL);
5434
5435 op_cost(0);
5436 format %{ %}
5437 interface(CONST_INTER);
5438 %}
5439
5440 // Offset for scaled or unscaled immediate loads and stores
5441 operand immIOffset()
5442 %{
5443 predicate(Address::offset_ok_for_immed(n->get_int()));
5444 match(ConI);
5445
5446 op_cost(0);
5447 format %{ %}
5448 interface(CONST_INTER);
5449 %}
5450
5451 operand immIOffset4()
5452 %{
5453 predicate(Address::offset_ok_for_immed(n->get_int(), 2));
5454 match(ConI);
5455
5456 op_cost(0);
5457 format %{ %}
5458 interface(CONST_INTER);
5459 %}
5460
5461 operand immIOffset8()
5462 %{
5463 predicate(Address::offset_ok_for_immed(n->get_int(), 3));
5464 match(ConI);
5465
5466 op_cost(0);
5467 format %{ %}
5468 interface(CONST_INTER);
5469 %}
5470
5471 operand immIOffset16()
5472 %{
5473 predicate(Address::offset_ok_for_immed(n->get_int(), 4));
5474 match(ConI);
5475
5476 op_cost(0);
5477 format %{ %}
5478 interface(CONST_INTER);
5479 %}
5480
5481 operand immLoffset()
5482 %{
5483 predicate(Address::offset_ok_for_immed(n->get_long()));
5484 match(ConL);
5485
5486 op_cost(0);
5487 format %{ %}
5488 interface(CONST_INTER);
5489 %}
5490
5491 operand immLoffset4()
5492 %{
5493 predicate(Address::offset_ok_for_immed(n->get_long(), 2));
5494 match(ConL);
5495
5496 op_cost(0);
5497 format %{ %}
5498 interface(CONST_INTER);
5499 %}
5500
5501 operand immLoffset8()
5502 %{
5503 predicate(Address::offset_ok_for_immed(n->get_long(), 3));
5504 match(ConL);
5505
5506 op_cost(0);
5507 format %{ %}
5508 interface(CONST_INTER);
5509 %}
5510
5511 operand immLoffset16()
5512 %{
5513 predicate(Address::offset_ok_for_immed(n->get_long(), 4));
5514 match(ConL);
5515
5516 op_cost(0);
5517 format %{ %}
5518 interface(CONST_INTER);
5519 %}
5520
5521 // 32 bit integer valid for add sub immediate
5522 operand immIAddSub()
5523 %{
5524 predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
5525 match(ConI);
5526 op_cost(0);
5527 format %{ %}
5528 interface(CONST_INTER);
5529 %}
5530
5531 // 32 bit unsigned integer valid for logical immediate
5532 // TODO -- check this is right when e.g the mask is 0x80000000
5533 operand immILog()
5534 %{
5535 predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
5536 match(ConI);
5537
5538 op_cost(0);
5539 format %{ %}
5540 interface(CONST_INTER);
5541 %}
5542
5543 // Integer operands 64 bit
5544 // 64 bit immediate
5545 operand immL()
5546 %{
5547 match(ConL);
5548
5549 op_cost(0);
5550 format %{ %}
5551 interface(CONST_INTER);
5552 %}
5553
5554 // 64 bit zero
5555 operand immL0()
5556 %{
5557 predicate(n->get_long() == 0);
5558 match(ConL);
5559
5560 op_cost(0);
5561 format %{ %}
5562 interface(CONST_INTER);
5563 %}
5564
5565 // 64 bit unit increment
5566 operand immL_1()
5567 %{
5568 predicate(n->get_long() == 1);
5569 match(ConL);
5570
5571 op_cost(0);
5572 format %{ %}
5573 interface(CONST_INTER);
5574 %}
5575
5576 // 64 bit unit decrement
5577 operand immL_M1()
5578 %{
5579 predicate(n->get_long() == -1);
5580 match(ConL);
5581
5582 op_cost(0);
5583 format %{ %}
5584 interface(CONST_INTER);
5585 %}
5586
5587 // 32 bit offset of pc in thread anchor
5588
5589 operand immL_pc_off()
5590 %{
5591 predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
5592 in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
5593 match(ConL);
5594
5595 op_cost(0);
5596 format %{ %}
5597 interface(CONST_INTER);
5598 %}
5599
5600 // 64 bit integer valid for add sub immediate
5601 operand immLAddSub()
5602 %{
5603 predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
5604 match(ConL);
5605 op_cost(0);
5606 format %{ %}
5607 interface(CONST_INTER);
5608 %}
5609
5610 // 64 bit integer valid for logical immediate
5611 operand immLLog()
5612 %{
5613 predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
5614 match(ConL);
5615 op_cost(0);
5616 format %{ %}
5617 interface(CONST_INTER);
5618 %}
5619
5620 // Long Immediate: low 32-bit mask
5621 operand immL_32bits()
5622 %{
5623 predicate(n->get_long() == 0xFFFFFFFFL);
5624 match(ConL);
5625 op_cost(0);
5626 format %{ %}
5627 interface(CONST_INTER);
5628 %}
5629
5630 // Pointer operands
5631 // Pointer Immediate
5632 operand immP()
5633 %{
5634 match(ConP);
5635
5636 op_cost(0);
5637 format %{ %}
5638 interface(CONST_INTER);
5639 %}
5640
5641 // NULL Pointer Immediate
5642 operand immP0()
5643 %{
5644 predicate(n->get_ptr() == 0);
5645 match(ConP);
5646
5647 op_cost(0);
5648 format %{ %}
5649 interface(CONST_INTER);
5650 %}
5651
5652 // Pointer Immediate One
5653 // this is used in object initialization (initial object header)
5654 operand immP_1()
5655 %{
5656 predicate(n->get_ptr() == 1);
5657 match(ConP);
5658
5659 op_cost(0);
5660 format %{ %}
5661 interface(CONST_INTER);
5662 %}
5663
5664 // Polling Page Pointer Immediate
5665 operand immPollPage()
5666 %{
5667 predicate((address)n->get_ptr() == os::get_polling_page());
5668 match(ConP);
5669
5670 op_cost(0);
5671 format %{ %}
5672 interface(CONST_INTER);
5673 %}
5674
5675 // Card Table Byte Map Base
5676 operand immByteMapBase()
5677 %{
5678 // Get base of card map
5679 predicate((jbyte*)n->get_ptr() ==
5680 ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base);
5681 match(ConP);
5682
5683 op_cost(0);
5684 format %{ %}
5685 interface(CONST_INTER);
5686 %}
5687
5688 // Pointer Immediate Minus One
5689 // this is used when we want to write the current PC to the thread anchor
5690 operand immP_M1()
5691 %{
5692 predicate(n->get_ptr() == -1);
5693 match(ConP);
5694
5695 op_cost(0);
5696 format %{ %}
5697 interface(CONST_INTER);
5698 %}
5699
5700 // Pointer Immediate Minus Two
5701 // this is used when we want to write the current PC to the thread anchor
5702 operand immP_M2()
5703 %{
5704 predicate(n->get_ptr() == -2);
5705 match(ConP);
5706
5707 op_cost(0);
5708 format %{ %}
5709 interface(CONST_INTER);
5710 %}
5711
5712 // Float and Double operands
5713 // Double Immediate
5714 operand immD()
5715 %{
5716 match(ConD);
5717 op_cost(0);
5718 format %{ %}
5719 interface(CONST_INTER);
5720 %}
5721
5722 // Double Immediate: +0.0d
5723 operand immD0()
5724 %{
5725 predicate(jlong_cast(n->getd()) == 0);
5726 match(ConD);
5727
5728 op_cost(0);
5729 format %{ %}
5730 interface(CONST_INTER);
5731 %}
5732
5733 // constant 'double +0.0'.
5734 operand immDPacked()
5735 %{
5736 predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
5737 match(ConD);
5738 op_cost(0);
5739 format %{ %}
5740 interface(CONST_INTER);
5741 %}
5742
5743 // Float Immediate
5744 operand immF()
5745 %{
5746 match(ConF);
5747 op_cost(0);
5748 format %{ %}
5749 interface(CONST_INTER);
5750 %}
5751
5752 // Float Immediate: +0.0f.
5753 operand immF0()
5754 %{
5755 predicate(jint_cast(n->getf()) == 0);
5756 match(ConF);
5757
5758 op_cost(0);
5759 format %{ %}
5760 interface(CONST_INTER);
5761 %}
5762
5763 //
5764 operand immFPacked()
5765 %{
5766 predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
5767 match(ConF);
5768 op_cost(0);
5769 format %{ %}
5770 interface(CONST_INTER);
5771 %}
5772
5773 // Narrow pointer operands
5774 // Narrow Pointer Immediate
5775 operand immN()
5776 %{
5777 match(ConN);
5778
5779 op_cost(0);
5780 format %{ %}
5781 interface(CONST_INTER);
5782 %}
5783
5784 // Narrow NULL Pointer Immediate
5785 operand immN0()
5786 %{
5787 predicate(n->get_narrowcon() == 0);
5788 match(ConN);
5789
5790 op_cost(0);
5791 format %{ %}
5792 interface(CONST_INTER);
5793 %}
5794
5795 operand immNKlass()
5796 %{
5797 match(ConNKlass);
5798
5799 op_cost(0);
5800 format %{ %}
5801 interface(CONST_INTER);
5802 %}
5803
5804 // Integer 32 bit Register Operands
5805 // Integer 32 bitRegister (excludes SP)
5806 operand iRegI()
5807 %{
5808 constraint(ALLOC_IN_RC(any_reg32));
5809 match(RegI);
5810 match(iRegINoSp);
5811 op_cost(0);
5812 format %{ %}
5813 interface(REG_INTER);
5814 %}
5815
5816 // Integer 32 bit Register not Special
5817 operand iRegINoSp()
5818 %{
5819 constraint(ALLOC_IN_RC(no_special_reg32));
5820 match(RegI);
5821 op_cost(0);
5822 format %{ %}
5823 interface(REG_INTER);
5824 %}
5825
5826 // Integer 64 bit Register Operands
5827 // Integer 64 bit Register (includes SP)
5828 operand iRegL()
5829 %{
5830 constraint(ALLOC_IN_RC(any_reg));
5831 match(RegL);
5832 match(iRegLNoSp);
5833 op_cost(0);
5834 format %{ %}
5835 interface(REG_INTER);
5836 %}
5837
5838 // Integer 64 bit Register not Special
5839 operand iRegLNoSp()
5840 %{
5841 constraint(ALLOC_IN_RC(no_special_reg));
5842 match(RegL);
5843 match(iRegL_R0);
5844 format %{ %}
5845 interface(REG_INTER);
5846 %}
5847
5848 // Pointer Register Operands
5849 // Pointer Register
5850 operand iRegP()
5851 %{
5852 constraint(ALLOC_IN_RC(ptr_reg));
5853 match(RegP);
5854 match(iRegPNoSp);
5855 match(iRegP_R0);
5856 //match(iRegP_R2);
5857 //match(iRegP_R4);
5858 //match(iRegP_R5);
5859 match(thread_RegP);
5860 op_cost(0);
5861 format %{ %}
5862 interface(REG_INTER);
5863 %}
5864
5865 // Pointer 64 bit Register not Special
5866 operand iRegPNoSp()
5867 %{
5868 constraint(ALLOC_IN_RC(no_special_ptr_reg));
5869 match(RegP);
5870 // match(iRegP);
5871 // match(iRegP_R0);
5872 // match(iRegP_R2);
5873 // match(iRegP_R4);
5874 // match(iRegP_R5);
5875 // match(thread_RegP);
5876 op_cost(0);
5877 format %{ %}
5878 interface(REG_INTER);
5879 %}
5880
5881 // Pointer 64 bit Register R0 only
5882 operand iRegP_R0()
5883 %{
5884 constraint(ALLOC_IN_RC(r0_reg));
5885 match(RegP);
5886 // match(iRegP);
5887 match(iRegPNoSp);
5888 op_cost(0);
5889 format %{ %}
5890 interface(REG_INTER);
5891 %}
5892
5893 // Pointer 64 bit Register R1 only
5894 operand iRegP_R1()
5895 %{
5896 constraint(ALLOC_IN_RC(r1_reg));
5897 match(RegP);
5898 // match(iRegP);
5899 match(iRegPNoSp);
5900 op_cost(0);
5901 format %{ %}
5902 interface(REG_INTER);
5903 %}
5904
5905 // Pointer 64 bit Register R2 only
5906 operand iRegP_R2()
5907 %{
5908 constraint(ALLOC_IN_RC(r2_reg));
5909 match(RegP);
5910 // match(iRegP);
5911 match(iRegPNoSp);
5912 op_cost(0);
5913 format %{ %}
5914 interface(REG_INTER);
5915 %}
5916
5917 // Pointer 64 bit Register R3 only
5918 operand iRegP_R3()
5919 %{
5920 constraint(ALLOC_IN_RC(r3_reg));
5921 match(RegP);
5922 // match(iRegP);
5923 match(iRegPNoSp);
5924 op_cost(0);
5925 format %{ %}
5926 interface(REG_INTER);
5927 %}
5928
5929 // Pointer 64 bit Register R4 only
5930 operand iRegP_R4()
5931 %{
5932 constraint(ALLOC_IN_RC(r4_reg));
5933 match(RegP);
5934 // match(iRegP);
5935 match(iRegPNoSp);
5936 op_cost(0);
5937 format %{ %}
5938 interface(REG_INTER);
5939 %}
5940
5941 // Pointer 64 bit Register R5 only
5942 operand iRegP_R5()
5943 %{
5944 constraint(ALLOC_IN_RC(r5_reg));
5945 match(RegP);
5946 // match(iRegP);
5947 match(iRegPNoSp);
5948 op_cost(0);
5949 format %{ %}
5950 interface(REG_INTER);
5951 %}
5952
5953 // Pointer 64 bit Register R10 only
5954 operand iRegP_R10()
5955 %{
5956 constraint(ALLOC_IN_RC(r10_reg));
5957 match(RegP);
5958 // match(iRegP);
5959 match(iRegPNoSp);
5960 op_cost(0);
5961 format %{ %}
5962 interface(REG_INTER);
5963 %}
5964
5965 // Long 64 bit Register R0 only
5966 operand iRegL_R0()
5967 %{
5968 constraint(ALLOC_IN_RC(r0_reg));
5969 match(RegL);
5970 match(iRegLNoSp);
5971 op_cost(0);
5972 format %{ %}
5973 interface(REG_INTER);
5974 %}
5975
5976 // Long 64 bit Register R2 only
5977 operand iRegL_R2()
5978 %{
5979 constraint(ALLOC_IN_RC(r2_reg));
5980 match(RegL);
5981 match(iRegLNoSp);
5982 op_cost(0);
5983 format %{ %}
5984 interface(REG_INTER);
5985 %}
5986
5987 // Long 64 bit Register R3 only
5988 operand iRegL_R3()
5989 %{
5990 constraint(ALLOC_IN_RC(r3_reg));
5991 match(RegL);
5992 match(iRegLNoSp);
5993 op_cost(0);
5994 format %{ %}
5995 interface(REG_INTER);
5996 %}
5997
5998 // Long 64 bit Register R11 only
5999 operand iRegL_R11()
6000 %{
6001 constraint(ALLOC_IN_RC(r11_reg));
6002 match(RegL);
6003 match(iRegLNoSp);
6004 op_cost(0);
6005 format %{ %}
6006 interface(REG_INTER);
6007 %}
6008
6009 // Pointer 64 bit Register FP only
6010 operand iRegP_FP()
6011 %{
6012 constraint(ALLOC_IN_RC(fp_reg));
6013 match(RegP);
6014 // match(iRegP);
6015 op_cost(0);
6016 format %{ %}
6017 interface(REG_INTER);
6018 %}
6019
6020 // Register R0 only
6021 operand iRegI_R0()
6022 %{
6023 constraint(ALLOC_IN_RC(int_r0_reg));
6024 match(RegI);
6025 match(iRegINoSp);
6026 op_cost(0);
6027 format %{ %}
6028 interface(REG_INTER);
6029 %}
6030
6031 // Register R2 only
6032 operand iRegI_R2()
6033 %{
6034 constraint(ALLOC_IN_RC(int_r2_reg));
6035 match(RegI);
6036 match(iRegINoSp);
6037 op_cost(0);
6038 format %{ %}
6039 interface(REG_INTER);
6040 %}
6041
6042 // Register R3 only
6043 operand iRegI_R3()
6044 %{
6045 constraint(ALLOC_IN_RC(int_r3_reg));
6046 match(RegI);
6047 match(iRegINoSp);
6048 op_cost(0);
6049 format %{ %}
6050 interface(REG_INTER);
6051 %}
6052
6053
6054 // Register R4 only
6055 operand iRegI_R4()
6056 %{
6057 constraint(ALLOC_IN_RC(int_r4_reg));
6058 match(RegI);
6059 match(iRegINoSp);
6060 op_cost(0);
6061 format %{ %}
6062 interface(REG_INTER);
6063 %}
6064
6065
6066 // Pointer Register Operands
6067 // Narrow Pointer Register
6068 operand iRegN()
6069 %{
6070 constraint(ALLOC_IN_RC(any_reg32));
6071 match(RegN);
6072 match(iRegNNoSp);
6073 op_cost(0);
6074 format %{ %}
6075 interface(REG_INTER);
6076 %}
6077
6078 operand iRegN_R0()
6079 %{
6080 constraint(ALLOC_IN_RC(r0_reg));
6081 match(iRegN);
6082 op_cost(0);
6083 format %{ %}
6084 interface(REG_INTER);
6085 %}
6086
6087 operand iRegN_R2()
6088 %{
6089 constraint(ALLOC_IN_RC(r2_reg));
6090 match(iRegN);
6091 op_cost(0);
6092 format %{ %}
6093 interface(REG_INTER);
6094 %}
6095
6096 operand iRegN_R3()
6097 %{
6098 constraint(ALLOC_IN_RC(r3_reg));
6099 match(iRegN);
6100 op_cost(0);
6101 format %{ %}
6102 interface(REG_INTER);
6103 %}
6104
6105 // Integer 64 bit Register not Special
6106 operand iRegNNoSp()
6107 %{
6108 constraint(ALLOC_IN_RC(no_special_reg32));
6109 match(RegN);
6110 op_cost(0);
6111 format %{ %}
6112 interface(REG_INTER);
6113 %}
6114
6115 // heap base register -- used for encoding immN0
6116
6117 operand iRegIHeapbase()
6118 %{
6119 constraint(ALLOC_IN_RC(heapbase_reg));
6120 match(RegI);
6121 op_cost(0);
6122 format %{ %}
6123 interface(REG_INTER);
6124 %}
6125
6126 // Float Register
6127 // Float register operands
6128 operand vRegF()
6129 %{
6130 constraint(ALLOC_IN_RC(float_reg));
6131 match(RegF);
6132
6133 op_cost(0);
6134 format %{ %}
6135 interface(REG_INTER);
6136 %}
6137
6138 // Double Register
6139 // Double register operands
6140 operand vRegD()
6141 %{
6142 constraint(ALLOC_IN_RC(double_reg));
6143 match(RegD);
6144
6145 op_cost(0);
6146 format %{ %}
6147 interface(REG_INTER);
6148 %}
6149
6150 operand vecD()
6151 %{
6152 constraint(ALLOC_IN_RC(vectord_reg));
6153 match(VecD);
6154
6155 op_cost(0);
6156 format %{ %}
6157 interface(REG_INTER);
6158 %}
6159
6160 operand vecX()
6161 %{
6162 constraint(ALLOC_IN_RC(vectorx_reg));
6163 match(VecX);
6164
6165 op_cost(0);
6166 format %{ %}
6167 interface(REG_INTER);
6168 %}
6169
6170 operand vRegD_V0()
6171 %{
6172 constraint(ALLOC_IN_RC(v0_reg));
6173 match(RegD);
6174 op_cost(0);
6175 format %{ %}
6176 interface(REG_INTER);
6177 %}
6178
6179 operand vRegD_V1()
6180 %{
6181 constraint(ALLOC_IN_RC(v1_reg));
6182 match(RegD);
6183 op_cost(0);
6184 format %{ %}
6185 interface(REG_INTER);
6186 %}
6187
6188 operand vRegD_V2()
6189 %{
6190 constraint(ALLOC_IN_RC(v2_reg));
6191 match(RegD);
6192 op_cost(0);
6193 format %{ %}
6194 interface(REG_INTER);
6195 %}
6196
6197 operand vRegD_V3()
6198 %{
6199 constraint(ALLOC_IN_RC(v3_reg));
6200 match(RegD);
6201 op_cost(0);
6202 format %{ %}
6203 interface(REG_INTER);
6204 %}
6205
6206 // Flags register, used as output of signed compare instructions
6207
6208 // note that on AArch64 we also use this register as the output for
6209 // for floating point compare instructions (CmpF CmpD). this ensures
6210 // that ordered inequality tests use GT, GE, LT or LE none of which
6211 // pass through cases where the result is unordered i.e. one or both
6212 // inputs to the compare is a NaN. this means that the ideal code can
6213 // replace e.g. a GT with an LE and not end up capturing the NaN case
6214 // (where the comparison should always fail). EQ and NE tests are
6215 // always generated in ideal code so that unordered folds into the NE
6216 // case, matching the behaviour of AArch64 NE.
6217 //
6218 // This differs from x86 where the outputs of FP compares use a
6219 // special FP flags registers and where compares based on this
6220 // register are distinguished into ordered inequalities (cmpOpUCF) and
6221 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
6222 // to explicitly handle the unordered case in branches. x86 also has
6223 // to include extra CMoveX rules to accept a cmpOpUCF input.
6224
6225 operand rFlagsReg()
6226 %{
6227 constraint(ALLOC_IN_RC(int_flags));
6228 match(RegFlags);
6229
6230 op_cost(0);
6231 format %{ "RFLAGS" %}
6232 interface(REG_INTER);
6233 %}
6234
6235 // Flags register, used as output of unsigned compare instructions
6236 operand rFlagsRegU()
6237 %{
6238 constraint(ALLOC_IN_RC(int_flags));
6239 match(RegFlags);
6240
6241 op_cost(0);
6242 format %{ "RFLAGSU" %}
6243 interface(REG_INTER);
6244 %}
6245
6246 // Special Registers
6247
6248 // Method Register
6249 operand inline_cache_RegP(iRegP reg)
6250 %{
6251 constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
6252 match(reg);
6253 match(iRegPNoSp);
6254 op_cost(0);
6255 format %{ %}
6256 interface(REG_INTER);
6257 %}
6258
6259 operand interpreter_method_oop_RegP(iRegP reg)
6260 %{
6261 constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
6262 match(reg);
6263 match(iRegPNoSp);
6264 op_cost(0);
6265 format %{ %}
6266 interface(REG_INTER);
6267 %}
6268
6269 // Thread Register
6270 operand thread_RegP(iRegP reg)
6271 %{
6272 constraint(ALLOC_IN_RC(thread_reg)); // link_reg
6273 match(reg);
6274 op_cost(0);
6275 format %{ %}
6276 interface(REG_INTER);
6277 %}
6278
6279 operand lr_RegP(iRegP reg)
6280 %{
6281 constraint(ALLOC_IN_RC(lr_reg)); // link_reg
6282 match(reg);
6283 op_cost(0);
6284 format %{ %}
6285 interface(REG_INTER);
6286 %}
6287
6288 //----------Memory Operands----------------------------------------------------
6289
6290 operand indirect(iRegP reg)
6291 %{
6292 constraint(ALLOC_IN_RC(ptr_reg));
6293 match(reg);
6294 op_cost(0);
6295 format %{ "[$reg]" %}
6296 interface(MEMORY_INTER) %{
6297 base($reg);
6298 index(0xffffffff);
6299 scale(0x0);
6300 disp(0x0);
6301 %}
6302 %}
6303
6304 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
6305 %{
6306 constraint(ALLOC_IN_RC(ptr_reg));
6307 predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6308 match(AddP reg (LShiftL (ConvI2L ireg) scale));
6309 op_cost(0);
6310 format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
6311 interface(MEMORY_INTER) %{
6312 base($reg);
6313 index($ireg);
6314 scale($scale);
6315 disp(0x0);
6316 %}
6317 %}
6318
6319 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
6320 %{
6321 constraint(ALLOC_IN_RC(ptr_reg));
6322 predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6323 match(AddP reg (LShiftL lreg scale));
6324 op_cost(0);
6325 format %{ "$reg, $lreg lsl($scale)" %}
6326 interface(MEMORY_INTER) %{
6327 base($reg);
6328 index($lreg);
6329 scale($scale);
6330 disp(0x0);
6331 %}
6332 %}
6333
6334 operand indIndexI2L(iRegP reg, iRegI ireg)
6335 %{
6336 constraint(ALLOC_IN_RC(ptr_reg));
6337 match(AddP reg (ConvI2L ireg));
6338 op_cost(0);
6339 format %{ "$reg, $ireg, 0, I2L" %}
6340 interface(MEMORY_INTER) %{
6341 base($reg);
6342 index($ireg);
6343 scale(0x0);
6344 disp(0x0);
6345 %}
6346 %}
6347
6348 operand indIndex(iRegP reg, iRegL lreg)
6349 %{
6350 constraint(ALLOC_IN_RC(ptr_reg));
6351 match(AddP reg lreg);
6352 op_cost(0);
6353 format %{ "$reg, $lreg" %}
6354 interface(MEMORY_INTER) %{
6355 base($reg);
6356 index($lreg);
6357 scale(0x0);
6358 disp(0x0);
6359 %}
6360 %}
6361
6362 operand indOffI(iRegP reg, immIOffset off)
6363 %{
6364 constraint(ALLOC_IN_RC(ptr_reg));
6365 match(AddP reg off);
6366 op_cost(0);
6367 format %{ "[$reg, $off]" %}
6368 interface(MEMORY_INTER) %{
6369 base($reg);
6370 index(0xffffffff);
6371 scale(0x0);
6372 disp($off);
6373 %}
6374 %}
6375
6376 operand indOffI4(iRegP reg, immIOffset4 off)
6377 %{
6378 constraint(ALLOC_IN_RC(ptr_reg));
6379 match(AddP reg off);
6380 op_cost(0);
6381 format %{ "[$reg, $off]" %}
6382 interface(MEMORY_INTER) %{
6383 base($reg);
6384 index(0xffffffff);
6385 scale(0x0);
6386 disp($off);
6387 %}
6388 %}
6389
6390 operand indOffI8(iRegP reg, immIOffset8 off)
6391 %{
6392 constraint(ALLOC_IN_RC(ptr_reg));
6393 match(AddP reg off);
6394 op_cost(0);
6395 format %{ "[$reg, $off]" %}
6396 interface(MEMORY_INTER) %{
6397 base($reg);
6398 index(0xffffffff);
6399 scale(0x0);
6400 disp($off);
6401 %}
6402 %}
6403
6404 operand indOffI16(iRegP reg, immIOffset16 off)
6405 %{
6406 constraint(ALLOC_IN_RC(ptr_reg));
6407 match(AddP reg off);
6408 op_cost(0);
6409 format %{ "[$reg, $off]" %}
6410 interface(MEMORY_INTER) %{
6411 base($reg);
6412 index(0xffffffff);
6413 scale(0x0);
6414 disp($off);
6415 %}
6416 %}
6417
6418 operand indOffL(iRegP reg, immLoffset off)
6419 %{
6420 constraint(ALLOC_IN_RC(ptr_reg));
6421 match(AddP reg off);
6422 op_cost(0);
6423 format %{ "[$reg, $off]" %}
6424 interface(MEMORY_INTER) %{
6425 base($reg);
6426 index(0xffffffff);
6427 scale(0x0);
6428 disp($off);
6429 %}
6430 %}
6431
6432 operand indOffL4(iRegP reg, immLoffset4 off)
6433 %{
6434 constraint(ALLOC_IN_RC(ptr_reg));
6435 match(AddP reg off);
6436 op_cost(0);
6437 format %{ "[$reg, $off]" %}
6438 interface(MEMORY_INTER) %{
6439 base($reg);
6440 index(0xffffffff);
6441 scale(0x0);
6442 disp($off);
6443 %}
6444 %}
6445
6446 operand indOffL8(iRegP reg, immLoffset8 off)
6447 %{
6448 constraint(ALLOC_IN_RC(ptr_reg));
6449 match(AddP reg off);
6450 op_cost(0);
6451 format %{ "[$reg, $off]" %}
6452 interface(MEMORY_INTER) %{
6453 base($reg);
6454 index(0xffffffff);
6455 scale(0x0);
6456 disp($off);
6457 %}
6458 %}
6459
6460 operand indOffL16(iRegP reg, immLoffset16 off)
6461 %{
6462 constraint(ALLOC_IN_RC(ptr_reg));
6463 match(AddP reg off);
6464 op_cost(0);
6465 format %{ "[$reg, $off]" %}
6466 interface(MEMORY_INTER) %{
6467 base($reg);
6468 index(0xffffffff);
6469 scale(0x0);
6470 disp($off);
6471 %}
6472 %}
6473
6474 operand indirectN(iRegN reg)
6475 %{
6476 predicate(Universe::narrow_oop_shift() == 0);
6477 constraint(ALLOC_IN_RC(ptr_reg));
6478 match(DecodeN reg);
6479 op_cost(0);
6480 format %{ "[$reg]\t# narrow" %}
6481 interface(MEMORY_INTER) %{
6482 base($reg);
6483 index(0xffffffff);
6484 scale(0x0);
6485 disp(0x0);
6486 %}
6487 %}
6488
6489 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
6490 %{
6491 predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6492 constraint(ALLOC_IN_RC(ptr_reg));
6493 match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
6494 op_cost(0);
6495 format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
6496 interface(MEMORY_INTER) %{
6497 base($reg);
6498 index($ireg);
6499 scale($scale);
6500 disp(0x0);
6501 %}
6502 %}
6503
6504 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
6505 %{
6506 predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
6507 constraint(ALLOC_IN_RC(ptr_reg));
6508 match(AddP (DecodeN reg) (LShiftL lreg scale));
6509 op_cost(0);
6510 format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
6511 interface(MEMORY_INTER) %{
6512 base($reg);
6513 index($lreg);
6514 scale($scale);
6515 disp(0x0);
6516 %}
6517 %}
6518
6519 operand indIndexI2LN(iRegN reg, iRegI ireg)
6520 %{
6521 predicate(Universe::narrow_oop_shift() == 0);
6522 constraint(ALLOC_IN_RC(ptr_reg));
6523 match(AddP (DecodeN reg) (ConvI2L ireg));
6524 op_cost(0);
6525 format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
6526 interface(MEMORY_INTER) %{
6527 base($reg);
6528 index($ireg);
6529 scale(0x0);
6530 disp(0x0);
6531 %}
6532 %}
6533
6534 operand indIndexN(iRegN reg, iRegL lreg)
6535 %{
6536 predicate(Universe::narrow_oop_shift() == 0);
6537 constraint(ALLOC_IN_RC(ptr_reg));
6538 match(AddP (DecodeN reg) lreg);
6539 op_cost(0);
6540 format %{ "$reg, $lreg\t# narrow" %}
6541 interface(MEMORY_INTER) %{
6542 base($reg);
6543 index($lreg);
6544 scale(0x0);
6545 disp(0x0);
6546 %}
6547 %}
6548
6549 operand indOffIN(iRegN reg, immIOffset off)
6550 %{
6551 predicate(Universe::narrow_oop_shift() == 0);
6552 constraint(ALLOC_IN_RC(ptr_reg));
6553 match(AddP (DecodeN reg) off);
6554 op_cost(0);
6555 format %{ "[$reg, $off]\t# narrow" %}
6556 interface(MEMORY_INTER) %{
6557 base($reg);
6558 index(0xffffffff);
6559 scale(0x0);
6560 disp($off);
6561 %}
6562 %}
6563
6564 operand indOffLN(iRegN reg, immLoffset off)
6565 %{
6566 predicate(Universe::narrow_oop_shift() == 0);
6567 constraint(ALLOC_IN_RC(ptr_reg));
6568 match(AddP (DecodeN reg) off);
6569 op_cost(0);
6570 format %{ "[$reg, $off]\t# narrow" %}
6571 interface(MEMORY_INTER) %{
6572 base($reg);
6573 index(0xffffffff);
6574 scale(0x0);
6575 disp($off);
6576 %}
6577 %}
6578
6579
6580
6581 // AArch64 opto stubs need to write to the pc slot in the thread anchor
6582 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
6583 %{
6584 constraint(ALLOC_IN_RC(ptr_reg));
6585 match(AddP reg off);
6586 op_cost(0);
6587 format %{ "[$reg, $off]" %}
6588 interface(MEMORY_INTER) %{
6589 base($reg);
6590 index(0xffffffff);
6591 scale(0x0);
6592 disp($off);
6593 %}
6594 %}
6595
6596 //----------Special Memory Operands--------------------------------------------
6597 // Stack Slot Operand - This operand is used for loading and storing temporary
6598 // values on the stack where a match requires a value to
6599 // flow through memory.
6600 operand stackSlotP(sRegP reg)
6601 %{
6602 constraint(ALLOC_IN_RC(stack_slots));
6603 op_cost(100);
6604 // No match rule because this operand is only generated in matching
6605 // match(RegP);
6606 format %{ "[$reg]" %}
6607 interface(MEMORY_INTER) %{
6608 base(0x1e); // RSP
6609 index(0x0); // No Index
6610 scale(0x0); // No Scale
6611 disp($reg); // Stack Offset
6612 %}
6613 %}
6614
6615 operand stackSlotI(sRegI reg)
6616 %{
6617 constraint(ALLOC_IN_RC(stack_slots));
6618 // No match rule because this operand is only generated in matching
6619 // match(RegI);
6620 format %{ "[$reg]" %}
6621 interface(MEMORY_INTER) %{
6622 base(0x1e); // RSP
6623 index(0x0); // No Index
6624 scale(0x0); // No Scale
6625 disp($reg); // Stack Offset
6626 %}
6627 %}
6628
6629 operand stackSlotF(sRegF reg)
6630 %{
6631 constraint(ALLOC_IN_RC(stack_slots));
6632 // No match rule because this operand is only generated in matching
6633 // match(RegF);
6634 format %{ "[$reg]" %}
6635 interface(MEMORY_INTER) %{
6636 base(0x1e); // RSP
6637 index(0x0); // No Index
6638 scale(0x0); // No Scale
6639 disp($reg); // Stack Offset
6640 %}
6641 %}
6642
6643 operand stackSlotD(sRegD reg)
6644 %{
6645 constraint(ALLOC_IN_RC(stack_slots));
6646 // No match rule because this operand is only generated in matching
6647 // match(RegD);
6648 format %{ "[$reg]" %}
6649 interface(MEMORY_INTER) %{
6650 base(0x1e); // RSP
6651 index(0x0); // No Index
6652 scale(0x0); // No Scale
6653 disp($reg); // Stack Offset
6654 %}
6655 %}
6656
6657 operand stackSlotL(sRegL reg)
6658 %{
6659 constraint(ALLOC_IN_RC(stack_slots));
6660 // No match rule because this operand is only generated in matching
6661 // match(RegL);
6662 format %{ "[$reg]" %}
6663 interface(MEMORY_INTER) %{
6664 base(0x1e); // RSP
6665 index(0x0); // No Index
6666 scale(0x0); // No Scale
6667 disp($reg); // Stack Offset
6668 %}
6669 %}
6670
6671 // Operands for expressing Control Flow
6672 // NOTE: Label is a predefined operand which should not be redefined in
6673 // the AD file. It is generically handled within the ADLC.
6674
6675 //----------Conditional Branch Operands----------------------------------------
6676 // Comparison Op - This is the operation of the comparison, and is limited to
6677 // the following set of codes:
6678 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
6679 //
6680 // Other attributes of the comparison, such as unsignedness, are specified
6681 // by the comparison instruction that sets a condition code flags register.
6682 // That result is represented by a flags operand whose subtype is appropriate
6683 // to the unsignedness (etc.) of the comparison.
6684 //
6685 // Later, the instruction which matches both the Comparison Op (a Bool) and
6686 // the flags (produced by the Cmp) specifies the coding of the comparison op
6687 // by matching a specific subtype of Bool operand below, such as cmpOpU.
6688
6689 // used for signed integral comparisons and fp comparisons
6690
6691 operand cmpOp()
6692 %{
6693 match(Bool);
6694
6695 format %{ "" %}
6696 interface(COND_INTER) %{
6697 equal(0x0, "eq");
6698 not_equal(0x1, "ne");
6699 less(0xb, "lt");
6700 greater_equal(0xa, "ge");
6701 less_equal(0xd, "le");
6702 greater(0xc, "gt");
6703 overflow(0x6, "vs");
6704 no_overflow(0x7, "vc");
6705 %}
6706 %}
6707
6708 // used for unsigned integral comparisons
6709
6710 operand cmpOpU()
6711 %{
6712 match(Bool);
6713
6714 format %{ "" %}
6715 interface(COND_INTER) %{
6716 equal(0x0, "eq");
6717 not_equal(0x1, "ne");
6718 less(0x3, "lo");
6719 greater_equal(0x2, "hs");
6720 less_equal(0x9, "ls");
6721 greater(0x8, "hi");
6722 overflow(0x6, "vs");
6723 no_overflow(0x7, "vc");
6724 %}
6725 %}
6726
6727 // used for certain integral comparisons which can be
6728 // converted to cbxx or tbxx instructions
6729
6730 operand cmpOpEqNe()
6731 %{
6732 match(Bool);
6733 match(CmpOp);
6734 op_cost(0);
6735 predicate(n->as_Bool()->_test._test == BoolTest::ne
6736 || n->as_Bool()->_test._test == BoolTest::eq);
6737
6738 format %{ "" %}
6739 interface(COND_INTER) %{
6740 equal(0x0, "eq");
6741 not_equal(0x1, "ne");
6742 less(0xb, "lt");
6743 greater_equal(0xa, "ge");
6744 less_equal(0xd, "le");
6745 greater(0xc, "gt");
6746 overflow(0x6, "vs");
6747 no_overflow(0x7, "vc");
6748 %}
6749 %}
6750
6751 // used for certain integral comparisons which can be
6752 // converted to cbxx or tbxx instructions
6753
6754 operand cmpOpLtGe()
6755 %{
6756 match(Bool);
6757 match(CmpOp);
6758 op_cost(0);
6759
6760 predicate(n->as_Bool()->_test._test == BoolTest::lt
6761 || n->as_Bool()->_test._test == BoolTest::ge);
6762
6763 format %{ "" %}
6764 interface(COND_INTER) %{
6765 equal(0x0, "eq");
6766 not_equal(0x1, "ne");
6767 less(0xb, "lt");
6768 greater_equal(0xa, "ge");
6769 less_equal(0xd, "le");
6770 greater(0xc, "gt");
6771 overflow(0x6, "vs");
6772 no_overflow(0x7, "vc");
6773 %}
6774 %}
6775
6776 // used for certain unsigned integral comparisons which can be
6777 // converted to cbxx or tbxx instructions
6778
6779 operand cmpOpUEqNeLtGe()
6780 %{
6781 match(Bool);
6782 match(CmpOp);
6783 op_cost(0);
6784
6785 predicate(n->as_Bool()->_test._test == BoolTest::eq
6786 || n->as_Bool()->_test._test == BoolTest::ne
6787 || n->as_Bool()->_test._test == BoolTest::lt
6788 || n->as_Bool()->_test._test == BoolTest::ge);
6789
6790 format %{ "" %}
6791 interface(COND_INTER) %{
6792 equal(0x0, "eq");
6793 not_equal(0x1, "ne");
6794 less(0xb, "lt");
6795 greater_equal(0xa, "ge");
6796 less_equal(0xd, "le");
6797 greater(0xc, "gt");
6798 overflow(0x6, "vs");
6799 no_overflow(0x7, "vc");
6800 %}
6801 %}
6802
6803 // Special operand allowing long args to int ops to be truncated for free
6804
6805 operand iRegL2I(iRegL reg) %{
6806
6807 op_cost(0);
6808
6809 match(ConvL2I reg);
6810
6811 format %{ "l2i($reg)" %}
6812
6813 interface(REG_INTER)
6814 %}
6815
6816 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
6817 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
6818 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
6819
6820 //----------OPERAND CLASSES----------------------------------------------------
6821 // Operand Classes are groups of operands that are used as to simplify
6822 // instruction definitions by not requiring the AD writer to specify
6823 // separate instructions for every form of operand when the
6824 // instruction accepts multiple operand types with the same basic
6825 // encoding and format. The classic case of this is memory operands.
6826
6827 // memory is used to define read/write location for load/store
6828 // instruction defs. we can turn a memory op into an Address
6829
6830 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
6831 indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
6832
6833 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
6834 // operations. it allows the src to be either an iRegI or a (ConvL2I
6835 // iRegL). in the latter case the l2i normally planted for a ConvL2I
6836 // can be elided because the 32-bit instruction will just employ the
6837 // lower 32 bits anyway.
6838 //
6839 // n.b. this does not elide all L2I conversions. if the truncated
6840 // value is consumed by more than one operation then the ConvL2I
6841 // cannot be bundled into the consuming nodes so an l2i gets planted
6842 // (actually a movw $dst $src) and the downstream instructions consume
6843 // the result of the l2i as an iRegI input. That's a shame since the
6844 // movw is actually redundant but its not too costly.
6845
6846 opclass iRegIorL2I(iRegI, iRegL2I);
6847
6848 //----------PIPELINE-----------------------------------------------------------
6849 // Rules which define the behavior of the target architectures pipeline.
6850
6851 // For specific pipelines, eg A53, define the stages of that pipeline
6852 //pipe_desc(ISS, EX1, EX2, WR);
6853 #define ISS S0
6854 #define EX1 S1
6855 #define EX2 S2
6856 #define WR S3
6857
6858 // Integer ALU reg operation
6859 pipeline %{
6860
6861 attributes %{
6862 // ARM instructions are of fixed length
6863 fixed_size_instructions; // Fixed size instructions TODO does
6864 max_instructions_per_bundle = 2; // A53 = 2, A57 = 4
6865 // ARM instructions come in 32-bit word units
6866 instruction_unit_size = 4; // An instruction is 4 bytes long
6867 instruction_fetch_unit_size = 64; // The processor fetches one line
6868 instruction_fetch_units = 1; // of 64 bytes
6869
6870 // List of nop instructions
6871 nops( MachNop );
6872 %}
6873
6874 // We don't use an actual pipeline model so don't care about resources
6875 // or description. we do use pipeline classes to introduce fixed
6876 // latencies
6877
6878 //----------RESOURCES----------------------------------------------------------
6879 // Resources are the functional units available to the machine
6880
6881 resources( INS0, INS1, INS01 = INS0 | INS1,
6882 ALU0, ALU1, ALU = ALU0 | ALU1,
6883 MAC,
6884 DIV,
6885 BRANCH,
6886 LDST,
6887 NEON_FP);
6888
6889 //----------PIPELINE DESCRIPTION-----------------------------------------------
6890 // Pipeline Description specifies the stages in the machine's pipeline
6891
6892 // Define the pipeline as a generic 6 stage pipeline
6893 pipe_desc(S0, S1, S2, S3, S4, S5);
6894
6895 //----------PIPELINE CLASSES---------------------------------------------------
6896 // Pipeline Classes describe the stages in which input and output are
6897 // referenced by the hardware pipeline.
6898
6899 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
6900 %{
6901 single_instruction;
6902 src1 : S1(read);
6903 src2 : S2(read);
6904 dst : S5(write);
6905 INS01 : ISS;
6906 NEON_FP : S5;
6907 %}
6908
6909 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
6910 %{
6911 single_instruction;
6912 src1 : S1(read);
6913 src2 : S2(read);
6914 dst : S5(write);
6915 INS01 : ISS;
6916 NEON_FP : S5;
6917 %}
6918
6919 pipe_class fp_uop_s(vRegF dst, vRegF src)
6920 %{
6921 single_instruction;
6922 src : S1(read);
6923 dst : S5(write);
6924 INS01 : ISS;
6925 NEON_FP : S5;
6926 %}
6927
6928 pipe_class fp_uop_d(vRegD dst, vRegD src)
6929 %{
6930 single_instruction;
6931 src : S1(read);
6932 dst : S5(write);
6933 INS01 : ISS;
6934 NEON_FP : S5;
6935 %}
6936
6937 pipe_class fp_d2f(vRegF dst, vRegD src)
6938 %{
6939 single_instruction;
6940 src : S1(read);
6941 dst : S5(write);
6942 INS01 : ISS;
6943 NEON_FP : S5;
6944 %}
6945
6946 pipe_class fp_f2d(vRegD dst, vRegF src)
6947 %{
6948 single_instruction;
6949 src : S1(read);
6950 dst : S5(write);
6951 INS01 : ISS;
6952 NEON_FP : S5;
6953 %}
6954
6955 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
6956 %{
6957 single_instruction;
6958 src : S1(read);
6959 dst : S5(write);
6960 INS01 : ISS;
6961 NEON_FP : S5;
6962 %}
6963
6964 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
6965 %{
6966 single_instruction;
6967 src : S1(read);
6968 dst : S5(write);
6969 INS01 : ISS;
6970 NEON_FP : S5;
6971 %}
6972
6973 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
6974 %{
6975 single_instruction;
6976 src : S1(read);
6977 dst : S5(write);
6978 INS01 : ISS;
6979 NEON_FP : S5;
6980 %}
6981
6982 pipe_class fp_l2f(vRegF dst, iRegL src)
6983 %{
6984 single_instruction;
6985 src : S1(read);
6986 dst : S5(write);
6987 INS01 : ISS;
6988 NEON_FP : S5;
6989 %}
6990
6991 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
6992 %{
6993 single_instruction;
6994 src : S1(read);
6995 dst : S5(write);
6996 INS01 : ISS;
6997 NEON_FP : S5;
6998 %}
6999
7000 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
7001 %{
7002 single_instruction;
7003 src : S1(read);
7004 dst : S5(write);
7005 INS01 : ISS;
7006 NEON_FP : S5;
7007 %}
7008
7009 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
7010 %{
7011 single_instruction;
7012 src : S1(read);
7013 dst : S5(write);
7014 INS01 : ISS;
7015 NEON_FP : S5;
7016 %}
7017
7018 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
7019 %{
7020 single_instruction;
7021 src : S1(read);
7022 dst : S5(write);
7023 INS01 : ISS;
7024 NEON_FP : S5;
7025 %}
7026
7027 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
7028 %{
7029 single_instruction;
7030 src1 : S1(read);
7031 src2 : S2(read);
7032 dst : S5(write);
7033 INS0 : ISS;
7034 NEON_FP : S5;
7035 %}
7036
7037 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
7038 %{
7039 single_instruction;
7040 src1 : S1(read);
7041 src2 : S2(read);
7042 dst : S5(write);
7043 INS0 : ISS;
7044 NEON_FP : S5;
7045 %}
7046
7047 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
7048 %{
7049 single_instruction;
7050 cr : S1(read);
7051 src1 : S1(read);
7052 src2 : S1(read);
7053 dst : S3(write);
7054 INS01 : ISS;
7055 NEON_FP : S3;
7056 %}
7057
7058 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
7059 %{
7060 single_instruction;
7061 cr : S1(read);
7062 src1 : S1(read);
7063 src2 : S1(read);
7064 dst : S3(write);
7065 INS01 : ISS;
7066 NEON_FP : S3;
7067 %}
7068
7069 pipe_class fp_imm_s(vRegF dst)
7070 %{
7071 single_instruction;
7072 dst : S3(write);
7073 INS01 : ISS;
7074 NEON_FP : S3;
7075 %}
7076
7077 pipe_class fp_imm_d(vRegD dst)
7078 %{
7079 single_instruction;
7080 dst : S3(write);
7081 INS01 : ISS;
7082 NEON_FP : S3;
7083 %}
7084
7085 pipe_class fp_load_constant_s(vRegF dst)
7086 %{
7087 single_instruction;
7088 dst : S4(write);
7089 INS01 : ISS;
7090 NEON_FP : S4;
7091 %}
7092
7093 pipe_class fp_load_constant_d(vRegD dst)
7094 %{
7095 single_instruction;
7096 dst : S4(write);
7097 INS01 : ISS;
7098 NEON_FP : S4;
7099 %}
7100
7101 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
7102 %{
7103 single_instruction;
7104 dst : S5(write);
7105 src1 : S1(read);
7106 src2 : S1(read);
7107 INS01 : ISS;
7108 NEON_FP : S5;
7109 %}
7110
7111 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
7112 %{
7113 single_instruction;
7114 dst : S5(write);
7115 src1 : S1(read);
7116 src2 : S1(read);
7117 INS0 : ISS;
7118 NEON_FP : S5;
7119 %}
7120
7121 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
7122 %{
7123 single_instruction;
7124 dst : S5(write);
7125 src1 : S1(read);
7126 src2 : S1(read);
7127 dst : S1(read);
7128 INS01 : ISS;
7129 NEON_FP : S5;
7130 %}
7131
7132 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
7133 %{
7134 single_instruction;
7135 dst : S5(write);
7136 src1 : S1(read);
7137 src2 : S1(read);
7138 dst : S1(read);
7139 INS0 : ISS;
7140 NEON_FP : S5;
7141 %}
7142
7143 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
7144 %{
7145 single_instruction;
7146 dst : S4(write);
7147 src1 : S2(read);
7148 src2 : S2(read);
7149 INS01 : ISS;
7150 NEON_FP : S4;
7151 %}
7152
7153 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
7154 %{
7155 single_instruction;
7156 dst : S4(write);
7157 src1 : S2(read);
7158 src2 : S2(read);
7159 INS0 : ISS;
7160 NEON_FP : S4;
7161 %}
7162
7163 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
7164 %{
7165 single_instruction;
7166 dst : S3(write);
7167 src1 : S2(read);
7168 src2 : S2(read);
7169 INS01 : ISS;
7170 NEON_FP : S3;
7171 %}
7172
7173 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
7174 %{
7175 single_instruction;
7176 dst : S3(write);
7177 src1 : S2(read);
7178 src2 : S2(read);
7179 INS0 : ISS;
7180 NEON_FP : S3;
7181 %}
7182
7183 pipe_class vshift64(vecD dst, vecD src, vecX shift)
7184 %{
7185 single_instruction;
7186 dst : S3(write);
7187 src : S1(read);
7188 shift : S1(read);
7189 INS01 : ISS;
7190 NEON_FP : S3;
7191 %}
7192
7193 pipe_class vshift128(vecX dst, vecX src, vecX shift)
7194 %{
7195 single_instruction;
7196 dst : S3(write);
7197 src : S1(read);
7198 shift : S1(read);
7199 INS0 : ISS;
7200 NEON_FP : S3;
7201 %}
7202
7203 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
7204 %{
7205 single_instruction;
7206 dst : S3(write);
7207 src : S1(read);
7208 INS01 : ISS;
7209 NEON_FP : S3;
7210 %}
7211
7212 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
7213 %{
7214 single_instruction;
7215 dst : S3(write);
7216 src : S1(read);
7217 INS0 : ISS;
7218 NEON_FP : S3;
7219 %}
7220
7221 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
7222 %{
7223 single_instruction;
7224 dst : S5(write);
7225 src1 : S1(read);
7226 src2 : S1(read);
7227 INS01 : ISS;
7228 NEON_FP : S5;
7229 %}
7230
7231 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
7232 %{
7233 single_instruction;
7234 dst : S5(write);
7235 src1 : S1(read);
7236 src2 : S1(read);
7237 INS0 : ISS;
7238 NEON_FP : S5;
7239 %}
7240
7241 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
7242 %{
7243 single_instruction;
7244 dst : S5(write);
7245 src1 : S1(read);
7246 src2 : S1(read);
7247 INS0 : ISS;
7248 NEON_FP : S5;
7249 %}
7250
7251 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
7252 %{
7253 single_instruction;
7254 dst : S5(write);
7255 src1 : S1(read);
7256 src2 : S1(read);
7257 INS0 : ISS;
7258 NEON_FP : S5;
7259 %}
7260
7261 pipe_class vsqrt_fp128(vecX dst, vecX src)
7262 %{
7263 single_instruction;
7264 dst : S5(write);
7265 src : S1(read);
7266 INS0 : ISS;
7267 NEON_FP : S5;
7268 %}
7269
7270 pipe_class vunop_fp64(vecD dst, vecD src)
7271 %{
7272 single_instruction;
7273 dst : S5(write);
7274 src : S1(read);
7275 INS01 : ISS;
7276 NEON_FP : S5;
7277 %}
7278
7279 pipe_class vunop_fp128(vecX dst, vecX src)
7280 %{
7281 single_instruction;
7282 dst : S5(write);
7283 src : S1(read);
7284 INS0 : ISS;
7285 NEON_FP : S5;
7286 %}
7287
7288 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
7289 %{
7290 single_instruction;
7291 dst : S3(write);
7292 src : S1(read);
7293 INS01 : ISS;
7294 NEON_FP : S3;
7295 %}
7296
7297 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
7298 %{
7299 single_instruction;
7300 dst : S3(write);
7301 src : S1(read);
7302 INS01 : ISS;
7303 NEON_FP : S3;
7304 %}
7305
7306 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
7307 %{
7308 single_instruction;
7309 dst : S3(write);
7310 src : S1(read);
7311 INS01 : ISS;
7312 NEON_FP : S3;
7313 %}
7314
7315 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
7316 %{
7317 single_instruction;
7318 dst : S3(write);
7319 src : S1(read);
7320 INS01 : ISS;
7321 NEON_FP : S3;
7322 %}
7323
7324 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
7325 %{
7326 single_instruction;
7327 dst : S3(write);
7328 src : S1(read);
7329 INS01 : ISS;
7330 NEON_FP : S3;
7331 %}
7332
7333 pipe_class vmovi_reg_imm64(vecD dst)
7334 %{
7335 single_instruction;
7336 dst : S3(write);
7337 INS01 : ISS;
7338 NEON_FP : S3;
7339 %}
7340
7341 pipe_class vmovi_reg_imm128(vecX dst)
7342 %{
7343 single_instruction;
7344 dst : S3(write);
7345 INS0 : ISS;
7346 NEON_FP : S3;
7347 %}
7348
7349 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
7350 %{
7351 single_instruction;
7352 dst : S5(write);
7353 mem : ISS(read);
7354 INS01 : ISS;
7355 NEON_FP : S3;
7356 %}
7357
7358 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
7359 %{
7360 single_instruction;
7361 dst : S5(write);
7362 mem : ISS(read);
7363 INS01 : ISS;
7364 NEON_FP : S3;
7365 %}
7366
7367 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
7368 %{
7369 single_instruction;
7370 mem : ISS(read);
7371 src : S2(read);
7372 INS01 : ISS;
7373 NEON_FP : S3;
7374 %}
7375
7376 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
7377 %{
7378 single_instruction;
7379 mem : ISS(read);
7380 src : S2(read);
7381 INS01 : ISS;
7382 NEON_FP : S3;
7383 %}
7384
7385 //------- Integer ALU operations --------------------------
7386
7387 // Integer ALU reg-reg operation
7388 // Operands needed in EX1, result generated in EX2
7389 // Eg. ADD x0, x1, x2
7390 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7391 %{
7392 single_instruction;
7393 dst : EX2(write);
7394 src1 : EX1(read);
7395 src2 : EX1(read);
7396 INS01 : ISS; // Dual issue as instruction 0 or 1
7397 ALU : EX2;
7398 %}
7399
7400 // Integer ALU reg-reg operation with constant shift
7401 // Shifted register must be available in LATE_ISS instead of EX1
7402 // Eg. ADD x0, x1, x2, LSL #2
7403 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
7404 %{
7405 single_instruction;
7406 dst : EX2(write);
7407 src1 : EX1(read);
7408 src2 : ISS(read);
7409 INS01 : ISS;
7410 ALU : EX2;
7411 %}
7412
7413 // Integer ALU reg operation with constant shift
7414 // Eg. LSL x0, x1, #shift
7415 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
7416 %{
7417 single_instruction;
7418 dst : EX2(write);
7419 src1 : ISS(read);
7420 INS01 : ISS;
7421 ALU : EX2;
7422 %}
7423
7424 // Integer ALU reg-reg operation with variable shift
7425 // Both operands must be available in LATE_ISS instead of EX1
7426 // Result is available in EX1 instead of EX2
7427 // Eg. LSLV x0, x1, x2
7428 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
7429 %{
7430 single_instruction;
7431 dst : EX1(write);
7432 src1 : ISS(read);
7433 src2 : ISS(read);
7434 INS01 : ISS;
7435 ALU : EX1;
7436 %}
7437
7438 // Integer ALU reg-reg operation with extract
7439 // As for _vshift above, but result generated in EX2
7440 // Eg. EXTR x0, x1, x2, #N
7441 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
7442 %{
7443 single_instruction;
7444 dst : EX2(write);
7445 src1 : ISS(read);
7446 src2 : ISS(read);
7447 INS1 : ISS; // Can only dual issue as Instruction 1
7448 ALU : EX1;
7449 %}
7450
7451 // Integer ALU reg operation
7452 // Eg. NEG x0, x1
7453 pipe_class ialu_reg(iRegI dst, iRegI src)
7454 %{
7455 single_instruction;
7456 dst : EX2(write);
7457 src : EX1(read);
7458 INS01 : ISS;
7459 ALU : EX2;
7460 %}
7461
7462 // Integer ALU reg mmediate operation
7463 // Eg. ADD x0, x1, #N
7464 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
7465 %{
7466 single_instruction;
7467 dst : EX2(write);
7468 src1 : EX1(read);
7469 INS01 : ISS;
7470 ALU : EX2;
7471 %}
7472
7473 // Integer ALU immediate operation (no source operands)
7474 // Eg. MOV x0, #N
7475 pipe_class ialu_imm(iRegI dst)
7476 %{
7477 single_instruction;
7478 dst : EX1(write);
7479 INS01 : ISS;
7480 ALU : EX1;
7481 %}
7482
7483 //------- Compare operation -------------------------------
7484
7485 // Compare reg-reg
7486 // Eg. CMP x0, x1
7487 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
7488 %{
7489 single_instruction;
7490 // fixed_latency(16);
7491 cr : EX2(write);
7492 op1 : EX1(read);
7493 op2 : EX1(read);
7494 INS01 : ISS;
7495 ALU : EX2;
7496 %}
7497
7498 // Compare reg-reg
7499 // Eg. CMP x0, #N
7500 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
7501 %{
7502 single_instruction;
7503 // fixed_latency(16);
7504 cr : EX2(write);
7505 op1 : EX1(read);
7506 INS01 : ISS;
7507 ALU : EX2;
7508 %}
7509
7510 //------- Conditional instructions ------------------------
7511
7512 // Conditional no operands
7513 // Eg. CSINC x0, zr, zr, <cond>
7514 pipe_class icond_none(iRegI dst, rFlagsReg cr)
7515 %{
7516 single_instruction;
7517 cr : EX1(read);
7518 dst : EX2(write);
7519 INS01 : ISS;
7520 ALU : EX2;
7521 %}
7522
7523 // Conditional 2 operand
7524 // EG. CSEL X0, X1, X2, <cond>
7525 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
7526 %{
7527 single_instruction;
7528 cr : EX1(read);
7529 src1 : EX1(read);
7530 src2 : EX1(read);
7531 dst : EX2(write);
7532 INS01 : ISS;
7533 ALU : EX2;
7534 %}
7535
7536 // Conditional 2 operand
7537 // EG. CSEL X0, X1, X2, <cond>
7538 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
7539 %{
7540 single_instruction;
7541 cr : EX1(read);
7542 src : EX1(read);
7543 dst : EX2(write);
7544 INS01 : ISS;
7545 ALU : EX2;
7546 %}
7547
7548 //------- Multiply pipeline operations --------------------
7549
7550 // Multiply reg-reg
7551 // Eg. MUL w0, w1, w2
7552 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7553 %{
7554 single_instruction;
7555 dst : WR(write);
7556 src1 : ISS(read);
7557 src2 : ISS(read);
7558 INS01 : ISS;
7559 MAC : WR;
7560 %}
7561
7562 // Multiply accumulate
7563 // Eg. MADD w0, w1, w2, w3
7564 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7565 %{
7566 single_instruction;
7567 dst : WR(write);
7568 src1 : ISS(read);
7569 src2 : ISS(read);
7570 src3 : ISS(read);
7571 INS01 : ISS;
7572 MAC : WR;
7573 %}
7574
7575 // Eg. MUL w0, w1, w2
7576 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7577 %{
7578 single_instruction;
7579 fixed_latency(3); // Maximum latency for 64 bit mul
7580 dst : WR(write);
7581 src1 : ISS(read);
7582 src2 : ISS(read);
7583 INS01 : ISS;
7584 MAC : WR;
7585 %}
7586
7587 // Multiply accumulate
7588 // Eg. MADD w0, w1, w2, w3
7589 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
7590 %{
7591 single_instruction;
7592 fixed_latency(3); // Maximum latency for 64 bit mul
7593 dst : WR(write);
7594 src1 : ISS(read);
7595 src2 : ISS(read);
7596 src3 : ISS(read);
7597 INS01 : ISS;
7598 MAC : WR;
7599 %}
7600
7601 //------- Divide pipeline operations --------------------
7602
7603 // Eg. SDIV w0, w1, w2
7604 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7605 %{
7606 single_instruction;
7607 fixed_latency(8); // Maximum latency for 32 bit divide
7608 dst : WR(write);
7609 src1 : ISS(read);
7610 src2 : ISS(read);
7611 INS0 : ISS; // Can only dual issue as instruction 0
7612 DIV : WR;
7613 %}
7614
7615 // Eg. SDIV x0, x1, x2
7616 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
7617 %{
7618 single_instruction;
7619 fixed_latency(16); // Maximum latency for 64 bit divide
7620 dst : WR(write);
7621 src1 : ISS(read);
7622 src2 : ISS(read);
7623 INS0 : ISS; // Can only dual issue as instruction 0
7624 DIV : WR;
7625 %}
7626
7627 //------- Load pipeline operations ------------------------
7628
7629 // Load - prefetch
7630 // Eg. PFRM <mem>
7631 pipe_class iload_prefetch(memory mem)
7632 %{
7633 single_instruction;
7634 mem : ISS(read);
7635 INS01 : ISS;
7636 LDST : WR;
7637 %}
7638
7639 // Load - reg, mem
7640 // Eg. LDR x0, <mem>
7641 pipe_class iload_reg_mem(iRegI dst, memory mem)
7642 %{
7643 single_instruction;
7644 dst : WR(write);
7645 mem : ISS(read);
7646 INS01 : ISS;
7647 LDST : WR;
7648 %}
7649
7650 // Load - reg, reg
7651 // Eg. LDR x0, [sp, x1]
7652 pipe_class iload_reg_reg(iRegI dst, iRegI src)
7653 %{
7654 single_instruction;
7655 dst : WR(write);
7656 src : ISS(read);
7657 INS01 : ISS;
7658 LDST : WR;
7659 %}
7660
7661 //------- Store pipeline operations -----------------------
7662
7663 // Store - zr, mem
7664 // Eg. STR zr, <mem>
7665 pipe_class istore_mem(memory mem)
7666 %{
7667 single_instruction;
7668 mem : ISS(read);
7669 INS01 : ISS;
7670 LDST : WR;
7671 %}
7672
7673 // Store - reg, mem
7674 // Eg. STR x0, <mem>
7675 pipe_class istore_reg_mem(iRegI src, memory mem)
7676 %{
7677 single_instruction;
7678 mem : ISS(read);
7679 src : EX2(read);
7680 INS01 : ISS;
7681 LDST : WR;
7682 %}
7683
7684 // Store - reg, reg
7685 // Eg. STR x0, [sp, x1]
7686 pipe_class istore_reg_reg(iRegI dst, iRegI src)
7687 %{
7688 single_instruction;
7689 dst : ISS(read);
7690 src : EX2(read);
7691 INS01 : ISS;
7692 LDST : WR;
7693 %}
7694
7695 //------- Store pipeline operations -----------------------
7696
7697 // Branch
7698 pipe_class pipe_branch()
7699 %{
7700 single_instruction;
7701 INS01 : ISS;
7702 BRANCH : EX1;
7703 %}
7704
7705 // Conditional branch
7706 pipe_class pipe_branch_cond(rFlagsReg cr)
7707 %{
7708 single_instruction;
7709 cr : EX1(read);
7710 INS01 : ISS;
7711 BRANCH : EX1;
7712 %}
7713
7714 // Compare & Branch
7715 // EG. CBZ/CBNZ
7716 pipe_class pipe_cmp_branch(iRegI op1)
7717 %{
7718 single_instruction;
7719 op1 : EX1(read);
7720 INS01 : ISS;
7721 BRANCH : EX1;
7722 %}
7723
7724 //------- Synchronisation operations ----------------------
7725
7726 // Any operation requiring serialization.
7727 // EG. DMB/Atomic Ops/Load Acquire/Str Release
7728 pipe_class pipe_serial()
7729 %{
7730 single_instruction;
7731 force_serialization;
7732 fixed_latency(16);
7733 INS01 : ISS(2); // Cannot dual issue with any other instruction
7734 LDST : WR;
7735 %}
7736
7737 // Generic big/slow expanded idiom - also serialized
7738 pipe_class pipe_slow()
7739 %{
7740 instruction_count(10);
7741 multiple_bundles;
7742 force_serialization;
7743 fixed_latency(16);
7744 INS01 : ISS(2); // Cannot dual issue with any other instruction
7745 LDST : WR;
7746 %}
7747
7748 // Empty pipeline class
7749 pipe_class pipe_class_empty()
7750 %{
7751 single_instruction;
7752 fixed_latency(0);
7753 %}
7754
7755 // Default pipeline class.
7756 pipe_class pipe_class_default()
7757 %{
7758 single_instruction;
7759 fixed_latency(2);
7760 %}
7761
7762 // Pipeline class for compares.
7763 pipe_class pipe_class_compare()
7764 %{
7765 single_instruction;
7766 fixed_latency(16);
7767 %}
7768
7769 // Pipeline class for memory operations.
7770 pipe_class pipe_class_memory()
7771 %{
7772 single_instruction;
7773 fixed_latency(16);
7774 %}
7775
7776 // Pipeline class for call.
7777 pipe_class pipe_class_call()
7778 %{
7779 single_instruction;
7780 fixed_latency(100);
7781 %}
7782
7783 // Define the class for the Nop node.
7784 define %{
7785 MachNop = pipe_class_empty;
7786 %}
7787
7788 %}
7789 //----------INSTRUCTIONS-------------------------------------------------------
7790 //
7791 // match -- States which machine-independent subtree may be replaced
7792 // by this instruction.
7793 // ins_cost -- The estimated cost of this instruction is used by instruction
7794 // selection to identify a minimum cost tree of machine
7795 // instructions that matches a tree of machine-independent
7796 // instructions.
7797 // format -- A string providing the disassembly for this instruction.
7798 // The value of an instruction's operand may be inserted
7799 // by referring to it with a '$' prefix.
7800 // opcode -- Three instruction opcodes may be provided. These are referred
7801 // to within an encode class as $primary, $secondary, and $tertiary
7802 // rrspectively. The primary opcode is commonly used to
7803 // indicate the type of machine instruction, while secondary
7804 // and tertiary are often used for prefix options or addressing
7805 // modes.
7806 // ins_encode -- A list of encode classes with parameters. The encode class
7807 // name must have been defined in an 'enc_class' specification
7808 // in the encode section of the architecture description.
7809
7810 // ============================================================================
7811 // Memory (Load/Store) Instructions
7812
7813 // Load Instructions
7814
7815 // Load Byte (8 bit signed)
7816 instruct loadB(iRegINoSp dst, memory mem)
7817 %{
7818 match(Set dst (LoadB mem));
7819 predicate(!needs_acquiring_load(n));
7820
7821 ins_cost(4 * INSN_COST);
7822 format %{ "ldrsbw $dst, $mem\t# byte" %}
7823
7824 ins_encode(aarch64_enc_ldrsbw(dst, mem));
7825
7826 ins_pipe(iload_reg_mem);
7827 %}
7828
7829 // Load Byte (8 bit signed) into long
7830 instruct loadB2L(iRegLNoSp dst, memory mem)
7831 %{
7832 match(Set dst (ConvI2L (LoadB mem)));
7833 predicate(!needs_acquiring_load(n->in(1)));
7834
7835 ins_cost(4 * INSN_COST);
7836 format %{ "ldrsb $dst, $mem\t# byte" %}
7837
7838 ins_encode(aarch64_enc_ldrsb(dst, mem));
7839
7840 ins_pipe(iload_reg_mem);
7841 %}
7842
7843 // Load Byte (8 bit unsigned)
7844 instruct loadUB(iRegINoSp dst, memory mem)
7845 %{
7846 match(Set dst (LoadUB mem));
7847 predicate(!needs_acquiring_load(n));
7848
7849 ins_cost(4 * INSN_COST);
7850 format %{ "ldrbw $dst, $mem\t# byte" %}
7851
7852 ins_encode(aarch64_enc_ldrb(dst, mem));
7853
7854 ins_pipe(iload_reg_mem);
7855 %}
7856
7857 // Load Byte (8 bit unsigned) into long
7858 instruct loadUB2L(iRegLNoSp dst, memory mem)
7859 %{
7860 match(Set dst (ConvI2L (LoadUB mem)));
7861 predicate(!needs_acquiring_load(n->in(1)));
7862
7863 ins_cost(4 * INSN_COST);
7864 format %{ "ldrb $dst, $mem\t# byte" %}
7865
7866 ins_encode(aarch64_enc_ldrb(dst, mem));
7867
7868 ins_pipe(iload_reg_mem);
7869 %}
7870
7871 // Load Short (16 bit signed)
7872 instruct loadS(iRegINoSp dst, memory mem)
7873 %{
7874 match(Set dst (LoadS mem));
7875 predicate(!needs_acquiring_load(n));
7876
7877 ins_cost(4 * INSN_COST);
7878 format %{ "ldrshw $dst, $mem\t# short" %}
7879
7880 ins_encode(aarch64_enc_ldrshw(dst, mem));
7881
7882 ins_pipe(iload_reg_mem);
7883 %}
7884
7885 // Load Short (16 bit signed) into long
7886 instruct loadS2L(iRegLNoSp dst, memory mem)
7887 %{
7888 match(Set dst (ConvI2L (LoadS mem)));
7889 predicate(!needs_acquiring_load(n->in(1)));
7890
7891 ins_cost(4 * INSN_COST);
7892 format %{ "ldrsh $dst, $mem\t# short" %}
7893
7894 ins_encode(aarch64_enc_ldrsh(dst, mem));
7895
7896 ins_pipe(iload_reg_mem);
7897 %}
7898
7899 // Load Char (16 bit unsigned)
7900 instruct loadUS(iRegINoSp dst, memory mem)
7901 %{
7902 match(Set dst (LoadUS mem));
7903 predicate(!needs_acquiring_load(n));
7904
7905 ins_cost(4 * INSN_COST);
7906 format %{ "ldrh $dst, $mem\t# short" %}
7907
7908 ins_encode(aarch64_enc_ldrh(dst, mem));
7909
7910 ins_pipe(iload_reg_mem);
7911 %}
7912
7913 // Load Short/Char (16 bit unsigned) into long
7914 instruct loadUS2L(iRegLNoSp dst, memory mem)
7915 %{
7916 match(Set dst (ConvI2L (LoadUS mem)));
7917 predicate(!needs_acquiring_load(n->in(1)));
7918
7919 ins_cost(4 * INSN_COST);
7920 format %{ "ldrh $dst, $mem\t# short" %}
7921
7922 ins_encode(aarch64_enc_ldrh(dst, mem));
7923
7924 ins_pipe(iload_reg_mem);
7925 %}
7926
7927 // Load Integer (32 bit signed)
7928 instruct loadI(iRegINoSp dst, memory mem)
7929 %{
7930 match(Set dst (LoadI mem));
7931 predicate(!needs_acquiring_load(n));
7932
7933 ins_cost(4 * INSN_COST);
7934 format %{ "ldrw $dst, $mem\t# int" %}
7935
7936 ins_encode(aarch64_enc_ldrw(dst, mem));
7937
7938 ins_pipe(iload_reg_mem);
7939 %}
7940
7941 // Load Integer (32 bit signed) into long
7942 instruct loadI2L(iRegLNoSp dst, memory mem)
7943 %{
7944 match(Set dst (ConvI2L (LoadI mem)));
7945 predicate(!needs_acquiring_load(n->in(1)));
7946
7947 ins_cost(4 * INSN_COST);
7948 format %{ "ldrsw $dst, $mem\t# int" %}
7949
7950 ins_encode(aarch64_enc_ldrsw(dst, mem));
7951
7952 ins_pipe(iload_reg_mem);
7953 %}
7954
7955 // Load Integer (32 bit unsigned) into long
7956 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
7957 %{
7958 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7959 predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
7960
7961 ins_cost(4 * INSN_COST);
7962 format %{ "ldrw $dst, $mem\t# int" %}
7963
7964 ins_encode(aarch64_enc_ldrw(dst, mem));
7965
7966 ins_pipe(iload_reg_mem);
7967 %}
7968
7969 // Load Long (64 bit signed)
7970 instruct loadL(iRegLNoSp dst, memory mem)
7971 %{
7972 match(Set dst (LoadL mem));
7973 predicate(!needs_acquiring_load(n));
7974
7975 ins_cost(4 * INSN_COST);
7976 format %{ "ldr $dst, $mem\t# int" %}
7977
7978 ins_encode(aarch64_enc_ldr(dst, mem));
7979
7980 ins_pipe(iload_reg_mem);
7981 %}
7982
7983 // Load Range
7984 instruct loadRange(iRegINoSp dst, memory mem)
7985 %{
7986 match(Set dst (LoadRange mem));
7987
7988 ins_cost(4 * INSN_COST);
7989 format %{ "ldrw $dst, $mem\t# range" %}
7990
7991 ins_encode(aarch64_enc_ldrw(dst, mem));
7992
7993 ins_pipe(iload_reg_mem);
7994 %}
7995
7996 // Load Pointer
7997 instruct loadP(iRegPNoSp dst, memory mem)
7998 %{
7999 match(Set dst (LoadP mem));
8000 predicate(!needs_acquiring_load(n));
8001
8002 ins_cost(4 * INSN_COST);
8003 format %{ "ldr $dst, $mem\t# ptr" %}
8004
8005 ins_encode(aarch64_enc_ldr(dst, mem));
8006
8007 ins_pipe(iload_reg_mem);
8008 %}
8009
8010 // Load Compressed Pointer
8011 instruct loadN(iRegNNoSp dst, memory mem)
8012 %{
8013 match(Set dst (LoadN mem));
8014 predicate(!needs_acquiring_load(n));
8015
8016 ins_cost(4 * INSN_COST);
8017 format %{ "ldrw $dst, $mem\t# compressed ptr" %}
8018
8019 ins_encode(aarch64_enc_ldrw(dst, mem));
8020
8021 ins_pipe(iload_reg_mem);
8022 %}
8023
8024 // Load Klass Pointer
8025 instruct loadKlass(iRegPNoSp dst, memory mem)
8026 %{
8027 match(Set dst (LoadKlass mem));
8028 predicate(!needs_acquiring_load(n));
8029
8030 ins_cost(4 * INSN_COST);
8031 format %{ "ldr $dst, $mem\t# class" %}
8032
8033 ins_encode(aarch64_enc_ldr(dst, mem));
8034
8035 ins_pipe(iload_reg_mem);
8036 %}
8037
8038 // Load Narrow Klass Pointer
8039 instruct loadNKlass(iRegNNoSp dst, memory mem)
8040 %{
8041 match(Set dst (LoadNKlass mem));
8042 predicate(!needs_acquiring_load(n));
8043
8044 ins_cost(4 * INSN_COST);
8045 format %{ "ldrw $dst, $mem\t# compressed class ptr" %}
8046
8047 ins_encode(aarch64_enc_ldrw(dst, mem));
8048
8049 ins_pipe(iload_reg_mem);
8050 %}
8051
8052 // Load Float
8053 instruct loadF(vRegF dst, memory mem)
8054 %{
8055 match(Set dst (LoadF mem));
8056 predicate(!needs_acquiring_load(n));
8057
8058 ins_cost(4 * INSN_COST);
8059 format %{ "ldrs $dst, $mem\t# float" %}
8060
8061 ins_encode( aarch64_enc_ldrs(dst, mem) );
8062
8063 ins_pipe(pipe_class_memory);
8064 %}
8065
8066 // Load Double
8067 instruct loadD(vRegD dst, memory mem)
8068 %{
8069 match(Set dst (LoadD mem));
8070 predicate(!needs_acquiring_load(n));
8071
8072 ins_cost(4 * INSN_COST);
8073 format %{ "ldrd $dst, $mem\t# double" %}
8074
8075 ins_encode( aarch64_enc_ldrd(dst, mem) );
8076
8077 ins_pipe(pipe_class_memory);
8078 %}
8079
8080
8081 // Load Int Constant
8082 instruct loadConI(iRegINoSp dst, immI src)
8083 %{
8084 match(Set dst src);
8085
8086 ins_cost(INSN_COST);
8087 format %{ "mov $dst, $src\t# int" %}
8088
8089 ins_encode( aarch64_enc_movw_imm(dst, src) );
8090
8091 ins_pipe(ialu_imm);
8092 %}
8093
8094 // Load Long Constant
8095 instruct loadConL(iRegLNoSp dst, immL src)
8096 %{
8097 match(Set dst src);
8098
8099 ins_cost(INSN_COST);
8100 format %{ "mov $dst, $src\t# long" %}
8101
8102 ins_encode( aarch64_enc_mov_imm(dst, src) );
8103
8104 ins_pipe(ialu_imm);
8105 %}
8106
8107 // Load Pointer Constant
8108
8109 instruct loadConP(iRegPNoSp dst, immP con)
8110 %{
8111 match(Set dst con);
8112
8113 ins_cost(INSN_COST * 4);
8114 format %{
8115 "mov $dst, $con\t# ptr\n\t"
8116 %}
8117
8118 ins_encode(aarch64_enc_mov_p(dst, con));
8119
8120 ins_pipe(ialu_imm);
8121 %}
8122
8123 // Load Null Pointer Constant
8124
8125 instruct loadConP0(iRegPNoSp dst, immP0 con)
8126 %{
8127 match(Set dst con);
8128
8129 ins_cost(INSN_COST);
8130 format %{ "mov $dst, $con\t# NULL ptr" %}
8131
8132 ins_encode(aarch64_enc_mov_p0(dst, con));
8133
8134 ins_pipe(ialu_imm);
8135 %}
8136
8137 // Load Pointer Constant One
8138
8139 instruct loadConP1(iRegPNoSp dst, immP_1 con)
8140 %{
8141 match(Set dst con);
8142
8143 ins_cost(INSN_COST);
8144 format %{ "mov $dst, $con\t# NULL ptr" %}
8145
8146 ins_encode(aarch64_enc_mov_p1(dst, con));
8147
8148 ins_pipe(ialu_imm);
8149 %}
8150
8151 // Load Poll Page Constant
8152
8153 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
8154 %{
8155 match(Set dst con);
8156
8157 ins_cost(INSN_COST);
8158 format %{ "adr $dst, $con\t# Poll Page Ptr" %}
8159
8160 ins_encode(aarch64_enc_mov_poll_page(dst, con));
8161
8162 ins_pipe(ialu_imm);
8163 %}
8164
8165 // Load Byte Map Base Constant
8166
8167 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
8168 %{
8169 match(Set dst con);
8170
8171 ins_cost(INSN_COST);
8172 format %{ "adr $dst, $con\t# Byte Map Base" %}
8173
8174 ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
8175
8176 ins_pipe(ialu_imm);
8177 %}
8178
8179 // Load Narrow Pointer Constant
8180
8181 instruct loadConN(iRegNNoSp dst, immN con)
8182 %{
8183 match(Set dst con);
8184
8185 ins_cost(INSN_COST * 4);
8186 format %{ "mov $dst, $con\t# compressed ptr" %}
8187
8188 ins_encode(aarch64_enc_mov_n(dst, con));
8189
8190 ins_pipe(ialu_imm);
8191 %}
8192
8193 // Load Narrow Null Pointer Constant
8194
8195 instruct loadConN0(iRegNNoSp dst, immN0 con)
8196 %{
8197 match(Set dst con);
8198
8199 ins_cost(INSN_COST);
8200 format %{ "mov $dst, $con\t# compressed NULL ptr" %}
8201
8202 ins_encode(aarch64_enc_mov_n0(dst, con));
8203
8204 ins_pipe(ialu_imm);
8205 %}
8206
8207 // Load Narrow Klass Constant
8208
8209 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
8210 %{
8211 match(Set dst con);
8212
8213 ins_cost(INSN_COST);
8214 format %{ "mov $dst, $con\t# compressed klass ptr" %}
8215
8216 ins_encode(aarch64_enc_mov_nk(dst, con));
8217
8218 ins_pipe(ialu_imm);
8219 %}
8220
8221 // Load Packed Float Constant
8222
8223 instruct loadConF_packed(vRegF dst, immFPacked con) %{
8224 match(Set dst con);
8225 ins_cost(INSN_COST * 4);
8226 format %{ "fmovs $dst, $con"%}
8227 ins_encode %{
8228 __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
8229 %}
8230
8231 ins_pipe(fp_imm_s);
8232 %}
8233
8234 // Load Float Constant
8235
8236 instruct loadConF(vRegF dst, immF con) %{
8237 match(Set dst con);
8238
8239 ins_cost(INSN_COST * 4);
8240
8241 format %{
8242 "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8243 %}
8244
8245 ins_encode %{
8246 __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
8247 %}
8248
8249 ins_pipe(fp_load_constant_s);
8250 %}
8251
8252 // Load Packed Double Constant
8253
8254 instruct loadConD_packed(vRegD dst, immDPacked con) %{
8255 match(Set dst con);
8256 ins_cost(INSN_COST);
8257 format %{ "fmovd $dst, $con"%}
8258 ins_encode %{
8259 __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
8260 %}
8261
8262 ins_pipe(fp_imm_d);
8263 %}
8264
8265 // Load Double Constant
8266
8267 instruct loadConD(vRegD dst, immD con) %{
8268 match(Set dst con);
8269
8270 ins_cost(INSN_COST * 5);
8271 format %{
8272 "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
8273 %}
8274
8275 ins_encode %{
8276 __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
8277 %}
8278
8279 ins_pipe(fp_load_constant_d);
8280 %}
8281
8282 // Store Instructions
8283
8284 // Store CMS card-mark Immediate
8285 instruct storeimmCM0(immI0 zero, memory mem)
8286 %{
8287 match(Set mem (StoreCM mem zero));
8288 predicate(unnecessary_storestore(n));
8289
8290 ins_cost(INSN_COST);
8291 format %{ "strb zr, $mem\t# byte" %}
8292
8293 ins_encode(aarch64_enc_strb0(mem));
8294
8295 ins_pipe(istore_mem);
8296 %}
8297
8298 // Store CMS card-mark Immediate with intervening StoreStore
8299 // needed when using CMS with no conditional card marking
8300 instruct storeimmCM0_ordered(immI0 zero, memory mem)
8301 %{
8302 match(Set mem (StoreCM mem zero));
8303
8304 ins_cost(INSN_COST * 2);
8305 format %{ "dmb ishst"
8306 "\n\tstrb zr, $mem\t# byte" %}
8307
8308 ins_encode(aarch64_enc_strb0_ordered(mem));
8309
8310 ins_pipe(istore_mem);
8311 %}
8312
8313 // Store Byte
8314 instruct storeB(iRegIorL2I src, memory mem)
8315 %{
8316 match(Set mem (StoreB mem src));
8317 predicate(!needs_releasing_store(n));
8318
8319 ins_cost(INSN_COST);
8320 format %{ "strb $src, $mem\t# byte" %}
8321
8322 ins_encode(aarch64_enc_strb(src, mem));
8323
8324 ins_pipe(istore_reg_mem);
8325 %}
8326
8327
8328 instruct storeimmB0(immI0 zero, memory mem)
8329 %{
8330 match(Set mem (StoreB mem zero));
8331 predicate(!needs_releasing_store(n));
8332
8333 ins_cost(INSN_COST);
8334 format %{ "strb rscractch2, $mem\t# byte" %}
8335
8336 ins_encode(aarch64_enc_strb0(mem));
8337
8338 ins_pipe(istore_mem);
8339 %}
8340
8341 // Store Char/Short
8342 instruct storeC(iRegIorL2I src, memory mem)
8343 %{
8344 match(Set mem (StoreC mem src));
8345 predicate(!needs_releasing_store(n));
8346
8347 ins_cost(INSN_COST);
8348 format %{ "strh $src, $mem\t# short" %}
8349
8350 ins_encode(aarch64_enc_strh(src, mem));
8351
8352 ins_pipe(istore_reg_mem);
8353 %}
8354
8355 instruct storeimmC0(immI0 zero, memory mem)
8356 %{
8357 match(Set mem (StoreC mem zero));
8358 predicate(!needs_releasing_store(n));
8359
8360 ins_cost(INSN_COST);
8361 format %{ "strh zr, $mem\t# short" %}
8362
8363 ins_encode(aarch64_enc_strh0(mem));
8364
8365 ins_pipe(istore_mem);
8366 %}
8367
8368 // Store Integer
8369
8370 instruct storeI(iRegIorL2I src, memory mem)
8371 %{
8372 match(Set mem(StoreI mem src));
8373 predicate(!needs_releasing_store(n));
8374
8375 ins_cost(INSN_COST);
8376 format %{ "strw $src, $mem\t# int" %}
8377
8378 ins_encode(aarch64_enc_strw(src, mem));
8379
8380 ins_pipe(istore_reg_mem);
8381 %}
8382
8383 instruct storeimmI0(immI0 zero, memory mem)
8384 %{
8385 match(Set mem(StoreI mem zero));
8386 predicate(!needs_releasing_store(n));
8387
8388 ins_cost(INSN_COST);
8389 format %{ "strw zr, $mem\t# int" %}
8390
8391 ins_encode(aarch64_enc_strw0(mem));
8392
8393 ins_pipe(istore_mem);
8394 %}
8395
8396 // Store Long (64 bit signed)
8397 instruct storeL(iRegL src, memory mem)
8398 %{
8399 match(Set mem (StoreL mem src));
8400 predicate(!needs_releasing_store(n));
8401
8402 ins_cost(INSN_COST);
8403 format %{ "str $src, $mem\t# int" %}
8404
8405 ins_encode(aarch64_enc_str(src, mem));
8406
8407 ins_pipe(istore_reg_mem);
8408 %}
8409
8410 // Store Long (64 bit signed)
8411 instruct storeimmL0(immL0 zero, memory mem)
8412 %{
8413 match(Set mem (StoreL mem zero));
8414 predicate(!needs_releasing_store(n));
8415
8416 ins_cost(INSN_COST);
8417 format %{ "str zr, $mem\t# int" %}
8418
8419 ins_encode(aarch64_enc_str0(mem));
8420
8421 ins_pipe(istore_mem);
8422 %}
8423
8424 // Store Pointer
8425 instruct storeP(iRegP src, memory mem)
8426 %{
8427 match(Set mem (StoreP mem src));
8428 predicate(!needs_releasing_store(n));
8429
8430 ins_cost(INSN_COST);
8431 format %{ "str $src, $mem\t# ptr" %}
8432
8433 ins_encode(aarch64_enc_str(src, mem));
8434
8435 ins_pipe(istore_reg_mem);
8436 %}
8437
8438 // Store Pointer
8439 instruct storeimmP0(immP0 zero, memory mem)
8440 %{
8441 match(Set mem (StoreP mem zero));
8442 predicate(!needs_releasing_store(n));
8443
8444 ins_cost(INSN_COST);
8445 format %{ "str zr, $mem\t# ptr" %}
8446
8447 ins_encode(aarch64_enc_str0(mem));
8448
8449 ins_pipe(istore_mem);
8450 %}
8451
8452 // Store Compressed Pointer
8453 instruct storeN(iRegN src, memory mem)
8454 %{
8455 match(Set mem (StoreN mem src));
8456 predicate(!needs_releasing_store(n));
8457
8458 ins_cost(INSN_COST);
8459 format %{ "strw $src, $mem\t# compressed ptr" %}
8460
8461 ins_encode(aarch64_enc_strw(src, mem));
8462
8463 ins_pipe(istore_reg_mem);
8464 %}
8465
8466 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
8467 %{
8468 match(Set mem (StoreN mem zero));
8469 predicate(Universe::narrow_oop_base() == NULL &&
8470 Universe::narrow_klass_base() == NULL &&
8471 (!needs_releasing_store(n)));
8472
8473 ins_cost(INSN_COST);
8474 format %{ "strw rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
8475
8476 ins_encode(aarch64_enc_strw(heapbase, mem));
8477
8478 ins_pipe(istore_reg_mem);
8479 %}
8480
8481 // Store Float
8482 instruct storeF(vRegF src, memory mem)
8483 %{
8484 match(Set mem (StoreF mem src));
8485 predicate(!needs_releasing_store(n));
8486
8487 ins_cost(INSN_COST);
8488 format %{ "strs $src, $mem\t# float" %}
8489
8490 ins_encode( aarch64_enc_strs(src, mem) );
8491
8492 ins_pipe(pipe_class_memory);
8493 %}
8494
8495 // TODO
8496 // implement storeImmF0 and storeFImmPacked
8497
8498 // Store Double
8499 instruct storeD(vRegD src, memory mem)
8500 %{
8501 match(Set mem (StoreD mem src));
8502 predicate(!needs_releasing_store(n));
8503
8504 ins_cost(INSN_COST);
8505 format %{ "strd $src, $mem\t# double" %}
8506
8507 ins_encode( aarch64_enc_strd(src, mem) );
8508
8509 ins_pipe(pipe_class_memory);
8510 %}
8511
8512 // Store Compressed Klass Pointer
8513 instruct storeNKlass(iRegN src, memory mem)
8514 %{
8515 predicate(!needs_releasing_store(n));
8516 match(Set mem (StoreNKlass mem src));
8517
8518 ins_cost(INSN_COST);
8519 format %{ "strw $src, $mem\t# compressed klass ptr" %}
8520
8521 ins_encode(aarch64_enc_strw(src, mem));
8522
8523 ins_pipe(istore_reg_mem);
8524 %}
8525
8526 // TODO
8527 // implement storeImmD0 and storeDImmPacked
8528
8529 // prefetch instructions
8530 // Must be safe to execute with invalid address (cannot fault).
8531
8532 instruct prefetchalloc( memory mem ) %{
8533 match(PrefetchAllocation mem);
8534
8535 ins_cost(INSN_COST);
8536 format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
8537
8538 ins_encode( aarch64_enc_prefetchw(mem) );
8539
8540 ins_pipe(iload_prefetch);
8541 %}
8542
8543 // ---------------- volatile loads and stores ----------------
8544
8545 // Load Byte (8 bit signed)
8546 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8547 %{
8548 match(Set dst (LoadB mem));
8549
8550 ins_cost(VOLATILE_REF_COST);
8551 format %{ "ldarsb $dst, $mem\t# byte" %}
8552
8553 ins_encode(aarch64_enc_ldarsb(dst, mem));
8554
8555 ins_pipe(pipe_serial);
8556 %}
8557
8558 // Load Byte (8 bit signed) into long
8559 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8560 %{
8561 match(Set dst (ConvI2L (LoadB mem)));
8562
8563 ins_cost(VOLATILE_REF_COST);
8564 format %{ "ldarsb $dst, $mem\t# byte" %}
8565
8566 ins_encode(aarch64_enc_ldarsb(dst, mem));
8567
8568 ins_pipe(pipe_serial);
8569 %}
8570
8571 // Load Byte (8 bit unsigned)
8572 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8573 %{
8574 match(Set dst (LoadUB mem));
8575
8576 ins_cost(VOLATILE_REF_COST);
8577 format %{ "ldarb $dst, $mem\t# byte" %}
8578
8579 ins_encode(aarch64_enc_ldarb(dst, mem));
8580
8581 ins_pipe(pipe_serial);
8582 %}
8583
8584 // Load Byte (8 bit unsigned) into long
8585 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8586 %{
8587 match(Set dst (ConvI2L (LoadUB mem)));
8588
8589 ins_cost(VOLATILE_REF_COST);
8590 format %{ "ldarb $dst, $mem\t# byte" %}
8591
8592 ins_encode(aarch64_enc_ldarb(dst, mem));
8593
8594 ins_pipe(pipe_serial);
8595 %}
8596
8597 // Load Short (16 bit signed)
8598 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8599 %{
8600 match(Set dst (LoadS mem));
8601
8602 ins_cost(VOLATILE_REF_COST);
8603 format %{ "ldarshw $dst, $mem\t# short" %}
8604
8605 ins_encode(aarch64_enc_ldarshw(dst, mem));
8606
8607 ins_pipe(pipe_serial);
8608 %}
8609
8610 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8611 %{
8612 match(Set dst (LoadUS mem));
8613
8614 ins_cost(VOLATILE_REF_COST);
8615 format %{ "ldarhw $dst, $mem\t# short" %}
8616
8617 ins_encode(aarch64_enc_ldarhw(dst, mem));
8618
8619 ins_pipe(pipe_serial);
8620 %}
8621
8622 // Load Short/Char (16 bit unsigned) into long
8623 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8624 %{
8625 match(Set dst (ConvI2L (LoadUS mem)));
8626
8627 ins_cost(VOLATILE_REF_COST);
8628 format %{ "ldarh $dst, $mem\t# short" %}
8629
8630 ins_encode(aarch64_enc_ldarh(dst, mem));
8631
8632 ins_pipe(pipe_serial);
8633 %}
8634
8635 // Load Short/Char (16 bit signed) into long
8636 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8637 %{
8638 match(Set dst (ConvI2L (LoadS mem)));
8639
8640 ins_cost(VOLATILE_REF_COST);
8641 format %{ "ldarh $dst, $mem\t# short" %}
8642
8643 ins_encode(aarch64_enc_ldarsh(dst, mem));
8644
8645 ins_pipe(pipe_serial);
8646 %}
8647
8648 // Load Integer (32 bit signed)
8649 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
8650 %{
8651 match(Set dst (LoadI mem));
8652
8653 ins_cost(VOLATILE_REF_COST);
8654 format %{ "ldarw $dst, $mem\t# int" %}
8655
8656 ins_encode(aarch64_enc_ldarw(dst, mem));
8657
8658 ins_pipe(pipe_serial);
8659 %}
8660
8661 // Load Integer (32 bit unsigned) into long
8662 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
8663 %{
8664 match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
8665
8666 ins_cost(VOLATILE_REF_COST);
8667 format %{ "ldarw $dst, $mem\t# int" %}
8668
8669 ins_encode(aarch64_enc_ldarw(dst, mem));
8670
8671 ins_pipe(pipe_serial);
8672 %}
8673
8674 // Load Long (64 bit signed)
8675 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
8676 %{
8677 match(Set dst (LoadL mem));
8678
8679 ins_cost(VOLATILE_REF_COST);
8680 format %{ "ldar $dst, $mem\t# int" %}
8681
8682 ins_encode(aarch64_enc_ldar(dst, mem));
8683
8684 ins_pipe(pipe_serial);
8685 %}
8686
8687 // Load Pointer
8688 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
8689 %{
8690 match(Set dst (LoadP mem));
8691
8692 ins_cost(VOLATILE_REF_COST);
8693 format %{ "ldar $dst, $mem\t# ptr" %}
8694
8695 ins_encode(aarch64_enc_ldar(dst, mem));
8696
8697 ins_pipe(pipe_serial);
8698 %}
8699
8700 // Load Compressed Pointer
8701 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
8702 %{
8703 match(Set dst (LoadN mem));
8704
8705 ins_cost(VOLATILE_REF_COST);
8706 format %{ "ldarw $dst, $mem\t# compressed ptr" %}
8707
8708 ins_encode(aarch64_enc_ldarw(dst, mem));
8709
8710 ins_pipe(pipe_serial);
8711 %}
8712
8713 // Load Float
8714 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
8715 %{
8716 match(Set dst (LoadF mem));
8717
8718 ins_cost(VOLATILE_REF_COST);
8719 format %{ "ldars $dst, $mem\t# float" %}
8720
8721 ins_encode( aarch64_enc_fldars(dst, mem) );
8722
8723 ins_pipe(pipe_serial);
8724 %}
8725
8726 // Load Double
8727 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
8728 %{
8729 match(Set dst (LoadD mem));
8730
8731 ins_cost(VOLATILE_REF_COST);
8732 format %{ "ldard $dst, $mem\t# double" %}
8733
8734 ins_encode( aarch64_enc_fldard(dst, mem) );
8735
8736 ins_pipe(pipe_serial);
8737 %}
8738
8739 // Store Byte
8740 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8741 %{
8742 match(Set mem (StoreB mem src));
8743
8744 ins_cost(VOLATILE_REF_COST);
8745 format %{ "stlrb $src, $mem\t# byte" %}
8746
8747 ins_encode(aarch64_enc_stlrb(src, mem));
8748
8749 ins_pipe(pipe_class_memory);
8750 %}
8751
8752 // Store Char/Short
8753 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8754 %{
8755 match(Set mem (StoreC mem src));
8756
8757 ins_cost(VOLATILE_REF_COST);
8758 format %{ "stlrh $src, $mem\t# short" %}
8759
8760 ins_encode(aarch64_enc_stlrh(src, mem));
8761
8762 ins_pipe(pipe_class_memory);
8763 %}
8764
8765 // Store Integer
8766
8767 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
8768 %{
8769 match(Set mem(StoreI mem src));
8770
8771 ins_cost(VOLATILE_REF_COST);
8772 format %{ "stlrw $src, $mem\t# int" %}
8773
8774 ins_encode(aarch64_enc_stlrw(src, mem));
8775
8776 ins_pipe(pipe_class_memory);
8777 %}
8778
8779 // Store Long (64 bit signed)
8780 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
8781 %{
8782 match(Set mem (StoreL mem src));
8783
8784 ins_cost(VOLATILE_REF_COST);
8785 format %{ "stlr $src, $mem\t# int" %}
8786
8787 ins_encode(aarch64_enc_stlr(src, mem));
8788
8789 ins_pipe(pipe_class_memory);
8790 %}
8791
8792 // Store Pointer
8793 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
8794 %{
8795 match(Set mem (StoreP mem src));
8796
8797 ins_cost(VOLATILE_REF_COST);
8798 format %{ "stlr $src, $mem\t# ptr" %}
8799
8800 ins_encode(aarch64_enc_stlr(src, mem));
8801
8802 ins_pipe(pipe_class_memory);
8803 %}
8804
8805 // Store Compressed Pointer
8806 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
8807 %{
8808 match(Set mem (StoreN mem src));
8809
8810 ins_cost(VOLATILE_REF_COST);
8811 format %{ "stlrw $src, $mem\t# compressed ptr" %}
8812
8813 ins_encode(aarch64_enc_stlrw(src, mem));
8814
8815 ins_pipe(pipe_class_memory);
8816 %}
8817
8818 // Store Float
8819 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
8820 %{
8821 match(Set mem (StoreF mem src));
8822
8823 ins_cost(VOLATILE_REF_COST);
8824 format %{ "stlrs $src, $mem\t# float" %}
8825
8826 ins_encode( aarch64_enc_fstlrs(src, mem) );
8827
8828 ins_pipe(pipe_class_memory);
8829 %}
8830
8831 // TODO
8832 // implement storeImmF0 and storeFImmPacked
8833
8834 // Store Double
8835 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
8836 %{
8837 match(Set mem (StoreD mem src));
8838
8839 ins_cost(VOLATILE_REF_COST);
8840 format %{ "stlrd $src, $mem\t# double" %}
8841
8842 ins_encode( aarch64_enc_fstlrd(src, mem) );
8843
8844 ins_pipe(pipe_class_memory);
8845 %}
8846
8847 // ---------------- end of volatile loads and stores ----------------
8848
8849 // ============================================================================
8850 // BSWAP Instructions
8851
8852 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
8853 match(Set dst (ReverseBytesI src));
8854
8855 ins_cost(INSN_COST);
8856 format %{ "revw $dst, $src" %}
8857
8858 ins_encode %{
8859 __ revw(as_Register($dst$$reg), as_Register($src$$reg));
8860 %}
8861
8862 ins_pipe(ialu_reg);
8863 %}
8864
8865 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
8866 match(Set dst (ReverseBytesL src));
8867
8868 ins_cost(INSN_COST);
8869 format %{ "rev $dst, $src" %}
8870
8871 ins_encode %{
8872 __ rev(as_Register($dst$$reg), as_Register($src$$reg));
8873 %}
8874
8875 ins_pipe(ialu_reg);
8876 %}
8877
8878 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
8879 match(Set dst (ReverseBytesUS src));
8880
8881 ins_cost(INSN_COST);
8882 format %{ "rev16w $dst, $src" %}
8883
8884 ins_encode %{
8885 __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8886 %}
8887
8888 ins_pipe(ialu_reg);
8889 %}
8890
8891 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
8892 match(Set dst (ReverseBytesS src));
8893
8894 ins_cost(INSN_COST);
8895 format %{ "rev16w $dst, $src\n\t"
8896 "sbfmw $dst, $dst, #0, #15" %}
8897
8898 ins_encode %{
8899 __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
8900 __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
8901 %}
8902
8903 ins_pipe(ialu_reg);
8904 %}
8905
8906 // ============================================================================
8907 // Zero Count Instructions
8908
8909 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8910 match(Set dst (CountLeadingZerosI src));
8911
8912 ins_cost(INSN_COST);
8913 format %{ "clzw $dst, $src" %}
8914 ins_encode %{
8915 __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
8916 %}
8917
8918 ins_pipe(ialu_reg);
8919 %}
8920
8921 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
8922 match(Set dst (CountLeadingZerosL src));
8923
8924 ins_cost(INSN_COST);
8925 format %{ "clz $dst, $src" %}
8926 ins_encode %{
8927 __ clz(as_Register($dst$$reg), as_Register($src$$reg));
8928 %}
8929
8930 ins_pipe(ialu_reg);
8931 %}
8932
8933 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
8934 match(Set dst (CountTrailingZerosI src));
8935
8936 ins_cost(INSN_COST * 2);
8937 format %{ "rbitw $dst, $src\n\t"
8938 "clzw $dst, $dst" %}
8939 ins_encode %{
8940 __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
8941 __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
8942 %}
8943
8944 ins_pipe(ialu_reg);
8945 %}
8946
8947 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
8948 match(Set dst (CountTrailingZerosL src));
8949
8950 ins_cost(INSN_COST * 2);
8951 format %{ "rbit $dst, $src\n\t"
8952 "clz $dst, $dst" %}
8953 ins_encode %{
8954 __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
8955 __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
8956 %}
8957
8958 ins_pipe(ialu_reg);
8959 %}
8960
8961 //---------- Population Count Instructions -------------------------------------
8962 //
8963
8964 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
8965 predicate(UsePopCountInstruction);
8966 match(Set dst (PopCountI src));
8967 effect(TEMP tmp);
8968 ins_cost(INSN_COST * 13);
8969
8970 format %{ "movw $src, $src\n\t"
8971 "mov $tmp, $src\t# vector (1D)\n\t"
8972 "cnt $tmp, $tmp\t# vector (8B)\n\t"
8973 "addv $tmp, $tmp\t# vector (8B)\n\t"
8974 "mov $dst, $tmp\t# vector (1D)" %}
8975 ins_encode %{
8976 __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
8977 __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
8978 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8979 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8980 __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
8981 %}
8982
8983 ins_pipe(pipe_class_default);
8984 %}
8985
8986 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
8987 predicate(UsePopCountInstruction);
8988 match(Set dst (PopCountI (LoadI mem)));
8989 effect(TEMP tmp);
8990 ins_cost(INSN_COST * 13);
8991
8992 format %{ "ldrs $tmp, $mem\n\t"
8993 "cnt $tmp, $tmp\t# vector (8B)\n\t"
8994 "addv $tmp, $tmp\t# vector (8B)\n\t"
8995 "mov $dst, $tmp\t# vector (1D)" %}
8996 ins_encode %{
8997 FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8998 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
8999 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9000 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9001 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9002 __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9003 %}
9004
9005 ins_pipe(pipe_class_default);
9006 %}
9007
9008 // Note: Long.bitCount(long) returns an int.
9009 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
9010 predicate(UsePopCountInstruction);
9011 match(Set dst (PopCountL src));
9012 effect(TEMP tmp);
9013 ins_cost(INSN_COST * 13);
9014
9015 format %{ "mov $tmp, $src\t# vector (1D)\n\t"
9016 "cnt $tmp, $tmp\t# vector (8B)\n\t"
9017 "addv $tmp, $tmp\t# vector (8B)\n\t"
9018 "mov $dst, $tmp\t# vector (1D)" %}
9019 ins_encode %{
9020 __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
9021 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9022 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9023 __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9024 %}
9025
9026 ins_pipe(pipe_class_default);
9027 %}
9028
9029 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
9030 predicate(UsePopCountInstruction);
9031 match(Set dst (PopCountL (LoadL mem)));
9032 effect(TEMP tmp);
9033 ins_cost(INSN_COST * 13);
9034
9035 format %{ "ldrd $tmp, $mem\n\t"
9036 "cnt $tmp, $tmp\t# vector (8B)\n\t"
9037 "addv $tmp, $tmp\t# vector (8B)\n\t"
9038 "mov $dst, $tmp\t# vector (1D)" %}
9039 ins_encode %{
9040 FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
9041 loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
9042 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
9043 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9044 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
9045 __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
9046 %}
9047
9048 ins_pipe(pipe_class_default);
9049 %}
9050
9051 // ============================================================================
9052 // MemBar Instruction
9053
9054 instruct load_fence() %{
9055 match(LoadFence);
9056 ins_cost(VOLATILE_REF_COST);
9057
9058 format %{ "load_fence" %}
9059
9060 ins_encode %{
9061 __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9062 %}
9063 ins_pipe(pipe_serial);
9064 %}
9065
9066 instruct unnecessary_membar_acquire() %{
9067 predicate(unnecessary_acquire(n));
9068 match(MemBarAcquire);
9069 ins_cost(0);
9070
9071 format %{ "membar_acquire (elided)" %}
9072
9073 ins_encode %{
9074 __ block_comment("membar_acquire (elided)");
9075 %}
9076
9077 ins_pipe(pipe_class_empty);
9078 %}
9079
9080 instruct membar_acquire() %{
9081 match(MemBarAcquire);
9082 ins_cost(VOLATILE_REF_COST);
9083
9084 format %{ "membar_acquire" %}
9085
9086 ins_encode %{
9087 __ block_comment("membar_acquire");
9088 __ membar(Assembler::LoadLoad|Assembler::LoadStore);
9089 %}
9090
9091 ins_pipe(pipe_serial);
9092 %}
9093
9094
9095 instruct membar_acquire_lock() %{
9096 match(MemBarAcquireLock);
9097 ins_cost(VOLATILE_REF_COST);
9098
9099 format %{ "membar_acquire_lock (elided)" %}
9100
9101 ins_encode %{
9102 __ block_comment("membar_acquire_lock (elided)");
9103 %}
9104
9105 ins_pipe(pipe_serial);
9106 %}
9107
9108 instruct store_fence() %{
9109 match(StoreFence);
9110 ins_cost(VOLATILE_REF_COST);
9111
9112 format %{ "store_fence" %}
9113
9114 ins_encode %{
9115 __ membar(Assembler::LoadStore|Assembler::StoreStore);
9116 %}
9117 ins_pipe(pipe_serial);
9118 %}
9119
9120 instruct unnecessary_membar_release() %{
9121 predicate(unnecessary_release(n));
9122 match(MemBarRelease);
9123 ins_cost(0);
9124
9125 format %{ "membar_release (elided)" %}
9126
9127 ins_encode %{
9128 __ block_comment("membar_release (elided)");
9129 %}
9130 ins_pipe(pipe_serial);
9131 %}
9132
9133 instruct membar_release() %{
9134 match(MemBarRelease);
9135 ins_cost(VOLATILE_REF_COST);
9136
9137 format %{ "membar_release" %}
9138
9139 ins_encode %{
9140 __ block_comment("membar_release");
9141 __ membar(Assembler::LoadStore|Assembler::StoreStore);
9142 %}
9143 ins_pipe(pipe_serial);
9144 %}
9145
9146 instruct membar_storestore() %{
9147 match(MemBarStoreStore);
9148 ins_cost(VOLATILE_REF_COST);
9149
9150 format %{ "MEMBAR-store-store" %}
9151
9152 ins_encode %{
9153 __ membar(Assembler::StoreStore);
9154 %}
9155 ins_pipe(pipe_serial);
9156 %}
9157
9158 instruct membar_release_lock() %{
9159 match(MemBarReleaseLock);
9160 ins_cost(VOLATILE_REF_COST);
9161
9162 format %{ "membar_release_lock (elided)" %}
9163
9164 ins_encode %{
9165 __ block_comment("membar_release_lock (elided)");
9166 %}
9167
9168 ins_pipe(pipe_serial);
9169 %}
9170
9171 instruct unnecessary_membar_volatile() %{
9172 predicate(unnecessary_volatile(n));
9173 match(MemBarVolatile);
9174 ins_cost(0);
9175
9176 format %{ "membar_volatile (elided)" %}
9177
9178 ins_encode %{
9179 __ block_comment("membar_volatile (elided)");
9180 %}
9181
9182 ins_pipe(pipe_serial);
9183 %}
9184
9185 instruct membar_volatile() %{
9186 match(MemBarVolatile);
9187 ins_cost(VOLATILE_REF_COST*100);
9188
9189 format %{ "membar_volatile" %}
9190
9191 ins_encode %{
9192 __ block_comment("membar_volatile");
9193 __ membar(Assembler::StoreLoad);
9194 %}
9195
9196 ins_pipe(pipe_serial);
9197 %}
9198
9199 // ============================================================================
9200 // Cast/Convert Instructions
9201
9202 instruct castX2P(iRegPNoSp dst, iRegL src) %{
9203 match(Set dst (CastX2P src));
9204
9205 ins_cost(INSN_COST);
9206 format %{ "mov $dst, $src\t# long -> ptr" %}
9207
9208 ins_encode %{
9209 if ($dst$$reg != $src$$reg) {
9210 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9211 }
9212 %}
9213
9214 ins_pipe(ialu_reg);
9215 %}
9216
9217 instruct castP2X(iRegLNoSp dst, iRegP src) %{
9218 match(Set dst (CastP2X src));
9219
9220 ins_cost(INSN_COST);
9221 format %{ "mov $dst, $src\t# ptr -> long" %}
9222
9223 ins_encode %{
9224 if ($dst$$reg != $src$$reg) {
9225 __ mov(as_Register($dst$$reg), as_Register($src$$reg));
9226 }
9227 %}
9228
9229 ins_pipe(ialu_reg);
9230 %}
9231
9232 // Convert oop into int for vectors alignment masking
9233 instruct convP2I(iRegINoSp dst, iRegP src) %{
9234 match(Set dst (ConvL2I (CastP2X src)));
9235
9236 ins_cost(INSN_COST);
9237 format %{ "movw $dst, $src\t# ptr -> int" %}
9238 ins_encode %{
9239 __ movw($dst$$Register, $src$$Register);
9240 %}
9241
9242 ins_pipe(ialu_reg);
9243 %}
9244
9245 // Convert compressed oop into int for vectors alignment masking
9246 // in case of 32bit oops (heap < 4Gb).
9247 instruct convN2I(iRegINoSp dst, iRegN src)
9248 %{
9249 predicate(Universe::narrow_oop_shift() == 0);
9250 match(Set dst (ConvL2I (CastP2X (DecodeN src))));
9251
9252 ins_cost(INSN_COST);
9253 format %{ "mov dst, $src\t# compressed ptr -> int" %}
9254 ins_encode %{
9255 __ movw($dst$$Register, $src$$Register);
9256 %}
9257
9258 ins_pipe(ialu_reg);
9259 %}
9260
9261
9262 // Convert oop pointer into compressed form
9263 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9264 predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
9265 match(Set dst (EncodeP src));
9266 effect(KILL cr);
9267 ins_cost(INSN_COST * 3);
9268 format %{ "encode_heap_oop $dst, $src" %}
9269 ins_encode %{
9270 Register s = $src$$Register;
9271 Register d = $dst$$Register;
9272 __ encode_heap_oop(d, s);
9273 %}
9274 ins_pipe(ialu_reg);
9275 %}
9276
9277 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
9278 predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
9279 match(Set dst (EncodeP src));
9280 ins_cost(INSN_COST * 3);
9281 format %{ "encode_heap_oop_not_null $dst, $src" %}
9282 ins_encode %{
9283 __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
9284 %}
9285 ins_pipe(ialu_reg);
9286 %}
9287
9288 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9289 predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
9290 n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
9291 match(Set dst (DecodeN src));
9292 ins_cost(INSN_COST * 3);
9293 format %{ "decode_heap_oop $dst, $src" %}
9294 ins_encode %{
9295 Register s = $src$$Register;
9296 Register d = $dst$$Register;
9297 __ decode_heap_oop(d, s);
9298 %}
9299 ins_pipe(ialu_reg);
9300 %}
9301
9302 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
9303 predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
9304 n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
9305 match(Set dst (DecodeN src));
9306 ins_cost(INSN_COST * 3);
9307 format %{ "decode_heap_oop_not_null $dst, $src" %}
9308 ins_encode %{
9309 Register s = $src$$Register;
9310 Register d = $dst$$Register;
9311 __ decode_heap_oop_not_null(d, s);
9312 %}
9313 ins_pipe(ialu_reg);
9314 %}
9315
9316 // n.b. AArch64 implementations of encode_klass_not_null and
9317 // decode_klass_not_null do not modify the flags register so, unlike
9318 // Intel, we don't kill CR as a side effect here
9319
9320 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
9321 match(Set dst (EncodePKlass src));
9322
9323 ins_cost(INSN_COST * 3);
9324 format %{ "encode_klass_not_null $dst,$src" %}
9325
9326 ins_encode %{
9327 Register src_reg = as_Register($src$$reg);
9328 Register dst_reg = as_Register($dst$$reg);
9329 __ encode_klass_not_null(dst_reg, src_reg);
9330 %}
9331
9332 ins_pipe(ialu_reg);
9333 %}
9334
9335 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
9336 match(Set dst (DecodeNKlass src));
9337
9338 ins_cost(INSN_COST * 3);
9339 format %{ "decode_klass_not_null $dst,$src" %}
9340
9341 ins_encode %{
9342 Register src_reg = as_Register($src$$reg);
9343 Register dst_reg = as_Register($dst$$reg);
9344 if (dst_reg != src_reg) {
9345 __ decode_klass_not_null(dst_reg, src_reg);
9346 } else {
9347 __ decode_klass_not_null(dst_reg);
9348 }
9349 %}
9350
9351 ins_pipe(ialu_reg);
9352 %}
9353
9354 instruct checkCastPP(iRegPNoSp dst)
9355 %{
9356 match(Set dst (CheckCastPP dst));
9357
9358 size(0);
9359 format %{ "# checkcastPP of $dst" %}
9360 ins_encode(/* empty encoding */);
9361 ins_pipe(pipe_class_empty);
9362 %}
9363
9364 instruct castPP(iRegPNoSp dst)
9365 %{
9366 match(Set dst (CastPP dst));
9367
9368 size(0);
9369 format %{ "# castPP of $dst" %}
9370 ins_encode(/* empty encoding */);
9371 ins_pipe(pipe_class_empty);
9372 %}
9373
9374 instruct castII(iRegI dst)
9375 %{
9376 match(Set dst (CastII dst));
9377
9378 size(0);
9379 format %{ "# castII of $dst" %}
9380 ins_encode(/* empty encoding */);
9381 ins_cost(0);
9382 ins_pipe(pipe_class_empty);
9383 %}
9384
9385 // ============================================================================
9386 // Atomic operation instructions
9387 //
9388 // Intel and SPARC both implement Ideal Node LoadPLocked and
9389 // Store{PIL}Conditional instructions using a normal load for the
9390 // LoadPLocked and a CAS for the Store{PIL}Conditional.
9391 //
9392 // The ideal code appears only to use LoadPLocked/StorePLocked as a
9393 // pair to lock object allocations from Eden space when not using
9394 // TLABs.
9395 //
9396 // There does not appear to be a Load{IL}Locked Ideal Node and the
9397 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
9398 // and to use StoreIConditional only for 32-bit and StoreLConditional
9399 // only for 64-bit.
9400 //
9401 // We implement LoadPLocked and StorePLocked instructions using,
9402 // respectively the AArch64 hw load-exclusive and store-conditional
9403 // instructions. Whereas we must implement each of
9404 // Store{IL}Conditional using a CAS which employs a pair of
9405 // instructions comprising a load-exclusive followed by a
9406 // store-conditional.
9407
9408
9409 // Locked-load (linked load) of the current heap-top
9410 // used when updating the eden heap top
9411 // implemented using ldaxr on AArch64
9412
9413 instruct loadPLocked(iRegPNoSp dst, indirect mem)
9414 %{
9415 match(Set dst (LoadPLocked mem));
9416
9417 ins_cost(VOLATILE_REF_COST);
9418
9419 format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
9420
9421 ins_encode(aarch64_enc_ldaxr(dst, mem));
9422
9423 ins_pipe(pipe_serial);
9424 %}
9425
9426 // Conditional-store of the updated heap-top.
9427 // Used during allocation of the shared heap.
9428 // Sets flag (EQ) on success.
9429 // implemented using stlxr on AArch64.
9430
9431 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
9432 %{
9433 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
9434
9435 ins_cost(VOLATILE_REF_COST);
9436
9437 // TODO
9438 // do we need to do a store-conditional release or can we just use a
9439 // plain store-conditional?
9440
9441 format %{
9442 "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
9443 "cmpw rscratch1, zr\t# EQ on successful write"
9444 %}
9445
9446 ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
9447
9448 ins_pipe(pipe_serial);
9449 %}
9450
9451
9452 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
9453 // when attempting to rebias a lock towards the current thread. We
9454 // must use the acquire form of cmpxchg in order to guarantee acquire
9455 // semantics in this case.
9456 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
9457 %{
9458 match(Set cr (StoreLConditional mem (Binary oldval newval)));
9459
9460 ins_cost(VOLATILE_REF_COST);
9461
9462 format %{
9463 "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9464 "cmpw rscratch1, zr\t# EQ on successful write"
9465 %}
9466
9467 ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
9468
9469 ins_pipe(pipe_slow);
9470 %}
9471
9472 // storeIConditional also has acquire semantics, for no better reason
9473 // than matching storeLConditional. At the time of writing this
9474 // comment storeIConditional was not used anywhere by AArch64.
9475 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
9476 %{
9477 match(Set cr (StoreIConditional mem (Binary oldval newval)));
9478
9479 ins_cost(VOLATILE_REF_COST);
9480
9481 format %{
9482 "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
9483 "cmpw rscratch1, zr\t# EQ on successful write"
9484 %}
9485
9486 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
9487
9488 ins_pipe(pipe_slow);
9489 %}
9490
9491 // standard CompareAndSwapX when we are using barriers
9492 // these have higher priority than the rules selected by a predicate
9493
9494 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
9495 // can't match them
9496
9497 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9498
9499 match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9500 ins_cost(2 * VOLATILE_REF_COST);
9501
9502 effect(KILL cr);
9503
9504 format %{
9505 "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9506 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9507 %}
9508
9509 ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9510 aarch64_enc_cset_eq(res));
9511
9512 ins_pipe(pipe_slow);
9513 %}
9514
9515 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9516
9517 match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9518 ins_cost(2 * VOLATILE_REF_COST);
9519
9520 effect(KILL cr);
9521
9522 format %{
9523 "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9524 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9525 %}
9526
9527 ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9528 aarch64_enc_cset_eq(res));
9529
9530 ins_pipe(pipe_slow);
9531 %}
9532
9533 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9534
9535 match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9536 ins_cost(2 * VOLATILE_REF_COST);
9537
9538 effect(KILL cr);
9539
9540 format %{
9541 "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9542 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9543 %}
9544
9545 ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
9546 aarch64_enc_cset_eq(res));
9547
9548 ins_pipe(pipe_slow);
9549 %}
9550
9551 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9552
9553 match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9554 ins_cost(2 * VOLATILE_REF_COST);
9555
9556 effect(KILL cr);
9557
9558 format %{
9559 "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9560 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9561 %}
9562
9563 ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
9564 aarch64_enc_cset_eq(res));
9565
9566 ins_pipe(pipe_slow);
9567 %}
9568
9569 // alternative CompareAndSwapX when we are eliding barriers
9570
9571 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
9572
9573 predicate(needs_acquiring_load_exclusive(n));
9574 match(Set res (CompareAndSwapI mem (Binary oldval newval)));
9575 ins_cost(VOLATILE_REF_COST);
9576
9577 effect(KILL cr);
9578
9579 format %{
9580 "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
9581 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9582 %}
9583
9584 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9585 aarch64_enc_cset_eq(res));
9586
9587 ins_pipe(pipe_slow);
9588 %}
9589
9590 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
9591
9592 predicate(needs_acquiring_load_exclusive(n));
9593 match(Set res (CompareAndSwapL mem (Binary oldval newval)));
9594 ins_cost(VOLATILE_REF_COST);
9595
9596 effect(KILL cr);
9597
9598 format %{
9599 "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
9600 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9601 %}
9602
9603 ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9604 aarch64_enc_cset_eq(res));
9605
9606 ins_pipe(pipe_slow);
9607 %}
9608
9609 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9610
9611 predicate(needs_acquiring_load_exclusive(n));
9612 match(Set res (CompareAndSwapP mem (Binary oldval newval)));
9613 ins_cost(VOLATILE_REF_COST);
9614
9615 effect(KILL cr);
9616
9617 format %{
9618 "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
9619 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9620 %}
9621
9622 ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
9623 aarch64_enc_cset_eq(res));
9624
9625 ins_pipe(pipe_slow);
9626 %}
9627
9628 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
9629
9630 predicate(needs_acquiring_load_exclusive(n));
9631 match(Set res (CompareAndSwapN mem (Binary oldval newval)));
9632 ins_cost(VOLATILE_REF_COST);
9633
9634 effect(KILL cr);
9635
9636 format %{
9637 "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
9638 "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9639 %}
9640
9641 ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
9642 aarch64_enc_cset_eq(res));
9643
9644 ins_pipe(pipe_slow);
9645 %}
9646
9647
9648 // ---------------------------------------------------------------------
9649
9650
9651 // BEGIN This section of the file is automatically generated. Do not edit --------------
9652
9653 // Sundry CAS operations. Note that release is always true,
9654 // regardless of the memory ordering of the CAS. This is because we
9655 // need the volatile case to be sequentially consistent but there is
9656 // no trailing StoreLoad barrier emitted by C2. Unfortunately we
9657 // can't check the type of memory ordering here, so we always emit a
9658 // STLXR.
9659
9660 // This section is generated from aarch64_ad_cas.m4
9661
9662
9663
9664 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9665 match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
9666 ins_cost(2 * VOLATILE_REF_COST);
9667 effect(TEMP_DEF res, KILL cr);
9668 format %{
9669 "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9670 %}
9671 ins_encode %{
9672 __ uxtbw(rscratch2, $oldval$$Register);
9673 __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9674 Assembler::byte, /*acquire*/ false, /*release*/ true,
9675 /*weak*/ false, $res$$Register);
9676 __ sxtbw($res$$Register, $res$$Register);
9677 %}
9678 ins_pipe(pipe_slow);
9679 %}
9680
9681 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9682 match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
9683 ins_cost(2 * VOLATILE_REF_COST);
9684 effect(TEMP_DEF res, KILL cr);
9685 format %{
9686 "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9687 %}
9688 ins_encode %{
9689 __ uxthw(rscratch2, $oldval$$Register);
9690 __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9691 Assembler::halfword, /*acquire*/ false, /*release*/ true,
9692 /*weak*/ false, $res$$Register);
9693 __ sxthw($res$$Register, $res$$Register);
9694 %}
9695 ins_pipe(pipe_slow);
9696 %}
9697
9698 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9699 match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
9700 ins_cost(2 * VOLATILE_REF_COST);
9701 effect(TEMP_DEF res, KILL cr);
9702 format %{
9703 "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9704 %}
9705 ins_encode %{
9706 __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9707 Assembler::word, /*acquire*/ false, /*release*/ true,
9708 /*weak*/ false, $res$$Register);
9709 %}
9710 ins_pipe(pipe_slow);
9711 %}
9712
9713 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9714 match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
9715 ins_cost(2 * VOLATILE_REF_COST);
9716 effect(TEMP_DEF res, KILL cr);
9717 format %{
9718 "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9719 %}
9720 ins_encode %{
9721 __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9722 Assembler::xword, /*acquire*/ false, /*release*/ true,
9723 /*weak*/ false, $res$$Register);
9724 %}
9725 ins_pipe(pipe_slow);
9726 %}
9727
9728 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9729 match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
9730 ins_cost(2 * VOLATILE_REF_COST);
9731 effect(TEMP_DEF res, KILL cr);
9732 format %{
9733 "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9734 %}
9735 ins_encode %{
9736 __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9737 Assembler::word, /*acquire*/ false, /*release*/ true,
9738 /*weak*/ false, $res$$Register);
9739 %}
9740 ins_pipe(pipe_slow);
9741 %}
9742
9743 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9744 match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
9745 ins_cost(2 * VOLATILE_REF_COST);
9746 effect(TEMP_DEF res, KILL cr);
9747 format %{
9748 "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9749 %}
9750 ins_encode %{
9751 __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9752 Assembler::xword, /*acquire*/ false, /*release*/ true,
9753 /*weak*/ false, $res$$Register);
9754 %}
9755 ins_pipe(pipe_slow);
9756 %}
9757
9758 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9759 match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
9760 ins_cost(2 * VOLATILE_REF_COST);
9761 effect(KILL cr);
9762 format %{
9763 "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
9764 "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9765 %}
9766 ins_encode %{
9767 __ uxtbw(rscratch2, $oldval$$Register);
9768 __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9769 Assembler::byte, /*acquire*/ false, /*release*/ true,
9770 /*weak*/ true, noreg);
9771 __ csetw($res$$Register, Assembler::EQ);
9772 %}
9773 ins_pipe(pipe_slow);
9774 %}
9775
9776 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9777 match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
9778 ins_cost(2 * VOLATILE_REF_COST);
9779 effect(KILL cr);
9780 format %{
9781 "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
9782 "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9783 %}
9784 ins_encode %{
9785 __ uxthw(rscratch2, $oldval$$Register);
9786 __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
9787 Assembler::halfword, /*acquire*/ false, /*release*/ true,
9788 /*weak*/ true, noreg);
9789 __ csetw($res$$Register, Assembler::EQ);
9790 %}
9791 ins_pipe(pipe_slow);
9792 %}
9793
9794 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
9795 match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
9796 ins_cost(2 * VOLATILE_REF_COST);
9797 effect(KILL cr);
9798 format %{
9799 "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
9800 "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9801 %}
9802 ins_encode %{
9803 __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9804 Assembler::word, /*acquire*/ false, /*release*/ true,
9805 /*weak*/ true, noreg);
9806 __ csetw($res$$Register, Assembler::EQ);
9807 %}
9808 ins_pipe(pipe_slow);
9809 %}
9810
9811 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
9812 match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
9813 ins_cost(2 * VOLATILE_REF_COST);
9814 effect(KILL cr);
9815 format %{
9816 "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
9817 "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9818 %}
9819 ins_encode %{
9820 __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9821 Assembler::xword, /*acquire*/ false, /*release*/ true,
9822 /*weak*/ true, noreg);
9823 __ csetw($res$$Register, Assembler::EQ);
9824 %}
9825 ins_pipe(pipe_slow);
9826 %}
9827
9828 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
9829 match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
9830 ins_cost(2 * VOLATILE_REF_COST);
9831 effect(KILL cr);
9832 format %{
9833 "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
9834 "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9835 %}
9836 ins_encode %{
9837 __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9838 Assembler::word, /*acquire*/ false, /*release*/ true,
9839 /*weak*/ true, noreg);
9840 __ csetw($res$$Register, Assembler::EQ);
9841 %}
9842 ins_pipe(pipe_slow);
9843 %}
9844
9845 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
9846 match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
9847 ins_cost(2 * VOLATILE_REF_COST);
9848 effect(KILL cr);
9849 format %{
9850 "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
9851 "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
9852 %}
9853 ins_encode %{
9854 __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
9855 Assembler::xword, /*acquire*/ false, /*release*/ true,
9856 /*weak*/ true, noreg);
9857 __ csetw($res$$Register, Assembler::EQ);
9858 %}
9859 ins_pipe(pipe_slow);
9860 %}
9861
9862 // END This section of the file is automatically generated. Do not edit --------------
9863 // ---------------------------------------------------------------------
9864
9865 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
9866 match(Set prev (GetAndSetI mem newv));
9867 format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
9868 ins_encode %{
9869 __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9870 %}
9871 ins_pipe(pipe_serial);
9872 %}
9873
9874 instruct get_and_setL(indirect mem, iRegLNoSp newv, iRegL prev) %{
9875 match(Set prev (GetAndSetL mem newv));
9876 format %{ "atomic_xchg $prev, $newv, [$mem]" %}
9877 ins_encode %{
9878 __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9879 %}
9880 ins_pipe(pipe_serial);
9881 %}
9882
9883 instruct get_and_setN(indirect mem, iRegNNoSp newv, iRegI prev) %{
9884 match(Set prev (GetAndSetN mem newv));
9885 format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
9886 ins_encode %{
9887 __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
9888 %}
9889 ins_pipe(pipe_serial);
9890 %}
9891
9892 instruct get_and_setP(indirect mem, iRegPNoSp newv, iRegP prev) %{
9893 match(Set prev (GetAndSetP mem newv));
9894 format %{ "atomic_xchg $prev, $newv, [$mem]" %}
9895 ins_encode %{
9896 __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
9897 %}
9898 ins_pipe(pipe_serial);
9899 %}
9900
9901
9902 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
9903 match(Set newval (GetAndAddL mem incr));
9904 ins_cost(INSN_COST * 10);
9905 format %{ "get_and_addL $newval, [$mem], $incr" %}
9906 ins_encode %{
9907 __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
9908 %}
9909 ins_pipe(pipe_serial);
9910 %}
9911
9912 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
9913 predicate(n->as_LoadStore()->result_not_used());
9914 match(Set dummy (GetAndAddL mem incr));
9915 ins_cost(INSN_COST * 9);
9916 format %{ "get_and_addL [$mem], $incr" %}
9917 ins_encode %{
9918 __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
9919 %}
9920 ins_pipe(pipe_serial);
9921 %}
9922
9923 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
9924 match(Set newval (GetAndAddL mem incr));
9925 ins_cost(INSN_COST * 10);
9926 format %{ "get_and_addL $newval, [$mem], $incr" %}
9927 ins_encode %{
9928 __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
9929 %}
9930 ins_pipe(pipe_serial);
9931 %}
9932
9933 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
9934 predicate(n->as_LoadStore()->result_not_used());
9935 match(Set dummy (GetAndAddL mem incr));
9936 ins_cost(INSN_COST * 9);
9937 format %{ "get_and_addL [$mem], $incr" %}
9938 ins_encode %{
9939 __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
9940 %}
9941 ins_pipe(pipe_serial);
9942 %}
9943
9944 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
9945 match(Set newval (GetAndAddI mem incr));
9946 ins_cost(INSN_COST * 10);
9947 format %{ "get_and_addI $newval, [$mem], $incr" %}
9948 ins_encode %{
9949 __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
9950 %}
9951 ins_pipe(pipe_serial);
9952 %}
9953
9954 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
9955 predicate(n->as_LoadStore()->result_not_used());
9956 match(Set dummy (GetAndAddI mem incr));
9957 ins_cost(INSN_COST * 9);
9958 format %{ "get_and_addI [$mem], $incr" %}
9959 ins_encode %{
9960 __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
9961 %}
9962 ins_pipe(pipe_serial);
9963 %}
9964
9965 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
9966 match(Set newval (GetAndAddI mem incr));
9967 ins_cost(INSN_COST * 10);
9968 format %{ "get_and_addI $newval, [$mem], $incr" %}
9969 ins_encode %{
9970 __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
9971 %}
9972 ins_pipe(pipe_serial);
9973 %}
9974
9975 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
9976 predicate(n->as_LoadStore()->result_not_used());
9977 match(Set dummy (GetAndAddI mem incr));
9978 ins_cost(INSN_COST * 9);
9979 format %{ "get_and_addI [$mem], $incr" %}
9980 ins_encode %{
9981 __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
9982 %}
9983 ins_pipe(pipe_serial);
9984 %}
9985
9986 // Manifest a CmpL result in an integer register.
9987 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
9988 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
9989 %{
9990 match(Set dst (CmpL3 src1 src2));
9991 effect(KILL flags);
9992
9993 ins_cost(INSN_COST * 6);
9994 format %{
9995 "cmp $src1, $src2"
9996 "csetw $dst, ne"
9997 "cnegw $dst, lt"
9998 %}
9999 // format %{ "CmpL3 $dst, $src1, $src2" %}
10000 ins_encode %{
10001 __ cmp($src1$$Register, $src2$$Register);
10002 __ csetw($dst$$Register, Assembler::NE);
10003 __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10004 %}
10005
10006 ins_pipe(pipe_class_default);
10007 %}
10008
10009 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
10010 %{
10011 match(Set dst (CmpL3 src1 src2));
10012 effect(KILL flags);
10013
10014 ins_cost(INSN_COST * 6);
10015 format %{
10016 "cmp $src1, $src2"
10017 "csetw $dst, ne"
10018 "cnegw $dst, lt"
10019 %}
10020 ins_encode %{
10021 int32_t con = (int32_t)$src2$$constant;
10022 if (con < 0) {
10023 __ adds(zr, $src1$$Register, -con);
10024 } else {
10025 __ subs(zr, $src1$$Register, con);
10026 }
10027 __ csetw($dst$$Register, Assembler::NE);
10028 __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
10029 %}
10030
10031 ins_pipe(pipe_class_default);
10032 %}
10033
10034 // ============================================================================
10035 // Conditional Move Instructions
10036
10037 // n.b. we have identical rules for both a signed compare op (cmpOp)
10038 // and an unsigned compare op (cmpOpU). it would be nice if we could
10039 // define an op class which merged both inputs and use it to type the
10040 // argument to a single rule. unfortunatelyt his fails because the
10041 // opclass does not live up to the COND_INTER interface of its
10042 // component operands. When the generic code tries to negate the
10043 // operand it ends up running the generci Machoper::negate method
10044 // which throws a ShouldNotHappen. So, we have to provide two flavours
10045 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
10046
10047 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10048 match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10049
10050 ins_cost(INSN_COST * 2);
10051 format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int" %}
10052
10053 ins_encode %{
10054 __ cselw(as_Register($dst$$reg),
10055 as_Register($src2$$reg),
10056 as_Register($src1$$reg),
10057 (Assembler::Condition)$cmp$$cmpcode);
10058 %}
10059
10060 ins_pipe(icond_reg_reg);
10061 %}
10062
10063 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10064 match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
10065
10066 ins_cost(INSN_COST * 2);
10067 format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int" %}
10068
10069 ins_encode %{
10070 __ cselw(as_Register($dst$$reg),
10071 as_Register($src2$$reg),
10072 as_Register($src1$$reg),
10073 (Assembler::Condition)$cmp$$cmpcode);
10074 %}
10075
10076 ins_pipe(icond_reg_reg);
10077 %}
10078
10079 // special cases where one arg is zero
10080
10081 // n.b. this is selected in preference to the rule above because it
10082 // avoids loading constant 0 into a source register
10083
10084 // TODO
10085 // we ought only to be able to cull one of these variants as the ideal
10086 // transforms ought always to order the zero consistently (to left/right?)
10087
10088 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10089 match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10090
10091 ins_cost(INSN_COST * 2);
10092 format %{ "cselw $dst, $src, zr $cmp\t# signed, int" %}
10093
10094 ins_encode %{
10095 __ cselw(as_Register($dst$$reg),
10096 as_Register($src$$reg),
10097 zr,
10098 (Assembler::Condition)$cmp$$cmpcode);
10099 %}
10100
10101 ins_pipe(icond_reg);
10102 %}
10103
10104 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
10105 match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
10106
10107 ins_cost(INSN_COST * 2);
10108 format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int" %}
10109
10110 ins_encode %{
10111 __ cselw(as_Register($dst$$reg),
10112 as_Register($src$$reg),
10113 zr,
10114 (Assembler::Condition)$cmp$$cmpcode);
10115 %}
10116
10117 ins_pipe(icond_reg);
10118 %}
10119
10120 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10121 match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10122
10123 ins_cost(INSN_COST * 2);
10124 format %{ "cselw $dst, zr, $src $cmp\t# signed, int" %}
10125
10126 ins_encode %{
10127 __ cselw(as_Register($dst$$reg),
10128 zr,
10129 as_Register($src$$reg),
10130 (Assembler::Condition)$cmp$$cmpcode);
10131 %}
10132
10133 ins_pipe(icond_reg);
10134 %}
10135
10136 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
10137 match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
10138
10139 ins_cost(INSN_COST * 2);
10140 format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int" %}
10141
10142 ins_encode %{
10143 __ cselw(as_Register($dst$$reg),
10144 zr,
10145 as_Register($src$$reg),
10146 (Assembler::Condition)$cmp$$cmpcode);
10147 %}
10148
10149 ins_pipe(icond_reg);
10150 %}
10151
10152 // special case for creating a boolean 0 or 1
10153
10154 // n.b. this is selected in preference to the rule above because it
10155 // avoids loading constants 0 and 1 into a source register
10156
10157 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10158 match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10159
10160 ins_cost(INSN_COST * 2);
10161 format %{ "csincw $dst, zr, zr $cmp\t# signed, int" %}
10162
10163 ins_encode %{
10164 // equivalently
10165 // cset(as_Register($dst$$reg),
10166 // negate_condition((Assembler::Condition)$cmp$$cmpcode));
10167 __ csincw(as_Register($dst$$reg),
10168 zr,
10169 zr,
10170 (Assembler::Condition)$cmp$$cmpcode);
10171 %}
10172
10173 ins_pipe(icond_none);
10174 %}
10175
10176 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
10177 match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
10178
10179 ins_cost(INSN_COST * 2);
10180 format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int" %}
10181
10182 ins_encode %{
10183 // equivalently
10184 // cset(as_Register($dst$$reg),
10185 // negate_condition((Assembler::Condition)$cmp$$cmpcode));
10186 __ csincw(as_Register($dst$$reg),
10187 zr,
10188 zr,
10189 (Assembler::Condition)$cmp$$cmpcode);
10190 %}
10191
10192 ins_pipe(icond_none);
10193 %}
10194
10195 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10196 match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10197
10198 ins_cost(INSN_COST * 2);
10199 format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long" %}
10200
10201 ins_encode %{
10202 __ csel(as_Register($dst$$reg),
10203 as_Register($src2$$reg),
10204 as_Register($src1$$reg),
10205 (Assembler::Condition)$cmp$$cmpcode);
10206 %}
10207
10208 ins_pipe(icond_reg_reg);
10209 %}
10210
10211 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
10212 match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
10213
10214 ins_cost(INSN_COST * 2);
10215 format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long" %}
10216
10217 ins_encode %{
10218 __ csel(as_Register($dst$$reg),
10219 as_Register($src2$$reg),
10220 as_Register($src1$$reg),
10221 (Assembler::Condition)$cmp$$cmpcode);
10222 %}
10223
10224 ins_pipe(icond_reg_reg);
10225 %}
10226
10227 // special cases where one arg is zero
10228
10229 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10230 match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10231
10232 ins_cost(INSN_COST * 2);
10233 format %{ "csel $dst, zr, $src $cmp\t# signed, long" %}
10234
10235 ins_encode %{
10236 __ csel(as_Register($dst$$reg),
10237 zr,
10238 as_Register($src$$reg),
10239 (Assembler::Condition)$cmp$$cmpcode);
10240 %}
10241
10242 ins_pipe(icond_reg);
10243 %}
10244
10245 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
10246 match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
10247
10248 ins_cost(INSN_COST * 2);
10249 format %{ "csel $dst, zr, $src $cmp\t# unsigned, long" %}
10250
10251 ins_encode %{
10252 __ csel(as_Register($dst$$reg),
10253 zr,
10254 as_Register($src$$reg),
10255 (Assembler::Condition)$cmp$$cmpcode);
10256 %}
10257
10258 ins_pipe(icond_reg);
10259 %}
10260
10261 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10262 match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10263
10264 ins_cost(INSN_COST * 2);
10265 format %{ "csel $dst, $src, zr $cmp\t# signed, long" %}
10266
10267 ins_encode %{
10268 __ csel(as_Register($dst$$reg),
10269 as_Register($src$$reg),
10270 zr,
10271 (Assembler::Condition)$cmp$$cmpcode);
10272 %}
10273
10274 ins_pipe(icond_reg);
10275 %}
10276
10277 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
10278 match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
10279
10280 ins_cost(INSN_COST * 2);
10281 format %{ "csel $dst, $src, zr $cmp\t# unsigned, long" %}
10282
10283 ins_encode %{
10284 __ csel(as_Register($dst$$reg),
10285 as_Register($src$$reg),
10286 zr,
10287 (Assembler::Condition)$cmp$$cmpcode);
10288 %}
10289
10290 ins_pipe(icond_reg);
10291 %}
10292
10293 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10294 match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10295
10296 ins_cost(INSN_COST * 2);
10297 format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr" %}
10298
10299 ins_encode %{
10300 __ csel(as_Register($dst$$reg),
10301 as_Register($src2$$reg),
10302 as_Register($src1$$reg),
10303 (Assembler::Condition)$cmp$$cmpcode);
10304 %}
10305
10306 ins_pipe(icond_reg_reg);
10307 %}
10308
10309 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
10310 match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
10311
10312 ins_cost(INSN_COST * 2);
10313 format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr" %}
10314
10315 ins_encode %{
10316 __ csel(as_Register($dst$$reg),
10317 as_Register($src2$$reg),
10318 as_Register($src1$$reg),
10319 (Assembler::Condition)$cmp$$cmpcode);
10320 %}
10321
10322 ins_pipe(icond_reg_reg);
10323 %}
10324
10325 // special cases where one arg is zero
10326
10327 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10328 match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10329
10330 ins_cost(INSN_COST * 2);
10331 format %{ "csel $dst, zr, $src $cmp\t# signed, ptr" %}
10332
10333 ins_encode %{
10334 __ csel(as_Register($dst$$reg),
10335 zr,
10336 as_Register($src$$reg),
10337 (Assembler::Condition)$cmp$$cmpcode);
10338 %}
10339
10340 ins_pipe(icond_reg);
10341 %}
10342
10343 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
10344 match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
10345
10346 ins_cost(INSN_COST * 2);
10347 format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr" %}
10348
10349 ins_encode %{
10350 __ csel(as_Register($dst$$reg),
10351 zr,
10352 as_Register($src$$reg),
10353 (Assembler::Condition)$cmp$$cmpcode);
10354 %}
10355
10356 ins_pipe(icond_reg);
10357 %}
10358
10359 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10360 match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10361
10362 ins_cost(INSN_COST * 2);
10363 format %{ "csel $dst, $src, zr $cmp\t# signed, ptr" %}
10364
10365 ins_encode %{
10366 __ csel(as_Register($dst$$reg),
10367 as_Register($src$$reg),
10368 zr,
10369 (Assembler::Condition)$cmp$$cmpcode);
10370 %}
10371
10372 ins_pipe(icond_reg);
10373 %}
10374
10375 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
10376 match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
10377
10378 ins_cost(INSN_COST * 2);
10379 format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr" %}
10380
10381 ins_encode %{
10382 __ csel(as_Register($dst$$reg),
10383 as_Register($src$$reg),
10384 zr,
10385 (Assembler::Condition)$cmp$$cmpcode);
10386 %}
10387
10388 ins_pipe(icond_reg);
10389 %}
10390
10391 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10392 match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10393
10394 ins_cost(INSN_COST * 2);
10395 format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr" %}
10396
10397 ins_encode %{
10398 __ cselw(as_Register($dst$$reg),
10399 as_Register($src2$$reg),
10400 as_Register($src1$$reg),
10401 (Assembler::Condition)$cmp$$cmpcode);
10402 %}
10403
10404 ins_pipe(icond_reg_reg);
10405 %}
10406
10407 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
10408 match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
10409
10410 ins_cost(INSN_COST * 2);
10411 format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr" %}
10412
10413 ins_encode %{
10414 __ cselw(as_Register($dst$$reg),
10415 as_Register($src2$$reg),
10416 as_Register($src1$$reg),
10417 (Assembler::Condition)$cmp$$cmpcode);
10418 %}
10419
10420 ins_pipe(icond_reg_reg);
10421 %}
10422
10423 // special cases where one arg is zero
10424
10425 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10426 match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10427
10428 ins_cost(INSN_COST * 2);
10429 format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr" %}
10430
10431 ins_encode %{
10432 __ cselw(as_Register($dst$$reg),
10433 zr,
10434 as_Register($src$$reg),
10435 (Assembler::Condition)$cmp$$cmpcode);
10436 %}
10437
10438 ins_pipe(icond_reg);
10439 %}
10440
10441 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
10442 match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
10443
10444 ins_cost(INSN_COST * 2);
10445 format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr" %}
10446
10447 ins_encode %{
10448 __ cselw(as_Register($dst$$reg),
10449 zr,
10450 as_Register($src$$reg),
10451 (Assembler::Condition)$cmp$$cmpcode);
10452 %}
10453
10454 ins_pipe(icond_reg);
10455 %}
10456
10457 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10458 match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10459
10460 ins_cost(INSN_COST * 2);
10461 format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr" %}
10462
10463 ins_encode %{
10464 __ cselw(as_Register($dst$$reg),
10465 as_Register($src$$reg),
10466 zr,
10467 (Assembler::Condition)$cmp$$cmpcode);
10468 %}
10469
10470 ins_pipe(icond_reg);
10471 %}
10472
10473 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
10474 match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
10475
10476 ins_cost(INSN_COST * 2);
10477 format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr" %}
10478
10479 ins_encode %{
10480 __ cselw(as_Register($dst$$reg),
10481 as_Register($src$$reg),
10482 zr,
10483 (Assembler::Condition)$cmp$$cmpcode);
10484 %}
10485
10486 ins_pipe(icond_reg);
10487 %}
10488
10489 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1, vRegF src2)
10490 %{
10491 match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10492
10493 ins_cost(INSN_COST * 3);
10494
10495 format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10496 ins_encode %{
10497 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10498 __ fcsels(as_FloatRegister($dst$$reg),
10499 as_FloatRegister($src2$$reg),
10500 as_FloatRegister($src1$$reg),
10501 cond);
10502 %}
10503
10504 ins_pipe(fp_cond_reg_reg_s);
10505 %}
10506
10507 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1, vRegF src2)
10508 %{
10509 match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
10510
10511 ins_cost(INSN_COST * 3);
10512
10513 format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10514 ins_encode %{
10515 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10516 __ fcsels(as_FloatRegister($dst$$reg),
10517 as_FloatRegister($src2$$reg),
10518 as_FloatRegister($src1$$reg),
10519 cond);
10520 %}
10521
10522 ins_pipe(fp_cond_reg_reg_s);
10523 %}
10524
10525 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1, vRegD src2)
10526 %{
10527 match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10528
10529 ins_cost(INSN_COST * 3);
10530
10531 format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
10532 ins_encode %{
10533 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10534 __ fcseld(as_FloatRegister($dst$$reg),
10535 as_FloatRegister($src2$$reg),
10536 as_FloatRegister($src1$$reg),
10537 cond);
10538 %}
10539
10540 ins_pipe(fp_cond_reg_reg_d);
10541 %}
10542
10543 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1, vRegD src2)
10544 %{
10545 match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
10546
10547 ins_cost(INSN_COST * 3);
10548
10549 format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
10550 ins_encode %{
10551 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
10552 __ fcseld(as_FloatRegister($dst$$reg),
10553 as_FloatRegister($src2$$reg),
10554 as_FloatRegister($src1$$reg),
10555 cond);
10556 %}
10557
10558 ins_pipe(fp_cond_reg_reg_d);
10559 %}
10560
10561 // ============================================================================
10562 // Arithmetic Instructions
10563 //
10564
10565 // Integer Addition
10566
10567 // TODO
10568 // these currently employ operations which do not set CR and hence are
10569 // not flagged as killing CR but we would like to isolate the cases
10570 // where we want to set flags from those where we don't. need to work
10571 // out how to do that.
10572
10573 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10574 match(Set dst (AddI src1 src2));
10575
10576 ins_cost(INSN_COST);
10577 format %{ "addw $dst, $src1, $src2" %}
10578
10579 ins_encode %{
10580 __ addw(as_Register($dst$$reg),
10581 as_Register($src1$$reg),
10582 as_Register($src2$$reg));
10583 %}
10584
10585 ins_pipe(ialu_reg_reg);
10586 %}
10587
10588 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10589 match(Set dst (AddI src1 src2));
10590
10591 ins_cost(INSN_COST);
10592 format %{ "addw $dst, $src1, $src2" %}
10593
10594 // use opcode to indicate that this is an add not a sub
10595 opcode(0x0);
10596
10597 ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10598
10599 ins_pipe(ialu_reg_imm);
10600 %}
10601
10602 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
10603 match(Set dst (AddI (ConvL2I src1) src2));
10604
10605 ins_cost(INSN_COST);
10606 format %{ "addw $dst, $src1, $src2" %}
10607
10608 // use opcode to indicate that this is an add not a sub
10609 opcode(0x0);
10610
10611 ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10612
10613 ins_pipe(ialu_reg_imm);
10614 %}
10615
10616 // Pointer Addition
10617 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
10618 match(Set dst (AddP src1 src2));
10619
10620 ins_cost(INSN_COST);
10621 format %{ "add $dst, $src1, $src2\t# ptr" %}
10622
10623 ins_encode %{
10624 __ add(as_Register($dst$$reg),
10625 as_Register($src1$$reg),
10626 as_Register($src2$$reg));
10627 %}
10628
10629 ins_pipe(ialu_reg_reg);
10630 %}
10631
10632 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
10633 match(Set dst (AddP src1 (ConvI2L src2)));
10634
10635 ins_cost(1.9 * INSN_COST);
10636 format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
10637
10638 ins_encode %{
10639 __ add(as_Register($dst$$reg),
10640 as_Register($src1$$reg),
10641 as_Register($src2$$reg), ext::sxtw);
10642 %}
10643
10644 ins_pipe(ialu_reg_reg);
10645 %}
10646
10647 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
10648 match(Set dst (AddP src1 (LShiftL src2 scale)));
10649
10650 ins_cost(1.9 * INSN_COST);
10651 format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
10652
10653 ins_encode %{
10654 __ lea(as_Register($dst$$reg),
10655 Address(as_Register($src1$$reg), as_Register($src2$$reg),
10656 Address::lsl($scale$$constant)));
10657 %}
10658
10659 ins_pipe(ialu_reg_reg_shift);
10660 %}
10661
10662 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
10663 match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
10664
10665 ins_cost(1.9 * INSN_COST);
10666 format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
10667
10668 ins_encode %{
10669 __ lea(as_Register($dst$$reg),
10670 Address(as_Register($src1$$reg), as_Register($src2$$reg),
10671 Address::sxtw($scale$$constant)));
10672 %}
10673
10674 ins_pipe(ialu_reg_reg_shift);
10675 %}
10676
10677 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
10678 match(Set dst (LShiftL (ConvI2L src) scale));
10679
10680 ins_cost(INSN_COST);
10681 format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
10682
10683 ins_encode %{
10684 __ sbfiz(as_Register($dst$$reg),
10685 as_Register($src$$reg),
10686 $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
10687 %}
10688
10689 ins_pipe(ialu_reg_shift);
10690 %}
10691
10692 // Pointer Immediate Addition
10693 // n.b. this needs to be more expensive than using an indirect memory
10694 // operand
10695 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
10696 match(Set dst (AddP src1 src2));
10697
10698 ins_cost(INSN_COST);
10699 format %{ "add $dst, $src1, $src2\t# ptr" %}
10700
10701 // use opcode to indicate that this is an add not a sub
10702 opcode(0x0);
10703
10704 ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10705
10706 ins_pipe(ialu_reg_imm);
10707 %}
10708
10709 // Long Addition
10710 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10711
10712 match(Set dst (AddL src1 src2));
10713
10714 ins_cost(INSN_COST);
10715 format %{ "add $dst, $src1, $src2" %}
10716
10717 ins_encode %{
10718 __ add(as_Register($dst$$reg),
10719 as_Register($src1$$reg),
10720 as_Register($src2$$reg));
10721 %}
10722
10723 ins_pipe(ialu_reg_reg);
10724 %}
10725
10726 // No constant pool entries requiredLong Immediate Addition.
10727 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10728 match(Set dst (AddL src1 src2));
10729
10730 ins_cost(INSN_COST);
10731 format %{ "add $dst, $src1, $src2" %}
10732
10733 // use opcode to indicate that this is an add not a sub
10734 opcode(0x0);
10735
10736 ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10737
10738 ins_pipe(ialu_reg_imm);
10739 %}
10740
10741 // Integer Subtraction
10742 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10743 match(Set dst (SubI src1 src2));
10744
10745 ins_cost(INSN_COST);
10746 format %{ "subw $dst, $src1, $src2" %}
10747
10748 ins_encode %{
10749 __ subw(as_Register($dst$$reg),
10750 as_Register($src1$$reg),
10751 as_Register($src2$$reg));
10752 %}
10753
10754 ins_pipe(ialu_reg_reg);
10755 %}
10756
10757 // Immediate Subtraction
10758 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
10759 match(Set dst (SubI src1 src2));
10760
10761 ins_cost(INSN_COST);
10762 format %{ "subw $dst, $src1, $src2" %}
10763
10764 // use opcode to indicate that this is a sub not an add
10765 opcode(0x1);
10766
10767 ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
10768
10769 ins_pipe(ialu_reg_imm);
10770 %}
10771
10772 // Long Subtraction
10773 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10774
10775 match(Set dst (SubL src1 src2));
10776
10777 ins_cost(INSN_COST);
10778 format %{ "sub $dst, $src1, $src2" %}
10779
10780 ins_encode %{
10781 __ sub(as_Register($dst$$reg),
10782 as_Register($src1$$reg),
10783 as_Register($src2$$reg));
10784 %}
10785
10786 ins_pipe(ialu_reg_reg);
10787 %}
10788
10789 // No constant pool entries requiredLong Immediate Subtraction.
10790 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
10791 match(Set dst (SubL src1 src2));
10792
10793 ins_cost(INSN_COST);
10794 format %{ "sub$dst, $src1, $src2" %}
10795
10796 // use opcode to indicate that this is a sub not an add
10797 opcode(0x1);
10798
10799 ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
10800
10801 ins_pipe(ialu_reg_imm);
10802 %}
10803
10804 // Integer Negation (special case for sub)
10805
10806 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
10807 match(Set dst (SubI zero src));
10808
10809 ins_cost(INSN_COST);
10810 format %{ "negw $dst, $src\t# int" %}
10811
10812 ins_encode %{
10813 __ negw(as_Register($dst$$reg),
10814 as_Register($src$$reg));
10815 %}
10816
10817 ins_pipe(ialu_reg);
10818 %}
10819
10820 // Long Negation
10821
10822 instruct negL_reg(iRegLNoSp dst, iRegIorL2I src, immL0 zero, rFlagsReg cr) %{
10823 match(Set dst (SubL zero src));
10824
10825 ins_cost(INSN_COST);
10826 format %{ "neg $dst, $src\t# long" %}
10827
10828 ins_encode %{
10829 __ neg(as_Register($dst$$reg),
10830 as_Register($src$$reg));
10831 %}
10832
10833 ins_pipe(ialu_reg);
10834 %}
10835
10836 // Integer Multiply
10837
10838 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10839 match(Set dst (MulI src1 src2));
10840
10841 ins_cost(INSN_COST * 3);
10842 format %{ "mulw $dst, $src1, $src2" %}
10843
10844 ins_encode %{
10845 __ mulw(as_Register($dst$$reg),
10846 as_Register($src1$$reg),
10847 as_Register($src2$$reg));
10848 %}
10849
10850 ins_pipe(imul_reg_reg);
10851 %}
10852
10853 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10854 match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
10855
10856 ins_cost(INSN_COST * 3);
10857 format %{ "smull $dst, $src1, $src2" %}
10858
10859 ins_encode %{
10860 __ smull(as_Register($dst$$reg),
10861 as_Register($src1$$reg),
10862 as_Register($src2$$reg));
10863 %}
10864
10865 ins_pipe(imul_reg_reg);
10866 %}
10867
10868 // Long Multiply
10869
10870 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10871 match(Set dst (MulL src1 src2));
10872
10873 ins_cost(INSN_COST * 5);
10874 format %{ "mul $dst, $src1, $src2" %}
10875
10876 ins_encode %{
10877 __ mul(as_Register($dst$$reg),
10878 as_Register($src1$$reg),
10879 as_Register($src2$$reg));
10880 %}
10881
10882 ins_pipe(lmul_reg_reg);
10883 %}
10884
10885 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
10886 %{
10887 match(Set dst (MulHiL src1 src2));
10888
10889 ins_cost(INSN_COST * 7);
10890 format %{ "smulh $dst, $src1, $src2, \t# mulhi" %}
10891
10892 ins_encode %{
10893 __ smulh(as_Register($dst$$reg),
10894 as_Register($src1$$reg),
10895 as_Register($src2$$reg));
10896 %}
10897
10898 ins_pipe(lmul_reg_reg);
10899 %}
10900
10901 // Combined Integer Multiply & Add/Sub
10902
10903 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10904 match(Set dst (AddI src3 (MulI src1 src2)));
10905
10906 ins_cost(INSN_COST * 3);
10907 format %{ "madd $dst, $src1, $src2, $src3" %}
10908
10909 ins_encode %{
10910 __ maddw(as_Register($dst$$reg),
10911 as_Register($src1$$reg),
10912 as_Register($src2$$reg),
10913 as_Register($src3$$reg));
10914 %}
10915
10916 ins_pipe(imac_reg_reg);
10917 %}
10918
10919 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
10920 match(Set dst (SubI src3 (MulI src1 src2)));
10921
10922 ins_cost(INSN_COST * 3);
10923 format %{ "msub $dst, $src1, $src2, $src3" %}
10924
10925 ins_encode %{
10926 __ msubw(as_Register($dst$$reg),
10927 as_Register($src1$$reg),
10928 as_Register($src2$$reg),
10929 as_Register($src3$$reg));
10930 %}
10931
10932 ins_pipe(imac_reg_reg);
10933 %}
10934
10935 // Combined Long Multiply & Add/Sub
10936
10937 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10938 match(Set dst (AddL src3 (MulL src1 src2)));
10939
10940 ins_cost(INSN_COST * 5);
10941 format %{ "madd $dst, $src1, $src2, $src3" %}
10942
10943 ins_encode %{
10944 __ madd(as_Register($dst$$reg),
10945 as_Register($src1$$reg),
10946 as_Register($src2$$reg),
10947 as_Register($src3$$reg));
10948 %}
10949
10950 ins_pipe(lmac_reg_reg);
10951 %}
10952
10953 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
10954 match(Set dst (SubL src3 (MulL src1 src2)));
10955
10956 ins_cost(INSN_COST * 5);
10957 format %{ "msub $dst, $src1, $src2, $src3" %}
10958
10959 ins_encode %{
10960 __ msub(as_Register($dst$$reg),
10961 as_Register($src1$$reg),
10962 as_Register($src2$$reg),
10963 as_Register($src3$$reg));
10964 %}
10965
10966 ins_pipe(lmac_reg_reg);
10967 %}
10968
10969 // Integer Divide
10970
10971 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10972 match(Set dst (DivI src1 src2));
10973
10974 ins_cost(INSN_COST * 19);
10975 format %{ "sdivw $dst, $src1, $src2" %}
10976
10977 ins_encode(aarch64_enc_divw(dst, src1, src2));
10978 ins_pipe(idiv_reg_reg);
10979 %}
10980
10981 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
10982 match(Set dst (URShiftI (RShiftI src1 div1) div2));
10983 ins_cost(INSN_COST);
10984 format %{ "lsrw $dst, $src1, $div1" %}
10985 ins_encode %{
10986 __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
10987 %}
10988 ins_pipe(ialu_reg_shift);
10989 %}
10990
10991 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
10992 match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
10993 ins_cost(INSN_COST);
10994 format %{ "addw $dst, $src, LSR $div1" %}
10995
10996 ins_encode %{
10997 __ addw(as_Register($dst$$reg),
10998 as_Register($src$$reg),
10999 as_Register($src$$reg),
11000 Assembler::LSR, 31);
11001 %}
11002 ins_pipe(ialu_reg);
11003 %}
11004
11005 // Long Divide
11006
11007 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11008 match(Set dst (DivL src1 src2));
11009
11010 ins_cost(INSN_COST * 35);
11011 format %{ "sdiv $dst, $src1, $src2" %}
11012
11013 ins_encode(aarch64_enc_div(dst, src1, src2));
11014 ins_pipe(ldiv_reg_reg);
11015 %}
11016
11017 instruct signExtractL(iRegLNoSp dst, iRegL src1, immL_63 div1, immL_63 div2) %{
11018 match(Set dst (URShiftL (RShiftL src1 div1) div2));
11019 ins_cost(INSN_COST);
11020 format %{ "lsr $dst, $src1, $div1" %}
11021 ins_encode %{
11022 __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
11023 %}
11024 ins_pipe(ialu_reg_shift);
11025 %}
11026
11027 instruct div2RoundL(iRegLNoSp dst, iRegL src, immL_63 div1, immL_63 div2) %{
11028 match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
11029 ins_cost(INSN_COST);
11030 format %{ "add $dst, $src, $div1" %}
11031
11032 ins_encode %{
11033 __ add(as_Register($dst$$reg),
11034 as_Register($src$$reg),
11035 as_Register($src$$reg),
11036 Assembler::LSR, 63);
11037 %}
11038 ins_pipe(ialu_reg);
11039 %}
11040
11041 // Integer Remainder
11042
11043 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11044 match(Set dst (ModI src1 src2));
11045
11046 ins_cost(INSN_COST * 22);
11047 format %{ "sdivw rscratch1, $src1, $src2\n\t"
11048 "msubw($dst, rscratch1, $src2, $src1" %}
11049
11050 ins_encode(aarch64_enc_modw(dst, src1, src2));
11051 ins_pipe(idiv_reg_reg);
11052 %}
11053
11054 // Long Remainder
11055
11056 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
11057 match(Set dst (ModL src1 src2));
11058
11059 ins_cost(INSN_COST * 38);
11060 format %{ "sdiv rscratch1, $src1, $src2\n"
11061 "msub($dst, rscratch1, $src2, $src1" %}
11062
11063 ins_encode(aarch64_enc_mod(dst, src1, src2));
11064 ins_pipe(ldiv_reg_reg);
11065 %}
11066
11067 // Integer Shifts
11068
11069 // Shift Left Register
11070 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11071 match(Set dst (LShiftI src1 src2));
11072
11073 ins_cost(INSN_COST * 2);
11074 format %{ "lslvw $dst, $src1, $src2" %}
11075
11076 ins_encode %{
11077 __ lslvw(as_Register($dst$$reg),
11078 as_Register($src1$$reg),
11079 as_Register($src2$$reg));
11080 %}
11081
11082 ins_pipe(ialu_reg_reg_vshift);
11083 %}
11084
11085 // Shift Left Immediate
11086 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11087 match(Set dst (LShiftI src1 src2));
11088
11089 ins_cost(INSN_COST);
11090 format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
11091
11092 ins_encode %{
11093 __ lslw(as_Register($dst$$reg),
11094 as_Register($src1$$reg),
11095 $src2$$constant & 0x1f);
11096 %}
11097
11098 ins_pipe(ialu_reg_shift);
11099 %}
11100
11101 // Shift Right Logical Register
11102 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11103 match(Set dst (URShiftI src1 src2));
11104
11105 ins_cost(INSN_COST * 2);
11106 format %{ "lsrvw $dst, $src1, $src2" %}
11107
11108 ins_encode %{
11109 __ lsrvw(as_Register($dst$$reg),
11110 as_Register($src1$$reg),
11111 as_Register($src2$$reg));
11112 %}
11113
11114 ins_pipe(ialu_reg_reg_vshift);
11115 %}
11116
11117 // Shift Right Logical Immediate
11118 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11119 match(Set dst (URShiftI src1 src2));
11120
11121 ins_cost(INSN_COST);
11122 format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
11123
11124 ins_encode %{
11125 __ lsrw(as_Register($dst$$reg),
11126 as_Register($src1$$reg),
11127 $src2$$constant & 0x1f);
11128 %}
11129
11130 ins_pipe(ialu_reg_shift);
11131 %}
11132
11133 // Shift Right Arithmetic Register
11134 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
11135 match(Set dst (RShiftI src1 src2));
11136
11137 ins_cost(INSN_COST * 2);
11138 format %{ "asrvw $dst, $src1, $src2" %}
11139
11140 ins_encode %{
11141 __ asrvw(as_Register($dst$$reg),
11142 as_Register($src1$$reg),
11143 as_Register($src2$$reg));
11144 %}
11145
11146 ins_pipe(ialu_reg_reg_vshift);
11147 %}
11148
11149 // Shift Right Arithmetic Immediate
11150 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
11151 match(Set dst (RShiftI src1 src2));
11152
11153 ins_cost(INSN_COST);
11154 format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
11155
11156 ins_encode %{
11157 __ asrw(as_Register($dst$$reg),
11158 as_Register($src1$$reg),
11159 $src2$$constant & 0x1f);
11160 %}
11161
11162 ins_pipe(ialu_reg_shift);
11163 %}
11164
11165 // Combined Int Mask and Right Shift (using UBFM)
11166 // TODO
11167
11168 // Long Shifts
11169
11170 // Shift Left Register
11171 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11172 match(Set dst (LShiftL src1 src2));
11173
11174 ins_cost(INSN_COST * 2);
11175 format %{ "lslv $dst, $src1, $src2" %}
11176
11177 ins_encode %{
11178 __ lslv(as_Register($dst$$reg),
11179 as_Register($src1$$reg),
11180 as_Register($src2$$reg));
11181 %}
11182
11183 ins_pipe(ialu_reg_reg_vshift);
11184 %}
11185
11186 // Shift Left Immediate
11187 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11188 match(Set dst (LShiftL src1 src2));
11189
11190 ins_cost(INSN_COST);
11191 format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
11192
11193 ins_encode %{
11194 __ lsl(as_Register($dst$$reg),
11195 as_Register($src1$$reg),
11196 $src2$$constant & 0x3f);
11197 %}
11198
11199 ins_pipe(ialu_reg_shift);
11200 %}
11201
11202 // Shift Right Logical Register
11203 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11204 match(Set dst (URShiftL src1 src2));
11205
11206 ins_cost(INSN_COST * 2);
11207 format %{ "lsrv $dst, $src1, $src2" %}
11208
11209 ins_encode %{
11210 __ lsrv(as_Register($dst$$reg),
11211 as_Register($src1$$reg),
11212 as_Register($src2$$reg));
11213 %}
11214
11215 ins_pipe(ialu_reg_reg_vshift);
11216 %}
11217
11218 // Shift Right Logical Immediate
11219 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11220 match(Set dst (URShiftL src1 src2));
11221
11222 ins_cost(INSN_COST);
11223 format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
11224
11225 ins_encode %{
11226 __ lsr(as_Register($dst$$reg),
11227 as_Register($src1$$reg),
11228 $src2$$constant & 0x3f);
11229 %}
11230
11231 ins_pipe(ialu_reg_shift);
11232 %}
11233
11234 // A special-case pattern for card table stores.
11235 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
11236 match(Set dst (URShiftL (CastP2X src1) src2));
11237
11238 ins_cost(INSN_COST);
11239 format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
11240
11241 ins_encode %{
11242 __ lsr(as_Register($dst$$reg),
11243 as_Register($src1$$reg),
11244 $src2$$constant & 0x3f);
11245 %}
11246
11247 ins_pipe(ialu_reg_shift);
11248 %}
11249
11250 // Shift Right Arithmetic Register
11251 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
11252 match(Set dst (RShiftL src1 src2));
11253
11254 ins_cost(INSN_COST * 2);
11255 format %{ "asrv $dst, $src1, $src2" %}
11256
11257 ins_encode %{
11258 __ asrv(as_Register($dst$$reg),
11259 as_Register($src1$$reg),
11260 as_Register($src2$$reg));
11261 %}
11262
11263 ins_pipe(ialu_reg_reg_vshift);
11264 %}
11265
11266 // Shift Right Arithmetic Immediate
11267 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
11268 match(Set dst (RShiftL src1 src2));
11269
11270 ins_cost(INSN_COST);
11271 format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
11272
11273 ins_encode %{
11274 __ asr(as_Register($dst$$reg),
11275 as_Register($src1$$reg),
11276 $src2$$constant & 0x3f);
11277 %}
11278
11279 ins_pipe(ialu_reg_shift);
11280 %}
11281
11282 // BEGIN This section of the file is automatically generated. Do not edit --------------
11283
11284 instruct regL_not_reg(iRegLNoSp dst,
11285 iRegL src1, immL_M1 m1,
11286 rFlagsReg cr) %{
11287 match(Set dst (XorL src1 m1));
11288 ins_cost(INSN_COST);
11289 format %{ "eon $dst, $src1, zr" %}
11290
11291 ins_encode %{
11292 __ eon(as_Register($dst$$reg),
11293 as_Register($src1$$reg),
11294 zr,
11295 Assembler::LSL, 0);
11296 %}
11297
11298 ins_pipe(ialu_reg);
11299 %}
11300 instruct regI_not_reg(iRegINoSp dst,
11301 iRegIorL2I src1, immI_M1 m1,
11302 rFlagsReg cr) %{
11303 match(Set dst (XorI src1 m1));
11304 ins_cost(INSN_COST);
11305 format %{ "eonw $dst, $src1, zr" %}
11306
11307 ins_encode %{
11308 __ eonw(as_Register($dst$$reg),
11309 as_Register($src1$$reg),
11310 zr,
11311 Assembler::LSL, 0);
11312 %}
11313
11314 ins_pipe(ialu_reg);
11315 %}
11316
11317 instruct AndI_reg_not_reg(iRegINoSp dst,
11318 iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11319 rFlagsReg cr) %{
11320 match(Set dst (AndI src1 (XorI src2 m1)));
11321 ins_cost(INSN_COST);
11322 format %{ "bicw $dst, $src1, $src2" %}
11323
11324 ins_encode %{
11325 __ bicw(as_Register($dst$$reg),
11326 as_Register($src1$$reg),
11327 as_Register($src2$$reg),
11328 Assembler::LSL, 0);
11329 %}
11330
11331 ins_pipe(ialu_reg_reg);
11332 %}
11333
11334 instruct AndL_reg_not_reg(iRegLNoSp dst,
11335 iRegL src1, iRegL src2, immL_M1 m1,
11336 rFlagsReg cr) %{
11337 match(Set dst (AndL src1 (XorL src2 m1)));
11338 ins_cost(INSN_COST);
11339 format %{ "bic $dst, $src1, $src2" %}
11340
11341 ins_encode %{
11342 __ bic(as_Register($dst$$reg),
11343 as_Register($src1$$reg),
11344 as_Register($src2$$reg),
11345 Assembler::LSL, 0);
11346 %}
11347
11348 ins_pipe(ialu_reg_reg);
11349 %}
11350
11351 instruct OrI_reg_not_reg(iRegINoSp dst,
11352 iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11353 rFlagsReg cr) %{
11354 match(Set dst (OrI src1 (XorI src2 m1)));
11355 ins_cost(INSN_COST);
11356 format %{ "ornw $dst, $src1, $src2" %}
11357
11358 ins_encode %{
11359 __ ornw(as_Register($dst$$reg),
11360 as_Register($src1$$reg),
11361 as_Register($src2$$reg),
11362 Assembler::LSL, 0);
11363 %}
11364
11365 ins_pipe(ialu_reg_reg);
11366 %}
11367
11368 instruct OrL_reg_not_reg(iRegLNoSp dst,
11369 iRegL src1, iRegL src2, immL_M1 m1,
11370 rFlagsReg cr) %{
11371 match(Set dst (OrL src1 (XorL src2 m1)));
11372 ins_cost(INSN_COST);
11373 format %{ "orn $dst, $src1, $src2" %}
11374
11375 ins_encode %{
11376 __ orn(as_Register($dst$$reg),
11377 as_Register($src1$$reg),
11378 as_Register($src2$$reg),
11379 Assembler::LSL, 0);
11380 %}
11381
11382 ins_pipe(ialu_reg_reg);
11383 %}
11384
11385 instruct XorI_reg_not_reg(iRegINoSp dst,
11386 iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
11387 rFlagsReg cr) %{
11388 match(Set dst (XorI m1 (XorI src2 src1)));
11389 ins_cost(INSN_COST);
11390 format %{ "eonw $dst, $src1, $src2" %}
11391
11392 ins_encode %{
11393 __ eonw(as_Register($dst$$reg),
11394 as_Register($src1$$reg),
11395 as_Register($src2$$reg),
11396 Assembler::LSL, 0);
11397 %}
11398
11399 ins_pipe(ialu_reg_reg);
11400 %}
11401
11402 instruct XorL_reg_not_reg(iRegLNoSp dst,
11403 iRegL src1, iRegL src2, immL_M1 m1,
11404 rFlagsReg cr) %{
11405 match(Set dst (XorL m1 (XorL src2 src1)));
11406 ins_cost(INSN_COST);
11407 format %{ "eon $dst, $src1, $src2" %}
11408
11409 ins_encode %{
11410 __ eon(as_Register($dst$$reg),
11411 as_Register($src1$$reg),
11412 as_Register($src2$$reg),
11413 Assembler::LSL, 0);
11414 %}
11415
11416 ins_pipe(ialu_reg_reg);
11417 %}
11418
11419 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
11420 iRegIorL2I src1, iRegIorL2I src2,
11421 immI src3, immI_M1 src4, rFlagsReg cr) %{
11422 match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
11423 ins_cost(1.9 * INSN_COST);
11424 format %{ "bicw $dst, $src1, $src2, LSR $src3" %}
11425
11426 ins_encode %{
11427 __ bicw(as_Register($dst$$reg),
11428 as_Register($src1$$reg),
11429 as_Register($src2$$reg),
11430 Assembler::LSR,
11431 $src3$$constant & 0x1f);
11432 %}
11433
11434 ins_pipe(ialu_reg_reg_shift);
11435 %}
11436
11437 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
11438 iRegL src1, iRegL src2,
11439 immI src3, immL_M1 src4, rFlagsReg cr) %{
11440 match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
11441 ins_cost(1.9 * INSN_COST);
11442 format %{ "bic $dst, $src1, $src2, LSR $src3" %}
11443
11444 ins_encode %{
11445 __ bic(as_Register($dst$$reg),
11446 as_Register($src1$$reg),
11447 as_Register($src2$$reg),
11448 Assembler::LSR,
11449 $src3$$constant & 0x3f);
11450 %}
11451
11452 ins_pipe(ialu_reg_reg_shift);
11453 %}
11454
11455 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
11456 iRegIorL2I src1, iRegIorL2I src2,
11457 immI src3, immI_M1 src4, rFlagsReg cr) %{
11458 match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
11459 ins_cost(1.9 * INSN_COST);
11460 format %{ "bicw $dst, $src1, $src2, ASR $src3" %}
11461
11462 ins_encode %{
11463 __ bicw(as_Register($dst$$reg),
11464 as_Register($src1$$reg),
11465 as_Register($src2$$reg),
11466 Assembler::ASR,
11467 $src3$$constant & 0x1f);
11468 %}
11469
11470 ins_pipe(ialu_reg_reg_shift);
11471 %}
11472
11473 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
11474 iRegL src1, iRegL src2,
11475 immI src3, immL_M1 src4, rFlagsReg cr) %{
11476 match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
11477 ins_cost(1.9 * INSN_COST);
11478 format %{ "bic $dst, $src1, $src2, ASR $src3" %}
11479
11480 ins_encode %{
11481 __ bic(as_Register($dst$$reg),
11482 as_Register($src1$$reg),
11483 as_Register($src2$$reg),
11484 Assembler::ASR,
11485 $src3$$constant & 0x3f);
11486 %}
11487
11488 ins_pipe(ialu_reg_reg_shift);
11489 %}
11490
11491 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
11492 iRegIorL2I src1, iRegIorL2I src2,
11493 immI src3, immI_M1 src4, rFlagsReg cr) %{
11494 match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
11495 ins_cost(1.9 * INSN_COST);
11496 format %{ "bicw $dst, $src1, $src2, LSL $src3" %}
11497
11498 ins_encode %{
11499 __ bicw(as_Register($dst$$reg),
11500 as_Register($src1$$reg),
11501 as_Register($src2$$reg),
11502 Assembler::LSL,
11503 $src3$$constant & 0x1f);
11504 %}
11505
11506 ins_pipe(ialu_reg_reg_shift);
11507 %}
11508
11509 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
11510 iRegL src1, iRegL src2,
11511 immI src3, immL_M1 src4, rFlagsReg cr) %{
11512 match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
11513 ins_cost(1.9 * INSN_COST);
11514 format %{ "bic $dst, $src1, $src2, LSL $src3" %}
11515
11516 ins_encode %{
11517 __ bic(as_Register($dst$$reg),
11518 as_Register($src1$$reg),
11519 as_Register($src2$$reg),
11520 Assembler::LSL,
11521 $src3$$constant & 0x3f);
11522 %}
11523
11524 ins_pipe(ialu_reg_reg_shift);
11525 %}
11526
11527 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
11528 iRegIorL2I src1, iRegIorL2I src2,
11529 immI src3, immI_M1 src4, rFlagsReg cr) %{
11530 match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
11531 ins_cost(1.9 * INSN_COST);
11532 format %{ "eonw $dst, $src1, $src2, LSR $src3" %}
11533
11534 ins_encode %{
11535 __ eonw(as_Register($dst$$reg),
11536 as_Register($src1$$reg),
11537 as_Register($src2$$reg),
11538 Assembler::LSR,
11539 $src3$$constant & 0x1f);
11540 %}
11541
11542 ins_pipe(ialu_reg_reg_shift);
11543 %}
11544
11545 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
11546 iRegL src1, iRegL src2,
11547 immI src3, immL_M1 src4, rFlagsReg cr) %{
11548 match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
11549 ins_cost(1.9 * INSN_COST);
11550 format %{ "eon $dst, $src1, $src2, LSR $src3" %}
11551
11552 ins_encode %{
11553 __ eon(as_Register($dst$$reg),
11554 as_Register($src1$$reg),
11555 as_Register($src2$$reg),
11556 Assembler::LSR,
11557 $src3$$constant & 0x3f);
11558 %}
11559
11560 ins_pipe(ialu_reg_reg_shift);
11561 %}
11562
11563 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
11564 iRegIorL2I src1, iRegIorL2I src2,
11565 immI src3, immI_M1 src4, rFlagsReg cr) %{
11566 match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
11567 ins_cost(1.9 * INSN_COST);
11568 format %{ "eonw $dst, $src1, $src2, ASR $src3" %}
11569
11570 ins_encode %{
11571 __ eonw(as_Register($dst$$reg),
11572 as_Register($src1$$reg),
11573 as_Register($src2$$reg),
11574 Assembler::ASR,
11575 $src3$$constant & 0x1f);
11576 %}
11577
11578 ins_pipe(ialu_reg_reg_shift);
11579 %}
11580
11581 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
11582 iRegL src1, iRegL src2,
11583 immI src3, immL_M1 src4, rFlagsReg cr) %{
11584 match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
11585 ins_cost(1.9 * INSN_COST);
11586 format %{ "eon $dst, $src1, $src2, ASR $src3" %}
11587
11588 ins_encode %{
11589 __ eon(as_Register($dst$$reg),
11590 as_Register($src1$$reg),
11591 as_Register($src2$$reg),
11592 Assembler::ASR,
11593 $src3$$constant & 0x3f);
11594 %}
11595
11596 ins_pipe(ialu_reg_reg_shift);
11597 %}
11598
11599 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
11600 iRegIorL2I src1, iRegIorL2I src2,
11601 immI src3, immI_M1 src4, rFlagsReg cr) %{
11602 match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
11603 ins_cost(1.9 * INSN_COST);
11604 format %{ "eonw $dst, $src1, $src2, LSL $src3" %}
11605
11606 ins_encode %{
11607 __ eonw(as_Register($dst$$reg),
11608 as_Register($src1$$reg),
11609 as_Register($src2$$reg),
11610 Assembler::LSL,
11611 $src3$$constant & 0x1f);
11612 %}
11613
11614 ins_pipe(ialu_reg_reg_shift);
11615 %}
11616
11617 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
11618 iRegL src1, iRegL src2,
11619 immI src3, immL_M1 src4, rFlagsReg cr) %{
11620 match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
11621 ins_cost(1.9 * INSN_COST);
11622 format %{ "eon $dst, $src1, $src2, LSL $src3" %}
11623
11624 ins_encode %{
11625 __ eon(as_Register($dst$$reg),
11626 as_Register($src1$$reg),
11627 as_Register($src2$$reg),
11628 Assembler::LSL,
11629 $src3$$constant & 0x3f);
11630 %}
11631
11632 ins_pipe(ialu_reg_reg_shift);
11633 %}
11634
11635 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
11636 iRegIorL2I src1, iRegIorL2I src2,
11637 immI src3, immI_M1 src4, rFlagsReg cr) %{
11638 match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
11639 ins_cost(1.9 * INSN_COST);
11640 format %{ "ornw $dst, $src1, $src2, LSR $src3" %}
11641
11642 ins_encode %{
11643 __ ornw(as_Register($dst$$reg),
11644 as_Register($src1$$reg),
11645 as_Register($src2$$reg),
11646 Assembler::LSR,
11647 $src3$$constant & 0x1f);
11648 %}
11649
11650 ins_pipe(ialu_reg_reg_shift);
11651 %}
11652
11653 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
11654 iRegL src1, iRegL src2,
11655 immI src3, immL_M1 src4, rFlagsReg cr) %{
11656 match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
11657 ins_cost(1.9 * INSN_COST);
11658 format %{ "orn $dst, $src1, $src2, LSR $src3" %}
11659
11660 ins_encode %{
11661 __ orn(as_Register($dst$$reg),
11662 as_Register($src1$$reg),
11663 as_Register($src2$$reg),
11664 Assembler::LSR,
11665 $src3$$constant & 0x3f);
11666 %}
11667
11668 ins_pipe(ialu_reg_reg_shift);
11669 %}
11670
11671 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
11672 iRegIorL2I src1, iRegIorL2I src2,
11673 immI src3, immI_M1 src4, rFlagsReg cr) %{
11674 match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
11675 ins_cost(1.9 * INSN_COST);
11676 format %{ "ornw $dst, $src1, $src2, ASR $src3" %}
11677
11678 ins_encode %{
11679 __ ornw(as_Register($dst$$reg),
11680 as_Register($src1$$reg),
11681 as_Register($src2$$reg),
11682 Assembler::ASR,
11683 $src3$$constant & 0x1f);
11684 %}
11685
11686 ins_pipe(ialu_reg_reg_shift);
11687 %}
11688
11689 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
11690 iRegL src1, iRegL src2,
11691 immI src3, immL_M1 src4, rFlagsReg cr) %{
11692 match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
11693 ins_cost(1.9 * INSN_COST);
11694 format %{ "orn $dst, $src1, $src2, ASR $src3" %}
11695
11696 ins_encode %{
11697 __ orn(as_Register($dst$$reg),
11698 as_Register($src1$$reg),
11699 as_Register($src2$$reg),
11700 Assembler::ASR,
11701 $src3$$constant & 0x3f);
11702 %}
11703
11704 ins_pipe(ialu_reg_reg_shift);
11705 %}
11706
11707 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
11708 iRegIorL2I src1, iRegIorL2I src2,
11709 immI src3, immI_M1 src4, rFlagsReg cr) %{
11710 match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
11711 ins_cost(1.9 * INSN_COST);
11712 format %{ "ornw $dst, $src1, $src2, LSL $src3" %}
11713
11714 ins_encode %{
11715 __ ornw(as_Register($dst$$reg),
11716 as_Register($src1$$reg),
11717 as_Register($src2$$reg),
11718 Assembler::LSL,
11719 $src3$$constant & 0x1f);
11720 %}
11721
11722 ins_pipe(ialu_reg_reg_shift);
11723 %}
11724
11725 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
11726 iRegL src1, iRegL src2,
11727 immI src3, immL_M1 src4, rFlagsReg cr) %{
11728 match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
11729 ins_cost(1.9 * INSN_COST);
11730 format %{ "orn $dst, $src1, $src2, LSL $src3" %}
11731
11732 ins_encode %{
11733 __ orn(as_Register($dst$$reg),
11734 as_Register($src1$$reg),
11735 as_Register($src2$$reg),
11736 Assembler::LSL,
11737 $src3$$constant & 0x3f);
11738 %}
11739
11740 ins_pipe(ialu_reg_reg_shift);
11741 %}
11742
11743 instruct AndI_reg_URShift_reg(iRegINoSp dst,
11744 iRegIorL2I src1, iRegIorL2I src2,
11745 immI src3, rFlagsReg cr) %{
11746 match(Set dst (AndI src1 (URShiftI src2 src3)));
11747
11748 ins_cost(1.9 * INSN_COST);
11749 format %{ "andw $dst, $src1, $src2, LSR $src3" %}
11750
11751 ins_encode %{
11752 __ andw(as_Register($dst$$reg),
11753 as_Register($src1$$reg),
11754 as_Register($src2$$reg),
11755 Assembler::LSR,
11756 $src3$$constant & 0x1f);
11757 %}
11758
11759 ins_pipe(ialu_reg_reg_shift);
11760 %}
11761
11762 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
11763 iRegL src1, iRegL src2,
11764 immI src3, rFlagsReg cr) %{
11765 match(Set dst (AndL src1 (URShiftL src2 src3)));
11766
11767 ins_cost(1.9 * INSN_COST);
11768 format %{ "andr $dst, $src1, $src2, LSR $src3" %}
11769
11770 ins_encode %{
11771 __ andr(as_Register($dst$$reg),
11772 as_Register($src1$$reg),
11773 as_Register($src2$$reg),
11774 Assembler::LSR,
11775 $src3$$constant & 0x3f);
11776 %}
11777
11778 ins_pipe(ialu_reg_reg_shift);
11779 %}
11780
11781 instruct AndI_reg_RShift_reg(iRegINoSp dst,
11782 iRegIorL2I src1, iRegIorL2I src2,
11783 immI src3, rFlagsReg cr) %{
11784 match(Set dst (AndI src1 (RShiftI src2 src3)));
11785
11786 ins_cost(1.9 * INSN_COST);
11787 format %{ "andw $dst, $src1, $src2, ASR $src3" %}
11788
11789 ins_encode %{
11790 __ andw(as_Register($dst$$reg),
11791 as_Register($src1$$reg),
11792 as_Register($src2$$reg),
11793 Assembler::ASR,
11794 $src3$$constant & 0x1f);
11795 %}
11796
11797 ins_pipe(ialu_reg_reg_shift);
11798 %}
11799
11800 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
11801 iRegL src1, iRegL src2,
11802 immI src3, rFlagsReg cr) %{
11803 match(Set dst (AndL src1 (RShiftL src2 src3)));
11804
11805 ins_cost(1.9 * INSN_COST);
11806 format %{ "andr $dst, $src1, $src2, ASR $src3" %}
11807
11808 ins_encode %{
11809 __ andr(as_Register($dst$$reg),
11810 as_Register($src1$$reg),
11811 as_Register($src2$$reg),
11812 Assembler::ASR,
11813 $src3$$constant & 0x3f);
11814 %}
11815
11816 ins_pipe(ialu_reg_reg_shift);
11817 %}
11818
11819 instruct AndI_reg_LShift_reg(iRegINoSp dst,
11820 iRegIorL2I src1, iRegIorL2I src2,
11821 immI src3, rFlagsReg cr) %{
11822 match(Set dst (AndI src1 (LShiftI src2 src3)));
11823
11824 ins_cost(1.9 * INSN_COST);
11825 format %{ "andw $dst, $src1, $src2, LSL $src3" %}
11826
11827 ins_encode %{
11828 __ andw(as_Register($dst$$reg),
11829 as_Register($src1$$reg),
11830 as_Register($src2$$reg),
11831 Assembler::LSL,
11832 $src3$$constant & 0x1f);
11833 %}
11834
11835 ins_pipe(ialu_reg_reg_shift);
11836 %}
11837
11838 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
11839 iRegL src1, iRegL src2,
11840 immI src3, rFlagsReg cr) %{
11841 match(Set dst (AndL src1 (LShiftL src2 src3)));
11842
11843 ins_cost(1.9 * INSN_COST);
11844 format %{ "andr $dst, $src1, $src2, LSL $src3" %}
11845
11846 ins_encode %{
11847 __ andr(as_Register($dst$$reg),
11848 as_Register($src1$$reg),
11849 as_Register($src2$$reg),
11850 Assembler::LSL,
11851 $src3$$constant & 0x3f);
11852 %}
11853
11854 ins_pipe(ialu_reg_reg_shift);
11855 %}
11856
11857 instruct XorI_reg_URShift_reg(iRegINoSp dst,
11858 iRegIorL2I src1, iRegIorL2I src2,
11859 immI src3, rFlagsReg cr) %{
11860 match(Set dst (XorI src1 (URShiftI src2 src3)));
11861
11862 ins_cost(1.9 * INSN_COST);
11863 format %{ "eorw $dst, $src1, $src2, LSR $src3" %}
11864
11865 ins_encode %{
11866 __ eorw(as_Register($dst$$reg),
11867 as_Register($src1$$reg),
11868 as_Register($src2$$reg),
11869 Assembler::LSR,
11870 $src3$$constant & 0x1f);
11871 %}
11872
11873 ins_pipe(ialu_reg_reg_shift);
11874 %}
11875
11876 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
11877 iRegL src1, iRegL src2,
11878 immI src3, rFlagsReg cr) %{
11879 match(Set dst (XorL src1 (URShiftL src2 src3)));
11880
11881 ins_cost(1.9 * INSN_COST);
11882 format %{ "eor $dst, $src1, $src2, LSR $src3" %}
11883
11884 ins_encode %{
11885 __ eor(as_Register($dst$$reg),
11886 as_Register($src1$$reg),
11887 as_Register($src2$$reg),
11888 Assembler::LSR,
11889 $src3$$constant & 0x3f);
11890 %}
11891
11892 ins_pipe(ialu_reg_reg_shift);
11893 %}
11894
11895 instruct XorI_reg_RShift_reg(iRegINoSp dst,
11896 iRegIorL2I src1, iRegIorL2I src2,
11897 immI src3, rFlagsReg cr) %{
11898 match(Set dst (XorI src1 (RShiftI src2 src3)));
11899
11900 ins_cost(1.9 * INSN_COST);
11901 format %{ "eorw $dst, $src1, $src2, ASR $src3" %}
11902
11903 ins_encode %{
11904 __ eorw(as_Register($dst$$reg),
11905 as_Register($src1$$reg),
11906 as_Register($src2$$reg),
11907 Assembler::ASR,
11908 $src3$$constant & 0x1f);
11909 %}
11910
11911 ins_pipe(ialu_reg_reg_shift);
11912 %}
11913
11914 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
11915 iRegL src1, iRegL src2,
11916 immI src3, rFlagsReg cr) %{
11917 match(Set dst (XorL src1 (RShiftL src2 src3)));
11918
11919 ins_cost(1.9 * INSN_COST);
11920 format %{ "eor $dst, $src1, $src2, ASR $src3" %}
11921
11922 ins_encode %{
11923 __ eor(as_Register($dst$$reg),
11924 as_Register($src1$$reg),
11925 as_Register($src2$$reg),
11926 Assembler::ASR,
11927 $src3$$constant & 0x3f);
11928 %}
11929
11930 ins_pipe(ialu_reg_reg_shift);
11931 %}
11932
11933 instruct XorI_reg_LShift_reg(iRegINoSp dst,
11934 iRegIorL2I src1, iRegIorL2I src2,
11935 immI src3, rFlagsReg cr) %{
11936 match(Set dst (XorI src1 (LShiftI src2 src3)));
11937
11938 ins_cost(1.9 * INSN_COST);
11939 format %{ "eorw $dst, $src1, $src2, LSL $src3" %}
11940
11941 ins_encode %{
11942 __ eorw(as_Register($dst$$reg),
11943 as_Register($src1$$reg),
11944 as_Register($src2$$reg),
11945 Assembler::LSL,
11946 $src3$$constant & 0x1f);
11947 %}
11948
11949 ins_pipe(ialu_reg_reg_shift);
11950 %}
11951
11952 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
11953 iRegL src1, iRegL src2,
11954 immI src3, rFlagsReg cr) %{
11955 match(Set dst (XorL src1 (LShiftL src2 src3)));
11956
11957 ins_cost(1.9 * INSN_COST);
11958 format %{ "eor $dst, $src1, $src2, LSL $src3" %}
11959
11960 ins_encode %{
11961 __ eor(as_Register($dst$$reg),
11962 as_Register($src1$$reg),
11963 as_Register($src2$$reg),
11964 Assembler::LSL,
11965 $src3$$constant & 0x3f);
11966 %}
11967
11968 ins_pipe(ialu_reg_reg_shift);
11969 %}
11970
11971 instruct OrI_reg_URShift_reg(iRegINoSp dst,
11972 iRegIorL2I src1, iRegIorL2I src2,
11973 immI src3, rFlagsReg cr) %{
11974 match(Set dst (OrI src1 (URShiftI src2 src3)));
11975
11976 ins_cost(1.9 * INSN_COST);
11977 format %{ "orrw $dst, $src1, $src2, LSR $src3" %}
11978
11979 ins_encode %{
11980 __ orrw(as_Register($dst$$reg),
11981 as_Register($src1$$reg),
11982 as_Register($src2$$reg),
11983 Assembler::LSR,
11984 $src3$$constant & 0x1f);
11985 %}
11986
11987 ins_pipe(ialu_reg_reg_shift);
11988 %}
11989
11990 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
11991 iRegL src1, iRegL src2,
11992 immI src3, rFlagsReg cr) %{
11993 match(Set dst (OrL src1 (URShiftL src2 src3)));
11994
11995 ins_cost(1.9 * INSN_COST);
11996 format %{ "orr $dst, $src1, $src2, LSR $src3" %}
11997
11998 ins_encode %{
11999 __ orr(as_Register($dst$$reg),
12000 as_Register($src1$$reg),
12001 as_Register($src2$$reg),
12002 Assembler::LSR,
12003 $src3$$constant & 0x3f);
12004 %}
12005
12006 ins_pipe(ialu_reg_reg_shift);
12007 %}
12008
12009 instruct OrI_reg_RShift_reg(iRegINoSp dst,
12010 iRegIorL2I src1, iRegIorL2I src2,
12011 immI src3, rFlagsReg cr) %{
12012 match(Set dst (OrI src1 (RShiftI src2 src3)));
12013
12014 ins_cost(1.9 * INSN_COST);
12015 format %{ "orrw $dst, $src1, $src2, ASR $src3" %}
12016
12017 ins_encode %{
12018 __ orrw(as_Register($dst$$reg),
12019 as_Register($src1$$reg),
12020 as_Register($src2$$reg),
12021 Assembler::ASR,
12022 $src3$$constant & 0x1f);
12023 %}
12024
12025 ins_pipe(ialu_reg_reg_shift);
12026 %}
12027
12028 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
12029 iRegL src1, iRegL src2,
12030 immI src3, rFlagsReg cr) %{
12031 match(Set dst (OrL src1 (RShiftL src2 src3)));
12032
12033 ins_cost(1.9 * INSN_COST);
12034 format %{ "orr $dst, $src1, $src2, ASR $src3" %}
12035
12036 ins_encode %{
12037 __ orr(as_Register($dst$$reg),
12038 as_Register($src1$$reg),
12039 as_Register($src2$$reg),
12040 Assembler::ASR,
12041 $src3$$constant & 0x3f);
12042 %}
12043
12044 ins_pipe(ialu_reg_reg_shift);
12045 %}
12046
12047 instruct OrI_reg_LShift_reg(iRegINoSp dst,
12048 iRegIorL2I src1, iRegIorL2I src2,
12049 immI src3, rFlagsReg cr) %{
12050 match(Set dst (OrI src1 (LShiftI src2 src3)));
12051
12052 ins_cost(1.9 * INSN_COST);
12053 format %{ "orrw $dst, $src1, $src2, LSL $src3" %}
12054
12055 ins_encode %{
12056 __ orrw(as_Register($dst$$reg),
12057 as_Register($src1$$reg),
12058 as_Register($src2$$reg),
12059 Assembler::LSL,
12060 $src3$$constant & 0x1f);
12061 %}
12062
12063 ins_pipe(ialu_reg_reg_shift);
12064 %}
12065
12066 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
12067 iRegL src1, iRegL src2,
12068 immI src3, rFlagsReg cr) %{
12069 match(Set dst (OrL src1 (LShiftL src2 src3)));
12070
12071 ins_cost(1.9 * INSN_COST);
12072 format %{ "orr $dst, $src1, $src2, LSL $src3" %}
12073
12074 ins_encode %{
12075 __ orr(as_Register($dst$$reg),
12076 as_Register($src1$$reg),
12077 as_Register($src2$$reg),
12078 Assembler::LSL,
12079 $src3$$constant & 0x3f);
12080 %}
12081
12082 ins_pipe(ialu_reg_reg_shift);
12083 %}
12084
12085 instruct AddI_reg_URShift_reg(iRegINoSp dst,
12086 iRegIorL2I src1, iRegIorL2I src2,
12087 immI src3, rFlagsReg cr) %{
12088 match(Set dst (AddI src1 (URShiftI src2 src3)));
12089
12090 ins_cost(1.9 * INSN_COST);
12091 format %{ "addw $dst, $src1, $src2, LSR $src3" %}
12092
12093 ins_encode %{
12094 __ addw(as_Register($dst$$reg),
12095 as_Register($src1$$reg),
12096 as_Register($src2$$reg),
12097 Assembler::LSR,
12098 $src3$$constant & 0x1f);
12099 %}
12100
12101 ins_pipe(ialu_reg_reg_shift);
12102 %}
12103
12104 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
12105 iRegL src1, iRegL src2,
12106 immI src3, rFlagsReg cr) %{
12107 match(Set dst (AddL src1 (URShiftL src2 src3)));
12108
12109 ins_cost(1.9 * INSN_COST);
12110 format %{ "add $dst, $src1, $src2, LSR $src3" %}
12111
12112 ins_encode %{
12113 __ add(as_Register($dst$$reg),
12114 as_Register($src1$$reg),
12115 as_Register($src2$$reg),
12116 Assembler::LSR,
12117 $src3$$constant & 0x3f);
12118 %}
12119
12120 ins_pipe(ialu_reg_reg_shift);
12121 %}
12122
12123 instruct AddI_reg_RShift_reg(iRegINoSp dst,
12124 iRegIorL2I src1, iRegIorL2I src2,
12125 immI src3, rFlagsReg cr) %{
12126 match(Set dst (AddI src1 (RShiftI src2 src3)));
12127
12128 ins_cost(1.9 * INSN_COST);
12129 format %{ "addw $dst, $src1, $src2, ASR $src3" %}
12130
12131 ins_encode %{
12132 __ addw(as_Register($dst$$reg),
12133 as_Register($src1$$reg),
12134 as_Register($src2$$reg),
12135 Assembler::ASR,
12136 $src3$$constant & 0x1f);
12137 %}
12138
12139 ins_pipe(ialu_reg_reg_shift);
12140 %}
12141
12142 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
12143 iRegL src1, iRegL src2,
12144 immI src3, rFlagsReg cr) %{
12145 match(Set dst (AddL src1 (RShiftL src2 src3)));
12146
12147 ins_cost(1.9 * INSN_COST);
12148 format %{ "add $dst, $src1, $src2, ASR $src3" %}
12149
12150 ins_encode %{
12151 __ add(as_Register($dst$$reg),
12152 as_Register($src1$$reg),
12153 as_Register($src2$$reg),
12154 Assembler::ASR,
12155 $src3$$constant & 0x3f);
12156 %}
12157
12158 ins_pipe(ialu_reg_reg_shift);
12159 %}
12160
12161 instruct AddI_reg_LShift_reg(iRegINoSp dst,
12162 iRegIorL2I src1, iRegIorL2I src2,
12163 immI src3, rFlagsReg cr) %{
12164 match(Set dst (AddI src1 (LShiftI src2 src3)));
12165
12166 ins_cost(1.9 * INSN_COST);
12167 format %{ "addw $dst, $src1, $src2, LSL $src3" %}
12168
12169 ins_encode %{
12170 __ addw(as_Register($dst$$reg),
12171 as_Register($src1$$reg),
12172 as_Register($src2$$reg),
12173 Assembler::LSL,
12174 $src3$$constant & 0x1f);
12175 %}
12176
12177 ins_pipe(ialu_reg_reg_shift);
12178 %}
12179
12180 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
12181 iRegL src1, iRegL src2,
12182 immI src3, rFlagsReg cr) %{
12183 match(Set dst (AddL src1 (LShiftL src2 src3)));
12184
12185 ins_cost(1.9 * INSN_COST);
12186 format %{ "add $dst, $src1, $src2, LSL $src3" %}
12187
12188 ins_encode %{
12189 __ add(as_Register($dst$$reg),
12190 as_Register($src1$$reg),
12191 as_Register($src2$$reg),
12192 Assembler::LSL,
12193 $src3$$constant & 0x3f);
12194 %}
12195
12196 ins_pipe(ialu_reg_reg_shift);
12197 %}
12198
12199 instruct SubI_reg_URShift_reg(iRegINoSp dst,
12200 iRegIorL2I src1, iRegIorL2I src2,
12201 immI src3, rFlagsReg cr) %{
12202 match(Set dst (SubI src1 (URShiftI src2 src3)));
12203
12204 ins_cost(1.9 * INSN_COST);
12205 format %{ "subw $dst, $src1, $src2, LSR $src3" %}
12206
12207 ins_encode %{
12208 __ subw(as_Register($dst$$reg),
12209 as_Register($src1$$reg),
12210 as_Register($src2$$reg),
12211 Assembler::LSR,
12212 $src3$$constant & 0x1f);
12213 %}
12214
12215 ins_pipe(ialu_reg_reg_shift);
12216 %}
12217
12218 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
12219 iRegL src1, iRegL src2,
12220 immI src3, rFlagsReg cr) %{
12221 match(Set dst (SubL src1 (URShiftL src2 src3)));
12222
12223 ins_cost(1.9 * INSN_COST);
12224 format %{ "sub $dst, $src1, $src2, LSR $src3" %}
12225
12226 ins_encode %{
12227 __ sub(as_Register($dst$$reg),
12228 as_Register($src1$$reg),
12229 as_Register($src2$$reg),
12230 Assembler::LSR,
12231 $src3$$constant & 0x3f);
12232 %}
12233
12234 ins_pipe(ialu_reg_reg_shift);
12235 %}
12236
12237 instruct SubI_reg_RShift_reg(iRegINoSp dst,
12238 iRegIorL2I src1, iRegIorL2I src2,
12239 immI src3, rFlagsReg cr) %{
12240 match(Set dst (SubI src1 (RShiftI src2 src3)));
12241
12242 ins_cost(1.9 * INSN_COST);
12243 format %{ "subw $dst, $src1, $src2, ASR $src3" %}
12244
12245 ins_encode %{
12246 __ subw(as_Register($dst$$reg),
12247 as_Register($src1$$reg),
12248 as_Register($src2$$reg),
12249 Assembler::ASR,
12250 $src3$$constant & 0x1f);
12251 %}
12252
12253 ins_pipe(ialu_reg_reg_shift);
12254 %}
12255
12256 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
12257 iRegL src1, iRegL src2,
12258 immI src3, rFlagsReg cr) %{
12259 match(Set dst (SubL src1 (RShiftL src2 src3)));
12260
12261 ins_cost(1.9 * INSN_COST);
12262 format %{ "sub $dst, $src1, $src2, ASR $src3" %}
12263
12264 ins_encode %{
12265 __ sub(as_Register($dst$$reg),
12266 as_Register($src1$$reg),
12267 as_Register($src2$$reg),
12268 Assembler::ASR,
12269 $src3$$constant & 0x3f);
12270 %}
12271
12272 ins_pipe(ialu_reg_reg_shift);
12273 %}
12274
12275 instruct SubI_reg_LShift_reg(iRegINoSp dst,
12276 iRegIorL2I src1, iRegIorL2I src2,
12277 immI src3, rFlagsReg cr) %{
12278 match(Set dst (SubI src1 (LShiftI src2 src3)));
12279
12280 ins_cost(1.9 * INSN_COST);
12281 format %{ "subw $dst, $src1, $src2, LSL $src3" %}
12282
12283 ins_encode %{
12284 __ subw(as_Register($dst$$reg),
12285 as_Register($src1$$reg),
12286 as_Register($src2$$reg),
12287 Assembler::LSL,
12288 $src3$$constant & 0x1f);
12289 %}
12290
12291 ins_pipe(ialu_reg_reg_shift);
12292 %}
12293
12294 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
12295 iRegL src1, iRegL src2,
12296 immI src3, rFlagsReg cr) %{
12297 match(Set dst (SubL src1 (LShiftL src2 src3)));
12298
12299 ins_cost(1.9 * INSN_COST);
12300 format %{ "sub $dst, $src1, $src2, LSL $src3" %}
12301
12302 ins_encode %{
12303 __ sub(as_Register($dst$$reg),
12304 as_Register($src1$$reg),
12305 as_Register($src2$$reg),
12306 Assembler::LSL,
12307 $src3$$constant & 0x3f);
12308 %}
12309
12310 ins_pipe(ialu_reg_reg_shift);
12311 %}
12312
12313
12314
12315 // Shift Left followed by Shift Right.
12316 // This idiom is used by the compiler for the i2b bytecode etc.
12317 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12318 %{
12319 match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
12320 // Make sure we are not going to exceed what sbfm can do.
12321 predicate((unsigned int)n->in(2)->get_int() <= 63
12322 && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12323
12324 ins_cost(INSN_COST * 2);
12325 format %{ "sbfm $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12326 ins_encode %{
12327 int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12328 int s = 63 - lshift;
12329 int r = (rshift - lshift) & 63;
12330 __ sbfm(as_Register($dst$$reg),
12331 as_Register($src$$reg),
12332 r, s);
12333 %}
12334
12335 ins_pipe(ialu_reg_shift);
12336 %}
12337
12338 // Shift Left followed by Shift Right.
12339 // This idiom is used by the compiler for the i2b bytecode etc.
12340 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12341 %{
12342 match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
12343 // Make sure we are not going to exceed what sbfmw can do.
12344 predicate((unsigned int)n->in(2)->get_int() <= 31
12345 && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12346
12347 ins_cost(INSN_COST * 2);
12348 format %{ "sbfmw $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12349 ins_encode %{
12350 int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12351 int s = 31 - lshift;
12352 int r = (rshift - lshift) & 31;
12353 __ sbfmw(as_Register($dst$$reg),
12354 as_Register($src$$reg),
12355 r, s);
12356 %}
12357
12358 ins_pipe(ialu_reg_shift);
12359 %}
12360
12361 // Shift Left followed by Shift Right.
12362 // This idiom is used by the compiler for the i2b bytecode etc.
12363 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
12364 %{
12365 match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
12366 // Make sure we are not going to exceed what ubfm can do.
12367 predicate((unsigned int)n->in(2)->get_int() <= 63
12368 && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
12369
12370 ins_cost(INSN_COST * 2);
12371 format %{ "ubfm $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
12372 ins_encode %{
12373 int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12374 int s = 63 - lshift;
12375 int r = (rshift - lshift) & 63;
12376 __ ubfm(as_Register($dst$$reg),
12377 as_Register($src$$reg),
12378 r, s);
12379 %}
12380
12381 ins_pipe(ialu_reg_shift);
12382 %}
12383
12384 // Shift Left followed by Shift Right.
12385 // This idiom is used by the compiler for the i2b bytecode etc.
12386 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
12387 %{
12388 match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
12389 // Make sure we are not going to exceed what ubfmw can do.
12390 predicate((unsigned int)n->in(2)->get_int() <= 31
12391 && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
12392
12393 ins_cost(INSN_COST * 2);
12394 format %{ "ubfmw $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
12395 ins_encode %{
12396 int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
12397 int s = 31 - lshift;
12398 int r = (rshift - lshift) & 31;
12399 __ ubfmw(as_Register($dst$$reg),
12400 as_Register($src$$reg),
12401 r, s);
12402 %}
12403
12404 ins_pipe(ialu_reg_shift);
12405 %}
12406 // Bitfield extract with shift & mask
12407
12408 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12409 %{
12410 match(Set dst (AndI (URShiftI src rshift) mask));
12411
12412 ins_cost(INSN_COST);
12413 format %{ "ubfxw $dst, $src, $mask" %}
12414 ins_encode %{
12415 int rshift = $rshift$$constant;
12416 long mask = $mask$$constant;
12417 int width = exact_log2(mask+1);
12418 __ ubfxw(as_Register($dst$$reg),
12419 as_Register($src$$reg), rshift, width);
12420 %}
12421 ins_pipe(ialu_reg_shift);
12422 %}
12423 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
12424 %{
12425 match(Set dst (AndL (URShiftL src rshift) mask));
12426
12427 ins_cost(INSN_COST);
12428 format %{ "ubfx $dst, $src, $mask" %}
12429 ins_encode %{
12430 int rshift = $rshift$$constant;
12431 long mask = $mask$$constant;
12432 int width = exact_log2(mask+1);
12433 __ ubfx(as_Register($dst$$reg),
12434 as_Register($src$$reg), rshift, width);
12435 %}
12436 ins_pipe(ialu_reg_shift);
12437 %}
12438
12439 // We can use ubfx when extending an And with a mask when we know mask
12440 // is positive. We know that because immI_bitmask guarantees it.
12441 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
12442 %{
12443 match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
12444
12445 ins_cost(INSN_COST * 2);
12446 format %{ "ubfx $dst, $src, $mask" %}
12447 ins_encode %{
12448 int rshift = $rshift$$constant;
12449 long mask = $mask$$constant;
12450 int width = exact_log2(mask+1);
12451 __ ubfx(as_Register($dst$$reg),
12452 as_Register($src$$reg), rshift, width);
12453 %}
12454 ins_pipe(ialu_reg_shift);
12455 %}
12456
12457 // Rotations
12458
12459 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12460 %{
12461 match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12462 predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12463
12464 ins_cost(INSN_COST);
12465 format %{ "extr $dst, $src1, $src2, #$rshift" %}
12466
12467 ins_encode %{
12468 __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12469 $rshift$$constant & 63);
12470 %}
12471 ins_pipe(ialu_reg_reg_extr);
12472 %}
12473
12474 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12475 %{
12476 match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12477 predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12478
12479 ins_cost(INSN_COST);
12480 format %{ "extr $dst, $src1, $src2, #$rshift" %}
12481
12482 ins_encode %{
12483 __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12484 $rshift$$constant & 31);
12485 %}
12486 ins_pipe(ialu_reg_reg_extr);
12487 %}
12488
12489 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
12490 %{
12491 match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
12492 predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
12493
12494 ins_cost(INSN_COST);
12495 format %{ "extr $dst, $src1, $src2, #$rshift" %}
12496
12497 ins_encode %{
12498 __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12499 $rshift$$constant & 63);
12500 %}
12501 ins_pipe(ialu_reg_reg_extr);
12502 %}
12503
12504 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
12505 %{
12506 match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
12507 predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
12508
12509 ins_cost(INSN_COST);
12510 format %{ "extr $dst, $src1, $src2, #$rshift" %}
12511
12512 ins_encode %{
12513 __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
12514 $rshift$$constant & 31);
12515 %}
12516 ins_pipe(ialu_reg_reg_extr);
12517 %}
12518
12519
12520 // rol expander
12521
12522 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12523 %{
12524 effect(DEF dst, USE src, USE shift);
12525
12526 format %{ "rol $dst, $src, $shift" %}
12527 ins_cost(INSN_COST * 3);
12528 ins_encode %{
12529 __ subw(rscratch1, zr, as_Register($shift$$reg));
12530 __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12531 rscratch1);
12532 %}
12533 ins_pipe(ialu_reg_reg_vshift);
12534 %}
12535
12536 // rol expander
12537
12538 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12539 %{
12540 effect(DEF dst, USE src, USE shift);
12541
12542 format %{ "rol $dst, $src, $shift" %}
12543 ins_cost(INSN_COST * 3);
12544 ins_encode %{
12545 __ subw(rscratch1, zr, as_Register($shift$$reg));
12546 __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12547 rscratch1);
12548 %}
12549 ins_pipe(ialu_reg_reg_vshift);
12550 %}
12551
12552 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12553 %{
12554 match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
12555
12556 expand %{
12557 rolL_rReg(dst, src, shift, cr);
12558 %}
12559 %}
12560
12561 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12562 %{
12563 match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
12564
12565 expand %{
12566 rolL_rReg(dst, src, shift, cr);
12567 %}
12568 %}
12569
12570 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12571 %{
12572 match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
12573
12574 expand %{
12575 rolI_rReg(dst, src, shift, cr);
12576 %}
12577 %}
12578
12579 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12580 %{
12581 match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
12582
12583 expand %{
12584 rolI_rReg(dst, src, shift, cr);
12585 %}
12586 %}
12587
12588 // ror expander
12589
12590 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
12591 %{
12592 effect(DEF dst, USE src, USE shift);
12593
12594 format %{ "ror $dst, $src, $shift" %}
12595 ins_cost(INSN_COST);
12596 ins_encode %{
12597 __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
12598 as_Register($shift$$reg));
12599 %}
12600 ins_pipe(ialu_reg_reg_vshift);
12601 %}
12602
12603 // ror expander
12604
12605 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
12606 %{
12607 effect(DEF dst, USE src, USE shift);
12608
12609 format %{ "ror $dst, $src, $shift" %}
12610 ins_cost(INSN_COST);
12611 ins_encode %{
12612 __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
12613 as_Register($shift$$reg));
12614 %}
12615 ins_pipe(ialu_reg_reg_vshift);
12616 %}
12617
12618 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
12619 %{
12620 match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
12621
12622 expand %{
12623 rorL_rReg(dst, src, shift, cr);
12624 %}
12625 %}
12626
12627 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
12628 %{
12629 match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
12630
12631 expand %{
12632 rorL_rReg(dst, src, shift, cr);
12633 %}
12634 %}
12635
12636 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
12637 %{
12638 match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
12639
12640 expand %{
12641 rorI_rReg(dst, src, shift, cr);
12642 %}
12643 %}
12644
12645 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
12646 %{
12647 match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
12648
12649 expand %{
12650 rorI_rReg(dst, src, shift, cr);
12651 %}
12652 %}
12653
12654 // Add/subtract (extended)
12655
12656 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12657 %{
12658 match(Set dst (AddL src1 (ConvI2L src2)));
12659 ins_cost(INSN_COST);
12660 format %{ "add $dst, $src1, sxtw $src2" %}
12661
12662 ins_encode %{
12663 __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12664 as_Register($src2$$reg), ext::sxtw);
12665 %}
12666 ins_pipe(ialu_reg_reg);
12667 %};
12668
12669 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
12670 %{
12671 match(Set dst (SubL src1 (ConvI2L src2)));
12672 ins_cost(INSN_COST);
12673 format %{ "sub $dst, $src1, sxtw $src2" %}
12674
12675 ins_encode %{
12676 __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12677 as_Register($src2$$reg), ext::sxtw);
12678 %}
12679 ins_pipe(ialu_reg_reg);
12680 %};
12681
12682
12683 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
12684 %{
12685 match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12686 ins_cost(INSN_COST);
12687 format %{ "add $dst, $src1, sxth $src2" %}
12688
12689 ins_encode %{
12690 __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12691 as_Register($src2$$reg), ext::sxth);
12692 %}
12693 ins_pipe(ialu_reg_reg);
12694 %}
12695
12696 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12697 %{
12698 match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
12699 ins_cost(INSN_COST);
12700 format %{ "add $dst, $src1, sxtb $src2" %}
12701
12702 ins_encode %{
12703 __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12704 as_Register($src2$$reg), ext::sxtb);
12705 %}
12706 ins_pipe(ialu_reg_reg);
12707 %}
12708
12709 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
12710 %{
12711 match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
12712 ins_cost(INSN_COST);
12713 format %{ "add $dst, $src1, uxtb $src2" %}
12714
12715 ins_encode %{
12716 __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12717 as_Register($src2$$reg), ext::uxtb);
12718 %}
12719 ins_pipe(ialu_reg_reg);
12720 %}
12721
12722 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
12723 %{
12724 match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12725 ins_cost(INSN_COST);
12726 format %{ "add $dst, $src1, sxth $src2" %}
12727
12728 ins_encode %{
12729 __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12730 as_Register($src2$$reg), ext::sxth);
12731 %}
12732 ins_pipe(ialu_reg_reg);
12733 %}
12734
12735 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
12736 %{
12737 match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12738 ins_cost(INSN_COST);
12739 format %{ "add $dst, $src1, sxtw $src2" %}
12740
12741 ins_encode %{
12742 __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12743 as_Register($src2$$reg), ext::sxtw);
12744 %}
12745 ins_pipe(ialu_reg_reg);
12746 %}
12747
12748 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12749 %{
12750 match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
12751 ins_cost(INSN_COST);
12752 format %{ "add $dst, $src1, sxtb $src2" %}
12753
12754 ins_encode %{
12755 __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12756 as_Register($src2$$reg), ext::sxtb);
12757 %}
12758 ins_pipe(ialu_reg_reg);
12759 %}
12760
12761 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
12762 %{
12763 match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
12764 ins_cost(INSN_COST);
12765 format %{ "add $dst, $src1, uxtb $src2" %}
12766
12767 ins_encode %{
12768 __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12769 as_Register($src2$$reg), ext::uxtb);
12770 %}
12771 ins_pipe(ialu_reg_reg);
12772 %}
12773
12774
12775 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12776 %{
12777 match(Set dst (AddI src1 (AndI src2 mask)));
12778 ins_cost(INSN_COST);
12779 format %{ "addw $dst, $src1, $src2, uxtb" %}
12780
12781 ins_encode %{
12782 __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12783 as_Register($src2$$reg), ext::uxtb);
12784 %}
12785 ins_pipe(ialu_reg_reg);
12786 %}
12787
12788 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12789 %{
12790 match(Set dst (AddI src1 (AndI src2 mask)));
12791 ins_cost(INSN_COST);
12792 format %{ "addw $dst, $src1, $src2, uxth" %}
12793
12794 ins_encode %{
12795 __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12796 as_Register($src2$$reg), ext::uxth);
12797 %}
12798 ins_pipe(ialu_reg_reg);
12799 %}
12800
12801 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12802 %{
12803 match(Set dst (AddL src1 (AndL src2 mask)));
12804 ins_cost(INSN_COST);
12805 format %{ "add $dst, $src1, $src2, uxtb" %}
12806
12807 ins_encode %{
12808 __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12809 as_Register($src2$$reg), ext::uxtb);
12810 %}
12811 ins_pipe(ialu_reg_reg);
12812 %}
12813
12814 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12815 %{
12816 match(Set dst (AddL src1 (AndL src2 mask)));
12817 ins_cost(INSN_COST);
12818 format %{ "add $dst, $src1, $src2, uxth" %}
12819
12820 ins_encode %{
12821 __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12822 as_Register($src2$$reg), ext::uxth);
12823 %}
12824 ins_pipe(ialu_reg_reg);
12825 %}
12826
12827 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12828 %{
12829 match(Set dst (AddL src1 (AndL src2 mask)));
12830 ins_cost(INSN_COST);
12831 format %{ "add $dst, $src1, $src2, uxtw" %}
12832
12833 ins_encode %{
12834 __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12835 as_Register($src2$$reg), ext::uxtw);
12836 %}
12837 ins_pipe(ialu_reg_reg);
12838 %}
12839
12840 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
12841 %{
12842 match(Set dst (SubI src1 (AndI src2 mask)));
12843 ins_cost(INSN_COST);
12844 format %{ "subw $dst, $src1, $src2, uxtb" %}
12845
12846 ins_encode %{
12847 __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12848 as_Register($src2$$reg), ext::uxtb);
12849 %}
12850 ins_pipe(ialu_reg_reg);
12851 %}
12852
12853 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
12854 %{
12855 match(Set dst (SubI src1 (AndI src2 mask)));
12856 ins_cost(INSN_COST);
12857 format %{ "subw $dst, $src1, $src2, uxth" %}
12858
12859 ins_encode %{
12860 __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12861 as_Register($src2$$reg), ext::uxth);
12862 %}
12863 ins_pipe(ialu_reg_reg);
12864 %}
12865
12866 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
12867 %{
12868 match(Set dst (SubL src1 (AndL src2 mask)));
12869 ins_cost(INSN_COST);
12870 format %{ "sub $dst, $src1, $src2, uxtb" %}
12871
12872 ins_encode %{
12873 __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12874 as_Register($src2$$reg), ext::uxtb);
12875 %}
12876 ins_pipe(ialu_reg_reg);
12877 %}
12878
12879 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
12880 %{
12881 match(Set dst (SubL src1 (AndL src2 mask)));
12882 ins_cost(INSN_COST);
12883 format %{ "sub $dst, $src1, $src2, uxth" %}
12884
12885 ins_encode %{
12886 __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12887 as_Register($src2$$reg), ext::uxth);
12888 %}
12889 ins_pipe(ialu_reg_reg);
12890 %}
12891
12892 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
12893 %{
12894 match(Set dst (SubL src1 (AndL src2 mask)));
12895 ins_cost(INSN_COST);
12896 format %{ "sub $dst, $src1, $src2, uxtw" %}
12897
12898 ins_encode %{
12899 __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12900 as_Register($src2$$reg), ext::uxtw);
12901 %}
12902 ins_pipe(ialu_reg_reg);
12903 %}
12904
12905 // END This section of the file is automatically generated. Do not edit --------------
12906
12907 // ============================================================================
12908 // Floating Point Arithmetic Instructions
12909
12910 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12911 match(Set dst (AddF src1 src2));
12912
12913 ins_cost(INSN_COST * 5);
12914 format %{ "fadds $dst, $src1, $src2" %}
12915
12916 ins_encode %{
12917 __ fadds(as_FloatRegister($dst$$reg),
12918 as_FloatRegister($src1$$reg),
12919 as_FloatRegister($src2$$reg));
12920 %}
12921
12922 ins_pipe(fp_dop_reg_reg_s);
12923 %}
12924
12925 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12926 match(Set dst (AddD src1 src2));
12927
12928 ins_cost(INSN_COST * 5);
12929 format %{ "faddd $dst, $src1, $src2" %}
12930
12931 ins_encode %{
12932 __ faddd(as_FloatRegister($dst$$reg),
12933 as_FloatRegister($src1$$reg),
12934 as_FloatRegister($src2$$reg));
12935 %}
12936
12937 ins_pipe(fp_dop_reg_reg_d);
12938 %}
12939
12940 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12941 match(Set dst (SubF src1 src2));
12942
12943 ins_cost(INSN_COST * 5);
12944 format %{ "fsubs $dst, $src1, $src2" %}
12945
12946 ins_encode %{
12947 __ fsubs(as_FloatRegister($dst$$reg),
12948 as_FloatRegister($src1$$reg),
12949 as_FloatRegister($src2$$reg));
12950 %}
12951
12952 ins_pipe(fp_dop_reg_reg_s);
12953 %}
12954
12955 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12956 match(Set dst (SubD src1 src2));
12957
12958 ins_cost(INSN_COST * 5);
12959 format %{ "fsubd $dst, $src1, $src2" %}
12960
12961 ins_encode %{
12962 __ fsubd(as_FloatRegister($dst$$reg),
12963 as_FloatRegister($src1$$reg),
12964 as_FloatRegister($src2$$reg));
12965 %}
12966
12967 ins_pipe(fp_dop_reg_reg_d);
12968 %}
12969
12970 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12971 match(Set dst (MulF src1 src2));
12972
12973 ins_cost(INSN_COST * 6);
12974 format %{ "fmuls $dst, $src1, $src2" %}
12975
12976 ins_encode %{
12977 __ fmuls(as_FloatRegister($dst$$reg),
12978 as_FloatRegister($src1$$reg),
12979 as_FloatRegister($src2$$reg));
12980 %}
12981
12982 ins_pipe(fp_dop_reg_reg_s);
12983 %}
12984
12985 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12986 match(Set dst (MulD src1 src2));
12987
12988 ins_cost(INSN_COST * 6);
12989 format %{ "fmuld $dst, $src1, $src2" %}
12990
12991 ins_encode %{
12992 __ fmuld(as_FloatRegister($dst$$reg),
12993 as_FloatRegister($src1$$reg),
12994 as_FloatRegister($src2$$reg));
12995 %}
12996
12997 ins_pipe(fp_dop_reg_reg_d);
12998 %}
12999
13000 // src1 * src2 + src3
13001 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13002 predicate(UseFMA);
13003 match(Set dst (FmaF src3 (Binary src1 src2)));
13004
13005 format %{ "fmadds $dst, $src1, $src2, $src3" %}
13006
13007 ins_encode %{
13008 __ fmadds(as_FloatRegister($dst$$reg),
13009 as_FloatRegister($src1$$reg),
13010 as_FloatRegister($src2$$reg),
13011 as_FloatRegister($src3$$reg));
13012 %}
13013
13014 ins_pipe(pipe_class_default);
13015 %}
13016
13017 // src1 * src2 + src3
13018 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13019 predicate(UseFMA);
13020 match(Set dst (FmaD src3 (Binary src1 src2)));
13021
13022 format %{ "fmaddd $dst, $src1, $src2, $src3" %}
13023
13024 ins_encode %{
13025 __ fmaddd(as_FloatRegister($dst$$reg),
13026 as_FloatRegister($src1$$reg),
13027 as_FloatRegister($src2$$reg),
13028 as_FloatRegister($src3$$reg));
13029 %}
13030
13031 ins_pipe(pipe_class_default);
13032 %}
13033
13034 // -src1 * src2 + src3
13035 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13036 predicate(UseFMA);
13037 match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
13038 match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
13039
13040 format %{ "fmsubs $dst, $src1, $src2, $src3" %}
13041
13042 ins_encode %{
13043 __ fmsubs(as_FloatRegister($dst$$reg),
13044 as_FloatRegister($src1$$reg),
13045 as_FloatRegister($src2$$reg),
13046 as_FloatRegister($src3$$reg));
13047 %}
13048
13049 ins_pipe(pipe_class_default);
13050 %}
13051
13052 // -src1 * src2 + src3
13053 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13054 predicate(UseFMA);
13055 match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
13056 match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
13057
13058 format %{ "fmsubd $dst, $src1, $src2, $src3" %}
13059
13060 ins_encode %{
13061 __ fmsubd(as_FloatRegister($dst$$reg),
13062 as_FloatRegister($src1$$reg),
13063 as_FloatRegister($src2$$reg),
13064 as_FloatRegister($src3$$reg));
13065 %}
13066
13067 ins_pipe(pipe_class_default);
13068 %}
13069
13070 // -src1 * src2 - src3
13071 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13072 predicate(UseFMA);
13073 match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
13074 match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
13075
13076 format %{ "fnmadds $dst, $src1, $src2, $src3" %}
13077
13078 ins_encode %{
13079 __ fnmadds(as_FloatRegister($dst$$reg),
13080 as_FloatRegister($src1$$reg),
13081 as_FloatRegister($src2$$reg),
13082 as_FloatRegister($src3$$reg));
13083 %}
13084
13085 ins_pipe(pipe_class_default);
13086 %}
13087
13088 // -src1 * src2 - src3
13089 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
13090 predicate(UseFMA);
13091 match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
13092 match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
13093
13094 format %{ "fnmaddd $dst, $src1, $src2, $src3" %}
13095
13096 ins_encode %{
13097 __ fnmaddd(as_FloatRegister($dst$$reg),
13098 as_FloatRegister($src1$$reg),
13099 as_FloatRegister($src2$$reg),
13100 as_FloatRegister($src3$$reg));
13101 %}
13102
13103 ins_pipe(pipe_class_default);
13104 %}
13105
13106 // src1 * src2 - src3
13107 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
13108 predicate(UseFMA);
13109 match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
13110
13111 format %{ "fnmsubs $dst, $src1, $src2, $src3" %}
13112
13113 ins_encode %{
13114 __ fnmsubs(as_FloatRegister($dst$$reg),
13115 as_FloatRegister($src1$$reg),
13116 as_FloatRegister($src2$$reg),
13117 as_FloatRegister($src3$$reg));
13118 %}
13119
13120 ins_pipe(pipe_class_default);
13121 %}
13122
13123 // src1 * src2 - src3
13124 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
13125 predicate(UseFMA);
13126 match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
13127
13128 format %{ "fnmsubd $dst, $src1, $src2, $src3" %}
13129
13130 ins_encode %{
13131 // n.b. insn name should be fnmsubd
13132 __ fnmsub(as_FloatRegister($dst$$reg),
13133 as_FloatRegister($src1$$reg),
13134 as_FloatRegister($src2$$reg),
13135 as_FloatRegister($src3$$reg));
13136 %}
13137
13138 ins_pipe(pipe_class_default);
13139 %}
13140
13141
13142 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13143 match(Set dst (DivF src1 src2));
13144
13145 ins_cost(INSN_COST * 18);
13146 format %{ "fdivs $dst, $src1, $src2" %}
13147
13148 ins_encode %{
13149 __ fdivs(as_FloatRegister($dst$$reg),
13150 as_FloatRegister($src1$$reg),
13151 as_FloatRegister($src2$$reg));
13152 %}
13153
13154 ins_pipe(fp_div_s);
13155 %}
13156
13157 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
13158 match(Set dst (DivD src1 src2));
13159
13160 ins_cost(INSN_COST * 32);
13161 format %{ "fdivd $dst, $src1, $src2" %}
13162
13163 ins_encode %{
13164 __ fdivd(as_FloatRegister($dst$$reg),
13165 as_FloatRegister($src1$$reg),
13166 as_FloatRegister($src2$$reg));
13167 %}
13168
13169 ins_pipe(fp_div_d);
13170 %}
13171
13172 instruct negF_reg_reg(vRegF dst, vRegF src) %{
13173 match(Set dst (NegF src));
13174
13175 ins_cost(INSN_COST * 3);
13176 format %{ "fneg $dst, $src" %}
13177
13178 ins_encode %{
13179 __ fnegs(as_FloatRegister($dst$$reg),
13180 as_FloatRegister($src$$reg));
13181 %}
13182
13183 ins_pipe(fp_uop_s);
13184 %}
13185
13186 instruct negD_reg_reg(vRegD dst, vRegD src) %{
13187 match(Set dst (NegD src));
13188
13189 ins_cost(INSN_COST * 3);
13190 format %{ "fnegd $dst, $src" %}
13191
13192 ins_encode %{
13193 __ fnegd(as_FloatRegister($dst$$reg),
13194 as_FloatRegister($src$$reg));
13195 %}
13196
13197 ins_pipe(fp_uop_d);
13198 %}
13199
13200 instruct absF_reg(vRegF dst, vRegF src) %{
13201 match(Set dst (AbsF src));
13202
13203 ins_cost(INSN_COST * 3);
13204 format %{ "fabss $dst, $src" %}
13205 ins_encode %{
13206 __ fabss(as_FloatRegister($dst$$reg),
13207 as_FloatRegister($src$$reg));
13208 %}
13209
13210 ins_pipe(fp_uop_s);
13211 %}
13212
13213 instruct absD_reg(vRegD dst, vRegD src) %{
13214 match(Set dst (AbsD src));
13215
13216 ins_cost(INSN_COST * 3);
13217 format %{ "fabsd $dst, $src" %}
13218 ins_encode %{
13219 __ fabsd(as_FloatRegister($dst$$reg),
13220 as_FloatRegister($src$$reg));
13221 %}
13222
13223 ins_pipe(fp_uop_d);
13224 %}
13225
13226 instruct sqrtD_reg(vRegD dst, vRegD src) %{
13227 match(Set dst (SqrtD src));
13228
13229 ins_cost(INSN_COST * 50);
13230 format %{ "fsqrtd $dst, $src" %}
13231 ins_encode %{
13232 __ fsqrtd(as_FloatRegister($dst$$reg),
13233 as_FloatRegister($src$$reg));
13234 %}
13235
13236 ins_pipe(fp_div_s);
13237 %}
13238
13239 instruct sqrtF_reg(vRegF dst, vRegF src) %{
13240 match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
13241
13242 ins_cost(INSN_COST * 50);
13243 format %{ "fsqrts $dst, $src" %}
13244 ins_encode %{
13245 __ fsqrts(as_FloatRegister($dst$$reg),
13246 as_FloatRegister($src$$reg));
13247 %}
13248
13249 ins_pipe(fp_div_d);
13250 %}
13251
13252 // ============================================================================
13253 // Logical Instructions
13254
13255 // Integer Logical Instructions
13256
13257 // And Instructions
13258
13259
13260 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
13261 match(Set dst (AndI src1 src2));
13262
13263 format %{ "andw $dst, $src1, $src2\t# int" %}
13264
13265 ins_cost(INSN_COST);
13266 ins_encode %{
13267 __ andw(as_Register($dst$$reg),
13268 as_Register($src1$$reg),
13269 as_Register($src2$$reg));
13270 %}
13271
13272 ins_pipe(ialu_reg_reg);
13273 %}
13274
13275 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
13276 match(Set dst (AndI src1 src2));
13277
13278 format %{ "andsw $dst, $src1, $src2\t# int" %}
13279
13280 ins_cost(INSN_COST);
13281 ins_encode %{
13282 __ andw(as_Register($dst$$reg),
13283 as_Register($src1$$reg),
13284 (unsigned long)($src2$$constant));
13285 %}
13286
13287 ins_pipe(ialu_reg_imm);
13288 %}
13289
13290 // Or Instructions
13291
13292 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13293 match(Set dst (OrI src1 src2));
13294
13295 format %{ "orrw $dst, $src1, $src2\t# int" %}
13296
13297 ins_cost(INSN_COST);
13298 ins_encode %{
13299 __ orrw(as_Register($dst$$reg),
13300 as_Register($src1$$reg),
13301 as_Register($src2$$reg));
13302 %}
13303
13304 ins_pipe(ialu_reg_reg);
13305 %}
13306
13307 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13308 match(Set dst (OrI src1 src2));
13309
13310 format %{ "orrw $dst, $src1, $src2\t# int" %}
13311
13312 ins_cost(INSN_COST);
13313 ins_encode %{
13314 __ orrw(as_Register($dst$$reg),
13315 as_Register($src1$$reg),
13316 (unsigned long)($src2$$constant));
13317 %}
13318
13319 ins_pipe(ialu_reg_imm);
13320 %}
13321
13322 // Xor Instructions
13323
13324 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
13325 match(Set dst (XorI src1 src2));
13326
13327 format %{ "eorw $dst, $src1, $src2\t# int" %}
13328
13329 ins_cost(INSN_COST);
13330 ins_encode %{
13331 __ eorw(as_Register($dst$$reg),
13332 as_Register($src1$$reg),
13333 as_Register($src2$$reg));
13334 %}
13335
13336 ins_pipe(ialu_reg_reg);
13337 %}
13338
13339 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
13340 match(Set dst (XorI src1 src2));
13341
13342 format %{ "eorw $dst, $src1, $src2\t# int" %}
13343
13344 ins_cost(INSN_COST);
13345 ins_encode %{
13346 __ eorw(as_Register($dst$$reg),
13347 as_Register($src1$$reg),
13348 (unsigned long)($src2$$constant));
13349 %}
13350
13351 ins_pipe(ialu_reg_imm);
13352 %}
13353
13354 // Long Logical Instructions
13355 // TODO
13356
13357 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
13358 match(Set dst (AndL src1 src2));
13359
13360 format %{ "and $dst, $src1, $src2\t# int" %}
13361
13362 ins_cost(INSN_COST);
13363 ins_encode %{
13364 __ andr(as_Register($dst$$reg),
13365 as_Register($src1$$reg),
13366 as_Register($src2$$reg));
13367 %}
13368
13369 ins_pipe(ialu_reg_reg);
13370 %}
13371
13372 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
13373 match(Set dst (AndL src1 src2));
13374
13375 format %{ "and $dst, $src1, $src2\t# int" %}
13376
13377 ins_cost(INSN_COST);
13378 ins_encode %{
13379 __ andr(as_Register($dst$$reg),
13380 as_Register($src1$$reg),
13381 (unsigned long)($src2$$constant));
13382 %}
13383
13384 ins_pipe(ialu_reg_imm);
13385 %}
13386
13387 // Or Instructions
13388
13389 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13390 match(Set dst (OrL src1 src2));
13391
13392 format %{ "orr $dst, $src1, $src2\t# int" %}
13393
13394 ins_cost(INSN_COST);
13395 ins_encode %{
13396 __ orr(as_Register($dst$$reg),
13397 as_Register($src1$$reg),
13398 as_Register($src2$$reg));
13399 %}
13400
13401 ins_pipe(ialu_reg_reg);
13402 %}
13403
13404 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13405 match(Set dst (OrL src1 src2));
13406
13407 format %{ "orr $dst, $src1, $src2\t# int" %}
13408
13409 ins_cost(INSN_COST);
13410 ins_encode %{
13411 __ orr(as_Register($dst$$reg),
13412 as_Register($src1$$reg),
13413 (unsigned long)($src2$$constant));
13414 %}
13415
13416 ins_pipe(ialu_reg_imm);
13417 %}
13418
13419 // Xor Instructions
13420
13421 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
13422 match(Set dst (XorL src1 src2));
13423
13424 format %{ "eor $dst, $src1, $src2\t# int" %}
13425
13426 ins_cost(INSN_COST);
13427 ins_encode %{
13428 __ eor(as_Register($dst$$reg),
13429 as_Register($src1$$reg),
13430 as_Register($src2$$reg));
13431 %}
13432
13433 ins_pipe(ialu_reg_reg);
13434 %}
13435
13436 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
13437 match(Set dst (XorL src1 src2));
13438
13439 ins_cost(INSN_COST);
13440 format %{ "eor $dst, $src1, $src2\t# int" %}
13441
13442 ins_encode %{
13443 __ eor(as_Register($dst$$reg),
13444 as_Register($src1$$reg),
13445 (unsigned long)($src2$$constant));
13446 %}
13447
13448 ins_pipe(ialu_reg_imm);
13449 %}
13450
13451 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
13452 %{
13453 match(Set dst (ConvI2L src));
13454
13455 ins_cost(INSN_COST);
13456 format %{ "sxtw $dst, $src\t# i2l" %}
13457 ins_encode %{
13458 __ sbfm($dst$$Register, $src$$Register, 0, 31);
13459 %}
13460 ins_pipe(ialu_reg_shift);
13461 %}
13462
13463 // this pattern occurs in bigmath arithmetic
13464 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
13465 %{
13466 match(Set dst (AndL (ConvI2L src) mask));
13467
13468 ins_cost(INSN_COST);
13469 format %{ "ubfm $dst, $src, 0, 31\t# ui2l" %}
13470 ins_encode %{
13471 __ ubfm($dst$$Register, $src$$Register, 0, 31);
13472 %}
13473
13474 ins_pipe(ialu_reg_shift);
13475 %}
13476
13477 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
13478 match(Set dst (ConvL2I src));
13479
13480 ins_cost(INSN_COST);
13481 format %{ "movw $dst, $src \t// l2i" %}
13482
13483 ins_encode %{
13484 __ movw(as_Register($dst$$reg), as_Register($src$$reg));
13485 %}
13486
13487 ins_pipe(ialu_reg);
13488 %}
13489
13490 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
13491 %{
13492 match(Set dst (Conv2B src));
13493 effect(KILL cr);
13494
13495 format %{
13496 "cmpw $src, zr\n\t"
13497 "cset $dst, ne"
13498 %}
13499
13500 ins_encode %{
13501 __ cmpw(as_Register($src$$reg), zr);
13502 __ cset(as_Register($dst$$reg), Assembler::NE);
13503 %}
13504
13505 ins_pipe(ialu_reg);
13506 %}
13507
13508 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
13509 %{
13510 match(Set dst (Conv2B src));
13511 effect(KILL cr);
13512
13513 format %{
13514 "cmp $src, zr\n\t"
13515 "cset $dst, ne"
13516 %}
13517
13518 ins_encode %{
13519 __ cmp(as_Register($src$$reg), zr);
13520 __ cset(as_Register($dst$$reg), Assembler::NE);
13521 %}
13522
13523 ins_pipe(ialu_reg);
13524 %}
13525
13526 instruct convD2F_reg(vRegF dst, vRegD src) %{
13527 match(Set dst (ConvD2F src));
13528
13529 ins_cost(INSN_COST * 5);
13530 format %{ "fcvtd $dst, $src \t// d2f" %}
13531
13532 ins_encode %{
13533 __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13534 %}
13535
13536 ins_pipe(fp_d2f);
13537 %}
13538
13539 instruct convF2D_reg(vRegD dst, vRegF src) %{
13540 match(Set dst (ConvF2D src));
13541
13542 ins_cost(INSN_COST * 5);
13543 format %{ "fcvts $dst, $src \t// f2d" %}
13544
13545 ins_encode %{
13546 __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
13547 %}
13548
13549 ins_pipe(fp_f2d);
13550 %}
13551
13552 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13553 match(Set dst (ConvF2I src));
13554
13555 ins_cost(INSN_COST * 5);
13556 format %{ "fcvtzsw $dst, $src \t// f2i" %}
13557
13558 ins_encode %{
13559 __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13560 %}
13561
13562 ins_pipe(fp_f2i);
13563 %}
13564
13565 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
13566 match(Set dst (ConvF2L src));
13567
13568 ins_cost(INSN_COST * 5);
13569 format %{ "fcvtzs $dst, $src \t// f2l" %}
13570
13571 ins_encode %{
13572 __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13573 %}
13574
13575 ins_pipe(fp_f2l);
13576 %}
13577
13578 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
13579 match(Set dst (ConvI2F src));
13580
13581 ins_cost(INSN_COST * 5);
13582 format %{ "scvtfws $dst, $src \t// i2f" %}
13583
13584 ins_encode %{
13585 __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13586 %}
13587
13588 ins_pipe(fp_i2f);
13589 %}
13590
13591 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
13592 match(Set dst (ConvL2F src));
13593
13594 ins_cost(INSN_COST * 5);
13595 format %{ "scvtfs $dst, $src \t// l2f" %}
13596
13597 ins_encode %{
13598 __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13599 %}
13600
13601 ins_pipe(fp_l2f);
13602 %}
13603
13604 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
13605 match(Set dst (ConvD2I src));
13606
13607 ins_cost(INSN_COST * 5);
13608 format %{ "fcvtzdw $dst, $src \t// d2i" %}
13609
13610 ins_encode %{
13611 __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13612 %}
13613
13614 ins_pipe(fp_d2i);
13615 %}
13616
13617 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13618 match(Set dst (ConvD2L src));
13619
13620 ins_cost(INSN_COST * 5);
13621 format %{ "fcvtzd $dst, $src \t// d2l" %}
13622
13623 ins_encode %{
13624 __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
13625 %}
13626
13627 ins_pipe(fp_d2l);
13628 %}
13629
13630 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
13631 match(Set dst (ConvI2D src));
13632
13633 ins_cost(INSN_COST * 5);
13634 format %{ "scvtfwd $dst, $src \t// i2d" %}
13635
13636 ins_encode %{
13637 __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13638 %}
13639
13640 ins_pipe(fp_i2d);
13641 %}
13642
13643 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
13644 match(Set dst (ConvL2D src));
13645
13646 ins_cost(INSN_COST * 5);
13647 format %{ "scvtfd $dst, $src \t// l2d" %}
13648
13649 ins_encode %{
13650 __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
13651 %}
13652
13653 ins_pipe(fp_l2d);
13654 %}
13655
13656 // stack <-> reg and reg <-> reg shuffles with no conversion
13657
13658 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
13659
13660 match(Set dst (MoveF2I src));
13661
13662 effect(DEF dst, USE src);
13663
13664 ins_cost(4 * INSN_COST);
13665
13666 format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
13667
13668 ins_encode %{
13669 __ ldrw($dst$$Register, Address(sp, $src$$disp));
13670 %}
13671
13672 ins_pipe(iload_reg_reg);
13673
13674 %}
13675
13676 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
13677
13678 match(Set dst (MoveI2F src));
13679
13680 effect(DEF dst, USE src);
13681
13682 ins_cost(4 * INSN_COST);
13683
13684 format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
13685
13686 ins_encode %{
13687 __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13688 %}
13689
13690 ins_pipe(pipe_class_memory);
13691
13692 %}
13693
13694 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
13695
13696 match(Set dst (MoveD2L src));
13697
13698 effect(DEF dst, USE src);
13699
13700 ins_cost(4 * INSN_COST);
13701
13702 format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
13703
13704 ins_encode %{
13705 __ ldr($dst$$Register, Address(sp, $src$$disp));
13706 %}
13707
13708 ins_pipe(iload_reg_reg);
13709
13710 %}
13711
13712 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13713
13714 match(Set dst (MoveL2D src));
13715
13716 effect(DEF dst, USE src);
13717
13718 ins_cost(4 * INSN_COST);
13719
13720 format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13721
13722 ins_encode %{
13723 __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13724 %}
13725
13726 ins_pipe(pipe_class_memory);
13727
13728 %}
13729
13730 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13731
13732 match(Set dst (MoveF2I src));
13733
13734 effect(DEF dst, USE src);
13735
13736 ins_cost(INSN_COST);
13737
13738 format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13739
13740 ins_encode %{
13741 __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13742 %}
13743
13744 ins_pipe(pipe_class_memory);
13745
13746 %}
13747
13748 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13749
13750 match(Set dst (MoveI2F src));
13751
13752 effect(DEF dst, USE src);
13753
13754 ins_cost(INSN_COST);
13755
13756 format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
13757
13758 ins_encode %{
13759 __ strw($src$$Register, Address(sp, $dst$$disp));
13760 %}
13761
13762 ins_pipe(istore_reg_reg);
13763
13764 %}
13765
13766 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
13767
13768 match(Set dst (MoveD2L src));
13769
13770 effect(DEF dst, USE src);
13771
13772 ins_cost(INSN_COST);
13773
13774 format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
13775
13776 ins_encode %{
13777 __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13778 %}
13779
13780 ins_pipe(pipe_class_memory);
13781
13782 %}
13783
13784 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
13785
13786 match(Set dst (MoveL2D src));
13787
13788 effect(DEF dst, USE src);
13789
13790 ins_cost(INSN_COST);
13791
13792 format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
13793
13794 ins_encode %{
13795 __ str($src$$Register, Address(sp, $dst$$disp));
13796 %}
13797
13798 ins_pipe(istore_reg_reg);
13799
13800 %}
13801
13802 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13803
13804 match(Set dst (MoveF2I src));
13805
13806 effect(DEF dst, USE src);
13807
13808 ins_cost(INSN_COST);
13809
13810 format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
13811
13812 ins_encode %{
13813 __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
13814 %}
13815
13816 ins_pipe(fp_f2i);
13817
13818 %}
13819
13820 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
13821
13822 match(Set dst (MoveI2F src));
13823
13824 effect(DEF dst, USE src);
13825
13826 ins_cost(INSN_COST);
13827
13828 format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
13829
13830 ins_encode %{
13831 __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
13832 %}
13833
13834 ins_pipe(fp_i2f);
13835
13836 %}
13837
13838 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13839
13840 match(Set dst (MoveD2L src));
13841
13842 effect(DEF dst, USE src);
13843
13844 ins_cost(INSN_COST);
13845
13846 format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
13847
13848 ins_encode %{
13849 __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
13850 %}
13851
13852 ins_pipe(fp_d2l);
13853
13854 %}
13855
13856 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
13857
13858 match(Set dst (MoveL2D src));
13859
13860 effect(DEF dst, USE src);
13861
13862 ins_cost(INSN_COST);
13863
13864 format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
13865
13866 ins_encode %{
13867 __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
13868 %}
13869
13870 ins_pipe(fp_l2d);
13871
13872 %}
13873
13874 // ============================================================================
13875 // clearing of an array
13876
13877 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13878 %{
13879 match(Set dummy (ClearArray cnt base));
13880 effect(USE_KILL cnt, USE_KILL base);
13881
13882 ins_cost(4 * INSN_COST);
13883 format %{ "ClearArray $cnt, $base" %}
13884
13885 ins_encode %{
13886 __ zero_words($base$$Register, $cnt$$Register);
13887 %}
13888
13889 ins_pipe(pipe_class_memory);
13890 %}
13891
13892 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, iRegL_R11 tmp, Universe dummy, rFlagsReg cr)
13893 %{
13894 match(Set dummy (ClearArray cnt base));
13895 effect(USE_KILL base, TEMP tmp);
13896
13897 ins_cost(4 * INSN_COST);
13898 format %{ "ClearArray $cnt, $base" %}
13899
13900 ins_encode %{
13901 __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
13902 %}
13903
13904 ins_pipe(pipe_class_memory);
13905 %}
13906
13907 // ============================================================================
13908 // Overflow Math Instructions
13909
13910 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13911 %{
13912 match(Set cr (OverflowAddI op1 op2));
13913
13914 format %{ "cmnw $op1, $op2\t# overflow check int" %}
13915 ins_cost(INSN_COST);
13916 ins_encode %{
13917 __ cmnw($op1$$Register, $op2$$Register);
13918 %}
13919
13920 ins_pipe(icmp_reg_reg);
13921 %}
13922
13923 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13924 %{
13925 match(Set cr (OverflowAddI op1 op2));
13926
13927 format %{ "cmnw $op1, $op2\t# overflow check int" %}
13928 ins_cost(INSN_COST);
13929 ins_encode %{
13930 __ cmnw($op1$$Register, $op2$$constant);
13931 %}
13932
13933 ins_pipe(icmp_reg_imm);
13934 %}
13935
13936 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13937 %{
13938 match(Set cr (OverflowAddL op1 op2));
13939
13940 format %{ "cmn $op1, $op2\t# overflow check long" %}
13941 ins_cost(INSN_COST);
13942 ins_encode %{
13943 __ cmn($op1$$Register, $op2$$Register);
13944 %}
13945
13946 ins_pipe(icmp_reg_reg);
13947 %}
13948
13949 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13950 %{
13951 match(Set cr (OverflowAddL op1 op2));
13952
13953 format %{ "cmn $op1, $op2\t# overflow check long" %}
13954 ins_cost(INSN_COST);
13955 ins_encode %{
13956 __ cmn($op1$$Register, $op2$$constant);
13957 %}
13958
13959 ins_pipe(icmp_reg_imm);
13960 %}
13961
13962 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13963 %{
13964 match(Set cr (OverflowSubI op1 op2));
13965
13966 format %{ "cmpw $op1, $op2\t# overflow check int" %}
13967 ins_cost(INSN_COST);
13968 ins_encode %{
13969 __ cmpw($op1$$Register, $op2$$Register);
13970 %}
13971
13972 ins_pipe(icmp_reg_reg);
13973 %}
13974
13975 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13976 %{
13977 match(Set cr (OverflowSubI op1 op2));
13978
13979 format %{ "cmpw $op1, $op2\t# overflow check int" %}
13980 ins_cost(INSN_COST);
13981 ins_encode %{
13982 __ cmpw($op1$$Register, $op2$$constant);
13983 %}
13984
13985 ins_pipe(icmp_reg_imm);
13986 %}
13987
13988 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13989 %{
13990 match(Set cr (OverflowSubL op1 op2));
13991
13992 format %{ "cmp $op1, $op2\t# overflow check long" %}
13993 ins_cost(INSN_COST);
13994 ins_encode %{
13995 __ cmp($op1$$Register, $op2$$Register);
13996 %}
13997
13998 ins_pipe(icmp_reg_reg);
13999 %}
14000
14001 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
14002 %{
14003 match(Set cr (OverflowSubL op1 op2));
14004
14005 format %{ "cmp $op1, $op2\t# overflow check long" %}
14006 ins_cost(INSN_COST);
14007 ins_encode %{
14008 __ cmp($op1$$Register, $op2$$constant);
14009 %}
14010
14011 ins_pipe(icmp_reg_imm);
14012 %}
14013
14014 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
14015 %{
14016 match(Set cr (OverflowSubI zero op1));
14017
14018 format %{ "cmpw zr, $op1\t# overflow check int" %}
14019 ins_cost(INSN_COST);
14020 ins_encode %{
14021 __ cmpw(zr, $op1$$Register);
14022 %}
14023
14024 ins_pipe(icmp_reg_imm);
14025 %}
14026
14027 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
14028 %{
14029 match(Set cr (OverflowSubL zero op1));
14030
14031 format %{ "cmp zr, $op1\t# overflow check long" %}
14032 ins_cost(INSN_COST);
14033 ins_encode %{
14034 __ cmp(zr, $op1$$Register);
14035 %}
14036
14037 ins_pipe(icmp_reg_imm);
14038 %}
14039
14040 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
14041 %{
14042 match(Set cr (OverflowMulI op1 op2));
14043
14044 format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14045 "cmp rscratch1, rscratch1, sxtw\n\t"
14046 "movw rscratch1, #0x80000000\n\t"
14047 "cselw rscratch1, rscratch1, zr, NE\n\t"
14048 "cmpw rscratch1, #1" %}
14049 ins_cost(5 * INSN_COST);
14050 ins_encode %{
14051 __ smull(rscratch1, $op1$$Register, $op2$$Register);
14052 __ subs(zr, rscratch1, rscratch1, ext::sxtw); // NE => overflow
14053 __ movw(rscratch1, 0x80000000); // Develop 0 (EQ),
14054 __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14055 __ cmpw(rscratch1, 1); // 0x80000000 - 1 => VS
14056 %}
14057
14058 ins_pipe(pipe_slow);
14059 %}
14060
14061 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
14062 %{
14063 match(If cmp (OverflowMulI op1 op2));
14064 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14065 || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14066 effect(USE labl, KILL cr);
14067
14068 format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
14069 "cmp rscratch1, rscratch1, sxtw\n\t"
14070 "b$cmp $labl" %}
14071 ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
14072 ins_encode %{
14073 Label* L = $labl$$label;
14074 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14075 __ smull(rscratch1, $op1$$Register, $op2$$Register);
14076 __ subs(zr, rscratch1, rscratch1, ext::sxtw); // NE => overflow
14077 __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14078 %}
14079
14080 ins_pipe(pipe_serial);
14081 %}
14082
14083 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14084 %{
14085 match(Set cr (OverflowMulL op1 op2));
14086
14087 format %{ "mul rscratch1, $op1, $op2\t#overflow check long\n\t"
14088 "smulh rscratch2, $op1, $op2\n\t"
14089 "cmp rscratch2, rscratch1, ASR #31\n\t"
14090 "movw rscratch1, #0x80000000\n\t"
14091 "cselw rscratch1, rscratch1, zr, NE\n\t"
14092 "cmpw rscratch1, #1" %}
14093 ins_cost(6 * INSN_COST);
14094 ins_encode %{
14095 __ mul(rscratch1, $op1$$Register, $op2$$Register); // Result bits 0..63
14096 __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14097 __ cmp(rscratch2, rscratch1, Assembler::ASR, 31); // Top is pure sign ext
14098 __ movw(rscratch1, 0x80000000); // Develop 0 (EQ),
14099 __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
14100 __ cmpw(rscratch1, 1); // 0x80000000 - 1 => VS
14101 %}
14102
14103 ins_pipe(pipe_slow);
14104 %}
14105
14106 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
14107 %{
14108 match(If cmp (OverflowMulL op1 op2));
14109 predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
14110 || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
14111 effect(USE labl, KILL cr);
14112
14113 format %{ "mul rscratch1, $op1, $op2\t#overflow check long\n\t"
14114 "smulh rscratch2, $op1, $op2\n\t"
14115 "cmp rscratch2, rscratch1, ASR #31\n\t"
14116 "b$cmp $labl" %}
14117 ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
14118 ins_encode %{
14119 Label* L = $labl$$label;
14120 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14121 __ mul(rscratch1, $op1$$Register, $op2$$Register); // Result bits 0..63
14122 __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
14123 __ cmp(rscratch2, rscratch1, Assembler::ASR, 31); // Top is pure sign ext
14124 __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
14125 %}
14126
14127 ins_pipe(pipe_serial);
14128 %}
14129
14130 // ============================================================================
14131 // Compare Instructions
14132
14133 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
14134 %{
14135 match(Set cr (CmpI op1 op2));
14136
14137 effect(DEF cr, USE op1, USE op2);
14138
14139 ins_cost(INSN_COST);
14140 format %{ "cmpw $op1, $op2" %}
14141
14142 ins_encode(aarch64_enc_cmpw(op1, op2));
14143
14144 ins_pipe(icmp_reg_reg);
14145 %}
14146
14147 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
14148 %{
14149 match(Set cr (CmpI op1 zero));
14150
14151 effect(DEF cr, USE op1);
14152
14153 ins_cost(INSN_COST);
14154 format %{ "cmpw $op1, 0" %}
14155
14156 ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14157
14158 ins_pipe(icmp_reg_imm);
14159 %}
14160
14161 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
14162 %{
14163 match(Set cr (CmpI op1 op2));
14164
14165 effect(DEF cr, USE op1);
14166
14167 ins_cost(INSN_COST);
14168 format %{ "cmpw $op1, $op2" %}
14169
14170 ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14171
14172 ins_pipe(icmp_reg_imm);
14173 %}
14174
14175 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
14176 %{
14177 match(Set cr (CmpI op1 op2));
14178
14179 effect(DEF cr, USE op1);
14180
14181 ins_cost(INSN_COST * 2);
14182 format %{ "cmpw $op1, $op2" %}
14183
14184 ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14185
14186 ins_pipe(icmp_reg_imm);
14187 %}
14188
14189 // Unsigned compare Instructions; really, same as signed compare
14190 // except it should only be used to feed an If or a CMovI which takes a
14191 // cmpOpU.
14192
14193 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
14194 %{
14195 match(Set cr (CmpU op1 op2));
14196
14197 effect(DEF cr, USE op1, USE op2);
14198
14199 ins_cost(INSN_COST);
14200 format %{ "cmpw $op1, $op2\t# unsigned" %}
14201
14202 ins_encode(aarch64_enc_cmpw(op1, op2));
14203
14204 ins_pipe(icmp_reg_reg);
14205 %}
14206
14207 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
14208 %{
14209 match(Set cr (CmpU op1 zero));
14210
14211 effect(DEF cr, USE op1);
14212
14213 ins_cost(INSN_COST);
14214 format %{ "cmpw $op1, #0\t# unsigned" %}
14215
14216 ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
14217
14218 ins_pipe(icmp_reg_imm);
14219 %}
14220
14221 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
14222 %{
14223 match(Set cr (CmpU op1 op2));
14224
14225 effect(DEF cr, USE op1);
14226
14227 ins_cost(INSN_COST);
14228 format %{ "cmpw $op1, $op2\t# unsigned" %}
14229
14230 ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
14231
14232 ins_pipe(icmp_reg_imm);
14233 %}
14234
14235 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
14236 %{
14237 match(Set cr (CmpU op1 op2));
14238
14239 effect(DEF cr, USE op1);
14240
14241 ins_cost(INSN_COST * 2);
14242 format %{ "cmpw $op1, $op2\t# unsigned" %}
14243
14244 ins_encode(aarch64_enc_cmpw_imm(op1, op2));
14245
14246 ins_pipe(icmp_reg_imm);
14247 %}
14248
14249 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
14250 %{
14251 match(Set cr (CmpL op1 op2));
14252
14253 effect(DEF cr, USE op1, USE op2);
14254
14255 ins_cost(INSN_COST);
14256 format %{ "cmp $op1, $op2" %}
14257
14258 ins_encode(aarch64_enc_cmp(op1, op2));
14259
14260 ins_pipe(icmp_reg_reg);
14261 %}
14262
14263 instruct compL_reg_immI0(rFlagsReg cr, iRegL op1, immI0 zero)
14264 %{
14265 match(Set cr (CmpL op1 zero));
14266
14267 effect(DEF cr, USE op1);
14268
14269 ins_cost(INSN_COST);
14270 format %{ "tst $op1" %}
14271
14272 ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
14273
14274 ins_pipe(icmp_reg_imm);
14275 %}
14276
14277 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
14278 %{
14279 match(Set cr (CmpL op1 op2));
14280
14281 effect(DEF cr, USE op1);
14282
14283 ins_cost(INSN_COST);
14284 format %{ "cmp $op1, $op2" %}
14285
14286 ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
14287
14288 ins_pipe(icmp_reg_imm);
14289 %}
14290
14291 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
14292 %{
14293 match(Set cr (CmpL op1 op2));
14294
14295 effect(DEF cr, USE op1);
14296
14297 ins_cost(INSN_COST * 2);
14298 format %{ "cmp $op1, $op2" %}
14299
14300 ins_encode(aarch64_enc_cmp_imm(op1, op2));
14301
14302 ins_pipe(icmp_reg_imm);
14303 %}
14304
14305 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
14306 %{
14307 match(Set cr (CmpP op1 op2));
14308
14309 effect(DEF cr, USE op1, USE op2);
14310
14311 ins_cost(INSN_COST);
14312 format %{ "cmp $op1, $op2\t // ptr" %}
14313
14314 ins_encode(aarch64_enc_cmpp(op1, op2));
14315
14316 ins_pipe(icmp_reg_reg);
14317 %}
14318
14319 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
14320 %{
14321 match(Set cr (CmpN op1 op2));
14322
14323 effect(DEF cr, USE op1, USE op2);
14324
14325 ins_cost(INSN_COST);
14326 format %{ "cmp $op1, $op2\t // compressed ptr" %}
14327
14328 ins_encode(aarch64_enc_cmpn(op1, op2));
14329
14330 ins_pipe(icmp_reg_reg);
14331 %}
14332
14333 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
14334 %{
14335 match(Set cr (CmpP op1 zero));
14336
14337 effect(DEF cr, USE op1, USE zero);
14338
14339 ins_cost(INSN_COST);
14340 format %{ "cmp $op1, 0\t // ptr" %}
14341
14342 ins_encode(aarch64_enc_testp(op1));
14343
14344 ins_pipe(icmp_reg_imm);
14345 %}
14346
14347 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
14348 %{
14349 match(Set cr (CmpN op1 zero));
14350
14351 effect(DEF cr, USE op1, USE zero);
14352
14353 ins_cost(INSN_COST);
14354 format %{ "cmp $op1, 0\t // compressed ptr" %}
14355
14356 ins_encode(aarch64_enc_testn(op1));
14357
14358 ins_pipe(icmp_reg_imm);
14359 %}
14360
14361 // FP comparisons
14362 //
14363 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
14364 // using normal cmpOp. See declaration of rFlagsReg for details.
14365
14366 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
14367 %{
14368 match(Set cr (CmpF src1 src2));
14369
14370 ins_cost(3 * INSN_COST);
14371 format %{ "fcmps $src1, $src2" %}
14372
14373 ins_encode %{
14374 __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14375 %}
14376
14377 ins_pipe(pipe_class_compare);
14378 %}
14379
14380 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
14381 %{
14382 match(Set cr (CmpF src1 src2));
14383
14384 ins_cost(3 * INSN_COST);
14385 format %{ "fcmps $src1, 0.0" %}
14386
14387 ins_encode %{
14388 __ fcmps(as_FloatRegister($src1$$reg), 0.0D);
14389 %}
14390
14391 ins_pipe(pipe_class_compare);
14392 %}
14393 // FROM HERE
14394
14395 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
14396 %{
14397 match(Set cr (CmpD src1 src2));
14398
14399 ins_cost(3 * INSN_COST);
14400 format %{ "fcmpd $src1, $src2" %}
14401
14402 ins_encode %{
14403 __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
14404 %}
14405
14406 ins_pipe(pipe_class_compare);
14407 %}
14408
14409 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
14410 %{
14411 match(Set cr (CmpD src1 src2));
14412
14413 ins_cost(3 * INSN_COST);
14414 format %{ "fcmpd $src1, 0.0" %}
14415
14416 ins_encode %{
14417 __ fcmpd(as_FloatRegister($src1$$reg), 0.0D);
14418 %}
14419
14420 ins_pipe(pipe_class_compare);
14421 %}
14422
14423 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
14424 %{
14425 match(Set dst (CmpF3 src1 src2));
14426 effect(KILL cr);
14427
14428 ins_cost(5 * INSN_COST);
14429 format %{ "fcmps $src1, $src2\n\t"
14430 "csinvw($dst, zr, zr, eq\n\t"
14431 "csnegw($dst, $dst, $dst, lt)"
14432 %}
14433
14434 ins_encode %{
14435 Label done;
14436 FloatRegister s1 = as_FloatRegister($src1$$reg);
14437 FloatRegister s2 = as_FloatRegister($src2$$reg);
14438 Register d = as_Register($dst$$reg);
14439 __ fcmps(s1, s2);
14440 // installs 0 if EQ else -1
14441 __ csinvw(d, zr, zr, Assembler::EQ);
14442 // keeps -1 if less or unordered else installs 1
14443 __ csnegw(d, d, d, Assembler::LT);
14444 __ bind(done);
14445 %}
14446
14447 ins_pipe(pipe_class_default);
14448
14449 %}
14450
14451 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
14452 %{
14453 match(Set dst (CmpD3 src1 src2));
14454 effect(KILL cr);
14455
14456 ins_cost(5 * INSN_COST);
14457 format %{ "fcmpd $src1, $src2\n\t"
14458 "csinvw($dst, zr, zr, eq\n\t"
14459 "csnegw($dst, $dst, $dst, lt)"
14460 %}
14461
14462 ins_encode %{
14463 Label done;
14464 FloatRegister s1 = as_FloatRegister($src1$$reg);
14465 FloatRegister s2 = as_FloatRegister($src2$$reg);
14466 Register d = as_Register($dst$$reg);
14467 __ fcmpd(s1, s2);
14468 // installs 0 if EQ else -1
14469 __ csinvw(d, zr, zr, Assembler::EQ);
14470 // keeps -1 if less or unordered else installs 1
14471 __ csnegw(d, d, d, Assembler::LT);
14472 __ bind(done);
14473 %}
14474 ins_pipe(pipe_class_default);
14475
14476 %}
14477
14478 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
14479 %{
14480 match(Set dst (CmpF3 src1 zero));
14481 effect(KILL cr);
14482
14483 ins_cost(5 * INSN_COST);
14484 format %{ "fcmps $src1, 0.0\n\t"
14485 "csinvw($dst, zr, zr, eq\n\t"
14486 "csnegw($dst, $dst, $dst, lt)"
14487 %}
14488
14489 ins_encode %{
14490 Label done;
14491 FloatRegister s1 = as_FloatRegister($src1$$reg);
14492 Register d = as_Register($dst$$reg);
14493 __ fcmps(s1, 0.0D);
14494 // installs 0 if EQ else -1
14495 __ csinvw(d, zr, zr, Assembler::EQ);
14496 // keeps -1 if less or unordered else installs 1
14497 __ csnegw(d, d, d, Assembler::LT);
14498 __ bind(done);
14499 %}
14500
14501 ins_pipe(pipe_class_default);
14502
14503 %}
14504
14505 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
14506 %{
14507 match(Set dst (CmpD3 src1 zero));
14508 effect(KILL cr);
14509
14510 ins_cost(5 * INSN_COST);
14511 format %{ "fcmpd $src1, 0.0\n\t"
14512 "csinvw($dst, zr, zr, eq\n\t"
14513 "csnegw($dst, $dst, $dst, lt)"
14514 %}
14515
14516 ins_encode %{
14517 Label done;
14518 FloatRegister s1 = as_FloatRegister($src1$$reg);
14519 Register d = as_Register($dst$$reg);
14520 __ fcmpd(s1, 0.0D);
14521 // installs 0 if EQ else -1
14522 __ csinvw(d, zr, zr, Assembler::EQ);
14523 // keeps -1 if less or unordered else installs 1
14524 __ csnegw(d, d, d, Assembler::LT);
14525 __ bind(done);
14526 %}
14527 ins_pipe(pipe_class_default);
14528
14529 %}
14530
14531 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
14532 %{
14533 match(Set dst (CmpLTMask p q));
14534 effect(KILL cr);
14535
14536 ins_cost(3 * INSN_COST);
14537
14538 format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
14539 "csetw $dst, lt\n\t"
14540 "subw $dst, zr, $dst"
14541 %}
14542
14543 ins_encode %{
14544 __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
14545 __ csetw(as_Register($dst$$reg), Assembler::LT);
14546 __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
14547 %}
14548
14549 ins_pipe(ialu_reg_reg);
14550 %}
14551
14552 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
14553 %{
14554 match(Set dst (CmpLTMask src zero));
14555 effect(KILL cr);
14556
14557 ins_cost(INSN_COST);
14558
14559 format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
14560
14561 ins_encode %{
14562 __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
14563 %}
14564
14565 ins_pipe(ialu_reg_shift);
14566 %}
14567
14568 // ============================================================================
14569 // Max and Min
14570
14571 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14572 %{
14573 match(Set dst (MinI src1 src2));
14574
14575 effect(DEF dst, USE src1, USE src2, KILL cr);
14576 size(8);
14577
14578 ins_cost(INSN_COST * 3);
14579 format %{
14580 "cmpw $src1 $src2\t signed int\n\t"
14581 "cselw $dst, $src1, $src2 lt\t"
14582 %}
14583
14584 ins_encode %{
14585 __ cmpw(as_Register($src1$$reg),
14586 as_Register($src2$$reg));
14587 __ cselw(as_Register($dst$$reg),
14588 as_Register($src1$$reg),
14589 as_Register($src2$$reg),
14590 Assembler::LT);
14591 %}
14592
14593 ins_pipe(ialu_reg_reg);
14594 %}
14595 // FROM HERE
14596
14597 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
14598 %{
14599 match(Set dst (MaxI src1 src2));
14600
14601 effect(DEF dst, USE src1, USE src2, KILL cr);
14602 size(8);
14603
14604 ins_cost(INSN_COST * 3);
14605 format %{
14606 "cmpw $src1 $src2\t signed int\n\t"
14607 "cselw $dst, $src1, $src2 gt\t"
14608 %}
14609
14610 ins_encode %{
14611 __ cmpw(as_Register($src1$$reg),
14612 as_Register($src2$$reg));
14613 __ cselw(as_Register($dst$$reg),
14614 as_Register($src1$$reg),
14615 as_Register($src2$$reg),
14616 Assembler::GT);
14617 %}
14618
14619 ins_pipe(ialu_reg_reg);
14620 %}
14621
14622 // ============================================================================
14623 // Branch Instructions
14624
14625 // Direct Branch.
14626 instruct branch(label lbl)
14627 %{
14628 match(Goto);
14629
14630 effect(USE lbl);
14631
14632 ins_cost(BRANCH_COST);
14633 format %{ "b $lbl" %}
14634
14635 ins_encode(aarch64_enc_b(lbl));
14636
14637 ins_pipe(pipe_branch);
14638 %}
14639
14640 // Conditional Near Branch
14641 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
14642 %{
14643 // Same match rule as `branchConFar'.
14644 match(If cmp cr);
14645
14646 effect(USE lbl);
14647
14648 ins_cost(BRANCH_COST);
14649 // If set to 1 this indicates that the current instruction is a
14650 // short variant of a long branch. This avoids using this
14651 // instruction in first-pass matching. It will then only be used in
14652 // the `Shorten_branches' pass.
14653 // ins_short_branch(1);
14654 format %{ "b$cmp $lbl" %}
14655
14656 ins_encode(aarch64_enc_br_con(cmp, lbl));
14657
14658 ins_pipe(pipe_branch_cond);
14659 %}
14660
14661 // Conditional Near Branch Unsigned
14662 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14663 %{
14664 // Same match rule as `branchConFar'.
14665 match(If cmp cr);
14666
14667 effect(USE lbl);
14668
14669 ins_cost(BRANCH_COST);
14670 // If set to 1 this indicates that the current instruction is a
14671 // short variant of a long branch. This avoids using this
14672 // instruction in first-pass matching. It will then only be used in
14673 // the `Shorten_branches' pass.
14674 // ins_short_branch(1);
14675 format %{ "b$cmp $lbl\t# unsigned" %}
14676
14677 ins_encode(aarch64_enc_br_conU(cmp, lbl));
14678
14679 ins_pipe(pipe_branch_cond);
14680 %}
14681
14682 // Make use of CBZ and CBNZ. These instructions, as well as being
14683 // shorter than (cmp; branch), have the additional benefit of not
14684 // killing the flags.
14685
14686 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14687 match(If cmp (CmpI op1 op2));
14688 effect(USE labl);
14689
14690 ins_cost(BRANCH_COST);
14691 format %{ "cbw$cmp $op1, $labl" %}
14692 ins_encode %{
14693 Label* L = $labl$$label;
14694 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14695 if (cond == Assembler::EQ)
14696 __ cbzw($op1$$Register, *L);
14697 else
14698 __ cbnzw($op1$$Register, *L);
14699 %}
14700 ins_pipe(pipe_cmp_branch);
14701 %}
14702
14703 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14704 match(If cmp (CmpL op1 op2));
14705 effect(USE labl);
14706
14707 ins_cost(BRANCH_COST);
14708 format %{ "cb$cmp $op1, $labl" %}
14709 ins_encode %{
14710 Label* L = $labl$$label;
14711 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14712 if (cond == Assembler::EQ)
14713 __ cbz($op1$$Register, *L);
14714 else
14715 __ cbnz($op1$$Register, *L);
14716 %}
14717 ins_pipe(pipe_cmp_branch);
14718 %}
14719
14720 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14721 match(If cmp (CmpP op1 op2));
14722 effect(USE labl);
14723
14724 ins_cost(BRANCH_COST);
14725 format %{ "cb$cmp $op1, $labl" %}
14726 ins_encode %{
14727 Label* L = $labl$$label;
14728 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14729 if (cond == Assembler::EQ)
14730 __ cbz($op1$$Register, *L);
14731 else
14732 __ cbnz($op1$$Register, *L);
14733 %}
14734 ins_pipe(pipe_cmp_branch);
14735 %}
14736
14737 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14738 match(If cmp (CmpN op1 op2));
14739 effect(USE labl);
14740
14741 ins_cost(BRANCH_COST);
14742 format %{ "cbw$cmp $op1, $labl" %}
14743 ins_encode %{
14744 Label* L = $labl$$label;
14745 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14746 if (cond == Assembler::EQ)
14747 __ cbzw($op1$$Register, *L);
14748 else
14749 __ cbnzw($op1$$Register, *L);
14750 %}
14751 ins_pipe(pipe_cmp_branch);
14752 %}
14753
14754 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14755 match(If cmp (CmpP (DecodeN oop) zero));
14756 effect(USE labl);
14757
14758 ins_cost(BRANCH_COST);
14759 format %{ "cb$cmp $oop, $labl" %}
14760 ins_encode %{
14761 Label* L = $labl$$label;
14762 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14763 if (cond == Assembler::EQ)
14764 __ cbzw($oop$$Register, *L);
14765 else
14766 __ cbnzw($oop$$Register, *L);
14767 %}
14768 ins_pipe(pipe_cmp_branch);
14769 %}
14770
14771 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
14772 match(If cmp (CmpU op1 op2));
14773 effect(USE labl);
14774
14775 ins_cost(BRANCH_COST);
14776 format %{ "cbw$cmp $op1, $labl" %}
14777 ins_encode %{
14778 Label* L = $labl$$label;
14779 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14780 if (cond == Assembler::EQ || cond == Assembler::LS)
14781 __ cbzw($op1$$Register, *L);
14782 else
14783 __ cbnzw($op1$$Register, *L);
14784 %}
14785 ins_pipe(pipe_cmp_branch);
14786 %}
14787
14788 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
14789 match(If cmp (CmpU op1 op2));
14790 effect(USE labl);
14791
14792 ins_cost(BRANCH_COST);
14793 format %{ "cb$cmp $op1, $labl" %}
14794 ins_encode %{
14795 Label* L = $labl$$label;
14796 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14797 if (cond == Assembler::EQ || cond == Assembler::LS)
14798 __ cbz($op1$$Register, *L);
14799 else
14800 __ cbnz($op1$$Register, *L);
14801 %}
14802 ins_pipe(pipe_cmp_branch);
14803 %}
14804
14805 // Test bit and Branch
14806
14807 // Patterns for short (< 32KiB) variants
14808 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14809 match(If cmp (CmpL op1 op2));
14810 effect(USE labl);
14811
14812 ins_cost(BRANCH_COST);
14813 format %{ "cb$cmp $op1, $labl # long" %}
14814 ins_encode %{
14815 Label* L = $labl$$label;
14816 Assembler::Condition cond =
14817 ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14818 __ tbr(cond, $op1$$Register, 63, *L);
14819 %}
14820 ins_pipe(pipe_cmp_branch);
14821 ins_short_branch(1);
14822 %}
14823
14824 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14825 match(If cmp (CmpI op1 op2));
14826 effect(USE labl);
14827
14828 ins_cost(BRANCH_COST);
14829 format %{ "cb$cmp $op1, $labl # int" %}
14830 ins_encode %{
14831 Label* L = $labl$$label;
14832 Assembler::Condition cond =
14833 ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14834 __ tbr(cond, $op1$$Register, 31, *L);
14835 %}
14836 ins_pipe(pipe_cmp_branch);
14837 ins_short_branch(1);
14838 %}
14839
14840 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14841 match(If cmp (CmpL (AndL op1 op2) op3));
14842 predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14843 effect(USE labl);
14844
14845 ins_cost(BRANCH_COST);
14846 format %{ "tb$cmp $op1, $op2, $labl" %}
14847 ins_encode %{
14848 Label* L = $labl$$label;
14849 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14850 int bit = exact_log2($op2$$constant);
14851 __ tbr(cond, $op1$$Register, bit, *L);
14852 %}
14853 ins_pipe(pipe_cmp_branch);
14854 ins_short_branch(1);
14855 %}
14856
14857 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14858 match(If cmp (CmpI (AndI op1 op2) op3));
14859 predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14860 effect(USE labl);
14861
14862 ins_cost(BRANCH_COST);
14863 format %{ "tb$cmp $op1, $op2, $labl" %}
14864 ins_encode %{
14865 Label* L = $labl$$label;
14866 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14867 int bit = exact_log2($op2$$constant);
14868 __ tbr(cond, $op1$$Register, bit, *L);
14869 %}
14870 ins_pipe(pipe_cmp_branch);
14871 ins_short_branch(1);
14872 %}
14873
14874 // And far variants
14875 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14876 match(If cmp (CmpL op1 op2));
14877 effect(USE labl);
14878
14879 ins_cost(BRANCH_COST);
14880 format %{ "cb$cmp $op1, $labl # long" %}
14881 ins_encode %{
14882 Label* L = $labl$$label;
14883 Assembler::Condition cond =
14884 ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14885 __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
14886 %}
14887 ins_pipe(pipe_cmp_branch);
14888 %}
14889
14890 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14891 match(If cmp (CmpI op1 op2));
14892 effect(USE labl);
14893
14894 ins_cost(BRANCH_COST);
14895 format %{ "cb$cmp $op1, $labl # int" %}
14896 ins_encode %{
14897 Label* L = $labl$$label;
14898 Assembler::Condition cond =
14899 ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14900 __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
14901 %}
14902 ins_pipe(pipe_cmp_branch);
14903 %}
14904
14905 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14906 match(If cmp (CmpL (AndL op1 op2) op3));
14907 predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14908 effect(USE labl);
14909
14910 ins_cost(BRANCH_COST);
14911 format %{ "tb$cmp $op1, $op2, $labl" %}
14912 ins_encode %{
14913 Label* L = $labl$$label;
14914 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14915 int bit = exact_log2($op2$$constant);
14916 __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14917 %}
14918 ins_pipe(pipe_cmp_branch);
14919 %}
14920
14921 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14922 match(If cmp (CmpI (AndI op1 op2) op3));
14923 predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14924 effect(USE labl);
14925
14926 ins_cost(BRANCH_COST);
14927 format %{ "tb$cmp $op1, $op2, $labl" %}
14928 ins_encode %{
14929 Label* L = $labl$$label;
14930 Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14931 int bit = exact_log2($op2$$constant);
14932 __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14933 %}
14934 ins_pipe(pipe_cmp_branch);
14935 %}
14936
14937 // Test bits
14938
14939 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14940 match(Set cr (CmpL (AndL op1 op2) op3));
14941 predicate(Assembler::operand_valid_for_logical_immediate
14942 (/*is_32*/false, n->in(1)->in(2)->get_long()));
14943
14944 ins_cost(INSN_COST);
14945 format %{ "tst $op1, $op2 # long" %}
14946 ins_encode %{
14947 __ tst($op1$$Register, $op2$$constant);
14948 %}
14949 ins_pipe(ialu_reg_reg);
14950 %}
14951
14952 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14953 match(Set cr (CmpI (AndI op1 op2) op3));
14954 predicate(Assembler::operand_valid_for_logical_immediate
14955 (/*is_32*/true, n->in(1)->in(2)->get_int()));
14956
14957 ins_cost(INSN_COST);
14958 format %{ "tst $op1, $op2 # int" %}
14959 ins_encode %{
14960 __ tstw($op1$$Register, $op2$$constant);
14961 %}
14962 ins_pipe(ialu_reg_reg);
14963 %}
14964
14965 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14966 match(Set cr (CmpL (AndL op1 op2) op3));
14967
14968 ins_cost(INSN_COST);
14969 format %{ "tst $op1, $op2 # long" %}
14970 ins_encode %{
14971 __ tst($op1$$Register, $op2$$Register);
14972 %}
14973 ins_pipe(ialu_reg_reg);
14974 %}
14975
14976 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
14977 match(Set cr (CmpI (AndI op1 op2) op3));
14978
14979 ins_cost(INSN_COST);
14980 format %{ "tstw $op1, $op2 # int" %}
14981 ins_encode %{
14982 __ tstw($op1$$Register, $op2$$Register);
14983 %}
14984 ins_pipe(ialu_reg_reg);
14985 %}
14986
14987
14988 // Conditional Far Branch
14989 // Conditional Far Branch Unsigned
14990 // TODO: fixme
14991
14992 // counted loop end branch near
14993 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
14994 %{
14995 match(CountedLoopEnd cmp cr);
14996
14997 effect(USE lbl);
14998
14999 ins_cost(BRANCH_COST);
15000 // short variant.
15001 // ins_short_branch(1);
15002 format %{ "b$cmp $lbl \t// counted loop end" %}
15003
15004 ins_encode(aarch64_enc_br_con(cmp, lbl));
15005
15006 ins_pipe(pipe_branch);
15007 %}
15008
15009 // counted loop end branch near Unsigned
15010 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
15011 %{
15012 match(CountedLoopEnd cmp cr);
15013
15014 effect(USE lbl);
15015
15016 ins_cost(BRANCH_COST);
15017 // short variant.
15018 // ins_short_branch(1);
15019 format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
15020
15021 ins_encode(aarch64_enc_br_conU(cmp, lbl));
15022
15023 ins_pipe(pipe_branch);
15024 %}
15025
15026 // counted loop end branch far
15027 // counted loop end branch far unsigned
15028 // TODO: fixme
15029
15030 // ============================================================================
15031 // inlined locking and unlocking
15032
15033 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15034 %{
15035 match(Set cr (FastLock object box));
15036 effect(TEMP tmp, TEMP tmp2);
15037
15038 // TODO
15039 // identify correct cost
15040 ins_cost(5 * INSN_COST);
15041 format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
15042
15043 ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
15044
15045 ins_pipe(pipe_serial);
15046 %}
15047
15048 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
15049 %{
15050 match(Set cr (FastUnlock object box));
15051 effect(TEMP tmp, TEMP tmp2);
15052
15053 ins_cost(5 * INSN_COST);
15054 format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
15055
15056 ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
15057
15058 ins_pipe(pipe_serial);
15059 %}
15060
15061
15062 // ============================================================================
15063 // Safepoint Instructions
15064
15065 // TODO
15066 // provide a near and far version of this code
15067
15068 instruct safePoint(iRegP poll)
15069 %{
15070 match(SafePoint poll);
15071
15072 format %{
15073 "ldrw zr, [$poll]\t# Safepoint: poll for GC"
15074 %}
15075 ins_encode %{
15076 __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
15077 %}
15078 ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
15079 %}
15080
15081
15082 // ============================================================================
15083 // Procedure Call/Return Instructions
15084
15085 // Call Java Static Instruction
15086
15087 instruct CallStaticJavaDirect(method meth)
15088 %{
15089 match(CallStaticJava);
15090
15091 effect(USE meth);
15092
15093 ins_cost(CALL_COST);
15094
15095 format %{ "call,static $meth \t// ==> " %}
15096
15097 ins_encode( aarch64_enc_java_static_call(meth),
15098 aarch64_enc_call_epilog );
15099
15100 ins_pipe(pipe_class_call);
15101 %}
15102
15103 // TO HERE
15104
15105 // Call Java Dynamic Instruction
15106 instruct CallDynamicJavaDirect(method meth)
15107 %{
15108 match(CallDynamicJava);
15109
15110 effect(USE meth);
15111
15112 ins_cost(CALL_COST);
15113
15114 format %{ "CALL,dynamic $meth \t// ==> " %}
15115
15116 ins_encode( aarch64_enc_java_dynamic_call(meth),
15117 aarch64_enc_call_epilog );
15118
15119 ins_pipe(pipe_class_call);
15120 %}
15121
15122 // Call Runtime Instruction
15123
15124 instruct CallRuntimeDirect(method meth)
15125 %{
15126 match(CallRuntime);
15127
15128 effect(USE meth);
15129
15130 ins_cost(CALL_COST);
15131
15132 format %{ "CALL, runtime $meth" %}
15133
15134 ins_encode( aarch64_enc_java_to_runtime(meth) );
15135
15136 ins_pipe(pipe_class_call);
15137 %}
15138
15139 // Call Runtime Instruction
15140
15141 instruct CallLeafDirect(method meth)
15142 %{
15143 match(CallLeaf);
15144
15145 effect(USE meth);
15146
15147 ins_cost(CALL_COST);
15148
15149 format %{ "CALL, runtime leaf $meth" %}
15150
15151 ins_encode( aarch64_enc_java_to_runtime(meth) );
15152
15153 ins_pipe(pipe_class_call);
15154 %}
15155
15156 // Call Runtime Instruction
15157
15158 instruct CallLeafNoFPDirect(method meth)
15159 %{
15160 match(CallLeafNoFP);
15161
15162 effect(USE meth);
15163
15164 ins_cost(CALL_COST);
15165
15166 format %{ "CALL, runtime leaf nofp $meth" %}
15167
15168 ins_encode( aarch64_enc_java_to_runtime(meth) );
15169
15170 ins_pipe(pipe_class_call);
15171 %}
15172
15173 // Tail Call; Jump from runtime stub to Java code.
15174 // Also known as an 'interprocedural jump'.
15175 // Target of jump will eventually return to caller.
15176 // TailJump below removes the return address.
15177 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
15178 %{
15179 match(TailCall jump_target method_oop);
15180
15181 ins_cost(CALL_COST);
15182
15183 format %{ "br $jump_target\t# $method_oop holds method oop" %}
15184
15185 ins_encode(aarch64_enc_tail_call(jump_target));
15186
15187 ins_pipe(pipe_class_call);
15188 %}
15189
15190 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
15191 %{
15192 match(TailJump jump_target ex_oop);
15193
15194 ins_cost(CALL_COST);
15195
15196 format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
15197
15198 ins_encode(aarch64_enc_tail_jmp(jump_target));
15199
15200 ins_pipe(pipe_class_call);
15201 %}
15202
15203 // Create exception oop: created by stack-crawling runtime code.
15204 // Created exception is now available to this handler, and is setup
15205 // just prior to jumping to this handler. No code emitted.
15206 // TODO check
15207 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
15208 instruct CreateException(iRegP_R0 ex_oop)
15209 %{
15210 match(Set ex_oop (CreateEx));
15211
15212 format %{ " -- \t// exception oop; no code emitted" %}
15213
15214 size(0);
15215
15216 ins_encode( /*empty*/ );
15217
15218 ins_pipe(pipe_class_empty);
15219 %}
15220
15221 // Rethrow exception: The exception oop will come in the first
15222 // argument position. Then JUMP (not call) to the rethrow stub code.
15223 instruct RethrowException() %{
15224 match(Rethrow);
15225 ins_cost(CALL_COST);
15226
15227 format %{ "b rethrow_stub" %}
15228
15229 ins_encode( aarch64_enc_rethrow() );
15230
15231 ins_pipe(pipe_class_call);
15232 %}
15233
15234
15235 // Return Instruction
15236 // epilog node loads ret address into lr as part of frame pop
15237 instruct Ret()
15238 %{
15239 match(Return);
15240
15241 format %{ "ret\t// return register" %}
15242
15243 ins_encode( aarch64_enc_ret() );
15244
15245 ins_pipe(pipe_branch);
15246 %}
15247
15248 // Die now.
15249 instruct ShouldNotReachHere() %{
15250 match(Halt);
15251
15252 ins_cost(CALL_COST);
15253 format %{ "ShouldNotReachHere" %}
15254
15255 ins_encode %{
15256 // TODO
15257 // implement proper trap call here
15258 __ brk(999);
15259 %}
15260
15261 ins_pipe(pipe_class_default);
15262 %}
15263
15264 // ============================================================================
15265 // Partial Subtype Check
15266 //
15267 // superklass array for an instance of the superklass. Set a hidden
15268 // internal cache on a hit (cache is checked with exposed code in
15269 // gen_subtype_check()). Return NZ for a miss or zero for a hit. The
15270 // encoding ALSO sets flags.
15271
15272 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
15273 %{
15274 match(Set result (PartialSubtypeCheck sub super));
15275 effect(KILL cr, KILL temp);
15276
15277 ins_cost(1100); // slightly larger than the next version
15278 format %{ "partialSubtypeCheck $result, $sub, $super" %}
15279
15280 ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15281
15282 opcode(0x1); // Force zero of result reg on hit
15283
15284 ins_pipe(pipe_class_memory);
15285 %}
15286
15287 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
15288 %{
15289 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
15290 effect(KILL temp, KILL result);
15291
15292 ins_cost(1100); // slightly larger than the next version
15293 format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
15294
15295 ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
15296
15297 opcode(0x0); // Don't zero result reg on hit
15298
15299 ins_pipe(pipe_class_memory);
15300 %}
15301
15302 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15303 iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15304 %{
15305 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
15306 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15307 effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15308
15309 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
15310 ins_encode %{
15311 // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15312 __ string_compare($str1$$Register, $str2$$Register,
15313 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15314 $tmp1$$Register,
15315 fnoreg, fnoreg, StrIntrinsicNode::UU);
15316 %}
15317 ins_pipe(pipe_class_memory);
15318 %}
15319
15320 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15321 iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
15322 %{
15323 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
15324 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15325 effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
15326
15327 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
15328 ins_encode %{
15329 __ string_compare($str1$$Register, $str2$$Register,
15330 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15331 $tmp1$$Register,
15332 fnoreg, fnoreg, StrIntrinsicNode::LL);
15333 %}
15334 ins_pipe(pipe_class_memory);
15335 %}
15336
15337 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15338 iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15339 %{
15340 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
15341 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15342 effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15343
15344 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
15345 ins_encode %{
15346 __ string_compare($str1$$Register, $str2$$Register,
15347 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15348 $tmp1$$Register,
15349 $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
15350 %}
15351 ins_pipe(pipe_class_memory);
15352 %}
15353
15354 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
15355 iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
15356 %{
15357 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
15358 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
15359 effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
15360
15361 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
15362 ins_encode %{
15363 __ string_compare($str1$$Register, $str2$$Register,
15364 $cnt1$$Register, $cnt2$$Register, $result$$Register,
15365 $tmp1$$Register,
15366 $vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
15367 %}
15368 ins_pipe(pipe_class_memory);
15369 %}
15370
15371 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15372 iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15373 %{
15374 predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15375 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15376 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15377 TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15378 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
15379
15380 ins_encode %{
15381 __ string_indexof($str1$$Register, $str2$$Register,
15382 $cnt1$$Register, $cnt2$$Register,
15383 $tmp1$$Register, $tmp2$$Register,
15384 $tmp3$$Register, $tmp4$$Register,
15385 -1, $result$$Register, StrIntrinsicNode::UU);
15386 %}
15387 ins_pipe(pipe_class_memory);
15388 %}
15389
15390 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15391 iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15392 %{
15393 predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15394 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15395 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15396 TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15397 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
15398
15399 ins_encode %{
15400 __ string_indexof($str1$$Register, $str2$$Register,
15401 $cnt1$$Register, $cnt2$$Register,
15402 $tmp1$$Register, $tmp2$$Register,
15403 $tmp3$$Register, $tmp4$$Register,
15404 -1, $result$$Register, StrIntrinsicNode::LL);
15405 %}
15406 ins_pipe(pipe_class_memory);
15407 %}
15408
15409 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15410 iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15411 %{
15412 predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15413 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15414 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15415 TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15416 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
15417
15418 ins_encode %{
15419 __ string_indexof($str1$$Register, $str2$$Register,
15420 $cnt1$$Register, $cnt2$$Register,
15421 $tmp1$$Register, $tmp2$$Register,
15422 $tmp3$$Register, $tmp4$$Register,
15423 -1, $result$$Register, StrIntrinsicNode::UL);
15424 %}
15425 ins_pipe(pipe_class_memory);
15426 %}
15427
15428 instruct string_indexofLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
15429 iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15430 %{
15431 predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15432 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
15433 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
15434 TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15435 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LU)" %}
15436
15437 ins_encode %{
15438 __ string_indexof($str1$$Register, $str2$$Register,
15439 $cnt1$$Register, $cnt2$$Register,
15440 $tmp1$$Register, $tmp2$$Register,
15441 $tmp3$$Register, $tmp4$$Register,
15442 -1, $result$$Register, StrIntrinsicNode::LU);
15443 %}
15444 ins_pipe(pipe_class_memory);
15445 %}
15446
15447 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15448 immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15449 iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15450 %{
15451 predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
15452 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15453 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15454 TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15455 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
15456
15457 ins_encode %{
15458 int icnt2 = (int)$int_cnt2$$constant;
15459 __ string_indexof($str1$$Register, $str2$$Register,
15460 $cnt1$$Register, zr,
15461 $tmp1$$Register, $tmp2$$Register,
15462 $tmp3$$Register, $tmp4$$Register,
15463 icnt2, $result$$Register, StrIntrinsicNode::UU);
15464 %}
15465 ins_pipe(pipe_class_memory);
15466 %}
15467
15468 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15469 immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15470 iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15471 %{
15472 predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
15473 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15474 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15475 TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15476 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
15477
15478 ins_encode %{
15479 int icnt2 = (int)$int_cnt2$$constant;
15480 __ string_indexof($str1$$Register, $str2$$Register,
15481 $cnt1$$Register, zr,
15482 $tmp1$$Register, $tmp2$$Register,
15483 $tmp3$$Register, $tmp4$$Register,
15484 icnt2, $result$$Register, StrIntrinsicNode::LL);
15485 %}
15486 ins_pipe(pipe_class_memory);
15487 %}
15488
15489 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15490 immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15491 iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15492 %{
15493 predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
15494 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15495 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15496 TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15497 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
15498
15499 ins_encode %{
15500 int icnt2 = (int)$int_cnt2$$constant;
15501 __ string_indexof($str1$$Register, $str2$$Register,
15502 $cnt1$$Register, zr,
15503 $tmp1$$Register, $tmp2$$Register,
15504 $tmp3$$Register, $tmp4$$Register,
15505 icnt2, $result$$Register, StrIntrinsicNode::UL);
15506 %}
15507 ins_pipe(pipe_class_memory);
15508 %}
15509
15510 instruct string_indexof_conLU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
15511 immI_1 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15512 iRegI tmp3, iRegI tmp4, rFlagsReg cr)
15513 %{
15514 predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LU);
15515 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
15516 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
15517 TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
15518 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LU)" %}
15519
15520 ins_encode %{
15521 int icnt2 = (int)$int_cnt2$$constant;
15522 __ string_indexof($str1$$Register, $str2$$Register,
15523 $cnt1$$Register, zr,
15524 $tmp1$$Register, $tmp2$$Register,
15525 $tmp3$$Register, $tmp4$$Register,
15526 icnt2, $result$$Register, StrIntrinsicNode::LU);
15527 %}
15528 ins_pipe(pipe_class_memory);
15529 %}
15530
15531 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
15532 iRegI_R0 result, iRegI tmp1, iRegI tmp2,
15533 iRegI tmp3, rFlagsReg cr)
15534 %{
15535 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
15536 effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
15537 TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
15538
15539 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
15540
15541 ins_encode %{
15542 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
15543 $result$$Register, $tmp1$$Register, $tmp2$$Register,
15544 $tmp3$$Register);
15545 %}
15546 ins_pipe(pipe_class_memory);
15547 %}
15548
15549 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15550 iRegI_R0 result, rFlagsReg cr)
15551 %{
15552 predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
15553 match(Set result (StrEquals (Binary str1 str2) cnt));
15554 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15555
15556 format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15557 ins_encode %{
15558 // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15559 __ arrays_equals($str1$$Register, $str2$$Register,
15560 $result$$Register, $cnt$$Register,
15561 1, /*is_string*/true);
15562 %}
15563 ins_pipe(pipe_class_memory);
15564 %}
15565
15566 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
15567 iRegI_R0 result, rFlagsReg cr)
15568 %{
15569 predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
15570 match(Set result (StrEquals (Binary str1 str2) cnt));
15571 effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
15572
15573 format %{ "String Equals $str1,$str2,$cnt -> $result" %}
15574 ins_encode %{
15575 // Count is in 8-bit bytes; non-Compact chars are 16 bits.
15576 __ asrw($cnt$$Register, $cnt$$Register, 1);
15577 __ arrays_equals($str1$$Register, $str2$$Register,
15578 $result$$Register, $cnt$$Register,
15579 2, /*is_string*/true);
15580 %}
15581 ins_pipe(pipe_class_memory);
15582 %}
15583
15584 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15585 iRegP_R10 tmp, rFlagsReg cr)
15586 %{
15587 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
15588 match(Set result (AryEq ary1 ary2));
15589 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15590
15591 format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
15592 ins_encode %{
15593 __ arrays_equals($ary1$$Register, $ary2$$Register,
15594 $result$$Register, $tmp$$Register,
15595 1, /*is_string*/false);
15596 %}
15597 ins_pipe(pipe_class_memory);
15598 %}
15599
15600 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
15601 iRegP_R10 tmp, rFlagsReg cr)
15602 %{
15603 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
15604 match(Set result (AryEq ary1 ary2));
15605 effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
15606
15607 format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
15608 ins_encode %{
15609 __ arrays_equals($ary1$$Register, $ary2$$Register,
15610 $result$$Register, $tmp$$Register,
15611 2, /*is_string*/false);
15612 %}
15613 ins_pipe(pipe_class_memory);
15614 %}
15615
15616
15617 // fast char[] to byte[] compression
15618 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15619 vRegD_V0 tmp1, vRegD_V1 tmp2,
15620 vRegD_V2 tmp3, vRegD_V3 tmp4,
15621 iRegI_R0 result, rFlagsReg cr)
15622 %{
15623 match(Set result (StrCompressedCopy src (Binary dst len)));
15624 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15625
15626 format %{ "String Compress $src,$dst -> $result // KILL R1, R2, R3, R4" %}
15627 ins_encode %{
15628 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
15629 $tmp1$$FloatRegister, $tmp2$$FloatRegister,
15630 $tmp3$$FloatRegister, $tmp4$$FloatRegister,
15631 $result$$Register);
15632 %}
15633 ins_pipe( pipe_slow );
15634 %}
15635
15636 // fast byte[] to char[] inflation
15637 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
15638 vRegD tmp1, vRegD tmp2, vRegD tmp3, iRegP_R3 tmp4, rFlagsReg cr)
15639 %{
15640 match(Set dummy (StrInflatedCopy src (Binary dst len)));
15641 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
15642
15643 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
15644 ins_encode %{
15645 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
15646 $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
15647 %}
15648 ins_pipe(pipe_class_memory);
15649 %}
15650
15651 // encode char[] to byte[] in ISO_8859_1
15652 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
15653 vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
15654 vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
15655 iRegI_R0 result, rFlagsReg cr)
15656 %{
15657 match(Set result (EncodeISOArray src (Binary dst len)));
15658 effect(USE_KILL src, USE_KILL dst, USE_KILL len,
15659 KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
15660
15661 format %{ "Encode array $src,$dst,$len -> $result" %}
15662 ins_encode %{
15663 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
15664 $result$$Register, $Vtmp1$$FloatRegister, $Vtmp2$$FloatRegister,
15665 $Vtmp3$$FloatRegister, $Vtmp4$$FloatRegister);
15666 %}
15667 ins_pipe( pipe_class_memory );
15668 %}
15669
15670 // ============================================================================
15671 // This name is KNOWN by the ADLC and cannot be changed.
15672 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15673 // for this guy.
15674 instruct tlsLoadP(thread_RegP dst)
15675 %{
15676 match(Set dst (ThreadLocal));
15677
15678 ins_cost(0);
15679
15680 format %{ " -- \t// $dst=Thread::current(), empty" %}
15681
15682 size(0);
15683
15684 ins_encode( /*empty*/ );
15685
15686 ins_pipe(pipe_class_empty);
15687 %}
15688
15689 // ====================VECTOR INSTRUCTIONS=====================================
15690
15691 // Load vector (32 bits)
15692 instruct loadV4(vecD dst, vmem4 mem)
15693 %{
15694 predicate(n->as_LoadVector()->memory_size() == 4);
15695 match(Set dst (LoadVector mem));
15696 ins_cost(4 * INSN_COST);
15697 format %{ "ldrs $dst,$mem\t# vector (32 bits)" %}
15698 ins_encode( aarch64_enc_ldrvS(dst, mem) );
15699 ins_pipe(vload_reg_mem64);
15700 %}
15701
15702 // Load vector (64 bits)
15703 instruct loadV8(vecD dst, vmem8 mem)
15704 %{
15705 predicate(n->as_LoadVector()->memory_size() == 8);
15706 match(Set dst (LoadVector mem));
15707 ins_cost(4 * INSN_COST);
15708 format %{ "ldrd $dst,$mem\t# vector (64 bits)" %}
15709 ins_encode( aarch64_enc_ldrvD(dst, mem) );
15710 ins_pipe(vload_reg_mem64);
15711 %}
15712
15713 // Load Vector (128 bits)
15714 instruct loadV16(vecX dst, vmem16 mem)
15715 %{
15716 predicate(n->as_LoadVector()->memory_size() == 16);
15717 match(Set dst (LoadVector mem));
15718 ins_cost(4 * INSN_COST);
15719 format %{ "ldrq $dst,$mem\t# vector (128 bits)" %}
15720 ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15721 ins_pipe(vload_reg_mem128);
15722 %}
15723
15724 // Store Vector (32 bits)
15725 instruct storeV4(vecD src, vmem4 mem)
15726 %{
15727 predicate(n->as_StoreVector()->memory_size() == 4);
15728 match(Set mem (StoreVector mem src));
15729 ins_cost(4 * INSN_COST);
15730 format %{ "strs $mem,$src\t# vector (32 bits)" %}
15731 ins_encode( aarch64_enc_strvS(src, mem) );
15732 ins_pipe(vstore_reg_mem64);
15733 %}
15734
15735 // Store Vector (64 bits)
15736 instruct storeV8(vecD src, vmem8 mem)
15737 %{
15738 predicate(n->as_StoreVector()->memory_size() == 8);
15739 match(Set mem (StoreVector mem src));
15740 ins_cost(4 * INSN_COST);
15741 format %{ "strd $mem,$src\t# vector (64 bits)" %}
15742 ins_encode( aarch64_enc_strvD(src, mem) );
15743 ins_pipe(vstore_reg_mem64);
15744 %}
15745
15746 // Store Vector (128 bits)
15747 instruct storeV16(vecX src, vmem16 mem)
15748 %{
15749 predicate(n->as_StoreVector()->memory_size() == 16);
15750 match(Set mem (StoreVector mem src));
15751 ins_cost(4 * INSN_COST);
15752 format %{ "strq $mem,$src\t# vector (128 bits)" %}
15753 ins_encode( aarch64_enc_strvQ(src, mem) );
15754 ins_pipe(vstore_reg_mem128);
15755 %}
15756
15757 instruct replicate8B(vecD dst, iRegIorL2I src)
15758 %{
15759 predicate(n->as_Vector()->length() == 4 ||
15760 n->as_Vector()->length() == 8);
15761 match(Set dst (ReplicateB src));
15762 ins_cost(INSN_COST);
15763 format %{ "dup $dst, $src\t# vector (8B)" %}
15764 ins_encode %{
15765 __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15766 %}
15767 ins_pipe(vdup_reg_reg64);
15768 %}
15769
15770 instruct replicate16B(vecX dst, iRegIorL2I src)
15771 %{
15772 predicate(n->as_Vector()->length() == 16);
15773 match(Set dst (ReplicateB src));
15774 ins_cost(INSN_COST);
15775 format %{ "dup $dst, $src\t# vector (16B)" %}
15776 ins_encode %{
15777 __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
15778 %}
15779 ins_pipe(vdup_reg_reg128);
15780 %}
15781
15782 instruct replicate8B_imm(vecD dst, immI con)
15783 %{
15784 predicate(n->as_Vector()->length() == 4 ||
15785 n->as_Vector()->length() == 8);
15786 match(Set dst (ReplicateB con));
15787 ins_cost(INSN_COST);
15788 format %{ "movi $dst, $con\t# vector(8B)" %}
15789 ins_encode %{
15790 __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
15791 %}
15792 ins_pipe(vmovi_reg_imm64);
15793 %}
15794
15795 instruct replicate16B_imm(vecX dst, immI con)
15796 %{
15797 predicate(n->as_Vector()->length() == 16);
15798 match(Set dst (ReplicateB con));
15799 ins_cost(INSN_COST);
15800 format %{ "movi $dst, $con\t# vector(16B)" %}
15801 ins_encode %{
15802 __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
15803 %}
15804 ins_pipe(vmovi_reg_imm128);
15805 %}
15806
15807 instruct replicate4S(vecD dst, iRegIorL2I src)
15808 %{
15809 predicate(n->as_Vector()->length() == 2 ||
15810 n->as_Vector()->length() == 4);
15811 match(Set dst (ReplicateS src));
15812 ins_cost(INSN_COST);
15813 format %{ "dup $dst, $src\t# vector (4S)" %}
15814 ins_encode %{
15815 __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
15816 %}
15817 ins_pipe(vdup_reg_reg64);
15818 %}
15819
15820 instruct replicate8S(vecX dst, iRegIorL2I src)
15821 %{
15822 predicate(n->as_Vector()->length() == 8);
15823 match(Set dst (ReplicateS src));
15824 ins_cost(INSN_COST);
15825 format %{ "dup $dst, $src\t# vector (8S)" %}
15826 ins_encode %{
15827 __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
15828 %}
15829 ins_pipe(vdup_reg_reg128);
15830 %}
15831
15832 instruct replicate4S_imm(vecD dst, immI con)
15833 %{
15834 predicate(n->as_Vector()->length() == 2 ||
15835 n->as_Vector()->length() == 4);
15836 match(Set dst (ReplicateS con));
15837 ins_cost(INSN_COST);
15838 format %{ "movi $dst, $con\t# vector(4H)" %}
15839 ins_encode %{
15840 __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
15841 %}
15842 ins_pipe(vmovi_reg_imm64);
15843 %}
15844
15845 instruct replicate8S_imm(vecX dst, immI con)
15846 %{
15847 predicate(n->as_Vector()->length() == 8);
15848 match(Set dst (ReplicateS con));
15849 ins_cost(INSN_COST);
15850 format %{ "movi $dst, $con\t# vector(8H)" %}
15851 ins_encode %{
15852 __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
15853 %}
15854 ins_pipe(vmovi_reg_imm128);
15855 %}
15856
15857 instruct replicate2I(vecD dst, iRegIorL2I src)
15858 %{
15859 predicate(n->as_Vector()->length() == 2);
15860 match(Set dst (ReplicateI src));
15861 ins_cost(INSN_COST);
15862 format %{ "dup $dst, $src\t# vector (2I)" %}
15863 ins_encode %{
15864 __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
15865 %}
15866 ins_pipe(vdup_reg_reg64);
15867 %}
15868
15869 instruct replicate4I(vecX dst, iRegIorL2I src)
15870 %{
15871 predicate(n->as_Vector()->length() == 4);
15872 match(Set dst (ReplicateI src));
15873 ins_cost(INSN_COST);
15874 format %{ "dup $dst, $src\t# vector (4I)" %}
15875 ins_encode %{
15876 __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
15877 %}
15878 ins_pipe(vdup_reg_reg128);
15879 %}
15880
15881 instruct replicate2I_imm(vecD dst, immI con)
15882 %{
15883 predicate(n->as_Vector()->length() == 2);
15884 match(Set dst (ReplicateI con));
15885 ins_cost(INSN_COST);
15886 format %{ "movi $dst, $con\t# vector(2I)" %}
15887 ins_encode %{
15888 __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
15889 %}
15890 ins_pipe(vmovi_reg_imm64);
15891 %}
15892
15893 instruct replicate4I_imm(vecX dst, immI con)
15894 %{
15895 predicate(n->as_Vector()->length() == 4);
15896 match(Set dst (ReplicateI con));
15897 ins_cost(INSN_COST);
15898 format %{ "movi $dst, $con\t# vector(4I)" %}
15899 ins_encode %{
15900 __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
15901 %}
15902 ins_pipe(vmovi_reg_imm128);
15903 %}
15904
15905 instruct replicate2L(vecX dst, iRegL src)
15906 %{
15907 predicate(n->as_Vector()->length() == 2);
15908 match(Set dst (ReplicateL src));
15909 ins_cost(INSN_COST);
15910 format %{ "dup $dst, $src\t# vector (2L)" %}
15911 ins_encode %{
15912 __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
15913 %}
15914 ins_pipe(vdup_reg_reg128);
15915 %}
15916
15917 instruct replicate2L_zero(vecX dst, immI0 zero)
15918 %{
15919 predicate(n->as_Vector()->length() == 2);
15920 match(Set dst (ReplicateI zero));
15921 ins_cost(INSN_COST);
15922 format %{ "movi $dst, $zero\t# vector(4I)" %}
15923 ins_encode %{
15924 __ eor(as_FloatRegister($dst$$reg), __ T16B,
15925 as_FloatRegister($dst$$reg),
15926 as_FloatRegister($dst$$reg));
15927 %}
15928 ins_pipe(vmovi_reg_imm128);
15929 %}
15930
15931 instruct replicate2F(vecD dst, vRegF src)
15932 %{
15933 predicate(n->as_Vector()->length() == 2);
15934 match(Set dst (ReplicateF src));
15935 ins_cost(INSN_COST);
15936 format %{ "dup $dst, $src\t# vector (2F)" %}
15937 ins_encode %{
15938 __ dup(as_FloatRegister($dst$$reg), __ T2S,
15939 as_FloatRegister($src$$reg));
15940 %}
15941 ins_pipe(vdup_reg_freg64);
15942 %}
15943
15944 instruct replicate4F(vecX dst, vRegF src)
15945 %{
15946 predicate(n->as_Vector()->length() == 4);
15947 match(Set dst (ReplicateF src));
15948 ins_cost(INSN_COST);
15949 format %{ "dup $dst, $src\t# vector (4F)" %}
15950 ins_encode %{
15951 __ dup(as_FloatRegister($dst$$reg), __ T4S,
15952 as_FloatRegister($src$$reg));
15953 %}
15954 ins_pipe(vdup_reg_freg128);
15955 %}
15956
15957 instruct replicate2D(vecX dst, vRegD src)
15958 %{
15959 predicate(n->as_Vector()->length() == 2);
15960 match(Set dst (ReplicateD src));
15961 ins_cost(INSN_COST);
15962 format %{ "dup $dst, $src\t# vector (2D)" %}
15963 ins_encode %{
15964 __ dup(as_FloatRegister($dst$$reg), __ T2D,
15965 as_FloatRegister($src$$reg));
15966 %}
15967 ins_pipe(vdup_reg_dreg128);
15968 %}
15969
15970 // ====================REDUCTION ARITHMETIC====================================
15971
15972 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2)
15973 %{
15974 match(Set dst (AddReductionVI src1 src2));
15975 ins_cost(INSN_COST);
15976 effect(TEMP tmp, TEMP tmp2);
15977 format %{ "umov $tmp, $src2, S, 0\n\t"
15978 "umov $tmp2, $src2, S, 1\n\t"
15979 "addw $dst, $src1, $tmp\n\t"
15980 "addw $dst, $dst, $tmp2\t add reduction2i"
15981 %}
15982 ins_encode %{
15983 __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15984 __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15985 __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
15986 __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
15987 %}
15988 ins_pipe(pipe_class_default);
15989 %}
15990
15991 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
15992 %{
15993 match(Set dst (AddReductionVI src1 src2));
15994 ins_cost(INSN_COST);
15995 effect(TEMP tmp, TEMP tmp2);
15996 format %{ "addv $tmp, T4S, $src2\n\t"
15997 "umov $tmp2, $tmp, S, 0\n\t"
15998 "addw $dst, $tmp2, $src1\t add reduction4i"
15999 %}
16000 ins_encode %{
16001 __ addv(as_FloatRegister($tmp$$reg), __ T4S,
16002 as_FloatRegister($src2$$reg));
16003 __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16004 __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
16005 %}
16006 ins_pipe(pipe_class_default);
16007 %}
16008
16009 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp)
16010 %{
16011 match(Set dst (MulReductionVI src1 src2));
16012 ins_cost(INSN_COST);
16013 effect(TEMP tmp, TEMP dst);
16014 format %{ "umov $tmp, $src2, S, 0\n\t"
16015 "mul $dst, $tmp, $src1\n\t"
16016 "umov $tmp, $src2, S, 1\n\t"
16017 "mul $dst, $tmp, $dst\t mul reduction2i\n\t"
16018 %}
16019 ins_encode %{
16020 __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
16021 __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
16022 __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
16023 __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
16024 %}
16025 ins_pipe(pipe_class_default);
16026 %}
16027
16028 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2)
16029 %{
16030 match(Set dst (MulReductionVI src1 src2));
16031 ins_cost(INSN_COST);
16032 effect(TEMP tmp, TEMP tmp2, TEMP dst);
16033 format %{ "ins $tmp, $src2, 0, 1\n\t"
16034 "mul $tmp, $tmp, $src2\n\t"
16035 "umov $tmp2, $tmp, S, 0\n\t"
16036 "mul $dst, $tmp2, $src1\n\t"
16037 "umov $tmp2, $tmp, S, 1\n\t"
16038 "mul $dst, $tmp2, $dst\t mul reduction4i\n\t"
16039 %}
16040 ins_encode %{
16041 __ ins(as_FloatRegister($tmp$$reg), __ D,
16042 as_FloatRegister($src2$$reg), 0, 1);
16043 __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
16044 as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
16045 __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
16046 __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
16047 __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
16048 __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
16049 %}
16050 ins_pipe(pipe_class_default);
16051 %}
16052
16053 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16054 %{
16055 match(Set dst (AddReductionVF src1 src2));
16056 ins_cost(INSN_COST);
16057 effect(TEMP tmp, TEMP dst);
16058 format %{ "fadds $dst, $src1, $src2\n\t"
16059 "ins $tmp, S, $src2, 0, 1\n\t"
16060 "fadds $dst, $dst, $tmp\t add reduction2f"
16061 %}
16062 ins_encode %{
16063 __ fadds(as_FloatRegister($dst$$reg),
16064 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16065 __ ins(as_FloatRegister($tmp$$reg), __ S,
16066 as_FloatRegister($src2$$reg), 0, 1);
16067 __ fadds(as_FloatRegister($dst$$reg),
16068 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16069 %}
16070 ins_pipe(pipe_class_default);
16071 %}
16072
16073 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16074 %{
16075 match(Set dst (AddReductionVF src1 src2));
16076 ins_cost(INSN_COST);
16077 effect(TEMP tmp, TEMP dst);
16078 format %{ "fadds $dst, $src1, $src2\n\t"
16079 "ins $tmp, S, $src2, 0, 1\n\t"
16080 "fadds $dst, $dst, $tmp\n\t"
16081 "ins $tmp, S, $src2, 0, 2\n\t"
16082 "fadds $dst, $dst, $tmp\n\t"
16083 "ins $tmp, S, $src2, 0, 3\n\t"
16084 "fadds $dst, $dst, $tmp\t add reduction4f"
16085 %}
16086 ins_encode %{
16087 __ fadds(as_FloatRegister($dst$$reg),
16088 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16089 __ ins(as_FloatRegister($tmp$$reg), __ S,
16090 as_FloatRegister($src2$$reg), 0, 1);
16091 __ fadds(as_FloatRegister($dst$$reg),
16092 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16093 __ ins(as_FloatRegister($tmp$$reg), __ S,
16094 as_FloatRegister($src2$$reg), 0, 2);
16095 __ fadds(as_FloatRegister($dst$$reg),
16096 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16097 __ ins(as_FloatRegister($tmp$$reg), __ S,
16098 as_FloatRegister($src2$$reg), 0, 3);
16099 __ fadds(as_FloatRegister($dst$$reg),
16100 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16101 %}
16102 ins_pipe(pipe_class_default);
16103 %}
16104
16105 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
16106 %{
16107 match(Set dst (MulReductionVF src1 src2));
16108 ins_cost(INSN_COST);
16109 effect(TEMP tmp, TEMP dst);
16110 format %{ "fmuls $dst, $src1, $src2\n\t"
16111 "ins $tmp, S, $src2, 0, 1\n\t"
16112 "fmuls $dst, $dst, $tmp\t add reduction4f"
16113 %}
16114 ins_encode %{
16115 __ fmuls(as_FloatRegister($dst$$reg),
16116 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16117 __ ins(as_FloatRegister($tmp$$reg), __ S,
16118 as_FloatRegister($src2$$reg), 0, 1);
16119 __ fmuls(as_FloatRegister($dst$$reg),
16120 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16121 %}
16122 ins_pipe(pipe_class_default);
16123 %}
16124
16125 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
16126 %{
16127 match(Set dst (MulReductionVF src1 src2));
16128 ins_cost(INSN_COST);
16129 effect(TEMP tmp, TEMP dst);
16130 format %{ "fmuls $dst, $src1, $src2\n\t"
16131 "ins $tmp, S, $src2, 0, 1\n\t"
16132 "fmuls $dst, $dst, $tmp\n\t"
16133 "ins $tmp, S, $src2, 0, 2\n\t"
16134 "fmuls $dst, $dst, $tmp\n\t"
16135 "ins $tmp, S, $src2, 0, 3\n\t"
16136 "fmuls $dst, $dst, $tmp\t add reduction4f"
16137 %}
16138 ins_encode %{
16139 __ fmuls(as_FloatRegister($dst$$reg),
16140 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16141 __ ins(as_FloatRegister($tmp$$reg), __ S,
16142 as_FloatRegister($src2$$reg), 0, 1);
16143 __ fmuls(as_FloatRegister($dst$$reg),
16144 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16145 __ ins(as_FloatRegister($tmp$$reg), __ S,
16146 as_FloatRegister($src2$$reg), 0, 2);
16147 __ fmuls(as_FloatRegister($dst$$reg),
16148 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16149 __ ins(as_FloatRegister($tmp$$reg), __ S,
16150 as_FloatRegister($src2$$reg), 0, 3);
16151 __ fmuls(as_FloatRegister($dst$$reg),
16152 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16153 %}
16154 ins_pipe(pipe_class_default);
16155 %}
16156
16157 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16158 %{
16159 match(Set dst (AddReductionVD src1 src2));
16160 ins_cost(INSN_COST);
16161 effect(TEMP tmp, TEMP dst);
16162 format %{ "faddd $dst, $src1, $src2\n\t"
16163 "ins $tmp, D, $src2, 0, 1\n\t"
16164 "faddd $dst, $dst, $tmp\t add reduction2d"
16165 %}
16166 ins_encode %{
16167 __ faddd(as_FloatRegister($dst$$reg),
16168 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16169 __ ins(as_FloatRegister($tmp$$reg), __ D,
16170 as_FloatRegister($src2$$reg), 0, 1);
16171 __ faddd(as_FloatRegister($dst$$reg),
16172 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16173 %}
16174 ins_pipe(pipe_class_default);
16175 %}
16176
16177 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
16178 %{
16179 match(Set dst (MulReductionVD src1 src2));
16180 ins_cost(INSN_COST);
16181 effect(TEMP tmp, TEMP dst);
16182 format %{ "fmuld $dst, $src1, $src2\n\t"
16183 "ins $tmp, D, $src2, 0, 1\n\t"
16184 "fmuld $dst, $dst, $tmp\t add reduction2d"
16185 %}
16186 ins_encode %{
16187 __ fmuld(as_FloatRegister($dst$$reg),
16188 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
16189 __ ins(as_FloatRegister($tmp$$reg), __ D,
16190 as_FloatRegister($src2$$reg), 0, 1);
16191 __ fmuld(as_FloatRegister($dst$$reg),
16192 as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
16193 %}
16194 ins_pipe(pipe_class_default);
16195 %}
16196
16197 // ====================VECTOR ARITHMETIC=======================================
16198
16199 // --------------------------------- ADD --------------------------------------
16200
16201 instruct vadd8B(vecD dst, vecD src1, vecD src2)
16202 %{
16203 predicate(n->as_Vector()->length() == 4 ||
16204 n->as_Vector()->length() == 8);
16205 match(Set dst (AddVB src1 src2));
16206 ins_cost(INSN_COST);
16207 format %{ "addv $dst,$src1,$src2\t# vector (8B)" %}
16208 ins_encode %{
16209 __ addv(as_FloatRegister($dst$$reg), __ T8B,
16210 as_FloatRegister($src1$$reg),
16211 as_FloatRegister($src2$$reg));
16212 %}
16213 ins_pipe(vdop64);
16214 %}
16215
16216 instruct vadd16B(vecX dst, vecX src1, vecX src2)
16217 %{
16218 predicate(n->as_Vector()->length() == 16);
16219 match(Set dst (AddVB src1 src2));
16220 ins_cost(INSN_COST);
16221 format %{ "addv $dst,$src1,$src2\t# vector (16B)" %}
16222 ins_encode %{
16223 __ addv(as_FloatRegister($dst$$reg), __ T16B,
16224 as_FloatRegister($src1$$reg),
16225 as_FloatRegister($src2$$reg));
16226 %}
16227 ins_pipe(vdop128);
16228 %}
16229
16230 instruct vadd4S(vecD dst, vecD src1, vecD src2)
16231 %{
16232 predicate(n->as_Vector()->length() == 2 ||
16233 n->as_Vector()->length() == 4);
16234 match(Set dst (AddVS src1 src2));
16235 ins_cost(INSN_COST);
16236 format %{ "addv $dst,$src1,$src2\t# vector (4H)" %}
16237 ins_encode %{
16238 __ addv(as_FloatRegister($dst$$reg), __ T4H,
16239 as_FloatRegister($src1$$reg),
16240 as_FloatRegister($src2$$reg));
16241 %}
16242 ins_pipe(vdop64);
16243 %}
16244
16245 instruct vadd8S(vecX dst, vecX src1, vecX src2)
16246 %{
16247 predicate(n->as_Vector()->length() == 8);
16248 match(Set dst (AddVS src1 src2));
16249 ins_cost(INSN_COST);
16250 format %{ "addv $dst,$src1,$src2\t# vector (8H)" %}
16251 ins_encode %{
16252 __ addv(as_FloatRegister($dst$$reg), __ T8H,
16253 as_FloatRegister($src1$$reg),
16254 as_FloatRegister($src2$$reg));
16255 %}
16256 ins_pipe(vdop128);
16257 %}
16258
16259 instruct vadd2I(vecD dst, vecD src1, vecD src2)
16260 %{
16261 predicate(n->as_Vector()->length() == 2);
16262 match(Set dst (AddVI src1 src2));
16263 ins_cost(INSN_COST);
16264 format %{ "addv $dst,$src1,$src2\t# vector (2S)" %}
16265 ins_encode %{
16266 __ addv(as_FloatRegister($dst$$reg), __ T2S,
16267 as_FloatRegister($src1$$reg),
16268 as_FloatRegister($src2$$reg));
16269 %}
16270 ins_pipe(vdop64);
16271 %}
16272
16273 instruct vadd4I(vecX dst, vecX src1, vecX src2)
16274 %{
16275 predicate(n->as_Vector()->length() == 4);
16276 match(Set dst (AddVI src1 src2));
16277 ins_cost(INSN_COST);
16278 format %{ "addv $dst,$src1,$src2\t# vector (4S)" %}
16279 ins_encode %{
16280 __ addv(as_FloatRegister($dst$$reg), __ T4S,
16281 as_FloatRegister($src1$$reg),
16282 as_FloatRegister($src2$$reg));
16283 %}
16284 ins_pipe(vdop128);
16285 %}
16286
16287 instruct vadd2L(vecX dst, vecX src1, vecX src2)
16288 %{
16289 predicate(n->as_Vector()->length() == 2);
16290 match(Set dst (AddVL src1 src2));
16291 ins_cost(INSN_COST);
16292 format %{ "addv $dst,$src1,$src2\t# vector (2L)" %}
16293 ins_encode %{
16294 __ addv(as_FloatRegister($dst$$reg), __ T2D,
16295 as_FloatRegister($src1$$reg),
16296 as_FloatRegister($src2$$reg));
16297 %}
16298 ins_pipe(vdop128);
16299 %}
16300
16301 instruct vadd2F(vecD dst, vecD src1, vecD src2)
16302 %{
16303 predicate(n->as_Vector()->length() == 2);
16304 match(Set dst (AddVF src1 src2));
16305 ins_cost(INSN_COST);
16306 format %{ "fadd $dst,$src1,$src2\t# vector (2S)" %}
16307 ins_encode %{
16308 __ fadd(as_FloatRegister($dst$$reg), __ T2S,
16309 as_FloatRegister($src1$$reg),
16310 as_FloatRegister($src2$$reg));
16311 %}
16312 ins_pipe(vdop_fp64);
16313 %}
16314
16315 instruct vadd4F(vecX dst, vecX src1, vecX src2)
16316 %{
16317 predicate(n->as_Vector()->length() == 4);
16318 match(Set dst (AddVF src1 src2));
16319 ins_cost(INSN_COST);
16320 format %{ "fadd $dst,$src1,$src2\t# vector (4S)" %}
16321 ins_encode %{
16322 __ fadd(as_FloatRegister($dst$$reg), __ T4S,
16323 as_FloatRegister($src1$$reg),
16324 as_FloatRegister($src2$$reg));
16325 %}
16326 ins_pipe(vdop_fp128);
16327 %}
16328
16329 instruct vadd2D(vecX dst, vecX src1, vecX src2)
16330 %{
16331 match(Set dst (AddVD src1 src2));
16332 ins_cost(INSN_COST);
16333 format %{ "fadd $dst,$src1,$src2\t# vector (2D)" %}
16334 ins_encode %{
16335 __ fadd(as_FloatRegister($dst$$reg), __ T2D,
16336 as_FloatRegister($src1$$reg),
16337 as_FloatRegister($src2$$reg));
16338 %}
16339 ins_pipe(vdop_fp128);
16340 %}
16341
16342 // --------------------------------- SUB --------------------------------------
16343
16344 instruct vsub8B(vecD dst, vecD src1, vecD src2)
16345 %{
16346 predicate(n->as_Vector()->length() == 4 ||
16347 n->as_Vector()->length() == 8);
16348 match(Set dst (SubVB src1 src2));
16349 ins_cost(INSN_COST);
16350 format %{ "subv $dst,$src1,$src2\t# vector (8B)" %}
16351 ins_encode %{
16352 __ subv(as_FloatRegister($dst$$reg), __ T8B,
16353 as_FloatRegister($src1$$reg),
16354 as_FloatRegister($src2$$reg));
16355 %}
16356 ins_pipe(vdop64);
16357 %}
16358
16359 instruct vsub16B(vecX dst, vecX src1, vecX src2)
16360 %{
16361 predicate(n->as_Vector()->length() == 16);
16362 match(Set dst (SubVB src1 src2));
16363 ins_cost(INSN_COST);
16364 format %{ "subv $dst,$src1,$src2\t# vector (16B)" %}
16365 ins_encode %{
16366 __ subv(as_FloatRegister($dst$$reg), __ T16B,
16367 as_FloatRegister($src1$$reg),
16368 as_FloatRegister($src2$$reg));
16369 %}
16370 ins_pipe(vdop128);
16371 %}
16372
16373 instruct vsub4S(vecD dst, vecD src1, vecD src2)
16374 %{
16375 predicate(n->as_Vector()->length() == 2 ||
16376 n->as_Vector()->length() == 4);
16377 match(Set dst (SubVS src1 src2));
16378 ins_cost(INSN_COST);
16379 format %{ "subv $dst,$src1,$src2\t# vector (4H)" %}
16380 ins_encode %{
16381 __ subv(as_FloatRegister($dst$$reg), __ T4H,
16382 as_FloatRegister($src1$$reg),
16383 as_FloatRegister($src2$$reg));
16384 %}
16385 ins_pipe(vdop64);
16386 %}
16387
16388 instruct vsub8S(vecX dst, vecX src1, vecX src2)
16389 %{
16390 predicate(n->as_Vector()->length() == 8);
16391 match(Set dst (SubVS src1 src2));
16392 ins_cost(INSN_COST);
16393 format %{ "subv $dst,$src1,$src2\t# vector (8H)" %}
16394 ins_encode %{
16395 __ subv(as_FloatRegister($dst$$reg), __ T8H,
16396 as_FloatRegister($src1$$reg),
16397 as_FloatRegister($src2$$reg));
16398 %}
16399 ins_pipe(vdop128);
16400 %}
16401
16402 instruct vsub2I(vecD dst, vecD src1, vecD src2)
16403 %{
16404 predicate(n->as_Vector()->length() == 2);
16405 match(Set dst (SubVI src1 src2));
16406 ins_cost(INSN_COST);
16407 format %{ "subv $dst,$src1,$src2\t# vector (2S)" %}
16408 ins_encode %{
16409 __ subv(as_FloatRegister($dst$$reg), __ T2S,
16410 as_FloatRegister($src1$$reg),
16411 as_FloatRegister($src2$$reg));
16412 %}
16413 ins_pipe(vdop64);
16414 %}
16415
16416 instruct vsub4I(vecX dst, vecX src1, vecX src2)
16417 %{
16418 predicate(n->as_Vector()->length() == 4);
16419 match(Set dst (SubVI src1 src2));
16420 ins_cost(INSN_COST);
16421 format %{ "subv $dst,$src1,$src2\t# vector (4S)" %}
16422 ins_encode %{
16423 __ subv(as_FloatRegister($dst$$reg), __ T4S,
16424 as_FloatRegister($src1$$reg),
16425 as_FloatRegister($src2$$reg));
16426 %}
16427 ins_pipe(vdop128);
16428 %}
16429
16430 instruct vsub2L(vecX dst, vecX src1, vecX src2)
16431 %{
16432 predicate(n->as_Vector()->length() == 2);
16433 match(Set dst (SubVL src1 src2));
16434 ins_cost(INSN_COST);
16435 format %{ "subv $dst,$src1,$src2\t# vector (2L)" %}
16436 ins_encode %{
16437 __ subv(as_FloatRegister($dst$$reg), __ T2D,
16438 as_FloatRegister($src1$$reg),
16439 as_FloatRegister($src2$$reg));
16440 %}
16441 ins_pipe(vdop128);
16442 %}
16443
16444 instruct vsub2F(vecD dst, vecD src1, vecD src2)
16445 %{
16446 predicate(n->as_Vector()->length() == 2);
16447 match(Set dst (SubVF src1 src2));
16448 ins_cost(INSN_COST);
16449 format %{ "fsub $dst,$src1,$src2\t# vector (2S)" %}
16450 ins_encode %{
16451 __ fsub(as_FloatRegister($dst$$reg), __ T2S,
16452 as_FloatRegister($src1$$reg),
16453 as_FloatRegister($src2$$reg));
16454 %}
16455 ins_pipe(vdop_fp64);
16456 %}
16457
16458 instruct vsub4F(vecX dst, vecX src1, vecX src2)
16459 %{
16460 predicate(n->as_Vector()->length() == 4);
16461 match(Set dst (SubVF src1 src2));
16462 ins_cost(INSN_COST);
16463 format %{ "fsub $dst,$src1,$src2\t# vector (4S)" %}
16464 ins_encode %{
16465 __ fsub(as_FloatRegister($dst$$reg), __ T4S,
16466 as_FloatRegister($src1$$reg),
16467 as_FloatRegister($src2$$reg));
16468 %}
16469 ins_pipe(vdop_fp128);
16470 %}
16471
16472 instruct vsub2D(vecX dst, vecX src1, vecX src2)
16473 %{
16474 predicate(n->as_Vector()->length() == 2);
16475 match(Set dst (SubVD src1 src2));
16476 ins_cost(INSN_COST);
16477 format %{ "fsub $dst,$src1,$src2\t# vector (2D)" %}
16478 ins_encode %{
16479 __ fsub(as_FloatRegister($dst$$reg), __ T2D,
16480 as_FloatRegister($src1$$reg),
16481 as_FloatRegister($src2$$reg));
16482 %}
16483 ins_pipe(vdop_fp128);
16484 %}
16485
16486 // --------------------------------- MUL --------------------------------------
16487
16488 instruct vmul4S(vecD dst, vecD src1, vecD src2)
16489 %{
16490 predicate(n->as_Vector()->length() == 2 ||
16491 n->as_Vector()->length() == 4);
16492 match(Set dst (MulVS src1 src2));
16493 ins_cost(INSN_COST);
16494 format %{ "mulv $dst,$src1,$src2\t# vector (4H)" %}
16495 ins_encode %{
16496 __ mulv(as_FloatRegister($dst$$reg), __ T4H,
16497 as_FloatRegister($src1$$reg),
16498 as_FloatRegister($src2$$reg));
16499 %}
16500 ins_pipe(vmul64);
16501 %}
16502
16503 instruct vmul8S(vecX dst, vecX src1, vecX src2)
16504 %{
16505 predicate(n->as_Vector()->length() == 8);
16506 match(Set dst (MulVS src1 src2));
16507 ins_cost(INSN_COST);
16508 format %{ "mulv $dst,$src1,$src2\t# vector (8H)" %}
16509 ins_encode %{
16510 __ mulv(as_FloatRegister($dst$$reg), __ T8H,
16511 as_FloatRegister($src1$$reg),
16512 as_FloatRegister($src2$$reg));
16513 %}
16514 ins_pipe(vmul128);
16515 %}
16516
16517 instruct vmul2I(vecD dst, vecD src1, vecD src2)
16518 %{
16519 predicate(n->as_Vector()->length() == 2);
16520 match(Set dst (MulVI src1 src2));
16521 ins_cost(INSN_COST);
16522 format %{ "mulv $dst,$src1,$src2\t# vector (2S)" %}
16523 ins_encode %{
16524 __ mulv(as_FloatRegister($dst$$reg), __ T2S,
16525 as_FloatRegister($src1$$reg),
16526 as_FloatRegister($src2$$reg));
16527 %}
16528 ins_pipe(vmul64);
16529 %}
16530
16531 instruct vmul4I(vecX dst, vecX src1, vecX src2)
16532 %{
16533 predicate(n->as_Vector()->length() == 4);
16534 match(Set dst (MulVI src1 src2));
16535 ins_cost(INSN_COST);
16536 format %{ "mulv $dst,$src1,$src2\t# vector (4S)" %}
16537 ins_encode %{
16538 __ mulv(as_FloatRegister($dst$$reg), __ T4S,
16539 as_FloatRegister($src1$$reg),
16540 as_FloatRegister($src2$$reg));
16541 %}
16542 ins_pipe(vmul128);
16543 %}
16544
16545 instruct vmul2F(vecD dst, vecD src1, vecD src2)
16546 %{
16547 predicate(n->as_Vector()->length() == 2);
16548 match(Set dst (MulVF src1 src2));
16549 ins_cost(INSN_COST);
16550 format %{ "fmul $dst,$src1,$src2\t# vector (2S)" %}
16551 ins_encode %{
16552 __ fmul(as_FloatRegister($dst$$reg), __ T2S,
16553 as_FloatRegister($src1$$reg),
16554 as_FloatRegister($src2$$reg));
16555 %}
16556 ins_pipe(vmuldiv_fp64);
16557 %}
16558
16559 instruct vmul4F(vecX dst, vecX src1, vecX src2)
16560 %{
16561 predicate(n->as_Vector()->length() == 4);
16562 match(Set dst (MulVF src1 src2));
16563 ins_cost(INSN_COST);
16564 format %{ "fmul $dst,$src1,$src2\t# vector (4S)" %}
16565 ins_encode %{
16566 __ fmul(as_FloatRegister($dst$$reg), __ T4S,
16567 as_FloatRegister($src1$$reg),
16568 as_FloatRegister($src2$$reg));
16569 %}
16570 ins_pipe(vmuldiv_fp128);
16571 %}
16572
16573 instruct vmul2D(vecX dst, vecX src1, vecX src2)
16574 %{
16575 predicate(n->as_Vector()->length() == 2);
16576 match(Set dst (MulVD src1 src2));
16577 ins_cost(INSN_COST);
16578 format %{ "fmul $dst,$src1,$src2\t# vector (2D)" %}
16579 ins_encode %{
16580 __ fmul(as_FloatRegister($dst$$reg), __ T2D,
16581 as_FloatRegister($src1$$reg),
16582 as_FloatRegister($src2$$reg));
16583 %}
16584 ins_pipe(vmuldiv_fp128);
16585 %}
16586
16587 // --------------------------------- MLA --------------------------------------
16588
16589 instruct vmla4S(vecD dst, vecD src1, vecD src2)
16590 %{
16591 predicate(n->as_Vector()->length() == 2 ||
16592 n->as_Vector()->length() == 4);
16593 match(Set dst (AddVS dst (MulVS src1 src2)));
16594 ins_cost(INSN_COST);
16595 format %{ "mlav $dst,$src1,$src2\t# vector (4H)" %}
16596 ins_encode %{
16597 __ mlav(as_FloatRegister($dst$$reg), __ T4H,
16598 as_FloatRegister($src1$$reg),
16599 as_FloatRegister($src2$$reg));
16600 %}
16601 ins_pipe(vmla64);
16602 %}
16603
16604 instruct vmla8S(vecX dst, vecX src1, vecX src2)
16605 %{
16606 predicate(n->as_Vector()->length() == 8);
16607 match(Set dst (AddVS dst (MulVS src1 src2)));
16608 ins_cost(INSN_COST);
16609 format %{ "mlav $dst,$src1,$src2\t# vector (8H)" %}
16610 ins_encode %{
16611 __ mlav(as_FloatRegister($dst$$reg), __ T8H,
16612 as_FloatRegister($src1$$reg),
16613 as_FloatRegister($src2$$reg));
16614 %}
16615 ins_pipe(vmla128);
16616 %}
16617
16618 instruct vmla2I(vecD dst, vecD src1, vecD src2)
16619 %{
16620 predicate(n->as_Vector()->length() == 2);
16621 match(Set dst (AddVI dst (MulVI src1 src2)));
16622 ins_cost(INSN_COST);
16623 format %{ "mlav $dst,$src1,$src2\t# vector (2S)" %}
16624 ins_encode %{
16625 __ mlav(as_FloatRegister($dst$$reg), __ T2S,
16626 as_FloatRegister($src1$$reg),
16627 as_FloatRegister($src2$$reg));
16628 %}
16629 ins_pipe(vmla64);
16630 %}
16631
16632 instruct vmla4I(vecX dst, vecX src1, vecX src2)
16633 %{
16634 predicate(n->as_Vector()->length() == 4);
16635 match(Set dst (AddVI dst (MulVI src1 src2)));
16636 ins_cost(INSN_COST);
16637 format %{ "mlav $dst,$src1,$src2\t# vector (4S)" %}
16638 ins_encode %{
16639 __ mlav(as_FloatRegister($dst$$reg), __ T4S,
16640 as_FloatRegister($src1$$reg),
16641 as_FloatRegister($src2$$reg));
16642 %}
16643 ins_pipe(vmla128);
16644 %}
16645
16646 // --------------------------------- MLS --------------------------------------
16647
16648 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16649 %{
16650 predicate(n->as_Vector()->length() == 2 ||
16651 n->as_Vector()->length() == 4);
16652 match(Set dst (SubVS dst (MulVS src1 src2)));
16653 ins_cost(INSN_COST);
16654 format %{ "mlsv $dst,$src1,$src2\t# vector (4H)" %}
16655 ins_encode %{
16656 __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16657 as_FloatRegister($src1$$reg),
16658 as_FloatRegister($src2$$reg));
16659 %}
16660 ins_pipe(vmla64);
16661 %}
16662
16663 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16664 %{
16665 predicate(n->as_Vector()->length() == 8);
16666 match(Set dst (SubVS dst (MulVS src1 src2)));
16667 ins_cost(INSN_COST);
16668 format %{ "mlsv $dst,$src1,$src2\t# vector (8H)" %}
16669 ins_encode %{
16670 __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16671 as_FloatRegister($src1$$reg),
16672 as_FloatRegister($src2$$reg));
16673 %}
16674 ins_pipe(vmla128);
16675 %}
16676
16677 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16678 %{
16679 predicate(n->as_Vector()->length() == 2);
16680 match(Set dst (SubVI dst (MulVI src1 src2)));
16681 ins_cost(INSN_COST);
16682 format %{ "mlsv $dst,$src1,$src2\t# vector (2S)" %}
16683 ins_encode %{
16684 __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16685 as_FloatRegister($src1$$reg),
16686 as_FloatRegister($src2$$reg));
16687 %}
16688 ins_pipe(vmla64);
16689 %}
16690
16691 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16692 %{
16693 predicate(n->as_Vector()->length() == 4);
16694 match(Set dst (SubVI dst (MulVI src1 src2)));
16695 ins_cost(INSN_COST);
16696 format %{ "mlsv $dst,$src1,$src2\t# vector (4S)" %}
16697 ins_encode %{
16698 __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16699 as_FloatRegister($src1$$reg),
16700 as_FloatRegister($src2$$reg));
16701 %}
16702 ins_pipe(vmla128);
16703 %}
16704
16705 // --------------------------------- DIV --------------------------------------
16706
16707 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16708 %{
16709 predicate(n->as_Vector()->length() == 2);
16710 match(Set dst (DivVF src1 src2));
16711 ins_cost(INSN_COST);
16712 format %{ "fdiv $dst,$src1,$src2\t# vector (2S)" %}
16713 ins_encode %{
16714 __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16715 as_FloatRegister($src1$$reg),
16716 as_FloatRegister($src2$$reg));
16717 %}
16718 ins_pipe(vmuldiv_fp64);
16719 %}
16720
16721 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16722 %{
16723 predicate(n->as_Vector()->length() == 4);
16724 match(Set dst (DivVF src1 src2));
16725 ins_cost(INSN_COST);
16726 format %{ "fdiv $dst,$src1,$src2\t# vector (4S)" %}
16727 ins_encode %{
16728 __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16729 as_FloatRegister($src1$$reg),
16730 as_FloatRegister($src2$$reg));
16731 %}
16732 ins_pipe(vmuldiv_fp128);
16733 %}
16734
16735 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16736 %{
16737 predicate(n->as_Vector()->length() == 2);
16738 match(Set dst (DivVD src1 src2));
16739 ins_cost(INSN_COST);
16740 format %{ "fdiv $dst,$src1,$src2\t# vector (2D)" %}
16741 ins_encode %{
16742 __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16743 as_FloatRegister($src1$$reg),
16744 as_FloatRegister($src2$$reg));
16745 %}
16746 ins_pipe(vmuldiv_fp128);
16747 %}
16748
16749 // --------------------------------- SQRT -------------------------------------
16750
16751 instruct vsqrt2D(vecX dst, vecX src)
16752 %{
16753 predicate(n->as_Vector()->length() == 2);
16754 match(Set dst (SqrtVD src));
16755 format %{ "fsqrt $dst, $src\t# vector (2D)" %}
16756 ins_encode %{
16757 __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16758 as_FloatRegister($src$$reg));
16759 %}
16760 ins_pipe(vsqrt_fp128);
16761 %}
16762
16763 // --------------------------------- ABS --------------------------------------
16764
16765 instruct vabs2F(vecD dst, vecD src)
16766 %{
16767 predicate(n->as_Vector()->length() == 2);
16768 match(Set dst (AbsVF src));
16769 ins_cost(INSN_COST * 3);
16770 format %{ "fabs $dst,$src\t# vector (2S)" %}
16771 ins_encode %{
16772 __ fabs(as_FloatRegister($dst$$reg), __ T2S,
16773 as_FloatRegister($src$$reg));
16774 %}
16775 ins_pipe(vunop_fp64);
16776 %}
16777
16778 instruct vabs4F(vecX dst, vecX src)
16779 %{
16780 predicate(n->as_Vector()->length() == 4);
16781 match(Set dst (AbsVF src));
16782 ins_cost(INSN_COST * 3);
16783 format %{ "fabs $dst,$src\t# vector (4S)" %}
16784 ins_encode %{
16785 __ fabs(as_FloatRegister($dst$$reg), __ T4S,
16786 as_FloatRegister($src$$reg));
16787 %}
16788 ins_pipe(vunop_fp128);
16789 %}
16790
16791 instruct vabs2D(vecX dst, vecX src)
16792 %{
16793 predicate(n->as_Vector()->length() == 2);
16794 match(Set dst (AbsVD src));
16795 ins_cost(INSN_COST * 3);
16796 format %{ "fabs $dst,$src\t# vector (2D)" %}
16797 ins_encode %{
16798 __ fabs(as_FloatRegister($dst$$reg), __ T2D,
16799 as_FloatRegister($src$$reg));
16800 %}
16801 ins_pipe(vunop_fp128);
16802 %}
16803
16804 // --------------------------------- NEG --------------------------------------
16805
16806 instruct vneg2F(vecD dst, vecD src)
16807 %{
16808 predicate(n->as_Vector()->length() == 2);
16809 match(Set dst (NegVF src));
16810 ins_cost(INSN_COST * 3);
16811 format %{ "fneg $dst,$src\t# vector (2S)" %}
16812 ins_encode %{
16813 __ fneg(as_FloatRegister($dst$$reg), __ T2S,
16814 as_FloatRegister($src$$reg));
16815 %}
16816 ins_pipe(vunop_fp64);
16817 %}
16818
16819 instruct vneg4F(vecX dst, vecX src)
16820 %{
16821 predicate(n->as_Vector()->length() == 4);
16822 match(Set dst (NegVF src));
16823 ins_cost(INSN_COST * 3);
16824 format %{ "fneg $dst,$src\t# vector (4S)" %}
16825 ins_encode %{
16826 __ fneg(as_FloatRegister($dst$$reg), __ T4S,
16827 as_FloatRegister($src$$reg));
16828 %}
16829 ins_pipe(vunop_fp128);
16830 %}
16831
16832 instruct vneg2D(vecX dst, vecX src)
16833 %{
16834 predicate(n->as_Vector()->length() == 2);
16835 match(Set dst (NegVD src));
16836 ins_cost(INSN_COST * 3);
16837 format %{ "fneg $dst,$src\t# vector (2D)" %}
16838 ins_encode %{
16839 __ fneg(as_FloatRegister($dst$$reg), __ T2D,
16840 as_FloatRegister($src$$reg));
16841 %}
16842 ins_pipe(vunop_fp128);
16843 %}
16844
16845 // --------------------------------- AND --------------------------------------
16846
16847 instruct vand8B(vecD dst, vecD src1, vecD src2)
16848 %{
16849 predicate(n->as_Vector()->length_in_bytes() == 4 ||
16850 n->as_Vector()->length_in_bytes() == 8);
16851 match(Set dst (AndV src1 src2));
16852 ins_cost(INSN_COST);
16853 format %{ "and $dst,$src1,$src2\t# vector (8B)" %}
16854 ins_encode %{
16855 __ andr(as_FloatRegister($dst$$reg), __ T8B,
16856 as_FloatRegister($src1$$reg),
16857 as_FloatRegister($src2$$reg));
16858 %}
16859 ins_pipe(vlogical64);
16860 %}
16861
16862 instruct vand16B(vecX dst, vecX src1, vecX src2)
16863 %{
16864 predicate(n->as_Vector()->length_in_bytes() == 16);
16865 match(Set dst (AndV src1 src2));
16866 ins_cost(INSN_COST);
16867 format %{ "and $dst,$src1,$src2\t# vector (16B)" %}
16868 ins_encode %{
16869 __ andr(as_FloatRegister($dst$$reg), __ T16B,
16870 as_FloatRegister($src1$$reg),
16871 as_FloatRegister($src2$$reg));
16872 %}
16873 ins_pipe(vlogical128);
16874 %}
16875
16876 // --------------------------------- OR ---------------------------------------
16877
16878 instruct vor8B(vecD dst, vecD src1, vecD src2)
16879 %{
16880 predicate(n->as_Vector()->length_in_bytes() == 4 ||
16881 n->as_Vector()->length_in_bytes() == 8);
16882 match(Set dst (OrV src1 src2));
16883 ins_cost(INSN_COST);
16884 format %{ "and $dst,$src1,$src2\t# vector (8B)" %}
16885 ins_encode %{
16886 __ orr(as_FloatRegister($dst$$reg), __ T8B,
16887 as_FloatRegister($src1$$reg),
16888 as_FloatRegister($src2$$reg));
16889 %}
16890 ins_pipe(vlogical64);
16891 %}
16892
16893 instruct vor16B(vecX dst, vecX src1, vecX src2)
16894 %{
16895 predicate(n->as_Vector()->length_in_bytes() == 16);
16896 match(Set dst (OrV src1 src2));
16897 ins_cost(INSN_COST);
16898 format %{ "orr $dst,$src1,$src2\t# vector (16B)" %}
16899 ins_encode %{
16900 __ orr(as_FloatRegister($dst$$reg), __ T16B,
16901 as_FloatRegister($src1$$reg),
16902 as_FloatRegister($src2$$reg));
16903 %}
16904 ins_pipe(vlogical128);
16905 %}
16906
16907 // --------------------------------- XOR --------------------------------------
16908
16909 instruct vxor8B(vecD dst, vecD src1, vecD src2)
16910 %{
16911 predicate(n->as_Vector()->length_in_bytes() == 4 ||
16912 n->as_Vector()->length_in_bytes() == 8);
16913 match(Set dst (XorV src1 src2));
16914 ins_cost(INSN_COST);
16915 format %{ "xor $dst,$src1,$src2\t# vector (8B)" %}
16916 ins_encode %{
16917 __ eor(as_FloatRegister($dst$$reg), __ T8B,
16918 as_FloatRegister($src1$$reg),
16919 as_FloatRegister($src2$$reg));
16920 %}
16921 ins_pipe(vlogical64);
16922 %}
16923
16924 instruct vxor16B(vecX dst, vecX src1, vecX src2)
16925 %{
16926 predicate(n->as_Vector()->length_in_bytes() == 16);
16927 match(Set dst (XorV src1 src2));
16928 ins_cost(INSN_COST);
16929 format %{ "xor $dst,$src1,$src2\t# vector (16B)" %}
16930 ins_encode %{
16931 __ eor(as_FloatRegister($dst$$reg), __ T16B,
16932 as_FloatRegister($src1$$reg),
16933 as_FloatRegister($src2$$reg));
16934 %}
16935 ins_pipe(vlogical128);
16936 %}
16937
16938 // ------------------------------ Shift ---------------------------------------
16939
16940 instruct vshiftcntL(vecX dst, iRegIorL2I cnt) %{
16941 match(Set dst (LShiftCntV cnt));
16942 format %{ "dup $dst, $cnt\t# shift count (vecX)" %}
16943 ins_encode %{
16944 __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16945 %}
16946 ins_pipe(vdup_reg_reg128);
16947 %}
16948
16949 // Right shifts on aarch64 SIMD are implemented as left shift by -ve amount
16950 instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{
16951 match(Set dst (RShiftCntV cnt));
16952 format %{ "dup $dst, $cnt\t# shift count (vecX)\n\tneg $dst, $dst\t T16B" %}
16953 ins_encode %{
16954 __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16955 __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
16956 %}
16957 ins_pipe(vdup_reg_reg128);
16958 %}
16959
16960 instruct vsll8B(vecD dst, vecD src, vecX shift) %{
16961 predicate(n->as_Vector()->length() == 4 ||
16962 n->as_Vector()->length() == 8);
16963 match(Set dst (LShiftVB src shift));
16964 match(Set dst (RShiftVB src shift));
16965 ins_cost(INSN_COST);
16966 format %{ "sshl $dst,$src,$shift\t# vector (8B)" %}
16967 ins_encode %{
16968 __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16969 as_FloatRegister($src$$reg),
16970 as_FloatRegister($shift$$reg));
16971 %}
16972 ins_pipe(vshift64);
16973 %}
16974
16975 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
16976 predicate(n->as_Vector()->length() == 16);
16977 match(Set dst (LShiftVB src shift));
16978 match(Set dst (RShiftVB src shift));
16979 ins_cost(INSN_COST);
16980 format %{ "sshl $dst,$src,$shift\t# vector (16B)" %}
16981 ins_encode %{
16982 __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16983 as_FloatRegister($src$$reg),
16984 as_FloatRegister($shift$$reg));
16985 %}
16986 ins_pipe(vshift128);
16987 %}
16988
16989 instruct vsrl8B(vecD dst, vecD src, vecX shift) %{
16990 predicate(n->as_Vector()->length() == 4 ||
16991 n->as_Vector()->length() == 8);
16992 match(Set dst (URShiftVB src shift));
16993 ins_cost(INSN_COST);
16994 format %{ "ushl $dst,$src,$shift\t# vector (8B)" %}
16995 ins_encode %{
16996 __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16997 as_FloatRegister($src$$reg),
16998 as_FloatRegister($shift$$reg));
16999 %}
17000 ins_pipe(vshift64);
17001 %}
17002
17003 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
17004 predicate(n->as_Vector()->length() == 16);
17005 match(Set dst (URShiftVB src shift));
17006 ins_cost(INSN_COST);
17007 format %{ "ushl $dst,$src,$shift\t# vector (16B)" %}
17008 ins_encode %{
17009 __ ushl(as_FloatRegister($dst$$reg), __ T16B,
17010 as_FloatRegister($src$$reg),
17011 as_FloatRegister($shift$$reg));
17012 %}
17013 ins_pipe(vshift128);
17014 %}
17015
17016 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
17017 predicate(n->as_Vector()->length() == 4 ||
17018 n->as_Vector()->length() == 8);
17019 match(Set dst (LShiftVB src shift));
17020 ins_cost(INSN_COST);
17021 format %{ "shl $dst, $src, $shift\t# vector (8B)" %}
17022 ins_encode %{
17023 int sh = (int)$shift$$constant & 31;
17024 if (sh >= 8) {
17025 __ eor(as_FloatRegister($dst$$reg), __ T8B,
17026 as_FloatRegister($src$$reg),
17027 as_FloatRegister($src$$reg));
17028 } else {
17029 __ shl(as_FloatRegister($dst$$reg), __ T8B,
17030 as_FloatRegister($src$$reg), sh);
17031 }
17032 %}
17033 ins_pipe(vshift64_imm);
17034 %}
17035
17036 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
17037 predicate(n->as_Vector()->length() == 16);
17038 match(Set dst (LShiftVB src shift));
17039 ins_cost(INSN_COST);
17040 format %{ "shl $dst, $src, $shift\t# vector (16B)" %}
17041 ins_encode %{
17042 int sh = (int)$shift$$constant & 31;
17043 if (sh >= 8) {
17044 __ eor(as_FloatRegister($dst$$reg), __ T16B,
17045 as_FloatRegister($src$$reg),
17046 as_FloatRegister($src$$reg));
17047 } else {
17048 __ shl(as_FloatRegister($dst$$reg), __ T16B,
17049 as_FloatRegister($src$$reg), sh);
17050 }
17051 %}
17052 ins_pipe(vshift128_imm);
17053 %}
17054
17055 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
17056 predicate(n->as_Vector()->length() == 4 ||
17057 n->as_Vector()->length() == 8);
17058 match(Set dst (RShiftVB src shift));
17059 ins_cost(INSN_COST);
17060 format %{ "sshr $dst, $src, $shift\t# vector (8B)" %}
17061 ins_encode %{
17062 int sh = (int)$shift$$constant & 31;
17063 if (sh >= 8) sh = 7;
17064 sh = -sh & 7;
17065 __ sshr(as_FloatRegister($dst$$reg), __ T8B,
17066 as_FloatRegister($src$$reg), sh);
17067 %}
17068 ins_pipe(vshift64_imm);
17069 %}
17070
17071 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
17072 predicate(n->as_Vector()->length() == 16);
17073 match(Set dst (RShiftVB src shift));
17074 ins_cost(INSN_COST);
17075 format %{ "sshr $dst, $src, $shift\t# vector (16B)" %}
17076 ins_encode %{
17077 int sh = (int)$shift$$constant & 31;
17078 if (sh >= 8) sh = 7;
17079 sh = -sh & 7;
17080 __ sshr(as_FloatRegister($dst$$reg), __ T16B,
17081 as_FloatRegister($src$$reg), sh);
17082 %}
17083 ins_pipe(vshift128_imm);
17084 %}
17085
17086 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
17087 predicate(n->as_Vector()->length() == 4 ||
17088 n->as_Vector()->length() == 8);
17089 match(Set dst (URShiftVB src shift));
17090 ins_cost(INSN_COST);
17091 format %{ "ushr $dst, $src, $shift\t# vector (8B)" %}
17092 ins_encode %{
17093 int sh = (int)$shift$$constant & 31;
17094 if (sh >= 8) {
17095 __ eor(as_FloatRegister($dst$$reg), __ T8B,
17096 as_FloatRegister($src$$reg),
17097 as_FloatRegister($src$$reg));
17098 } else {
17099 __ ushr(as_FloatRegister($dst$$reg), __ T8B,
17100 as_FloatRegister($src$$reg), -sh & 7);
17101 }
17102 %}
17103 ins_pipe(vshift64_imm);
17104 %}
17105
17106 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
17107 predicate(n->as_Vector()->length() == 16);
17108 match(Set dst (URShiftVB src shift));
17109 ins_cost(INSN_COST);
17110 format %{ "ushr $dst, $src, $shift\t# vector (16B)" %}
17111 ins_encode %{
17112 int sh = (int)$shift$$constant & 31;
17113 if (sh >= 8) {
17114 __ eor(as_FloatRegister($dst$$reg), __ T16B,
17115 as_FloatRegister($src$$reg),
17116 as_FloatRegister($src$$reg));
17117 } else {
17118 __ ushr(as_FloatRegister($dst$$reg), __ T16B,
17119 as_FloatRegister($src$$reg), -sh & 7);
17120 }
17121 %}
17122 ins_pipe(vshift128_imm);
17123 %}
17124
17125 instruct vsll4S(vecD dst, vecD src, vecX shift) %{
17126 predicate(n->as_Vector()->length() == 2 ||
17127 n->as_Vector()->length() == 4);
17128 match(Set dst (LShiftVS src shift));
17129 match(Set dst (RShiftVS src shift));
17130 ins_cost(INSN_COST);
17131 format %{ "sshl $dst,$src,$shift\t# vector (4H)" %}
17132 ins_encode %{
17133 __ sshl(as_FloatRegister($dst$$reg), __ T4H,
17134 as_FloatRegister($src$$reg),
17135 as_FloatRegister($shift$$reg));
17136 %}
17137 ins_pipe(vshift64);
17138 %}
17139
17140 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
17141 predicate(n->as_Vector()->length() == 8);
17142 match(Set dst (LShiftVS src shift));
17143 match(Set dst (RShiftVS src shift));
17144 ins_cost(INSN_COST);
17145 format %{ "sshl $dst,$src,$shift\t# vector (8H)" %}
17146 ins_encode %{
17147 __ sshl(as_FloatRegister($dst$$reg), __ T8H,
17148 as_FloatRegister($src$$reg),
17149 as_FloatRegister($shift$$reg));
17150 %}
17151 ins_pipe(vshift128);
17152 %}
17153
17154 instruct vsrl4S(vecD dst, vecD src, vecX shift) %{
17155 predicate(n->as_Vector()->length() == 2 ||
17156 n->as_Vector()->length() == 4);
17157 match(Set dst (URShiftVS src shift));
17158 ins_cost(INSN_COST);
17159 format %{ "ushl $dst,$src,$shift\t# vector (4H)" %}
17160 ins_encode %{
17161 __ ushl(as_FloatRegister($dst$$reg), __ T4H,
17162 as_FloatRegister($src$$reg),
17163 as_FloatRegister($shift$$reg));
17164 %}
17165 ins_pipe(vshift64);
17166 %}
17167
17168 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
17169 predicate(n->as_Vector()->length() == 8);
17170 match(Set dst (URShiftVS src shift));
17171 ins_cost(INSN_COST);
17172 format %{ "ushl $dst,$src,$shift\t# vector (8H)" %}
17173 ins_encode %{
17174 __ ushl(as_FloatRegister($dst$$reg), __ T8H,
17175 as_FloatRegister($src$$reg),
17176 as_FloatRegister($shift$$reg));
17177 %}
17178 ins_pipe(vshift128);
17179 %}
17180
17181 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
17182 predicate(n->as_Vector()->length() == 2 ||
17183 n->as_Vector()->length() == 4);
17184 match(Set dst (LShiftVS src shift));
17185 ins_cost(INSN_COST);
17186 format %{ "shl $dst, $src, $shift\t# vector (4H)" %}
17187 ins_encode %{
17188 int sh = (int)$shift$$constant & 31;
17189 if (sh >= 16) {
17190 __ eor(as_FloatRegister($dst$$reg), __ T8B,
17191 as_FloatRegister($src$$reg),
17192 as_FloatRegister($src$$reg));
17193 } else {
17194 __ shl(as_FloatRegister($dst$$reg), __ T4H,
17195 as_FloatRegister($src$$reg), sh);
17196 }
17197 %}
17198 ins_pipe(vshift64_imm);
17199 %}
17200
17201 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
17202 predicate(n->as_Vector()->length() == 8);
17203 match(Set dst (LShiftVS src shift));
17204 ins_cost(INSN_COST);
17205 format %{ "shl $dst, $src, $shift\t# vector (8H)" %}
17206 ins_encode %{
17207 int sh = (int)$shift$$constant & 31;
17208 if (sh >= 16) {
17209 __ eor(as_FloatRegister($dst$$reg), __ T16B,
17210 as_FloatRegister($src$$reg),
17211 as_FloatRegister($src$$reg));
17212 } else {
17213 __ shl(as_FloatRegister($dst$$reg), __ T8H,
17214 as_FloatRegister($src$$reg), sh);
17215 }
17216 %}
17217 ins_pipe(vshift128_imm);
17218 %}
17219
17220 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
17221 predicate(n->as_Vector()->length() == 2 ||
17222 n->as_Vector()->length() == 4);
17223 match(Set dst (RShiftVS src shift));
17224 ins_cost(INSN_COST);
17225 format %{ "sshr $dst, $src, $shift\t# vector (4H)" %}
17226 ins_encode %{
17227 int sh = (int)$shift$$constant & 31;
17228 if (sh >= 16) sh = 15;
17229 sh = -sh & 15;
17230 __ sshr(as_FloatRegister($dst$$reg), __ T4H,
17231 as_FloatRegister($src$$reg), sh);
17232 %}
17233 ins_pipe(vshift64_imm);
17234 %}
17235
17236 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
17237 predicate(n->as_Vector()->length() == 8);
17238 match(Set dst (RShiftVS src shift));
17239 ins_cost(INSN_COST);
17240 format %{ "sshr $dst, $src, $shift\t# vector (8H)" %}
17241 ins_encode %{
17242 int sh = (int)$shift$$constant & 31;
17243 if (sh >= 16) sh = 15;
17244 sh = -sh & 15;
17245 __ sshr(as_FloatRegister($dst$$reg), __ T8H,
17246 as_FloatRegister($src$$reg), sh);
17247 %}
17248 ins_pipe(vshift128_imm);
17249 %}
17250
17251 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
17252 predicate(n->as_Vector()->length() == 2 ||
17253 n->as_Vector()->length() == 4);
17254 match(Set dst (URShiftVS src shift));
17255 ins_cost(INSN_COST);
17256 format %{ "ushr $dst, $src, $shift\t# vector (4H)" %}
17257 ins_encode %{
17258 int sh = (int)$shift$$constant & 31;
17259 if (sh >= 16) {
17260 __ eor(as_FloatRegister($dst$$reg), __ T8B,
17261 as_FloatRegister($src$$reg),
17262 as_FloatRegister($src$$reg));
17263 } else {
17264 __ ushr(as_FloatRegister($dst$$reg), __ T4H,
17265 as_FloatRegister($src$$reg), -sh & 15);
17266 }
17267 %}
17268 ins_pipe(vshift64_imm);
17269 %}
17270
17271 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
17272 predicate(n->as_Vector()->length() == 8);
17273 match(Set dst (URShiftVS src shift));
17274 ins_cost(INSN_COST);
17275 format %{ "ushr $dst, $src, $shift\t# vector (8H)" %}
17276 ins_encode %{
17277 int sh = (int)$shift$$constant & 31;
17278 if (sh >= 16) {
17279 __ eor(as_FloatRegister($dst$$reg), __ T16B,
17280 as_FloatRegister($src$$reg),
17281 as_FloatRegister($src$$reg));
17282 } else {
17283 __ ushr(as_FloatRegister($dst$$reg), __ T8H,
17284 as_FloatRegister($src$$reg), -sh & 15);
17285 }
17286 %}
17287 ins_pipe(vshift128_imm);
17288 %}
17289
17290 instruct vsll2I(vecD dst, vecD src, vecX shift) %{
17291 predicate(n->as_Vector()->length() == 2);
17292 match(Set dst (LShiftVI src shift));
17293 match(Set dst (RShiftVI src shift));
17294 ins_cost(INSN_COST);
17295 format %{ "sshl $dst,$src,$shift\t# vector (2S)" %}
17296 ins_encode %{
17297 __ sshl(as_FloatRegister($dst$$reg), __ T2S,
17298 as_FloatRegister($src$$reg),
17299 as_FloatRegister($shift$$reg));
17300 %}
17301 ins_pipe(vshift64);
17302 %}
17303
17304 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
17305 predicate(n->as_Vector()->length() == 4);
17306 match(Set dst (LShiftVI src shift));
17307 match(Set dst (RShiftVI src shift));
17308 ins_cost(INSN_COST);
17309 format %{ "sshl $dst,$src,$shift\t# vector (4S)" %}
17310 ins_encode %{
17311 __ sshl(as_FloatRegister($dst$$reg), __ T4S,
17312 as_FloatRegister($src$$reg),
17313 as_FloatRegister($shift$$reg));
17314 %}
17315 ins_pipe(vshift128);
17316 %}
17317
17318 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
17319 predicate(n->as_Vector()->length() == 2);
17320 match(Set dst (URShiftVI src shift));
17321 ins_cost(INSN_COST);
17322 format %{ "ushl $dst,$src,$shift\t# vector (2S)" %}
17323 ins_encode %{
17324 __ ushl(as_FloatRegister($dst$$reg), __ T2S,
17325 as_FloatRegister($src$$reg),
17326 as_FloatRegister($shift$$reg));
17327 %}
17328 ins_pipe(vshift64);
17329 %}
17330
17331 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
17332 predicate(n->as_Vector()->length() == 4);
17333 match(Set dst (URShiftVI src shift));
17334 ins_cost(INSN_COST);
17335 format %{ "ushl $dst,$src,$shift\t# vector (4S)" %}
17336 ins_encode %{
17337 __ ushl(as_FloatRegister($dst$$reg), __ T4S,
17338 as_FloatRegister($src$$reg),
17339 as_FloatRegister($shift$$reg));
17340 %}
17341 ins_pipe(vshift128);
17342 %}
17343
17344 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
17345 predicate(n->as_Vector()->length() == 2);
17346 match(Set dst (LShiftVI src shift));
17347 ins_cost(INSN_COST);
17348 format %{ "shl $dst, $src, $shift\t# vector (2S)" %}
17349 ins_encode %{
17350 __ shl(as_FloatRegister($dst$$reg), __ T2S,
17351 as_FloatRegister($src$$reg),
17352 (int)$shift$$constant & 31);
17353 %}
17354 ins_pipe(vshift64_imm);
17355 %}
17356
17357 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
17358 predicate(n->as_Vector()->length() == 4);
17359 match(Set dst (LShiftVI src shift));
17360 ins_cost(INSN_COST);
17361 format %{ "shl $dst, $src, $shift\t# vector (4S)" %}
17362 ins_encode %{
17363 __ shl(as_FloatRegister($dst$$reg), __ T4S,
17364 as_FloatRegister($src$$reg),
17365 (int)$shift$$constant & 31);
17366 %}
17367 ins_pipe(vshift128_imm);
17368 %}
17369
17370 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
17371 predicate(n->as_Vector()->length() == 2);
17372 match(Set dst (RShiftVI src shift));
17373 ins_cost(INSN_COST);
17374 format %{ "sshr $dst, $src, $shift\t# vector (2S)" %}
17375 ins_encode %{
17376 __ sshr(as_FloatRegister($dst$$reg), __ T2S,
17377 as_FloatRegister($src$$reg),
17378 -(int)$shift$$constant & 31);
17379 %}
17380 ins_pipe(vshift64_imm);
17381 %}
17382
17383 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
17384 predicate(n->as_Vector()->length() == 4);
17385 match(Set dst (RShiftVI src shift));
17386 ins_cost(INSN_COST);
17387 format %{ "sshr $dst, $src, $shift\t# vector (4S)" %}
17388 ins_encode %{
17389 __ sshr(as_FloatRegister($dst$$reg), __ T4S,
17390 as_FloatRegister($src$$reg),
17391 -(int)$shift$$constant & 31);
17392 %}
17393 ins_pipe(vshift128_imm);
17394 %}
17395
17396 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
17397 predicate(n->as_Vector()->length() == 2);
17398 match(Set dst (URShiftVI src shift));
17399 ins_cost(INSN_COST);
17400 format %{ "ushr $dst, $src, $shift\t# vector (2S)" %}
17401 ins_encode %{
17402 __ ushr(as_FloatRegister($dst$$reg), __ T2S,
17403 as_FloatRegister($src$$reg),
17404 -(int)$shift$$constant & 31);
17405 %}
17406 ins_pipe(vshift64_imm);
17407 %}
17408
17409 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
17410 predicate(n->as_Vector()->length() == 4);
17411 match(Set dst (URShiftVI src shift));
17412 ins_cost(INSN_COST);
17413 format %{ "ushr $dst, $src, $shift\t# vector (4S)" %}
17414 ins_encode %{
17415 __ ushr(as_FloatRegister($dst$$reg), __ T4S,
17416 as_FloatRegister($src$$reg),
17417 -(int)$shift$$constant & 31);
17418 %}
17419 ins_pipe(vshift128_imm);
17420 %}
17421
17422 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
17423 predicate(n->as_Vector()->length() == 2);
17424 match(Set dst (LShiftVL src shift));
17425 match(Set dst (RShiftVL src shift));
17426 ins_cost(INSN_COST);
17427 format %{ "sshl $dst,$src,$shift\t# vector (2D)" %}
17428 ins_encode %{
17429 __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17430 as_FloatRegister($src$$reg),
17431 as_FloatRegister($shift$$reg));
17432 %}
17433 ins_pipe(vshift128);
17434 %}
17435
17436 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
17437 predicate(n->as_Vector()->length() == 2);
17438 match(Set dst (URShiftVL src shift));
17439 ins_cost(INSN_COST);
17440 format %{ "ushl $dst,$src,$shift\t# vector (2D)" %}
17441 ins_encode %{
17442 __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17443 as_FloatRegister($src$$reg),
17444 as_FloatRegister($shift$$reg));
17445 %}
17446 ins_pipe(vshift128);
17447 %}
17448
17449 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17450 predicate(n->as_Vector()->length() == 2);
17451 match(Set dst (LShiftVL src shift));
17452 ins_cost(INSN_COST);
17453 format %{ "shl $dst, $src, $shift\t# vector (2D)" %}
17454 ins_encode %{
17455 __ shl(as_FloatRegister($dst$$reg), __ T2D,
17456 as_FloatRegister($src$$reg),
17457 (int)$shift$$constant & 63);
17458 %}
17459 ins_pipe(vshift128_imm);
17460 %}
17461
17462 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17463 predicate(n->as_Vector()->length() == 2);
17464 match(Set dst (RShiftVL src shift));
17465 ins_cost(INSN_COST);
17466 format %{ "sshr $dst, $src, $shift\t# vector (2D)" %}
17467 ins_encode %{
17468 __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17469 as_FloatRegister($src$$reg),
17470 -(int)$shift$$constant & 63);
17471 %}
17472 ins_pipe(vshift128_imm);
17473 %}
17474
17475 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17476 predicate(n->as_Vector()->length() == 2);
17477 match(Set dst (URShiftVL src shift));
17478 ins_cost(INSN_COST);
17479 format %{ "ushr $dst, $src, $shift\t# vector (2D)" %}
17480 ins_encode %{
17481 __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17482 as_FloatRegister($src$$reg),
17483 -(int)$shift$$constant & 63);
17484 %}
17485 ins_pipe(vshift128_imm);
17486 %}
17487
17488 //----------PEEPHOLE RULES-----------------------------------------------------
17489 // These must follow all instruction definitions as they use the names
17490 // defined in the instructions definitions.
17491 //
17492 // peepmatch ( root_instr_name [preceding_instruction]* );
17493 //
17494 // peepconstraint %{
17495 // (instruction_number.operand_name relational_op instruction_number.operand_name
17496 // [, ...] );
17497 // // instruction numbers are zero-based using left to right order in peepmatch
17498 //
17499 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) );
17500 // // provide an instruction_number.operand_name for each operand that appears
17501 // // in the replacement instruction's match rule
17502 //
17503 // ---------VM FLAGS---------------------------------------------------------
17504 //
17505 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17506 //
17507 // Each peephole rule is given an identifying number starting with zero and
17508 // increasing by one in the order seen by the parser. An individual peephole
17509 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17510 // on the command-line.
17511 //
17512 // ---------CURRENT LIMITATIONS----------------------------------------------
17513 //
17514 // Only match adjacent instructions in same basic block
17515 // Only equality constraints
17516 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17517 // Only one replacement instruction
17518 //
17519 // ---------EXAMPLE----------------------------------------------------------
17520 //
17521 // // pertinent parts of existing instructions in architecture description
17522 // instruct movI(iRegINoSp dst, iRegI src)
17523 // %{
17524 // match(Set dst (CopyI src));
17525 // %}
17526 //
17527 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17528 // %{
17529 // match(Set dst (AddI dst src));
17530 // effect(KILL cr);
17531 // %}
17532 //
17533 // // Change (inc mov) to lea
17534 // peephole %{
17535 // // increment preceeded by register-register move
17536 // peepmatch ( incI_iReg movI );
17537 // // require that the destination register of the increment
17538 // // match the destination register of the move
17539 // peepconstraint ( 0.dst == 1.dst );
17540 // // construct a replacement instruction that sets
17541 // // the destination to ( move's source register + one )
17542 // peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17543 // %}
17544 //
17545
17546 // Implementation no longer uses movX instructions since
17547 // machine-independent system no longer uses CopyX nodes.
17548 //
17549 // peephole
17550 // %{
17551 // peepmatch (incI_iReg movI);
17552 // peepconstraint (0.dst == 1.dst);
17553 // peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17554 // %}
17555
17556 // peephole
17557 // %{
17558 // peepmatch (decI_iReg movI);
17559 // peepconstraint (0.dst == 1.dst);
17560 // peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17561 // %}
17562
17563 // peephole
17564 // %{
17565 // peepmatch (addI_iReg_imm movI);
17566 // peepconstraint (0.dst == 1.dst);
17567 // peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17568 // %}
17569
17570 // peephole
17571 // %{
17572 // peepmatch (incL_iReg movL);
17573 // peepconstraint (0.dst == 1.dst);
17574 // peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17575 // %}
17576
17577 // peephole
17578 // %{
17579 // peepmatch (decL_iReg movL);
17580 // peepconstraint (0.dst == 1.dst);
17581 // peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17582 // %}
17583
17584 // peephole
17585 // %{
17586 // peepmatch (addL_iReg_imm movL);
17587 // peepconstraint (0.dst == 1.dst);
17588 // peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17589 // %}
17590
17591 // peephole
17592 // %{
17593 // peepmatch (addP_iReg_imm movP);
17594 // peepconstraint (0.dst == 1.dst);
17595 // peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17596 // %}
17597
17598 // // Change load of spilled value to only a spill
17599 // instruct storeI(memory mem, iRegI src)
17600 // %{
17601 // match(Set mem (StoreI mem src));
17602 // %}
17603 //
17604 // instruct loadI(iRegINoSp dst, memory mem)
17605 // %{
17606 // match(Set dst (LoadI mem));
17607 // %}
17608 //
17609
17610 //----------SMARTSPILL RULES---------------------------------------------------
17611 // These must follow all instruction definitions as they use the names
17612 // defined in the instructions definitions.
17613
17614 // Local Variables:
17615 // mode: c++
17616 // End: