1 // 2 // Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 632 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 633 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 634 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 635 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 636 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 637 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 638 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 639 #ifdef _LP64 640 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 641 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 642 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 643 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 644 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 645 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 646 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 647 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 648 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 649 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 650 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 651 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 652 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 653 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 654 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 655 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 656 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 657 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 658 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 659 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 660 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 661 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 662 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 663 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 664 #endif 665 ); 666 667 // flags allocation class should be last. 668 alloc_class chunk2(RFLAGS); 669 670 // Singleton class for condition codes 671 reg_class int_flags(RFLAGS); 672 673 // Class for pre evex float registers 674 reg_class float_reg_legacy(XMM0, 675 XMM1, 676 XMM2, 677 XMM3, 678 XMM4, 679 XMM5, 680 XMM6, 681 XMM7 682 #ifdef _LP64 683 ,XMM8, 684 XMM9, 685 XMM10, 686 XMM11, 687 XMM12, 688 XMM13, 689 XMM14, 690 XMM15 691 #endif 692 ); 693 694 // Class for evex float registers 695 reg_class float_reg_evex(XMM0, 696 XMM1, 697 XMM2, 698 XMM3, 699 XMM4, 700 XMM5, 701 XMM6, 702 XMM7 703 #ifdef _LP64 704 ,XMM8, 705 XMM9, 706 XMM10, 707 XMM11, 708 XMM12, 709 XMM13, 710 XMM14, 711 XMM15, 712 XMM16, 713 XMM17, 714 XMM18, 715 XMM19, 716 XMM20, 717 XMM21, 718 XMM22, 719 XMM23, 720 XMM24, 721 XMM25, 722 XMM26, 723 XMM27, 724 XMM28, 725 XMM29, 726 XMM30, 727 XMM31 728 #endif 729 ); 730 731 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 732 733 // Class for pre evex double registers 734 reg_class double_reg_legacy(XMM0, XMM0b, 735 XMM1, XMM1b, 736 XMM2, XMM2b, 737 XMM3, XMM3b, 738 XMM4, XMM4b, 739 XMM5, XMM5b, 740 XMM6, XMM6b, 741 XMM7, XMM7b 742 #ifdef _LP64 743 ,XMM8, XMM8b, 744 XMM9, XMM9b, 745 XMM10, XMM10b, 746 XMM11, XMM11b, 747 XMM12, XMM12b, 748 XMM13, XMM13b, 749 XMM14, XMM14b, 750 XMM15, XMM15b 751 #endif 752 ); 753 754 // Class for evex double registers 755 reg_class double_reg_evex(XMM0, XMM0b, 756 XMM1, XMM1b, 757 XMM2, XMM2b, 758 XMM3, XMM3b, 759 XMM4, XMM4b, 760 XMM5, XMM5b, 761 XMM6, XMM6b, 762 XMM7, XMM7b 763 #ifdef _LP64 764 ,XMM8, XMM8b, 765 XMM9, XMM9b, 766 XMM10, XMM10b, 767 XMM11, XMM11b, 768 XMM12, XMM12b, 769 XMM13, XMM13b, 770 XMM14, XMM14b, 771 XMM15, XMM15b, 772 XMM16, XMM16b, 773 XMM17, XMM17b, 774 XMM18, XMM18b, 775 XMM19, XMM19b, 776 XMM20, XMM20b, 777 XMM21, XMM21b, 778 XMM22, XMM22b, 779 XMM23, XMM23b, 780 XMM24, XMM24b, 781 XMM25, XMM25b, 782 XMM26, XMM26b, 783 XMM27, XMM27b, 784 XMM28, XMM28b, 785 XMM29, XMM29b, 786 XMM30, XMM30b, 787 XMM31, XMM31b 788 #endif 789 ); 790 791 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 792 793 // Class for pre evex 32bit vector registers 794 reg_class vectors_reg_legacy(XMM0, 795 XMM1, 796 XMM2, 797 XMM3, 798 XMM4, 799 XMM5, 800 XMM6, 801 XMM7 802 #ifdef _LP64 803 ,XMM8, 804 XMM9, 805 XMM10, 806 XMM11, 807 XMM12, 808 XMM13, 809 XMM14, 810 XMM15 811 #endif 812 ); 813 814 // Class for evex 32bit vector registers 815 reg_class vectors_reg_evex(XMM0, 816 XMM1, 817 XMM2, 818 XMM3, 819 XMM4, 820 XMM5, 821 XMM6, 822 XMM7 823 #ifdef _LP64 824 ,XMM8, 825 XMM9, 826 XMM10, 827 XMM11, 828 XMM12, 829 XMM13, 830 XMM14, 831 XMM15, 832 XMM16, 833 XMM17, 834 XMM18, 835 XMM19, 836 XMM20, 837 XMM21, 838 XMM22, 839 XMM23, 840 XMM24, 841 XMM25, 842 XMM26, 843 XMM27, 844 XMM28, 845 XMM29, 846 XMM30, 847 XMM31 848 #endif 849 ); 850 851 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 852 853 // Class for all 64bit vector registers 854 reg_class vectord_reg_legacy(XMM0, XMM0b, 855 XMM1, XMM1b, 856 XMM2, XMM2b, 857 XMM3, XMM3b, 858 XMM4, XMM4b, 859 XMM5, XMM5b, 860 XMM6, XMM6b, 861 XMM7, XMM7b 862 #ifdef _LP64 863 ,XMM8, XMM8b, 864 XMM9, XMM9b, 865 XMM10, XMM10b, 866 XMM11, XMM11b, 867 XMM12, XMM12b, 868 XMM13, XMM13b, 869 XMM14, XMM14b, 870 XMM15, XMM15b 871 #endif 872 ); 873 874 // Class for all 64bit vector registers 875 reg_class vectord_reg_evex(XMM0, XMM0b, 876 XMM1, XMM1b, 877 XMM2, XMM2b, 878 XMM3, XMM3b, 879 XMM4, XMM4b, 880 XMM5, XMM5b, 881 XMM6, XMM6b, 882 XMM7, XMM7b 883 #ifdef _LP64 884 ,XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b, 892 XMM16, XMM16b, 893 XMM17, XMM17b, 894 XMM18, XMM18b, 895 XMM19, XMM19b, 896 XMM20, XMM20b, 897 XMM21, XMM21b, 898 XMM22, XMM22b, 899 XMM23, XMM23b, 900 XMM24, XMM24b, 901 XMM25, XMM25b, 902 XMM26, XMM26b, 903 XMM27, XMM27b, 904 XMM28, XMM28b, 905 XMM29, XMM29b, 906 XMM30, XMM30b, 907 XMM31, XMM31b 908 #endif 909 ); 910 911 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 912 913 // Class for all 128bit vector registers 914 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 915 XMM1, XMM1b, XMM1c, XMM1d, 916 XMM2, XMM2b, XMM2c, XMM2d, 917 XMM3, XMM3b, XMM3c, XMM3d, 918 XMM4, XMM4b, XMM4c, XMM4d, 919 XMM5, XMM5b, XMM5c, XMM5d, 920 XMM6, XMM6b, XMM6c, XMM6d, 921 XMM7, XMM7b, XMM7c, XMM7d 922 #ifdef _LP64 923 ,XMM8, XMM8b, XMM8c, XMM8d, 924 XMM9, XMM9b, XMM9c, XMM9d, 925 XMM10, XMM10b, XMM10c, XMM10d, 926 XMM11, XMM11b, XMM11c, XMM11d, 927 XMM12, XMM12b, XMM12c, XMM12d, 928 XMM13, XMM13b, XMM13c, XMM13d, 929 XMM14, XMM14b, XMM14c, XMM14d, 930 XMM15, XMM15b, XMM15c, XMM15d 931 #endif 932 ); 933 934 // Class for all 128bit vector registers 935 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 936 XMM1, XMM1b, XMM1c, XMM1d, 937 XMM2, XMM2b, XMM2c, XMM2d, 938 XMM3, XMM3b, XMM3c, XMM3d, 939 XMM4, XMM4b, XMM4c, XMM4d, 940 XMM5, XMM5b, XMM5c, XMM5d, 941 XMM6, XMM6b, XMM6c, XMM6d, 942 XMM7, XMM7b, XMM7c, XMM7d 943 #ifdef _LP64 944 ,XMM8, XMM8b, XMM8c, XMM8d, 945 XMM9, XMM9b, XMM9c, XMM9d, 946 XMM10, XMM10b, XMM10c, XMM10d, 947 XMM11, XMM11b, XMM11c, XMM11d, 948 XMM12, XMM12b, XMM12c, XMM12d, 949 XMM13, XMM13b, XMM13c, XMM13d, 950 XMM14, XMM14b, XMM14c, XMM14d, 951 XMM15, XMM15b, XMM15c, XMM15d, 952 XMM16, XMM16b, XMM16c, XMM16d, 953 XMM17, XMM17b, XMM17c, XMM17d, 954 XMM18, XMM18b, XMM18c, XMM18d, 955 XMM19, XMM19b, XMM19c, XMM19d, 956 XMM20, XMM20b, XMM20c, XMM20d, 957 XMM21, XMM21b, XMM21c, XMM21d, 958 XMM22, XMM22b, XMM22c, XMM22d, 959 XMM23, XMM23b, XMM23c, XMM23d, 960 XMM24, XMM24b, XMM24c, XMM24d, 961 XMM25, XMM25b, XMM25c, XMM25d, 962 XMM26, XMM26b, XMM26c, XMM26d, 963 XMM27, XMM27b, XMM27c, XMM27d, 964 XMM28, XMM28b, XMM28c, XMM28d, 965 XMM29, XMM29b, XMM29c, XMM29d, 966 XMM30, XMM30b, XMM30c, XMM30d, 967 XMM31, XMM31b, XMM31c, XMM31d 968 #endif 969 ); 970 971 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 972 973 // Class for all 256bit vector registers 974 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 975 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 976 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 977 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 978 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 979 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 980 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 981 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 982 #ifdef _LP64 983 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 984 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 985 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 986 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 987 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 988 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 989 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 990 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 991 #endif 992 ); 993 994 // Class for all 256bit vector registers 995 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 996 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 997 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 998 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 999 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1000 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1001 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1002 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1003 #ifdef _LP64 1004 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1005 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1006 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1007 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1008 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1009 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1010 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1011 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1012 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1013 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1014 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1015 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1016 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1017 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1018 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1019 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1020 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1021 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1022 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1023 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1024 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1025 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1026 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1027 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1028 #endif 1029 ); 1030 1031 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1032 1033 // Class for all 512bit vector registers 1034 reg_class vectorz_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1035 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1036 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1037 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1038 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1039 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1040 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1041 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1042 #ifdef _LP64 1043 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1044 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1045 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1046 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1047 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1048 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1049 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1050 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1051 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1052 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1053 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1054 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1055 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1056 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1057 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1058 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1059 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1060 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1061 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1062 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1063 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1064 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1065 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1066 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1067 #endif 1068 ); 1069 1070 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1071 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h); 1072 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p); 1073 1074 reg_class xmm1_reg(XMM1, XMM1b, XMM1c, XMM1d); 1075 reg_class ymm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h); 1076 reg_class zmm1_reg(XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p); 1077 1078 reg_class xmm2_reg(XMM2, XMM2b, XMM2c, XMM2d); 1079 reg_class ymm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h); 1080 reg_class zmm2_reg(XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p); 1081 1082 reg_class xmm3_reg(XMM3, XMM3b, XMM3c, XMM3d); 1083 reg_class ymm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h); 1084 reg_class zmm3_reg(XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p); 1085 1086 reg_class xmm4_reg(XMM4, XMM4b, XMM4c, XMM4d); 1087 reg_class ymm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h); 1088 reg_class zmm4_reg(XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p); 1089 1090 reg_class xmm5_reg(XMM5, XMM5b, XMM5c, XMM5d); 1091 reg_class ymm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h); 1092 reg_class zmm5_reg(XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p); 1093 1094 reg_class xmm6_reg(XMM6, XMM6b, XMM6c, XMM6d); 1095 reg_class ymm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h); 1096 reg_class zmm6_reg(XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p); 1097 1098 reg_class xmm7_reg(XMM7, XMM7b, XMM7c, XMM7d); 1099 reg_class ymm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h); 1100 reg_class zmm7_reg(XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p); 1101 1102 #ifdef _LP64 1103 1104 reg_class xmm8_reg(XMM8, XMM8b, XMM8c, XMM8d); 1105 reg_class ymm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h); 1106 reg_class zmm8_reg(XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p); 1107 1108 reg_class xmm9_reg(XMM9, XMM9b, XMM9c, XMM9d); 1109 reg_class ymm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h); 1110 reg_class zmm9_reg(XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p); 1111 1112 reg_class xmm10_reg(XMM10, XMM10b, XMM10c, XMM10d); 1113 reg_class ymm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h); 1114 reg_class zmm10_reg(XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p); 1115 1116 reg_class xmm11_reg(XMM11, XMM11b, XMM11c, XMM11d); 1117 reg_class ymm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h); 1118 reg_class zmm11_reg(XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p); 1119 1120 reg_class xmm12_reg(XMM12, XMM12b, XMM12c, XMM12d); 1121 reg_class ymm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h); 1122 reg_class zmm12_reg(XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p); 1123 1124 reg_class xmm13_reg(XMM13, XMM13b, XMM13c, XMM13d); 1125 reg_class ymm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h); 1126 reg_class zmm13_reg(XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p); 1127 1128 reg_class xmm14_reg(XMM14, XMM14b, XMM14c, XMM14d); 1129 reg_class ymm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h); 1130 reg_class zmm14_reg(XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p); 1131 1132 reg_class xmm15_reg(XMM15, XMM15b, XMM15c, XMM15d); 1133 reg_class ymm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1134 reg_class zmm15_reg(XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1135 1136 reg_class xmm16_reg(XMM16, XMM16b, XMM16c, XMM16d); 1137 reg_class ymm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h); 1138 reg_class zmm16_reg(XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p); 1139 1140 reg_class xmm17_reg(XMM17, XMM17b, XMM17c, XMM17d); 1141 reg_class ymm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h); 1142 reg_class zmm17_reg(XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p); 1143 1144 reg_class xmm18_reg(XMM18, XMM18b, XMM18c, XMM18d); 1145 reg_class ymm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h); 1146 reg_class zmm18_reg(XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p); 1147 1148 reg_class xmm19_reg(XMM19, XMM19b, XMM19c, XMM19d); 1149 reg_class ymm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h); 1150 reg_class zmm19_reg(XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p); 1151 1152 reg_class xmm20_reg(XMM20, XMM20b, XMM20c, XMM20d); 1153 reg_class ymm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h); 1154 reg_class zmm20_reg(XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p); 1155 1156 reg_class xmm21_reg(XMM21, XMM21b, XMM21c, XMM21d); 1157 reg_class ymm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h); 1158 reg_class zmm21_reg(XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p); 1159 1160 reg_class xmm22_reg(XMM22, XMM22b, XMM22c, XMM22d); 1161 reg_class ymm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h); 1162 reg_class zmm22_reg(XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p); 1163 1164 reg_class xmm23_reg(XMM23, XMM23b, XMM23c, XMM23d); 1165 reg_class ymm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h); 1166 reg_class zmm23_reg(XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p); 1167 1168 reg_class xmm24_reg(XMM24, XMM24b, XMM24c, XMM24d); 1169 reg_class ymm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h); 1170 reg_class zmm24_reg(XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p); 1171 1172 reg_class xmm25_reg(XMM25, XMM25b, XMM25c, XMM25d); 1173 reg_class ymm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h); 1174 reg_class zmm25_reg(XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p); 1175 1176 reg_class xmm26_reg(XMM26, XMM26b, XMM26c, XMM26d); 1177 reg_class ymm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h); 1178 reg_class zmm26_reg(XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p); 1179 1180 reg_class xmm27_reg(XMM27, XMM27b, XMM27c, XMM27d); 1181 reg_class ymm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h); 1182 reg_class zmm27_reg(XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p); 1183 1184 reg_class xmm28_reg(XMM28, XMM28b, XMM28c, XMM28d); 1185 reg_class ymm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h); 1186 reg_class zmm28_reg(XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p); 1187 1188 reg_class xmm29_reg(XMM29, XMM29b, XMM29c, XMM29d); 1189 reg_class ymm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h); 1190 reg_class zmm29_reg(XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p); 1191 1192 reg_class xmm30_reg(XMM30, XMM30b, XMM30c, XMM30d); 1193 reg_class ymm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h); 1194 reg_class zmm30_reg(XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p); 1195 1196 reg_class xmm31_reg(XMM31, XMM31b, XMM31c, XMM31d); 1197 reg_class ymm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1198 reg_class zmm31_reg(XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1199 1200 #endif 1201 1202 %} 1203 1204 1205 //----------SOURCE BLOCK------------------------------------------------------- 1206 // This is a block of C++ code which provides values, functions, and 1207 // definitions necessary in the rest of the architecture description 1208 1209 source_hpp %{ 1210 // Header information of the source block. 1211 // Method declarations/definitions which are used outside 1212 // the ad-scope can conveniently be defined here. 1213 // 1214 // To keep related declarations/definitions/uses close together, 1215 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1216 1217 class NativeJump; 1218 1219 class CallStubImpl { 1220 1221 //-------------------------------------------------------------- 1222 //---< Used for optimization in Compile::shorten_branches >--- 1223 //-------------------------------------------------------------- 1224 1225 public: 1226 // Size of call trampoline stub. 1227 static uint size_call_trampoline() { 1228 return 0; // no call trampolines on this platform 1229 } 1230 1231 // number of relocations needed by a call trampoline stub 1232 static uint reloc_call_trampoline() { 1233 return 0; // no call trampolines on this platform 1234 } 1235 }; 1236 1237 class HandlerImpl { 1238 1239 public: 1240 1241 static int emit_exception_handler(CodeBuffer &cbuf); 1242 static int emit_deopt_handler(CodeBuffer& cbuf); 1243 1244 static uint size_exception_handler() { 1245 // NativeCall instruction size is the same as NativeJump. 1246 // exception handler starts out as jump and can be patched to 1247 // a call be deoptimization. (4932387) 1248 // Note that this value is also credited (in output.cpp) to 1249 // the size of the code section. 1250 return NativeJump::instruction_size; 1251 } 1252 1253 #ifdef _LP64 1254 static uint size_deopt_handler() { 1255 // three 5 byte instructions 1256 return 15; 1257 } 1258 #else 1259 static uint size_deopt_handler() { 1260 // NativeCall instruction size is the same as NativeJump. 1261 // exception handler starts out as jump and can be patched to 1262 // a call be deoptimization. (4932387) 1263 // Note that this value is also credited (in output.cpp) to 1264 // the size of the code section. 1265 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1266 } 1267 #endif 1268 }; 1269 1270 %} // end source_hpp 1271 1272 source %{ 1273 1274 #include "opto/addnode.hpp" 1275 1276 // Emit exception handler code. 1277 // Stuff framesize into a register and call a VM stub routine. 1278 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1279 1280 // Note that the code buffer's insts_mark is always relative to insts. 1281 // That's why we must use the macroassembler to generate a handler. 1282 MacroAssembler _masm(&cbuf); 1283 address base = __ start_a_stub(size_exception_handler()); 1284 if (base == NULL) { 1285 ciEnv::current()->record_failure("CodeCache is full"); 1286 return 0; // CodeBuffer::expand failed 1287 } 1288 int offset = __ offset(); 1289 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1290 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1291 __ end_a_stub(); 1292 return offset; 1293 } 1294 1295 // Emit deopt handler code. 1296 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1297 1298 // Note that the code buffer's insts_mark is always relative to insts. 1299 // That's why we must use the macroassembler to generate a handler. 1300 MacroAssembler _masm(&cbuf); 1301 address base = __ start_a_stub(size_deopt_handler()); 1302 if (base == NULL) { 1303 ciEnv::current()->record_failure("CodeCache is full"); 1304 return 0; // CodeBuffer::expand failed 1305 } 1306 int offset = __ offset(); 1307 1308 #ifdef _LP64 1309 address the_pc = (address) __ pc(); 1310 Label next; 1311 // push a "the_pc" on the stack without destroying any registers 1312 // as they all may be live. 1313 1314 // push address of "next" 1315 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1316 __ bind(next); 1317 // adjust it so it matches "the_pc" 1318 __ subptr(Address(rsp, 0), __ offset() - offset); 1319 #else 1320 InternalAddress here(__ pc()); 1321 __ pushptr(here.addr()); 1322 #endif 1323 1324 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1325 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 1326 __ end_a_stub(); 1327 return offset; 1328 } 1329 1330 1331 //============================================================================= 1332 1333 // Float masks come from different places depending on platform. 1334 #ifdef _LP64 1335 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1336 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1337 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1338 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1339 #else 1340 static address float_signmask() { return (address)float_signmask_pool; } 1341 static address float_signflip() { return (address)float_signflip_pool; } 1342 static address double_signmask() { return (address)double_signmask_pool; } 1343 static address double_signflip() { return (address)double_signflip_pool; } 1344 #endif 1345 1346 1347 const bool Matcher::match_rule_supported(int opcode) { 1348 if (!has_match_rule(opcode)) 1349 return false; 1350 1351 bool ret_value = true; 1352 switch (opcode) { 1353 case Op_PopCountI: 1354 case Op_PopCountL: 1355 if (!UsePopCountInstruction) 1356 ret_value = false; 1357 break; 1358 case Op_PopCountVI: 1359 if (!UsePopCountInstruction || !VM_Version::supports_vpopcntdq()) 1360 ret_value = false; 1361 break; 1362 case Op_MulVI: 1363 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 1364 ret_value = false; 1365 break; 1366 case Op_MulVL: 1367 case Op_MulReductionVL: 1368 if (VM_Version::supports_avx512dq() == false) 1369 ret_value = false; 1370 break; 1371 case Op_AddReductionVL: 1372 if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here 1373 ret_value = false; 1374 break; 1375 case Op_AddReductionVI: 1376 if (UseSSE < 3) // requires at least SSE3 1377 ret_value = false; 1378 break; 1379 case Op_MulReductionVI: 1380 if (UseSSE < 4) // requires at least SSE4 1381 ret_value = false; 1382 break; 1383 case Op_AddReductionVF: 1384 case Op_AddReductionVD: 1385 case Op_MulReductionVF: 1386 case Op_MulReductionVD: 1387 if (UseSSE < 1) // requires at least SSE 1388 ret_value = false; 1389 break; 1390 case Op_SqrtVD: 1391 case Op_SqrtVF: 1392 if (UseAVX < 1) // enabled for AVX only 1393 ret_value = false; 1394 break; 1395 case Op_CompareAndSwapL: 1396 #ifdef _LP64 1397 case Op_CompareAndSwapP: 1398 #endif 1399 if (!VM_Version::supports_cx8()) 1400 ret_value = false; 1401 break; 1402 case Op_CMoveVF: 1403 case Op_CMoveVD: 1404 if (UseAVX < 1 || UseAVX > 2) 1405 ret_value = false; 1406 break; 1407 case Op_StrIndexOf: 1408 if (!UseSSE42Intrinsics) 1409 ret_value = false; 1410 break; 1411 case Op_StrIndexOfChar: 1412 if (!UseSSE42Intrinsics) 1413 ret_value = false; 1414 break; 1415 case Op_OnSpinWait: 1416 if (VM_Version::supports_on_spin_wait() == false) 1417 ret_value = false; 1418 break; 1419 } 1420 1421 return ret_value; // Per default match rules are supported. 1422 } 1423 1424 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { 1425 // identify extra cases that we might want to provide match rules for 1426 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen 1427 bool ret_value = match_rule_supported(opcode); 1428 if (ret_value) { 1429 switch (opcode) { 1430 case Op_AddVB: 1431 case Op_SubVB: 1432 if ((vlen == 64) && (VM_Version::supports_avx512bw() == false)) 1433 ret_value = false; 1434 break; 1435 case Op_URShiftVS: 1436 case Op_RShiftVS: 1437 case Op_LShiftVS: 1438 case Op_MulVS: 1439 case Op_AddVS: 1440 case Op_SubVS: 1441 if ((vlen == 32) && (VM_Version::supports_avx512bw() == false)) 1442 ret_value = false; 1443 break; 1444 case Op_CMoveVF: 1445 if (vlen != 8) 1446 ret_value = false; 1447 case Op_CMoveVD: 1448 if (vlen != 4) 1449 ret_value = false; 1450 break; 1451 } 1452 } 1453 1454 return ret_value; // Per default match rules are supported. 1455 } 1456 1457 const bool Matcher::has_predicated_vectors(void) { 1458 bool ret_value = false; 1459 if (UseAVX > 2) { 1460 ret_value = VM_Version::supports_avx512vl(); 1461 } 1462 1463 return ret_value; 1464 } 1465 1466 const int Matcher::float_pressure(int default_pressure_threshold) { 1467 int float_pressure_threshold = default_pressure_threshold; 1468 #ifdef _LP64 1469 if (UseAVX > 2) { 1470 // Increase pressure threshold on machines with AVX3 which have 1471 // 2x more XMM registers. 1472 float_pressure_threshold = default_pressure_threshold * 2; 1473 } 1474 #endif 1475 return float_pressure_threshold; 1476 } 1477 1478 // Max vector size in bytes. 0 if not supported. 1479 const int Matcher::vector_width_in_bytes(BasicType bt) { 1480 assert(is_java_primitive(bt), "only primitive type vectors"); 1481 if (UseSSE < 2) return 0; 1482 // SSE2 supports 128bit vectors for all types. 1483 // AVX2 supports 256bit vectors for all types. 1484 // AVX2/EVEX supports 512bit vectors for all types. 1485 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 1486 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 1487 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 1488 size = (UseAVX > 2) ? 64 : 32; 1489 // Use flag to limit vector size. 1490 size = MIN2(size,(int)MaxVectorSize); 1491 // Minimum 2 values in vector (or 4 for bytes). 1492 switch (bt) { 1493 case T_DOUBLE: 1494 case T_LONG: 1495 if (size < 16) return 0; 1496 break; 1497 case T_FLOAT: 1498 case T_INT: 1499 if (size < 8) return 0; 1500 break; 1501 case T_BOOLEAN: 1502 if (size < 4) return 0; 1503 break; 1504 case T_CHAR: 1505 if (size < 4) return 0; 1506 break; 1507 case T_BYTE: 1508 if (size < 4) return 0; 1509 break; 1510 case T_SHORT: 1511 if (size < 4) return 0; 1512 break; 1513 default: 1514 ShouldNotReachHere(); 1515 } 1516 return size; 1517 } 1518 1519 // Limits on vector size (number of elements) loaded into vector. 1520 const int Matcher::max_vector_size(const BasicType bt) { 1521 return vector_width_in_bytes(bt)/type2aelembytes(bt); 1522 } 1523 const int Matcher::min_vector_size(const BasicType bt) { 1524 int max_size = max_vector_size(bt); 1525 // Min size which can be loaded into vector is 4 bytes. 1526 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 1527 return MIN2(size,max_size); 1528 } 1529 1530 // Vector ideal reg corresponding to specidied size in bytes 1531 const uint Matcher::vector_ideal_reg(int size) { 1532 assert(MaxVectorSize >= size, ""); 1533 switch(size) { 1534 case 4: return Op_VecS; 1535 case 8: return Op_VecD; 1536 case 16: return Op_VecX; 1537 case 32: return Op_VecY; 1538 case 64: return Op_VecZ; 1539 } 1540 ShouldNotReachHere(); 1541 return 0; 1542 } 1543 1544 // Only lowest bits of xmm reg are used for vector shift count. 1545 const uint Matcher::vector_shift_count_ideal_reg(int size) { 1546 return Op_VecS; 1547 } 1548 1549 // x86 supports misaligned vectors store/load. 1550 const bool Matcher::misaligned_vectors_ok() { 1551 return !AlignVector; // can be changed by flag 1552 } 1553 1554 // x86 AES instructions are compatible with SunJCE expanded 1555 // keys, hence we do not need to pass the original key to stubs 1556 const bool Matcher::pass_original_key_for_aes() { 1557 return false; 1558 } 1559 1560 1561 const bool Matcher::convi2l_type_required = true; 1562 1563 // Check for shift by small constant as well 1564 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 1565 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 1566 shift->in(2)->get_int() <= 3 && 1567 // Are there other uses besides address expressions? 1568 !matcher->is_visited(shift)) { 1569 address_visited.set(shift->_idx); // Flag as address_visited 1570 mstack.push(shift->in(2), Matcher::Visit); 1571 Node *conv = shift->in(1); 1572 #ifdef _LP64 1573 // Allow Matcher to match the rule which bypass 1574 // ConvI2L operation for an array index on LP64 1575 // if the index value is positive. 1576 if (conv->Opcode() == Op_ConvI2L && 1577 conv->as_Type()->type()->is_long()->_lo >= 0 && 1578 // Are there other uses besides address expressions? 1579 !matcher->is_visited(conv)) { 1580 address_visited.set(conv->_idx); // Flag as address_visited 1581 mstack.push(conv->in(1), Matcher::Pre_Visit); 1582 } else 1583 #endif 1584 mstack.push(conv, Matcher::Pre_Visit); 1585 return true; 1586 } 1587 return false; 1588 } 1589 1590 // Should the Matcher clone shifts on addressing modes, expecting them 1591 // to be subsumed into complex addressing expressions or compute them 1592 // into registers? 1593 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 1594 Node *off = m->in(AddPNode::Offset); 1595 if (off->is_Con()) { 1596 address_visited.test_set(m->_idx); // Flag as address_visited 1597 Node *adr = m->in(AddPNode::Address); 1598 1599 // Intel can handle 2 adds in addressing mode 1600 // AtomicAdd is not an addressing expression. 1601 // Cheap to find it by looking for screwy base. 1602 if (adr->is_AddP() && 1603 !adr->in(AddPNode::Base)->is_top() && 1604 // Are there other uses besides address expressions? 1605 !is_visited(adr)) { 1606 address_visited.set(adr->_idx); // Flag as address_visited 1607 Node *shift = adr->in(AddPNode::Offset); 1608 if (!clone_shift(shift, this, mstack, address_visited)) { 1609 mstack.push(shift, Pre_Visit); 1610 } 1611 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 1612 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 1613 } else { 1614 mstack.push(adr, Pre_Visit); 1615 } 1616 1617 // Clone X+offset as it also folds into most addressing expressions 1618 mstack.push(off, Visit); 1619 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1620 return true; 1621 } else if (clone_shift(off, this, mstack, address_visited)) { 1622 address_visited.test_set(m->_idx); // Flag as address_visited 1623 mstack.push(m->in(AddPNode::Address), Pre_Visit); 1624 mstack.push(m->in(AddPNode::Base), Pre_Visit); 1625 return true; 1626 } 1627 return false; 1628 } 1629 1630 void Compile::reshape_address(AddPNode* addp) { 1631 } 1632 1633 // Helper methods for MachSpillCopyNode::implementation(). 1634 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 1635 int src_hi, int dst_hi, uint ireg, outputStream* st) { 1636 // In 64-bit VM size calculation is very complex. Emitting instructions 1637 // into scratch buffer is used to get size in 64-bit VM. 1638 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1639 assert(ireg == Op_VecS || // 32bit vector 1640 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 1641 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 1642 "no non-adjacent vector moves" ); 1643 if (cbuf) { 1644 MacroAssembler _masm(cbuf); 1645 int offset = __ offset(); 1646 switch (ireg) { 1647 case Op_VecS: // copy whole register 1648 case Op_VecD: 1649 case Op_VecX: 1650 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1651 break; 1652 case Op_VecY: 1653 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 1654 break; 1655 case Op_VecZ: 1656 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 1657 break; 1658 default: 1659 ShouldNotReachHere(); 1660 } 1661 int size = __ offset() - offset; 1662 #ifdef ASSERT 1663 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1664 assert(!do_size || size == 4, "incorrect size calculattion"); 1665 #endif 1666 return size; 1667 #ifndef PRODUCT 1668 } else if (!do_size) { 1669 switch (ireg) { 1670 case Op_VecS: 1671 case Op_VecD: 1672 case Op_VecX: 1673 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1674 break; 1675 case Op_VecY: 1676 case Op_VecZ: 1677 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 1678 break; 1679 default: 1680 ShouldNotReachHere(); 1681 } 1682 #endif 1683 } 1684 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 1685 return (UseAVX > 2) ? 6 : 4; 1686 } 1687 1688 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 1689 int stack_offset, int reg, uint ireg, outputStream* st) { 1690 // In 64-bit VM size calculation is very complex. Emitting instructions 1691 // into scratch buffer is used to get size in 64-bit VM. 1692 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 1693 if (cbuf) { 1694 MacroAssembler _masm(cbuf); 1695 int offset = __ offset(); 1696 if (is_load) { 1697 switch (ireg) { 1698 case Op_VecS: 1699 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1700 break; 1701 case Op_VecD: 1702 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1703 break; 1704 case Op_VecX: 1705 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1706 break; 1707 case Op_VecY: 1708 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 1709 break; 1710 case Op_VecZ: 1711 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 1712 break; 1713 default: 1714 ShouldNotReachHere(); 1715 } 1716 } else { // store 1717 switch (ireg) { 1718 case Op_VecS: 1719 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1720 break; 1721 case Op_VecD: 1722 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1723 break; 1724 case Op_VecX: 1725 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1726 break; 1727 case Op_VecY: 1728 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 1729 break; 1730 case Op_VecZ: 1731 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 1732 break; 1733 default: 1734 ShouldNotReachHere(); 1735 } 1736 } 1737 int size = __ offset() - offset; 1738 #ifdef ASSERT 1739 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4); 1740 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1741 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 1742 #endif 1743 return size; 1744 #ifndef PRODUCT 1745 } else if (!do_size) { 1746 if (is_load) { 1747 switch (ireg) { 1748 case Op_VecS: 1749 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1750 break; 1751 case Op_VecD: 1752 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1753 break; 1754 case Op_VecX: 1755 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1756 break; 1757 case Op_VecY: 1758 case Op_VecZ: 1759 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 1760 break; 1761 default: 1762 ShouldNotReachHere(); 1763 } 1764 } else { // store 1765 switch (ireg) { 1766 case Op_VecS: 1767 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1768 break; 1769 case Op_VecD: 1770 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1771 break; 1772 case Op_VecX: 1773 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1774 break; 1775 case Op_VecY: 1776 case Op_VecZ: 1777 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 1778 break; 1779 default: 1780 ShouldNotReachHere(); 1781 } 1782 } 1783 #endif 1784 } 1785 bool is_single_byte = false; 1786 int vec_len = 0; 1787 if ((UseAVX > 2) && (stack_offset != 0)) { 1788 int tuple_type = Assembler::EVEX_FVM; 1789 int input_size = Assembler::EVEX_32bit; 1790 switch (ireg) { 1791 case Op_VecS: 1792 tuple_type = Assembler::EVEX_T1S; 1793 break; 1794 case Op_VecD: 1795 tuple_type = Assembler::EVEX_T1S; 1796 input_size = Assembler::EVEX_64bit; 1797 break; 1798 case Op_VecX: 1799 break; 1800 case Op_VecY: 1801 vec_len = 1; 1802 break; 1803 case Op_VecZ: 1804 vec_len = 2; 1805 break; 1806 } 1807 is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0); 1808 } 1809 int offset_size = 0; 1810 int size = 5; 1811 if (UseAVX > 2 ) { 1812 if (VM_Version::supports_avx512novl() && (vec_len == 2)) { 1813 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1814 size += 2; // Need an additional two bytes for EVEX encoding 1815 } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) { 1816 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1817 } else { 1818 offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 1819 size += 2; // Need an additional two bytes for EVEX encodding 1820 } 1821 } else { 1822 offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4); 1823 } 1824 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 1825 return size+offset_size; 1826 } 1827 1828 static inline jint replicate4_imm(int con, int width) { 1829 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 1830 assert(width == 1 || width == 2, "only byte or short types here"); 1831 int bit_width = width * 8; 1832 jint val = con; 1833 val &= (1 << bit_width) - 1; // mask off sign bits 1834 while(bit_width < 32) { 1835 val |= (val << bit_width); 1836 bit_width <<= 1; 1837 } 1838 return val; 1839 } 1840 1841 static inline jlong replicate8_imm(int con, int width) { 1842 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 1843 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 1844 int bit_width = width * 8; 1845 jlong val = con; 1846 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 1847 while(bit_width < 64) { 1848 val |= (val << bit_width); 1849 bit_width <<= 1; 1850 } 1851 return val; 1852 } 1853 1854 #ifndef PRODUCT 1855 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 1856 st->print("nop \t# %d bytes pad for loops and calls", _count); 1857 } 1858 #endif 1859 1860 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 1861 MacroAssembler _masm(&cbuf); 1862 __ nop(_count); 1863 } 1864 1865 uint MachNopNode::size(PhaseRegAlloc*) const { 1866 return _count; 1867 } 1868 1869 #ifndef PRODUCT 1870 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 1871 st->print("# breakpoint"); 1872 } 1873 #endif 1874 1875 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 1876 MacroAssembler _masm(&cbuf); 1877 __ int3(); 1878 } 1879 1880 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 1881 return MachNode::size(ra_); 1882 } 1883 1884 %} 1885 1886 encode %{ 1887 1888 enc_class call_epilog %{ 1889 if (VerifyStackAtCalls) { 1890 // Check that stack depth is unchanged: find majik cookie on stack 1891 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 1892 MacroAssembler _masm(&cbuf); 1893 Label L; 1894 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 1895 __ jccb(Assembler::equal, L); 1896 // Die if stack mismatch 1897 __ int3(); 1898 __ bind(L); 1899 } 1900 %} 1901 1902 %} 1903 1904 1905 //----------OPERANDS----------------------------------------------------------- 1906 // Operand definitions must precede instruction definitions for correct parsing 1907 // in the ADLC because operands constitute user defined types which are used in 1908 // instruction definitions. 1909 1910 // This one generically applies only for evex, so only one version 1911 operand vecZ() %{ 1912 constraint(ALLOC_IN_RC(vectorz_reg)); 1913 match(VecZ); 1914 1915 format %{ %} 1916 interface(REG_INTER); 1917 %} 1918 1919 // Comparison Code for FP conditional move 1920 operand cmpOp_vcmppd() %{ 1921 match(Bool); 1922 1923 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 1924 n->as_Bool()->_test._test != BoolTest::no_overflow); 1925 format %{ "" %} 1926 interface(COND_INTER) %{ 1927 equal (0x0, "eq"); 1928 less (0x1, "lt"); 1929 less_equal (0x2, "le"); 1930 not_equal (0xC, "ne"); 1931 greater_equal(0xD, "ge"); 1932 greater (0xE, "gt"); 1933 //TODO cannot compile (adlc breaks) without two next lines with error: 1934 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 1935 // equal' for overflow. 1936 overflow (0x20, "o"); // not really supported by the instruction 1937 no_overflow (0x21, "no"); // not really supported by the instruction 1938 %} 1939 %} 1940 1941 1942 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 1943 1944 // ============================================================================ 1945 1946 instruct ShouldNotReachHere() %{ 1947 match(Halt); 1948 format %{ "ud2\t# ShouldNotReachHere" %} 1949 ins_encode %{ 1950 __ ud2(); 1951 %} 1952 ins_pipe(pipe_slow); 1953 %} 1954 1955 // =================================EVEX special=============================== 1956 1957 instruct setMask(rRegI dst, rRegI src) %{ 1958 predicate(Matcher::has_predicated_vectors()); 1959 match(Set dst (SetVectMaskI src)); 1960 effect(TEMP dst); 1961 format %{ "setvectmask $dst, $src" %} 1962 ins_encode %{ 1963 __ setvectmask($dst$$Register, $src$$Register); 1964 %} 1965 ins_pipe(pipe_slow); 1966 %} 1967 1968 // ============================================================================ 1969 1970 instruct addF_reg(regF dst, regF src) %{ 1971 predicate((UseSSE>=1) && (UseAVX == 0)); 1972 match(Set dst (AddF dst src)); 1973 1974 format %{ "addss $dst, $src" %} 1975 ins_cost(150); 1976 ins_encode %{ 1977 __ addss($dst$$XMMRegister, $src$$XMMRegister); 1978 %} 1979 ins_pipe(pipe_slow); 1980 %} 1981 1982 instruct addF_mem(regF dst, memory src) %{ 1983 predicate((UseSSE>=1) && (UseAVX == 0)); 1984 match(Set dst (AddF dst (LoadF src))); 1985 1986 format %{ "addss $dst, $src" %} 1987 ins_cost(150); 1988 ins_encode %{ 1989 __ addss($dst$$XMMRegister, $src$$Address); 1990 %} 1991 ins_pipe(pipe_slow); 1992 %} 1993 1994 instruct addF_imm(regF dst, immF con) %{ 1995 predicate((UseSSE>=1) && (UseAVX == 0)); 1996 match(Set dst (AddF dst con)); 1997 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1998 ins_cost(150); 1999 ins_encode %{ 2000 __ addss($dst$$XMMRegister, $constantaddress($con)); 2001 %} 2002 ins_pipe(pipe_slow); 2003 %} 2004 2005 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2006 predicate(UseAVX > 0); 2007 match(Set dst (AddF src1 src2)); 2008 2009 format %{ "vaddss $dst, $src1, $src2" %} 2010 ins_cost(150); 2011 ins_encode %{ 2012 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2013 %} 2014 ins_pipe(pipe_slow); 2015 %} 2016 2017 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2018 predicate(UseAVX > 0); 2019 match(Set dst (AddF src1 (LoadF src2))); 2020 2021 format %{ "vaddss $dst, $src1, $src2" %} 2022 ins_cost(150); 2023 ins_encode %{ 2024 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2025 %} 2026 ins_pipe(pipe_slow); 2027 %} 2028 2029 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2030 predicate(UseAVX > 0); 2031 match(Set dst (AddF src con)); 2032 2033 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2034 ins_cost(150); 2035 ins_encode %{ 2036 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2037 %} 2038 ins_pipe(pipe_slow); 2039 %} 2040 2041 instruct addD_reg(regD dst, regD src) %{ 2042 predicate((UseSSE>=2) && (UseAVX == 0)); 2043 match(Set dst (AddD dst src)); 2044 2045 format %{ "addsd $dst, $src" %} 2046 ins_cost(150); 2047 ins_encode %{ 2048 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2049 %} 2050 ins_pipe(pipe_slow); 2051 %} 2052 2053 instruct addD_mem(regD dst, memory src) %{ 2054 predicate((UseSSE>=2) && (UseAVX == 0)); 2055 match(Set dst (AddD dst (LoadD src))); 2056 2057 format %{ "addsd $dst, $src" %} 2058 ins_cost(150); 2059 ins_encode %{ 2060 __ addsd($dst$$XMMRegister, $src$$Address); 2061 %} 2062 ins_pipe(pipe_slow); 2063 %} 2064 2065 instruct addD_imm(regD dst, immD con) %{ 2066 predicate((UseSSE>=2) && (UseAVX == 0)); 2067 match(Set dst (AddD dst con)); 2068 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2069 ins_cost(150); 2070 ins_encode %{ 2071 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2072 %} 2073 ins_pipe(pipe_slow); 2074 %} 2075 2076 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2077 predicate(UseAVX > 0); 2078 match(Set dst (AddD src1 src2)); 2079 2080 format %{ "vaddsd $dst, $src1, $src2" %} 2081 ins_cost(150); 2082 ins_encode %{ 2083 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2084 %} 2085 ins_pipe(pipe_slow); 2086 %} 2087 2088 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2089 predicate(UseAVX > 0); 2090 match(Set dst (AddD src1 (LoadD src2))); 2091 2092 format %{ "vaddsd $dst, $src1, $src2" %} 2093 ins_cost(150); 2094 ins_encode %{ 2095 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2096 %} 2097 ins_pipe(pipe_slow); 2098 %} 2099 2100 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2101 predicate(UseAVX > 0); 2102 match(Set dst (AddD src con)); 2103 2104 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2105 ins_cost(150); 2106 ins_encode %{ 2107 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2108 %} 2109 ins_pipe(pipe_slow); 2110 %} 2111 2112 instruct subF_reg(regF dst, regF src) %{ 2113 predicate((UseSSE>=1) && (UseAVX == 0)); 2114 match(Set dst (SubF dst src)); 2115 2116 format %{ "subss $dst, $src" %} 2117 ins_cost(150); 2118 ins_encode %{ 2119 __ subss($dst$$XMMRegister, $src$$XMMRegister); 2120 %} 2121 ins_pipe(pipe_slow); 2122 %} 2123 2124 instruct subF_mem(regF dst, memory src) %{ 2125 predicate((UseSSE>=1) && (UseAVX == 0)); 2126 match(Set dst (SubF dst (LoadF src))); 2127 2128 format %{ "subss $dst, $src" %} 2129 ins_cost(150); 2130 ins_encode %{ 2131 __ subss($dst$$XMMRegister, $src$$Address); 2132 %} 2133 ins_pipe(pipe_slow); 2134 %} 2135 2136 instruct subF_imm(regF dst, immF con) %{ 2137 predicate((UseSSE>=1) && (UseAVX == 0)); 2138 match(Set dst (SubF dst con)); 2139 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2140 ins_cost(150); 2141 ins_encode %{ 2142 __ subss($dst$$XMMRegister, $constantaddress($con)); 2143 %} 2144 ins_pipe(pipe_slow); 2145 %} 2146 2147 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 2148 predicate(UseAVX > 0); 2149 match(Set dst (SubF src1 src2)); 2150 2151 format %{ "vsubss $dst, $src1, $src2" %} 2152 ins_cost(150); 2153 ins_encode %{ 2154 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2155 %} 2156 ins_pipe(pipe_slow); 2157 %} 2158 2159 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 2160 predicate(UseAVX > 0); 2161 match(Set dst (SubF src1 (LoadF src2))); 2162 2163 format %{ "vsubss $dst, $src1, $src2" %} 2164 ins_cost(150); 2165 ins_encode %{ 2166 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2167 %} 2168 ins_pipe(pipe_slow); 2169 %} 2170 2171 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 2172 predicate(UseAVX > 0); 2173 match(Set dst (SubF src con)); 2174 2175 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2176 ins_cost(150); 2177 ins_encode %{ 2178 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2179 %} 2180 ins_pipe(pipe_slow); 2181 %} 2182 2183 instruct subD_reg(regD dst, regD src) %{ 2184 predicate((UseSSE>=2) && (UseAVX == 0)); 2185 match(Set dst (SubD dst src)); 2186 2187 format %{ "subsd $dst, $src" %} 2188 ins_cost(150); 2189 ins_encode %{ 2190 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 2191 %} 2192 ins_pipe(pipe_slow); 2193 %} 2194 2195 instruct subD_mem(regD dst, memory src) %{ 2196 predicate((UseSSE>=2) && (UseAVX == 0)); 2197 match(Set dst (SubD dst (LoadD src))); 2198 2199 format %{ "subsd $dst, $src" %} 2200 ins_cost(150); 2201 ins_encode %{ 2202 __ subsd($dst$$XMMRegister, $src$$Address); 2203 %} 2204 ins_pipe(pipe_slow); 2205 %} 2206 2207 instruct subD_imm(regD dst, immD con) %{ 2208 predicate((UseSSE>=2) && (UseAVX == 0)); 2209 match(Set dst (SubD dst con)); 2210 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2211 ins_cost(150); 2212 ins_encode %{ 2213 __ subsd($dst$$XMMRegister, $constantaddress($con)); 2214 %} 2215 ins_pipe(pipe_slow); 2216 %} 2217 2218 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 2219 predicate(UseAVX > 0); 2220 match(Set dst (SubD src1 src2)); 2221 2222 format %{ "vsubsd $dst, $src1, $src2" %} 2223 ins_cost(150); 2224 ins_encode %{ 2225 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2226 %} 2227 ins_pipe(pipe_slow); 2228 %} 2229 2230 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 2231 predicate(UseAVX > 0); 2232 match(Set dst (SubD src1 (LoadD src2))); 2233 2234 format %{ "vsubsd $dst, $src1, $src2" %} 2235 ins_cost(150); 2236 ins_encode %{ 2237 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2238 %} 2239 ins_pipe(pipe_slow); 2240 %} 2241 2242 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 2243 predicate(UseAVX > 0); 2244 match(Set dst (SubD src con)); 2245 2246 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2247 ins_cost(150); 2248 ins_encode %{ 2249 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2250 %} 2251 ins_pipe(pipe_slow); 2252 %} 2253 2254 instruct mulF_reg(regF dst, regF src) %{ 2255 predicate((UseSSE>=1) && (UseAVX == 0)); 2256 match(Set dst (MulF dst src)); 2257 2258 format %{ "mulss $dst, $src" %} 2259 ins_cost(150); 2260 ins_encode %{ 2261 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 2262 %} 2263 ins_pipe(pipe_slow); 2264 %} 2265 2266 instruct mulF_mem(regF dst, memory src) %{ 2267 predicate((UseSSE>=1) && (UseAVX == 0)); 2268 match(Set dst (MulF dst (LoadF src))); 2269 2270 format %{ "mulss $dst, $src" %} 2271 ins_cost(150); 2272 ins_encode %{ 2273 __ mulss($dst$$XMMRegister, $src$$Address); 2274 %} 2275 ins_pipe(pipe_slow); 2276 %} 2277 2278 instruct mulF_imm(regF dst, immF con) %{ 2279 predicate((UseSSE>=1) && (UseAVX == 0)); 2280 match(Set dst (MulF dst con)); 2281 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2282 ins_cost(150); 2283 ins_encode %{ 2284 __ mulss($dst$$XMMRegister, $constantaddress($con)); 2285 %} 2286 ins_pipe(pipe_slow); 2287 %} 2288 2289 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 2290 predicate(UseAVX > 0); 2291 match(Set dst (MulF src1 src2)); 2292 2293 format %{ "vmulss $dst, $src1, $src2" %} 2294 ins_cost(150); 2295 ins_encode %{ 2296 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2297 %} 2298 ins_pipe(pipe_slow); 2299 %} 2300 2301 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 2302 predicate(UseAVX > 0); 2303 match(Set dst (MulF src1 (LoadF src2))); 2304 2305 format %{ "vmulss $dst, $src1, $src2" %} 2306 ins_cost(150); 2307 ins_encode %{ 2308 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2309 %} 2310 ins_pipe(pipe_slow); 2311 %} 2312 2313 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 2314 predicate(UseAVX > 0); 2315 match(Set dst (MulF src con)); 2316 2317 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2318 ins_cost(150); 2319 ins_encode %{ 2320 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2321 %} 2322 ins_pipe(pipe_slow); 2323 %} 2324 2325 instruct mulD_reg(regD dst, regD src) %{ 2326 predicate((UseSSE>=2) && (UseAVX == 0)); 2327 match(Set dst (MulD dst src)); 2328 2329 format %{ "mulsd $dst, $src" %} 2330 ins_cost(150); 2331 ins_encode %{ 2332 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 2333 %} 2334 ins_pipe(pipe_slow); 2335 %} 2336 2337 instruct mulD_mem(regD dst, memory src) %{ 2338 predicate((UseSSE>=2) && (UseAVX == 0)); 2339 match(Set dst (MulD dst (LoadD src))); 2340 2341 format %{ "mulsd $dst, $src" %} 2342 ins_cost(150); 2343 ins_encode %{ 2344 __ mulsd($dst$$XMMRegister, $src$$Address); 2345 %} 2346 ins_pipe(pipe_slow); 2347 %} 2348 2349 instruct mulD_imm(regD dst, immD con) %{ 2350 predicate((UseSSE>=2) && (UseAVX == 0)); 2351 match(Set dst (MulD dst con)); 2352 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2353 ins_cost(150); 2354 ins_encode %{ 2355 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 2356 %} 2357 ins_pipe(pipe_slow); 2358 %} 2359 2360 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 2361 predicate(UseAVX > 0); 2362 match(Set dst (MulD src1 src2)); 2363 2364 format %{ "vmulsd $dst, $src1, $src2" %} 2365 ins_cost(150); 2366 ins_encode %{ 2367 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2368 %} 2369 ins_pipe(pipe_slow); 2370 %} 2371 2372 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 2373 predicate(UseAVX > 0); 2374 match(Set dst (MulD src1 (LoadD src2))); 2375 2376 format %{ "vmulsd $dst, $src1, $src2" %} 2377 ins_cost(150); 2378 ins_encode %{ 2379 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2380 %} 2381 ins_pipe(pipe_slow); 2382 %} 2383 2384 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 2385 predicate(UseAVX > 0); 2386 match(Set dst (MulD src con)); 2387 2388 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2389 ins_cost(150); 2390 ins_encode %{ 2391 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2392 %} 2393 ins_pipe(pipe_slow); 2394 %} 2395 2396 instruct divF_reg(regF dst, regF src) %{ 2397 predicate((UseSSE>=1) && (UseAVX == 0)); 2398 match(Set dst (DivF dst src)); 2399 2400 format %{ "divss $dst, $src" %} 2401 ins_cost(150); 2402 ins_encode %{ 2403 __ divss($dst$$XMMRegister, $src$$XMMRegister); 2404 %} 2405 ins_pipe(pipe_slow); 2406 %} 2407 2408 instruct divF_mem(regF dst, memory src) %{ 2409 predicate((UseSSE>=1) && (UseAVX == 0)); 2410 match(Set dst (DivF dst (LoadF src))); 2411 2412 format %{ "divss $dst, $src" %} 2413 ins_cost(150); 2414 ins_encode %{ 2415 __ divss($dst$$XMMRegister, $src$$Address); 2416 %} 2417 ins_pipe(pipe_slow); 2418 %} 2419 2420 instruct divF_imm(regF dst, immF con) %{ 2421 predicate((UseSSE>=1) && (UseAVX == 0)); 2422 match(Set dst (DivF dst con)); 2423 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2424 ins_cost(150); 2425 ins_encode %{ 2426 __ divss($dst$$XMMRegister, $constantaddress($con)); 2427 %} 2428 ins_pipe(pipe_slow); 2429 %} 2430 2431 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 2432 predicate(UseAVX > 0); 2433 match(Set dst (DivF src1 src2)); 2434 2435 format %{ "vdivss $dst, $src1, $src2" %} 2436 ins_cost(150); 2437 ins_encode %{ 2438 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2439 %} 2440 ins_pipe(pipe_slow); 2441 %} 2442 2443 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 2444 predicate(UseAVX > 0); 2445 match(Set dst (DivF src1 (LoadF src2))); 2446 2447 format %{ "vdivss $dst, $src1, $src2" %} 2448 ins_cost(150); 2449 ins_encode %{ 2450 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2451 %} 2452 ins_pipe(pipe_slow); 2453 %} 2454 2455 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 2456 predicate(UseAVX > 0); 2457 match(Set dst (DivF src con)); 2458 2459 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2460 ins_cost(150); 2461 ins_encode %{ 2462 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2463 %} 2464 ins_pipe(pipe_slow); 2465 %} 2466 2467 instruct divD_reg(regD dst, regD src) %{ 2468 predicate((UseSSE>=2) && (UseAVX == 0)); 2469 match(Set dst (DivD dst src)); 2470 2471 format %{ "divsd $dst, $src" %} 2472 ins_cost(150); 2473 ins_encode %{ 2474 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 2475 %} 2476 ins_pipe(pipe_slow); 2477 %} 2478 2479 instruct divD_mem(regD dst, memory src) %{ 2480 predicate((UseSSE>=2) && (UseAVX == 0)); 2481 match(Set dst (DivD dst (LoadD src))); 2482 2483 format %{ "divsd $dst, $src" %} 2484 ins_cost(150); 2485 ins_encode %{ 2486 __ divsd($dst$$XMMRegister, $src$$Address); 2487 %} 2488 ins_pipe(pipe_slow); 2489 %} 2490 2491 instruct divD_imm(regD dst, immD con) %{ 2492 predicate((UseSSE>=2) && (UseAVX == 0)); 2493 match(Set dst (DivD dst con)); 2494 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2495 ins_cost(150); 2496 ins_encode %{ 2497 __ divsd($dst$$XMMRegister, $constantaddress($con)); 2498 %} 2499 ins_pipe(pipe_slow); 2500 %} 2501 2502 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 2503 predicate(UseAVX > 0); 2504 match(Set dst (DivD src1 src2)); 2505 2506 format %{ "vdivsd $dst, $src1, $src2" %} 2507 ins_cost(150); 2508 ins_encode %{ 2509 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2510 %} 2511 ins_pipe(pipe_slow); 2512 %} 2513 2514 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 2515 predicate(UseAVX > 0); 2516 match(Set dst (DivD src1 (LoadD src2))); 2517 2518 format %{ "vdivsd $dst, $src1, $src2" %} 2519 ins_cost(150); 2520 ins_encode %{ 2521 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2522 %} 2523 ins_pipe(pipe_slow); 2524 %} 2525 2526 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 2527 predicate(UseAVX > 0); 2528 match(Set dst (DivD src con)); 2529 2530 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2531 ins_cost(150); 2532 ins_encode %{ 2533 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2534 %} 2535 ins_pipe(pipe_slow); 2536 %} 2537 2538 instruct absF_reg(regF dst) %{ 2539 predicate((UseSSE>=1) && (UseAVX == 0)); 2540 match(Set dst (AbsF dst)); 2541 ins_cost(150); 2542 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 2543 ins_encode %{ 2544 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 2545 %} 2546 ins_pipe(pipe_slow); 2547 %} 2548 2549 instruct absF_reg_reg(regF dst, regF src) %{ 2550 predicate(VM_Version::supports_avxonly()); 2551 match(Set dst (AbsF src)); 2552 ins_cost(150); 2553 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2554 ins_encode %{ 2555 int vector_len = 0; 2556 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2557 ExternalAddress(float_signmask()), vector_len); 2558 %} 2559 ins_pipe(pipe_slow); 2560 %} 2561 2562 #ifdef _LP64 2563 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2564 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2565 match(Set dst (AbsF src)); 2566 ins_cost(150); 2567 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2568 ins_encode %{ 2569 int vector_len = 0; 2570 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2571 ExternalAddress(float_signmask()), vector_len); 2572 %} 2573 ins_pipe(pipe_slow); 2574 %} 2575 2576 instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{ 2577 predicate(VM_Version::supports_avx512novl()); 2578 match(Set dst (AbsF src1)); 2579 effect(TEMP src2); 2580 ins_cost(150); 2581 format %{ "vabsss $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %} 2582 ins_encode %{ 2583 int vector_len = 0; 2584 __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2585 ExternalAddress(float_signmask()), vector_len); 2586 %} 2587 ins_pipe(pipe_slow); 2588 %} 2589 #else // _LP64 2590 instruct absF_reg_reg_evex(regF dst, regF src) %{ 2591 predicate(UseAVX > 2); 2592 match(Set dst (AbsF src)); 2593 ins_cost(150); 2594 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 2595 ins_encode %{ 2596 int vector_len = 0; 2597 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 2598 ExternalAddress(float_signmask()), vector_len); 2599 %} 2600 ins_pipe(pipe_slow); 2601 %} 2602 #endif 2603 2604 instruct absD_reg(regD dst) %{ 2605 predicate((UseSSE>=2) && (UseAVX == 0)); 2606 match(Set dst (AbsD dst)); 2607 ins_cost(150); 2608 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 2609 "# abs double by sign masking" %} 2610 ins_encode %{ 2611 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 2612 %} 2613 ins_pipe(pipe_slow); 2614 %} 2615 2616 instruct absD_reg_reg(regD dst, regD src) %{ 2617 predicate(VM_Version::supports_avxonly()); 2618 match(Set dst (AbsD src)); 2619 ins_cost(150); 2620 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2621 "# abs double by sign masking" %} 2622 ins_encode %{ 2623 int vector_len = 0; 2624 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2625 ExternalAddress(double_signmask()), vector_len); 2626 %} 2627 ins_pipe(pipe_slow); 2628 %} 2629 2630 #ifdef _LP64 2631 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2632 predicate(UseAVX > 2 && VM_Version::supports_avx512vl()); 2633 match(Set dst (AbsD src)); 2634 ins_cost(150); 2635 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2636 "# abs double by sign masking" %} 2637 ins_encode %{ 2638 int vector_len = 0; 2639 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2640 ExternalAddress(double_signmask()), vector_len); 2641 %} 2642 ins_pipe(pipe_slow); 2643 %} 2644 2645 instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{ 2646 predicate(VM_Version::supports_avx512novl()); 2647 match(Set dst (AbsD src1)); 2648 effect(TEMP src2); 2649 ins_cost(150); 2650 format %{ "vabssd $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %} 2651 ins_encode %{ 2652 int vector_len = 0; 2653 __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 2654 ExternalAddress(double_signmask()), vector_len); 2655 %} 2656 ins_pipe(pipe_slow); 2657 %} 2658 #else // _LP64 2659 instruct absD_reg_reg_evex(regD dst, regD src) %{ 2660 predicate(UseAVX > 2); 2661 match(Set dst (AbsD src)); 2662 ins_cost(150); 2663 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 2664 "# abs double by sign masking" %} 2665 ins_encode %{ 2666 int vector_len = 0; 2667 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 2668 ExternalAddress(double_signmask()), vector_len); 2669 %} 2670 ins_pipe(pipe_slow); 2671 %} 2672 #endif 2673 2674 instruct negF_reg(regF dst) %{ 2675 predicate((UseSSE>=1) && (UseAVX == 0)); 2676 match(Set dst (NegF dst)); 2677 ins_cost(150); 2678 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 2679 ins_encode %{ 2680 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 2681 %} 2682 ins_pipe(pipe_slow); 2683 %} 2684 2685 instruct negF_reg_reg(regF dst, regF src) %{ 2686 predicate(UseAVX > 0); 2687 match(Set dst (NegF src)); 2688 ins_cost(150); 2689 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 2690 ins_encode %{ 2691 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 2692 ExternalAddress(float_signflip())); 2693 %} 2694 ins_pipe(pipe_slow); 2695 %} 2696 2697 instruct negD_reg(regD dst) %{ 2698 predicate((UseSSE>=2) && (UseAVX == 0)); 2699 match(Set dst (NegD dst)); 2700 ins_cost(150); 2701 format %{ "xorpd $dst, [0x8000000000000000]\t" 2702 "# neg double by sign flipping" %} 2703 ins_encode %{ 2704 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 2705 %} 2706 ins_pipe(pipe_slow); 2707 %} 2708 2709 instruct negD_reg_reg(regD dst, regD src) %{ 2710 predicate(UseAVX > 0); 2711 match(Set dst (NegD src)); 2712 ins_cost(150); 2713 format %{ "vnegatess $dst, $src, [0x8000000000000000]\t" 2714 "# neg double by sign flipping" %} 2715 ins_encode %{ 2716 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 2717 ExternalAddress(double_signflip())); 2718 %} 2719 ins_pipe(pipe_slow); 2720 %} 2721 2722 instruct sqrtF_reg(regF dst, regF src) %{ 2723 predicate(UseSSE>=1); 2724 match(Set dst (SqrtF src)); 2725 2726 format %{ "sqrtss $dst, $src" %} 2727 ins_cost(150); 2728 ins_encode %{ 2729 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 2730 %} 2731 ins_pipe(pipe_slow); 2732 %} 2733 2734 instruct sqrtF_mem(regF dst, memory src) %{ 2735 predicate(UseSSE>=1); 2736 match(Set dst (SqrtF (LoadF src))); 2737 2738 format %{ "sqrtss $dst, $src" %} 2739 ins_cost(150); 2740 ins_encode %{ 2741 __ sqrtss($dst$$XMMRegister, $src$$Address); 2742 %} 2743 ins_pipe(pipe_slow); 2744 %} 2745 2746 instruct sqrtF_imm(regF dst, immF con) %{ 2747 predicate(UseSSE>=1); 2748 match(Set dst (SqrtF con)); 2749 2750 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2751 ins_cost(150); 2752 ins_encode %{ 2753 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 2754 %} 2755 ins_pipe(pipe_slow); 2756 %} 2757 2758 instruct sqrtD_reg(regD dst, regD src) %{ 2759 predicate(UseSSE>=2); 2760 match(Set dst (SqrtD src)); 2761 2762 format %{ "sqrtsd $dst, $src" %} 2763 ins_cost(150); 2764 ins_encode %{ 2765 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 2766 %} 2767 ins_pipe(pipe_slow); 2768 %} 2769 2770 instruct sqrtD_mem(regD dst, memory src) %{ 2771 predicate(UseSSE>=2); 2772 match(Set dst (SqrtD (LoadD src))); 2773 2774 format %{ "sqrtsd $dst, $src" %} 2775 ins_cost(150); 2776 ins_encode %{ 2777 __ sqrtsd($dst$$XMMRegister, $src$$Address); 2778 %} 2779 ins_pipe(pipe_slow); 2780 %} 2781 2782 instruct sqrtD_imm(regD dst, immD con) %{ 2783 predicate(UseSSE>=2); 2784 match(Set dst (SqrtD con)); 2785 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2786 ins_cost(150); 2787 ins_encode %{ 2788 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 2789 %} 2790 ins_pipe(pipe_slow); 2791 %} 2792 2793 instruct onspinwait() %{ 2794 match(OnSpinWait); 2795 ins_cost(200); 2796 2797 format %{ 2798 $$template 2799 if (os::is_MP()) { 2800 $$emit$$"pause\t! membar_onspinwait" 2801 } else { 2802 $$emit$$"MEMBAR-onspinwait ! (empty encoding)" 2803 } 2804 %} 2805 ins_encode %{ 2806 __ pause(); 2807 %} 2808 ins_pipe(pipe_slow); 2809 %} 2810 2811 // a * b + c 2812 instruct fmaD_reg(regD a, regD b, regD c) %{ 2813 predicate(UseFMA); 2814 match(Set c (FmaD c (Binary a b))); 2815 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 2816 ins_cost(150); 2817 ins_encode %{ 2818 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2819 %} 2820 ins_pipe( pipe_slow ); 2821 %} 2822 2823 // a * b + c 2824 instruct fmaF_reg(regF a, regF b, regF c) %{ 2825 predicate(UseFMA); 2826 match(Set c (FmaF c (Binary a b))); 2827 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 2828 ins_cost(150); 2829 ins_encode %{ 2830 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 2831 %} 2832 ins_pipe( pipe_slow ); 2833 %} 2834 2835 // ====================VECTOR INSTRUCTIONS===================================== 2836 2837 // Load vectors (4 bytes long) 2838 instruct loadV4(vecS dst, memory mem) %{ 2839 predicate(n->as_LoadVector()->memory_size() == 4); 2840 match(Set dst (LoadVector mem)); 2841 ins_cost(125); 2842 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 2843 ins_encode %{ 2844 __ movdl($dst$$XMMRegister, $mem$$Address); 2845 %} 2846 ins_pipe( pipe_slow ); 2847 %} 2848 2849 // Load vectors (8 bytes long) 2850 instruct loadV8(vecD dst, memory mem) %{ 2851 predicate(n->as_LoadVector()->memory_size() == 8); 2852 match(Set dst (LoadVector mem)); 2853 ins_cost(125); 2854 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 2855 ins_encode %{ 2856 __ movq($dst$$XMMRegister, $mem$$Address); 2857 %} 2858 ins_pipe( pipe_slow ); 2859 %} 2860 2861 // Load vectors (16 bytes long) 2862 instruct loadV16(vecX dst, memory mem) %{ 2863 predicate(n->as_LoadVector()->memory_size() == 16); 2864 match(Set dst (LoadVector mem)); 2865 ins_cost(125); 2866 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 2867 ins_encode %{ 2868 __ movdqu($dst$$XMMRegister, $mem$$Address); 2869 %} 2870 ins_pipe( pipe_slow ); 2871 %} 2872 2873 // Load vectors (32 bytes long) 2874 instruct loadV32(vecY dst, memory mem) %{ 2875 predicate(n->as_LoadVector()->memory_size() == 32); 2876 match(Set dst (LoadVector mem)); 2877 ins_cost(125); 2878 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 2879 ins_encode %{ 2880 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 2881 %} 2882 ins_pipe( pipe_slow ); 2883 %} 2884 2885 // Load vectors (64 bytes long) 2886 instruct loadV64_dword(vecZ dst, memory mem) %{ 2887 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4); 2888 match(Set dst (LoadVector mem)); 2889 ins_cost(125); 2890 format %{ "vmovdqul $dst k0,$mem\t! load vector (64 bytes)" %} 2891 ins_encode %{ 2892 int vector_len = 2; 2893 __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len); 2894 %} 2895 ins_pipe( pipe_slow ); 2896 %} 2897 2898 // Load vectors (64 bytes long) 2899 instruct loadV64_qword(vecZ dst, memory mem) %{ 2900 predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() > 4); 2901 match(Set dst (LoadVector mem)); 2902 ins_cost(125); 2903 format %{ "vmovdquq $dst k0,$mem\t! load vector (64 bytes)" %} 2904 ins_encode %{ 2905 int vector_len = 2; 2906 __ evmovdquq($dst$$XMMRegister, $mem$$Address, vector_len); 2907 %} 2908 ins_pipe( pipe_slow ); 2909 %} 2910 2911 // Store vectors 2912 instruct storeV4(memory mem, vecS src) %{ 2913 predicate(n->as_StoreVector()->memory_size() == 4); 2914 match(Set mem (StoreVector mem src)); 2915 ins_cost(145); 2916 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 2917 ins_encode %{ 2918 __ movdl($mem$$Address, $src$$XMMRegister); 2919 %} 2920 ins_pipe( pipe_slow ); 2921 %} 2922 2923 instruct storeV8(memory mem, vecD src) %{ 2924 predicate(n->as_StoreVector()->memory_size() == 8); 2925 match(Set mem (StoreVector mem src)); 2926 ins_cost(145); 2927 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 2928 ins_encode %{ 2929 __ movq($mem$$Address, $src$$XMMRegister); 2930 %} 2931 ins_pipe( pipe_slow ); 2932 %} 2933 2934 instruct storeV16(memory mem, vecX src) %{ 2935 predicate(n->as_StoreVector()->memory_size() == 16); 2936 match(Set mem (StoreVector mem src)); 2937 ins_cost(145); 2938 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 2939 ins_encode %{ 2940 __ movdqu($mem$$Address, $src$$XMMRegister); 2941 %} 2942 ins_pipe( pipe_slow ); 2943 %} 2944 2945 instruct storeV32(memory mem, vecY src) %{ 2946 predicate(n->as_StoreVector()->memory_size() == 32); 2947 match(Set mem (StoreVector mem src)); 2948 ins_cost(145); 2949 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 2950 ins_encode %{ 2951 __ vmovdqu($mem$$Address, $src$$XMMRegister); 2952 %} 2953 ins_pipe( pipe_slow ); 2954 %} 2955 2956 instruct storeV64_dword(memory mem, vecZ src) %{ 2957 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() <= 4); 2958 match(Set mem (StoreVector mem src)); 2959 ins_cost(145); 2960 format %{ "vmovdqul $mem k0,$src\t! store vector (64 bytes)" %} 2961 ins_encode %{ 2962 int vector_len = 2; 2963 __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len); 2964 %} 2965 ins_pipe( pipe_slow ); 2966 %} 2967 2968 instruct storeV64_qword(memory mem, vecZ src) %{ 2969 predicate(n->as_StoreVector()->memory_size() == 64 && n->as_StoreVector()->element_size() > 4); 2970 match(Set mem (StoreVector mem src)); 2971 ins_cost(145); 2972 format %{ "vmovdquq $mem k0,$src\t! store vector (64 bytes)" %} 2973 ins_encode %{ 2974 int vector_len = 2; 2975 __ evmovdquq($mem$$Address, $src$$XMMRegister, vector_len); 2976 %} 2977 ins_pipe( pipe_slow ); 2978 %} 2979 2980 // ====================LEGACY REPLICATE======================================= 2981 2982 instruct Repl4B_mem(vecS dst, memory mem) %{ 2983 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2984 match(Set dst (ReplicateB (LoadB mem))); 2985 format %{ "punpcklbw $dst,$mem\n\t" 2986 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 2987 ins_encode %{ 2988 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 2989 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2990 %} 2991 ins_pipe( pipe_slow ); 2992 %} 2993 2994 instruct Repl8B_mem(vecD dst, memory mem) %{ 2995 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 2996 match(Set dst (ReplicateB (LoadB mem))); 2997 format %{ "punpcklbw $dst,$mem\n\t" 2998 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 2999 ins_encode %{ 3000 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3001 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3002 %} 3003 ins_pipe( pipe_slow ); 3004 %} 3005 3006 instruct Repl16B(vecX dst, rRegI src) %{ 3007 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3008 match(Set dst (ReplicateB src)); 3009 format %{ "movd $dst,$src\n\t" 3010 "punpcklbw $dst,$dst\n\t" 3011 "pshuflw $dst,$dst,0x00\n\t" 3012 "punpcklqdq $dst,$dst\t! replicate16B" %} 3013 ins_encode %{ 3014 __ movdl($dst$$XMMRegister, $src$$Register); 3015 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3016 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3017 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3018 %} 3019 ins_pipe( pipe_slow ); 3020 %} 3021 3022 instruct Repl16B_mem(vecX dst, memory mem) %{ 3023 predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3024 match(Set dst (ReplicateB (LoadB mem))); 3025 format %{ "punpcklbw $dst,$mem\n\t" 3026 "pshuflw $dst,$dst,0x00\n\t" 3027 "punpcklqdq $dst,$dst\t! replicate16B" %} 3028 ins_encode %{ 3029 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3030 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3031 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3032 %} 3033 ins_pipe( pipe_slow ); 3034 %} 3035 3036 instruct Repl32B(vecY dst, rRegI src) %{ 3037 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3038 match(Set dst (ReplicateB src)); 3039 format %{ "movd $dst,$src\n\t" 3040 "punpcklbw $dst,$dst\n\t" 3041 "pshuflw $dst,$dst,0x00\n\t" 3042 "punpcklqdq $dst,$dst\n\t" 3043 "vinserti128_high $dst,$dst\t! replicate32B" %} 3044 ins_encode %{ 3045 __ movdl($dst$$XMMRegister, $src$$Register); 3046 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3047 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3048 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3049 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3050 %} 3051 ins_pipe( pipe_slow ); 3052 %} 3053 3054 instruct Repl32B_mem(vecY dst, memory mem) %{ 3055 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3056 match(Set dst (ReplicateB (LoadB mem))); 3057 format %{ "punpcklbw $dst,$mem\n\t" 3058 "pshuflw $dst,$dst,0x00\n\t" 3059 "punpcklqdq $dst,$dst\n\t" 3060 "vinserti128_high $dst,$dst\t! replicate32B" %} 3061 ins_encode %{ 3062 __ punpcklbw($dst$$XMMRegister, $mem$$Address); 3063 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3064 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3065 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3066 %} 3067 ins_pipe( pipe_slow ); 3068 %} 3069 3070 instruct Repl16B_imm(vecX dst, immI con) %{ 3071 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3072 match(Set dst (ReplicateB con)); 3073 format %{ "movq $dst,[$constantaddress]\n\t" 3074 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 3075 ins_encode %{ 3076 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3077 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3078 %} 3079 ins_pipe( pipe_slow ); 3080 %} 3081 3082 instruct Repl32B_imm(vecY dst, immI con) %{ 3083 predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw()); 3084 match(Set dst (ReplicateB con)); 3085 format %{ "movq $dst,[$constantaddress]\n\t" 3086 "punpcklqdq $dst,$dst\n\t" 3087 "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %} 3088 ins_encode %{ 3089 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3090 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3091 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3092 %} 3093 ins_pipe( pipe_slow ); 3094 %} 3095 3096 instruct Repl4S(vecD dst, rRegI src) %{ 3097 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw()); 3098 match(Set dst (ReplicateS src)); 3099 format %{ "movd $dst,$src\n\t" 3100 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 3101 ins_encode %{ 3102 __ movdl($dst$$XMMRegister, $src$$Register); 3103 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3104 %} 3105 ins_pipe( pipe_slow ); 3106 %} 3107 3108 instruct Repl4S_mem(vecD dst, memory mem) %{ 3109 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3110 match(Set dst (ReplicateS (LoadS mem))); 3111 format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %} 3112 ins_encode %{ 3113 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3114 %} 3115 ins_pipe( pipe_slow ); 3116 %} 3117 3118 instruct Repl8S(vecX dst, rRegI src) %{ 3119 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3120 match(Set dst (ReplicateS src)); 3121 format %{ "movd $dst,$src\n\t" 3122 "pshuflw $dst,$dst,0x00\n\t" 3123 "punpcklqdq $dst,$dst\t! replicate8S" %} 3124 ins_encode %{ 3125 __ movdl($dst$$XMMRegister, $src$$Register); 3126 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3127 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3128 %} 3129 ins_pipe( pipe_slow ); 3130 %} 3131 3132 instruct Repl8S_mem(vecX dst, memory mem) %{ 3133 predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw()); 3134 match(Set dst (ReplicateS (LoadS mem))); 3135 format %{ "pshuflw $dst,$mem,0x00\n\t" 3136 "punpcklqdq $dst,$dst\t! replicate8S" %} 3137 ins_encode %{ 3138 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3139 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3140 %} 3141 ins_pipe( pipe_slow ); 3142 %} 3143 3144 instruct Repl8S_imm(vecX dst, immI con) %{ 3145 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw()); 3146 match(Set dst (ReplicateS con)); 3147 format %{ "movq $dst,[$constantaddress]\n\t" 3148 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 3149 ins_encode %{ 3150 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3151 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3152 %} 3153 ins_pipe( pipe_slow ); 3154 %} 3155 3156 instruct Repl16S(vecY dst, rRegI src) %{ 3157 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3158 match(Set dst (ReplicateS src)); 3159 format %{ "movd $dst,$src\n\t" 3160 "pshuflw $dst,$dst,0x00\n\t" 3161 "punpcklqdq $dst,$dst\n\t" 3162 "vinserti128_high $dst,$dst\t! replicate16S" %} 3163 ins_encode %{ 3164 __ movdl($dst$$XMMRegister, $src$$Register); 3165 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3166 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3167 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3168 %} 3169 ins_pipe( pipe_slow ); 3170 %} 3171 3172 instruct Repl16S_mem(vecY dst, memory mem) %{ 3173 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3174 match(Set dst (ReplicateS (LoadS mem))); 3175 format %{ "pshuflw $dst,$mem,0x00\n\t" 3176 "punpcklqdq $dst,$dst\n\t" 3177 "vinserti128_high $dst,$dst\t! replicate16S" %} 3178 ins_encode %{ 3179 __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00); 3180 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3181 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3182 %} 3183 ins_pipe( pipe_slow ); 3184 %} 3185 3186 instruct Repl16S_imm(vecY dst, immI con) %{ 3187 predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw()); 3188 match(Set dst (ReplicateS con)); 3189 format %{ "movq $dst,[$constantaddress]\n\t" 3190 "punpcklqdq $dst,$dst\n\t" 3191 "vinserti128_high $dst,$dst\t! replicate16S($con)" %} 3192 ins_encode %{ 3193 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3194 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3195 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3196 %} 3197 ins_pipe( pipe_slow ); 3198 %} 3199 3200 instruct Repl4I(vecX dst, rRegI src) %{ 3201 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3202 match(Set dst (ReplicateI src)); 3203 format %{ "movd $dst,$src\n\t" 3204 "pshufd $dst,$dst,0x00\t! replicate4I" %} 3205 ins_encode %{ 3206 __ movdl($dst$$XMMRegister, $src$$Register); 3207 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3208 %} 3209 ins_pipe( pipe_slow ); 3210 %} 3211 3212 instruct Repl4I_mem(vecX dst, memory mem) %{ 3213 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3214 match(Set dst (ReplicateI (LoadI mem))); 3215 format %{ "pshufd $dst,$mem,0x00\t! replicate4I" %} 3216 ins_encode %{ 3217 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3218 %} 3219 ins_pipe( pipe_slow ); 3220 %} 3221 3222 instruct Repl8I(vecY dst, rRegI src) %{ 3223 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3224 match(Set dst (ReplicateI src)); 3225 format %{ "movd $dst,$src\n\t" 3226 "pshufd $dst,$dst,0x00\n\t" 3227 "vinserti128_high $dst,$dst\t! replicate8I" %} 3228 ins_encode %{ 3229 __ movdl($dst$$XMMRegister, $src$$Register); 3230 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3231 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3232 %} 3233 ins_pipe( pipe_slow ); 3234 %} 3235 3236 instruct Repl8I_mem(vecY dst, memory mem) %{ 3237 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3238 match(Set dst (ReplicateI (LoadI mem))); 3239 format %{ "pshufd $dst,$mem,0x00\n\t" 3240 "vinserti128_high $dst,$dst\t! replicate8I" %} 3241 ins_encode %{ 3242 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3243 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3244 %} 3245 ins_pipe( pipe_slow ); 3246 %} 3247 3248 instruct Repl4I_imm(vecX dst, immI con) %{ 3249 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3250 match(Set dst (ReplicateI con)); 3251 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 3252 "punpcklqdq $dst,$dst" %} 3253 ins_encode %{ 3254 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3255 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3256 %} 3257 ins_pipe( pipe_slow ); 3258 %} 3259 3260 instruct Repl8I_imm(vecY dst, immI con) %{ 3261 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3262 match(Set dst (ReplicateI con)); 3263 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 3264 "punpcklqdq $dst,$dst\n\t" 3265 "vinserti128_high $dst,$dst" %} 3266 ins_encode %{ 3267 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3268 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3269 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3270 %} 3271 ins_pipe( pipe_slow ); 3272 %} 3273 3274 // Long could be loaded into xmm register directly from memory. 3275 instruct Repl2L_mem(vecX dst, memory mem) %{ 3276 predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw()); 3277 match(Set dst (ReplicateL (LoadL mem))); 3278 format %{ "movq $dst,$mem\n\t" 3279 "punpcklqdq $dst,$dst\t! replicate2L" %} 3280 ins_encode %{ 3281 __ movq($dst$$XMMRegister, $mem$$Address); 3282 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3283 %} 3284 ins_pipe( pipe_slow ); 3285 %} 3286 3287 // Replicate long (8 byte) scalar to be vector 3288 #ifdef _LP64 3289 instruct Repl4L(vecY dst, rRegL src) %{ 3290 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3291 match(Set dst (ReplicateL src)); 3292 format %{ "movdq $dst,$src\n\t" 3293 "punpcklqdq $dst,$dst\n\t" 3294 "vinserti128_high $dst,$dst\t! replicate4L" %} 3295 ins_encode %{ 3296 __ movdq($dst$$XMMRegister, $src$$Register); 3297 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3298 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3299 %} 3300 ins_pipe( pipe_slow ); 3301 %} 3302 #else // _LP64 3303 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 3304 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3305 match(Set dst (ReplicateL src)); 3306 effect(TEMP dst, USE src, TEMP tmp); 3307 format %{ "movdl $dst,$src.lo\n\t" 3308 "movdl $tmp,$src.hi\n\t" 3309 "punpckldq $dst,$tmp\n\t" 3310 "punpcklqdq $dst,$dst\n\t" 3311 "vinserti128_high $dst,$dst\t! replicate4L" %} 3312 ins_encode %{ 3313 __ movdl($dst$$XMMRegister, $src$$Register); 3314 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3315 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3316 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3317 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3318 %} 3319 ins_pipe( pipe_slow ); 3320 %} 3321 #endif // _LP64 3322 3323 instruct Repl4L_imm(vecY dst, immL con) %{ 3324 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3325 match(Set dst (ReplicateL con)); 3326 format %{ "movq $dst,[$constantaddress]\n\t" 3327 "punpcklqdq $dst,$dst\n\t" 3328 "vinserti128_high $dst,$dst\t! replicate4L($con)" %} 3329 ins_encode %{ 3330 __ movq($dst$$XMMRegister, $constantaddress($con)); 3331 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3332 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3333 %} 3334 ins_pipe( pipe_slow ); 3335 %} 3336 3337 instruct Repl4L_mem(vecY dst, memory mem) %{ 3338 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3339 match(Set dst (ReplicateL (LoadL mem))); 3340 format %{ "movq $dst,$mem\n\t" 3341 "punpcklqdq $dst,$dst\n\t" 3342 "vinserti128_high $dst,$dst\t! replicate4L" %} 3343 ins_encode %{ 3344 __ movq($dst$$XMMRegister, $mem$$Address); 3345 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3346 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 3347 %} 3348 ins_pipe( pipe_slow ); 3349 %} 3350 3351 instruct Repl2F_mem(vecD dst, memory mem) %{ 3352 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3353 match(Set dst (ReplicateF (LoadF mem))); 3354 format %{ "pshufd $dst,$mem,0x00\t! replicate2F" %} 3355 ins_encode %{ 3356 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3357 %} 3358 ins_pipe( pipe_slow ); 3359 %} 3360 3361 instruct Repl4F_mem(vecX dst, memory mem) %{ 3362 predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3363 match(Set dst (ReplicateF (LoadF mem))); 3364 format %{ "pshufd $dst,$mem,0x00\t! replicate4F" %} 3365 ins_encode %{ 3366 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3367 %} 3368 ins_pipe( pipe_slow ); 3369 %} 3370 3371 instruct Repl8F(vecY dst, regF src) %{ 3372 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3373 match(Set dst (ReplicateF src)); 3374 format %{ "pshufd $dst,$src,0x00\n\t" 3375 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3376 ins_encode %{ 3377 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3378 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3379 %} 3380 ins_pipe( pipe_slow ); 3381 %} 3382 3383 instruct Repl8F_mem(vecY dst, memory mem) %{ 3384 predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl()); 3385 match(Set dst (ReplicateF (LoadF mem))); 3386 format %{ "pshufd $dst,$mem,0x00\n\t" 3387 "vinsertf128_high $dst,$dst\t! replicate8F" %} 3388 ins_encode %{ 3389 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00); 3390 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3391 %} 3392 ins_pipe( pipe_slow ); 3393 %} 3394 3395 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 3396 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3397 match(Set dst (ReplicateF zero)); 3398 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 3399 ins_encode %{ 3400 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3401 %} 3402 ins_pipe( fpu_reg_reg ); 3403 %} 3404 3405 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 3406 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3407 match(Set dst (ReplicateF zero)); 3408 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 3409 ins_encode %{ 3410 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 3411 %} 3412 ins_pipe( fpu_reg_reg ); 3413 %} 3414 3415 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 3416 predicate(n->as_Vector()->length() == 8 && UseAVX < 3); 3417 match(Set dst (ReplicateF zero)); 3418 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 3419 ins_encode %{ 3420 int vector_len = 1; 3421 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3422 %} 3423 ins_pipe( fpu_reg_reg ); 3424 %} 3425 3426 instruct Repl2D_mem(vecX dst, memory mem) %{ 3427 predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl()); 3428 match(Set dst (ReplicateD (LoadD mem))); 3429 format %{ "pshufd $dst,$mem,0x44\t! replicate2D" %} 3430 ins_encode %{ 3431 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3432 %} 3433 ins_pipe( pipe_slow ); 3434 %} 3435 3436 instruct Repl4D(vecY dst, regD src) %{ 3437 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3438 match(Set dst (ReplicateD src)); 3439 format %{ "pshufd $dst,$src,0x44\n\t" 3440 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3441 ins_encode %{ 3442 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3443 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3444 %} 3445 ins_pipe( pipe_slow ); 3446 %} 3447 3448 instruct Repl4D_mem(vecY dst, memory mem) %{ 3449 predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl()); 3450 match(Set dst (ReplicateD (LoadD mem))); 3451 format %{ "pshufd $dst,$mem,0x44\n\t" 3452 "vinsertf128_high $dst,$dst\t! replicate4D" %} 3453 ins_encode %{ 3454 __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44); 3455 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 3456 %} 3457 ins_pipe( pipe_slow ); 3458 %} 3459 3460 // Replicate double (8 byte) scalar zero to be vector 3461 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 3462 predicate(n->as_Vector()->length() == 2 && UseAVX < 3); 3463 match(Set dst (ReplicateD zero)); 3464 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 3465 ins_encode %{ 3466 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 3467 %} 3468 ins_pipe( fpu_reg_reg ); 3469 %} 3470 3471 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 3472 predicate(n->as_Vector()->length() == 4 && UseAVX < 3); 3473 match(Set dst (ReplicateD zero)); 3474 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 3475 ins_encode %{ 3476 int vector_len = 1; 3477 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3478 %} 3479 ins_pipe( fpu_reg_reg ); 3480 %} 3481 3482 // ====================GENERIC REPLICATE========================================== 3483 3484 // Replicate byte scalar to be vector 3485 instruct Repl4B(vecS dst, rRegI src) %{ 3486 predicate(n->as_Vector()->length() == 4); 3487 match(Set dst (ReplicateB src)); 3488 format %{ "movd $dst,$src\n\t" 3489 "punpcklbw $dst,$dst\n\t" 3490 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 3491 ins_encode %{ 3492 __ movdl($dst$$XMMRegister, $src$$Register); 3493 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3494 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3495 %} 3496 ins_pipe( pipe_slow ); 3497 %} 3498 3499 instruct Repl8B(vecD dst, rRegI src) %{ 3500 predicate(n->as_Vector()->length() == 8); 3501 match(Set dst (ReplicateB src)); 3502 format %{ "movd $dst,$src\n\t" 3503 "punpcklbw $dst,$dst\n\t" 3504 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 3505 ins_encode %{ 3506 __ movdl($dst$$XMMRegister, $src$$Register); 3507 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 3508 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3509 %} 3510 ins_pipe( pipe_slow ); 3511 %} 3512 3513 // Replicate byte scalar immediate to be vector by loading from const table. 3514 instruct Repl4B_imm(vecS dst, immI con) %{ 3515 predicate(n->as_Vector()->length() == 4); 3516 match(Set dst (ReplicateB con)); 3517 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 3518 ins_encode %{ 3519 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 3520 %} 3521 ins_pipe( pipe_slow ); 3522 %} 3523 3524 instruct Repl8B_imm(vecD dst, immI con) %{ 3525 predicate(n->as_Vector()->length() == 8); 3526 match(Set dst (ReplicateB con)); 3527 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 3528 ins_encode %{ 3529 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3530 %} 3531 ins_pipe( pipe_slow ); 3532 %} 3533 3534 // Replicate byte scalar zero to be vector 3535 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 3536 predicate(n->as_Vector()->length() == 4); 3537 match(Set dst (ReplicateB zero)); 3538 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 3539 ins_encode %{ 3540 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3541 %} 3542 ins_pipe( fpu_reg_reg ); 3543 %} 3544 3545 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 3546 predicate(n->as_Vector()->length() == 8); 3547 match(Set dst (ReplicateB zero)); 3548 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 3549 ins_encode %{ 3550 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3551 %} 3552 ins_pipe( fpu_reg_reg ); 3553 %} 3554 3555 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 3556 predicate(n->as_Vector()->length() == 16); 3557 match(Set dst (ReplicateB zero)); 3558 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 3559 ins_encode %{ 3560 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3561 %} 3562 ins_pipe( fpu_reg_reg ); 3563 %} 3564 3565 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 3566 predicate(n->as_Vector()->length() == 32); 3567 match(Set dst (ReplicateB zero)); 3568 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 3569 ins_encode %{ 3570 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3571 int vector_len = 1; 3572 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3573 %} 3574 ins_pipe( fpu_reg_reg ); 3575 %} 3576 3577 // Replicate char/short (2 byte) scalar to be vector 3578 instruct Repl2S(vecS dst, rRegI src) %{ 3579 predicate(n->as_Vector()->length() == 2); 3580 match(Set dst (ReplicateS src)); 3581 format %{ "movd $dst,$src\n\t" 3582 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 3583 ins_encode %{ 3584 __ movdl($dst$$XMMRegister, $src$$Register); 3585 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3586 %} 3587 ins_pipe( fpu_reg_reg ); 3588 %} 3589 3590 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 3591 instruct Repl2S_imm(vecS dst, immI con) %{ 3592 predicate(n->as_Vector()->length() == 2); 3593 match(Set dst (ReplicateS con)); 3594 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 3595 ins_encode %{ 3596 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 3597 %} 3598 ins_pipe( fpu_reg_reg ); 3599 %} 3600 3601 instruct Repl4S_imm(vecD dst, immI con) %{ 3602 predicate(n->as_Vector()->length() == 4); 3603 match(Set dst (ReplicateS con)); 3604 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 3605 ins_encode %{ 3606 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 3607 %} 3608 ins_pipe( fpu_reg_reg ); 3609 %} 3610 3611 // Replicate char/short (2 byte) scalar zero to be vector 3612 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 3613 predicate(n->as_Vector()->length() == 2); 3614 match(Set dst (ReplicateS zero)); 3615 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 3616 ins_encode %{ 3617 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3618 %} 3619 ins_pipe( fpu_reg_reg ); 3620 %} 3621 3622 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 3623 predicate(n->as_Vector()->length() == 4); 3624 match(Set dst (ReplicateS zero)); 3625 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 3626 ins_encode %{ 3627 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3628 %} 3629 ins_pipe( fpu_reg_reg ); 3630 %} 3631 3632 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 3633 predicate(n->as_Vector()->length() == 8); 3634 match(Set dst (ReplicateS zero)); 3635 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 3636 ins_encode %{ 3637 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3638 %} 3639 ins_pipe( fpu_reg_reg ); 3640 %} 3641 3642 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 3643 predicate(n->as_Vector()->length() == 16); 3644 match(Set dst (ReplicateS zero)); 3645 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 3646 ins_encode %{ 3647 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3648 int vector_len = 1; 3649 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3650 %} 3651 ins_pipe( fpu_reg_reg ); 3652 %} 3653 3654 // Replicate integer (4 byte) scalar to be vector 3655 instruct Repl2I(vecD dst, rRegI src) %{ 3656 predicate(n->as_Vector()->length() == 2); 3657 match(Set dst (ReplicateI src)); 3658 format %{ "movd $dst,$src\n\t" 3659 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3660 ins_encode %{ 3661 __ movdl($dst$$XMMRegister, $src$$Register); 3662 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3663 %} 3664 ins_pipe( fpu_reg_reg ); 3665 %} 3666 3667 // Integer could be loaded into xmm register directly from memory. 3668 instruct Repl2I_mem(vecD dst, memory mem) %{ 3669 predicate(n->as_Vector()->length() == 2); 3670 match(Set dst (ReplicateI (LoadI mem))); 3671 format %{ "movd $dst,$mem\n\t" 3672 "pshufd $dst,$dst,0x00\t! replicate2I" %} 3673 ins_encode %{ 3674 __ movdl($dst$$XMMRegister, $mem$$Address); 3675 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 3676 %} 3677 ins_pipe( fpu_reg_reg ); 3678 %} 3679 3680 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 3681 instruct Repl2I_imm(vecD dst, immI con) %{ 3682 predicate(n->as_Vector()->length() == 2); 3683 match(Set dst (ReplicateI con)); 3684 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 3685 ins_encode %{ 3686 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 3687 %} 3688 ins_pipe( fpu_reg_reg ); 3689 %} 3690 3691 // Replicate integer (4 byte) scalar zero to be vector 3692 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 3693 predicate(n->as_Vector()->length() == 2); 3694 match(Set dst (ReplicateI zero)); 3695 format %{ "pxor $dst,$dst\t! replicate2I" %} 3696 ins_encode %{ 3697 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3698 %} 3699 ins_pipe( fpu_reg_reg ); 3700 %} 3701 3702 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 3703 predicate(n->as_Vector()->length() == 4); 3704 match(Set dst (ReplicateI zero)); 3705 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 3706 ins_encode %{ 3707 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3708 %} 3709 ins_pipe( fpu_reg_reg ); 3710 %} 3711 3712 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 3713 predicate(n->as_Vector()->length() == 8); 3714 match(Set dst (ReplicateI zero)); 3715 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 3716 ins_encode %{ 3717 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3718 int vector_len = 1; 3719 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3720 %} 3721 ins_pipe( fpu_reg_reg ); 3722 %} 3723 3724 // Replicate long (8 byte) scalar to be vector 3725 #ifdef _LP64 3726 instruct Repl2L(vecX dst, rRegL src) %{ 3727 predicate(n->as_Vector()->length() == 2); 3728 match(Set dst (ReplicateL src)); 3729 format %{ "movdq $dst,$src\n\t" 3730 "punpcklqdq $dst,$dst\t! replicate2L" %} 3731 ins_encode %{ 3732 __ movdq($dst$$XMMRegister, $src$$Register); 3733 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3734 %} 3735 ins_pipe( pipe_slow ); 3736 %} 3737 #else // _LP64 3738 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 3739 predicate(n->as_Vector()->length() == 2); 3740 match(Set dst (ReplicateL src)); 3741 effect(TEMP dst, USE src, TEMP tmp); 3742 format %{ "movdl $dst,$src.lo\n\t" 3743 "movdl $tmp,$src.hi\n\t" 3744 "punpckldq $dst,$tmp\n\t" 3745 "punpcklqdq $dst,$dst\t! replicate2L"%} 3746 ins_encode %{ 3747 __ movdl($dst$$XMMRegister, $src$$Register); 3748 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 3749 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 3750 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3751 %} 3752 ins_pipe( pipe_slow ); 3753 %} 3754 #endif // _LP64 3755 3756 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 3757 instruct Repl2L_imm(vecX dst, immL con) %{ 3758 predicate(n->as_Vector()->length() == 2); 3759 match(Set dst (ReplicateL con)); 3760 format %{ "movq $dst,[$constantaddress]\n\t" 3761 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 3762 ins_encode %{ 3763 __ movq($dst$$XMMRegister, $constantaddress($con)); 3764 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 3765 %} 3766 ins_pipe( pipe_slow ); 3767 %} 3768 3769 // Replicate long (8 byte) scalar zero to be vector 3770 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 3771 predicate(n->as_Vector()->length() == 2); 3772 match(Set dst (ReplicateL zero)); 3773 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 3774 ins_encode %{ 3775 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3776 %} 3777 ins_pipe( fpu_reg_reg ); 3778 %} 3779 3780 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 3781 predicate(n->as_Vector()->length() == 4); 3782 match(Set dst (ReplicateL zero)); 3783 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 3784 ins_encode %{ 3785 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 3786 int vector_len = 1; 3787 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3788 %} 3789 ins_pipe( fpu_reg_reg ); 3790 %} 3791 3792 // Replicate float (4 byte) scalar to be vector 3793 instruct Repl2F(vecD dst, regF src) %{ 3794 predicate(n->as_Vector()->length() == 2); 3795 match(Set dst (ReplicateF src)); 3796 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 3797 ins_encode %{ 3798 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3799 %} 3800 ins_pipe( fpu_reg_reg ); 3801 %} 3802 3803 instruct Repl4F(vecX dst, regF src) %{ 3804 predicate(n->as_Vector()->length() == 4); 3805 match(Set dst (ReplicateF src)); 3806 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 3807 ins_encode %{ 3808 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 3809 %} 3810 ins_pipe( pipe_slow ); 3811 %} 3812 3813 // Replicate double (8 bytes) scalar to be vector 3814 instruct Repl2D(vecX dst, regD src) %{ 3815 predicate(n->as_Vector()->length() == 2); 3816 match(Set dst (ReplicateD src)); 3817 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 3818 ins_encode %{ 3819 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 3820 %} 3821 ins_pipe( pipe_slow ); 3822 %} 3823 3824 // ====================EVEX REPLICATE============================================= 3825 3826 instruct Repl4B_mem_evex(vecS dst, memory mem) %{ 3827 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3828 match(Set dst (ReplicateB (LoadB mem))); 3829 format %{ "vpbroadcastb $dst,$mem\t! replicate4B" %} 3830 ins_encode %{ 3831 int vector_len = 0; 3832 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3833 %} 3834 ins_pipe( pipe_slow ); 3835 %} 3836 3837 instruct Repl8B_mem_evex(vecD dst, memory mem) %{ 3838 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3839 match(Set dst (ReplicateB (LoadB mem))); 3840 format %{ "vpbroadcastb $dst,$mem\t! replicate8B" %} 3841 ins_encode %{ 3842 int vector_len = 0; 3843 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3844 %} 3845 ins_pipe( pipe_slow ); 3846 %} 3847 3848 instruct Repl16B_evex(vecX dst, rRegI src) %{ 3849 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3850 match(Set dst (ReplicateB src)); 3851 format %{ "vpbroadcastb $dst,$src\t! replicate16B" %} 3852 ins_encode %{ 3853 int vector_len = 0; 3854 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3855 %} 3856 ins_pipe( pipe_slow ); 3857 %} 3858 3859 instruct Repl16B_mem_evex(vecX dst, memory mem) %{ 3860 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3861 match(Set dst (ReplicateB (LoadB mem))); 3862 format %{ "vpbroadcastb $dst,$mem\t! replicate16B" %} 3863 ins_encode %{ 3864 int vector_len = 0; 3865 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3866 %} 3867 ins_pipe( pipe_slow ); 3868 %} 3869 3870 instruct Repl32B_evex(vecY dst, rRegI src) %{ 3871 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3872 match(Set dst (ReplicateB src)); 3873 format %{ "vpbroadcastb $dst,$src\t! replicate32B" %} 3874 ins_encode %{ 3875 int vector_len = 1; 3876 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3877 %} 3878 ins_pipe( pipe_slow ); 3879 %} 3880 3881 instruct Repl32B_mem_evex(vecY dst, memory mem) %{ 3882 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3883 match(Set dst (ReplicateB (LoadB mem))); 3884 format %{ "vpbroadcastb $dst,$mem\t! replicate32B" %} 3885 ins_encode %{ 3886 int vector_len = 1; 3887 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3888 %} 3889 ins_pipe( pipe_slow ); 3890 %} 3891 3892 instruct Repl64B_evex(vecZ dst, rRegI src) %{ 3893 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3894 match(Set dst (ReplicateB src)); 3895 format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %} 3896 ins_encode %{ 3897 int vector_len = 2; 3898 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len); 3899 %} 3900 ins_pipe( pipe_slow ); 3901 %} 3902 3903 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{ 3904 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3905 match(Set dst (ReplicateB (LoadB mem))); 3906 format %{ "vpbroadcastb $dst,$mem\t! replicate64B" %} 3907 ins_encode %{ 3908 int vector_len = 2; 3909 __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len); 3910 %} 3911 ins_pipe( pipe_slow ); 3912 %} 3913 3914 instruct Repl16B_imm_evex(vecX dst, immI con) %{ 3915 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 3916 match(Set dst (ReplicateB con)); 3917 format %{ "movq $dst,[$constantaddress]\n\t" 3918 "vpbroadcastb $dst,$dst\t! replicate16B" %} 3919 ins_encode %{ 3920 int vector_len = 0; 3921 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3922 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3923 %} 3924 ins_pipe( pipe_slow ); 3925 %} 3926 3927 instruct Repl32B_imm_evex(vecY dst, immI con) %{ 3928 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw()); 3929 match(Set dst (ReplicateB con)); 3930 format %{ "movq $dst,[$constantaddress]\n\t" 3931 "vpbroadcastb $dst,$dst\t! replicate32B" %} 3932 ins_encode %{ 3933 int vector_len = 1; 3934 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3935 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3936 %} 3937 ins_pipe( pipe_slow ); 3938 %} 3939 3940 instruct Repl64B_imm_evex(vecZ dst, immI con) %{ 3941 predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw()); 3942 match(Set dst (ReplicateB con)); 3943 format %{ "movq $dst,[$constantaddress]\n\t" 3944 "vpbroadcastb $dst,$dst\t! upper replicate64B" %} 3945 ins_encode %{ 3946 int vector_len = 2; 3947 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 3948 __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3949 %} 3950 ins_pipe( pipe_slow ); 3951 %} 3952 3953 instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{ 3954 predicate(n->as_Vector()->length() == 64 && UseAVX > 2); 3955 match(Set dst (ReplicateB zero)); 3956 format %{ "vpxor $dst k0,$dst,$dst\t! replicate64B zero" %} 3957 ins_encode %{ 3958 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 3959 int vector_len = 2; 3960 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 3961 %} 3962 ins_pipe( fpu_reg_reg ); 3963 %} 3964 3965 instruct Repl4S_evex(vecD dst, rRegI src) %{ 3966 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3967 match(Set dst (ReplicateS src)); 3968 format %{ "vpbroadcastw $dst,$src\t! replicate4S" %} 3969 ins_encode %{ 3970 int vector_len = 0; 3971 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3972 %} 3973 ins_pipe( pipe_slow ); 3974 %} 3975 3976 instruct Repl4S_mem_evex(vecD dst, memory mem) %{ 3977 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw()); 3978 match(Set dst (ReplicateS (LoadS mem))); 3979 format %{ "vpbroadcastw $dst,$mem\t! replicate4S" %} 3980 ins_encode %{ 3981 int vector_len = 0; 3982 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 3983 %} 3984 ins_pipe( pipe_slow ); 3985 %} 3986 3987 instruct Repl8S_evex(vecX dst, rRegI src) %{ 3988 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 3989 match(Set dst (ReplicateS src)); 3990 format %{ "vpbroadcastw $dst,$src\t! replicate8S" %} 3991 ins_encode %{ 3992 int vector_len = 0; 3993 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 3994 %} 3995 ins_pipe( pipe_slow ); 3996 %} 3997 3998 instruct Repl8S_mem_evex(vecX dst, memory mem) %{ 3999 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4000 match(Set dst (ReplicateS (LoadS mem))); 4001 format %{ "vpbroadcastw $dst,$mem\t! replicate8S" %} 4002 ins_encode %{ 4003 int vector_len = 0; 4004 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4005 %} 4006 ins_pipe( pipe_slow ); 4007 %} 4008 4009 instruct Repl16S_evex(vecY dst, rRegI src) %{ 4010 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4011 match(Set dst (ReplicateS src)); 4012 format %{ "vpbroadcastw $dst,$src\t! replicate16S" %} 4013 ins_encode %{ 4014 int vector_len = 1; 4015 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4016 %} 4017 ins_pipe( pipe_slow ); 4018 %} 4019 4020 instruct Repl16S_mem_evex(vecY dst, memory mem) %{ 4021 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4022 match(Set dst (ReplicateS (LoadS mem))); 4023 format %{ "vpbroadcastw $dst,$mem\t! replicate16S" %} 4024 ins_encode %{ 4025 int vector_len = 1; 4026 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4027 %} 4028 ins_pipe( pipe_slow ); 4029 %} 4030 4031 instruct Repl32S_evex(vecZ dst, rRegI src) %{ 4032 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4033 match(Set dst (ReplicateS src)); 4034 format %{ "vpbroadcastw $dst,$src\t! replicate32S" %} 4035 ins_encode %{ 4036 int vector_len = 2; 4037 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len); 4038 %} 4039 ins_pipe( pipe_slow ); 4040 %} 4041 4042 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{ 4043 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4044 match(Set dst (ReplicateS (LoadS mem))); 4045 format %{ "vpbroadcastw $dst,$mem\t! replicate32S" %} 4046 ins_encode %{ 4047 int vector_len = 2; 4048 __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len); 4049 %} 4050 ins_pipe( pipe_slow ); 4051 %} 4052 4053 instruct Repl8S_imm_evex(vecX dst, immI con) %{ 4054 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw()); 4055 match(Set dst (ReplicateS con)); 4056 format %{ "movq $dst,[$constantaddress]\n\t" 4057 "vpbroadcastw $dst,$dst\t! replicate8S" %} 4058 ins_encode %{ 4059 int vector_len = 0; 4060 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4061 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4062 %} 4063 ins_pipe( pipe_slow ); 4064 %} 4065 4066 instruct Repl16S_imm_evex(vecY dst, immI con) %{ 4067 predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw()); 4068 match(Set dst (ReplicateS con)); 4069 format %{ "movq $dst,[$constantaddress]\n\t" 4070 "vpbroadcastw $dst,$dst\t! replicate16S" %} 4071 ins_encode %{ 4072 int vector_len = 1; 4073 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4074 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4075 %} 4076 ins_pipe( pipe_slow ); 4077 %} 4078 4079 instruct Repl32S_imm_evex(vecZ dst, immI con) %{ 4080 predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw()); 4081 match(Set dst (ReplicateS con)); 4082 format %{ "movq $dst,[$constantaddress]\n\t" 4083 "vpbroadcastw $dst,$dst\t! replicate32S" %} 4084 ins_encode %{ 4085 int vector_len = 2; 4086 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 4087 __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4088 %} 4089 ins_pipe( pipe_slow ); 4090 %} 4091 4092 instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{ 4093 predicate(n->as_Vector()->length() == 32 && UseAVX > 2); 4094 match(Set dst (ReplicateS zero)); 4095 format %{ "vpxor $dst k0,$dst,$dst\t! replicate32S zero" %} 4096 ins_encode %{ 4097 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4098 int vector_len = 2; 4099 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4100 %} 4101 ins_pipe( fpu_reg_reg ); 4102 %} 4103 4104 instruct Repl4I_evex(vecX dst, rRegI src) %{ 4105 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4106 match(Set dst (ReplicateI src)); 4107 format %{ "vpbroadcastd $dst,$src\t! replicate4I" %} 4108 ins_encode %{ 4109 int vector_len = 0; 4110 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4111 %} 4112 ins_pipe( pipe_slow ); 4113 %} 4114 4115 instruct Repl4I_mem_evex(vecX dst, memory mem) %{ 4116 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4117 match(Set dst (ReplicateI (LoadI mem))); 4118 format %{ "vpbroadcastd $dst,$mem\t! replicate4I" %} 4119 ins_encode %{ 4120 int vector_len = 0; 4121 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4122 %} 4123 ins_pipe( pipe_slow ); 4124 %} 4125 4126 instruct Repl8I_evex(vecY dst, rRegI src) %{ 4127 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4128 match(Set dst (ReplicateI src)); 4129 format %{ "vpbroadcastd $dst,$src\t! replicate8I" %} 4130 ins_encode %{ 4131 int vector_len = 1; 4132 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4133 %} 4134 ins_pipe( pipe_slow ); 4135 %} 4136 4137 instruct Repl8I_mem_evex(vecY dst, memory mem) %{ 4138 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4139 match(Set dst (ReplicateI (LoadI mem))); 4140 format %{ "vpbroadcastd $dst,$mem\t! replicate8I" %} 4141 ins_encode %{ 4142 int vector_len = 1; 4143 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4144 %} 4145 ins_pipe( pipe_slow ); 4146 %} 4147 4148 instruct Repl16I_evex(vecZ dst, rRegI src) %{ 4149 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4150 match(Set dst (ReplicateI src)); 4151 format %{ "vpbroadcastd $dst,$src\t! replicate16I" %} 4152 ins_encode %{ 4153 int vector_len = 2; 4154 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len); 4155 %} 4156 ins_pipe( pipe_slow ); 4157 %} 4158 4159 instruct Repl16I_mem_evex(vecZ dst, memory mem) %{ 4160 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4161 match(Set dst (ReplicateI (LoadI mem))); 4162 format %{ "vpbroadcastd $dst,$mem\t! replicate16I" %} 4163 ins_encode %{ 4164 int vector_len = 2; 4165 __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len); 4166 %} 4167 ins_pipe( pipe_slow ); 4168 %} 4169 4170 instruct Repl4I_imm_evex(vecX dst, immI con) %{ 4171 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4172 match(Set dst (ReplicateI con)); 4173 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4174 "vpbroadcastd $dst,$dst\t! replicate4I" %} 4175 ins_encode %{ 4176 int vector_len = 0; 4177 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4178 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4179 %} 4180 ins_pipe( pipe_slow ); 4181 %} 4182 4183 instruct Repl8I_imm_evex(vecY dst, immI con) %{ 4184 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4185 match(Set dst (ReplicateI con)); 4186 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 4187 "vpbroadcastd $dst,$dst\t! replicate8I" %} 4188 ins_encode %{ 4189 int vector_len = 1; 4190 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4191 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4192 %} 4193 ins_pipe( pipe_slow ); 4194 %} 4195 4196 instruct Repl16I_imm_evex(vecZ dst, immI con) %{ 4197 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4198 match(Set dst (ReplicateI con)); 4199 format %{ "movq $dst,[$constantaddress]\t! replicate16I($con)\n\t" 4200 "vpbroadcastd $dst,$dst\t! replicate16I" %} 4201 ins_encode %{ 4202 int vector_len = 2; 4203 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 4204 __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4205 %} 4206 ins_pipe( pipe_slow ); 4207 %} 4208 4209 instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{ 4210 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4211 match(Set dst (ReplicateI zero)); 4212 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16I zero" %} 4213 ins_encode %{ 4214 // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it). 4215 int vector_len = 2; 4216 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4217 %} 4218 ins_pipe( fpu_reg_reg ); 4219 %} 4220 4221 // Replicate long (8 byte) scalar to be vector 4222 #ifdef _LP64 4223 instruct Repl4L_evex(vecY dst, rRegL src) %{ 4224 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4225 match(Set dst (ReplicateL src)); 4226 format %{ "vpbroadcastq $dst,$src\t! replicate4L" %} 4227 ins_encode %{ 4228 int vector_len = 1; 4229 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4230 %} 4231 ins_pipe( pipe_slow ); 4232 %} 4233 4234 instruct Repl8L_evex(vecZ dst, rRegL src) %{ 4235 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4236 match(Set dst (ReplicateL src)); 4237 format %{ "vpbroadcastq $dst,$src\t! replicate8L" %} 4238 ins_encode %{ 4239 int vector_len = 2; 4240 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len); 4241 %} 4242 ins_pipe( pipe_slow ); 4243 %} 4244 #else // _LP64 4245 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{ 4246 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4247 match(Set dst (ReplicateL src)); 4248 effect(TEMP dst, USE src, TEMP tmp); 4249 format %{ "movdl $dst,$src.lo\n\t" 4250 "movdl $tmp,$src.hi\n\t" 4251 "punpckldq $dst,$tmp\n\t" 4252 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4253 ins_encode %{ 4254 int vector_len = 1; 4255 __ movdl($dst$$XMMRegister, $src$$Register); 4256 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4257 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4258 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4259 %} 4260 ins_pipe( pipe_slow ); 4261 %} 4262 4263 instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{ 4264 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4265 match(Set dst (ReplicateL src)); 4266 effect(TEMP dst, USE src, TEMP tmp); 4267 format %{ "movdl $dst,$src.lo\n\t" 4268 "movdl $tmp,$src.hi\n\t" 4269 "punpckldq $dst,$tmp\n\t" 4270 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4271 ins_encode %{ 4272 int vector_len = 2; 4273 __ movdl($dst$$XMMRegister, $src$$Register); 4274 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4275 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4276 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4277 %} 4278 ins_pipe( pipe_slow ); 4279 %} 4280 #endif // _LP64 4281 4282 instruct Repl4L_imm_evex(vecY dst, immL con) %{ 4283 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4284 match(Set dst (ReplicateL con)); 4285 format %{ "movq $dst,[$constantaddress]\n\t" 4286 "vpbroadcastq $dst,$dst\t! replicate4L" %} 4287 ins_encode %{ 4288 int vector_len = 1; 4289 __ movq($dst$$XMMRegister, $constantaddress($con)); 4290 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4291 %} 4292 ins_pipe( pipe_slow ); 4293 %} 4294 4295 instruct Repl8L_imm_evex(vecZ dst, immL con) %{ 4296 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4297 match(Set dst (ReplicateL con)); 4298 format %{ "movq $dst,[$constantaddress]\n\t" 4299 "vpbroadcastq $dst,$dst\t! replicate8L" %} 4300 ins_encode %{ 4301 int vector_len = 2; 4302 __ movq($dst$$XMMRegister, $constantaddress($con)); 4303 __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4304 %} 4305 ins_pipe( pipe_slow ); 4306 %} 4307 4308 instruct Repl2L_mem_evex(vecX dst, memory mem) %{ 4309 predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl()); 4310 match(Set dst (ReplicateL (LoadL mem))); 4311 format %{ "vpbroadcastd $dst,$mem\t! replicate2L" %} 4312 ins_encode %{ 4313 int vector_len = 0; 4314 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4315 %} 4316 ins_pipe( pipe_slow ); 4317 %} 4318 4319 instruct Repl4L_mem_evex(vecY dst, memory mem) %{ 4320 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4321 match(Set dst (ReplicateL (LoadL mem))); 4322 format %{ "vpbroadcastd $dst,$mem\t! replicate4L" %} 4323 ins_encode %{ 4324 int vector_len = 1; 4325 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4326 %} 4327 ins_pipe( pipe_slow ); 4328 %} 4329 4330 instruct Repl8L_mem_evex(vecZ dst, memory mem) %{ 4331 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4332 match(Set dst (ReplicateL (LoadL mem))); 4333 format %{ "vpbroadcastd $dst,$mem\t! replicate8L" %} 4334 ins_encode %{ 4335 int vector_len = 2; 4336 __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len); 4337 %} 4338 ins_pipe( pipe_slow ); 4339 %} 4340 4341 instruct Repl8L_zero_evex(vecZ dst, immL0 zero) %{ 4342 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4343 match(Set dst (ReplicateL zero)); 4344 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8L zero" %} 4345 ins_encode %{ 4346 // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it). 4347 int vector_len = 2; 4348 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4349 %} 4350 ins_pipe( fpu_reg_reg ); 4351 %} 4352 4353 instruct Repl8F_evex(vecY dst, regF src) %{ 4354 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4355 match(Set dst (ReplicateF src)); 4356 format %{ "vbroadcastss $dst,$src\t! replicate8F" %} 4357 ins_encode %{ 4358 int vector_len = 1; 4359 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4360 %} 4361 ins_pipe( pipe_slow ); 4362 %} 4363 4364 instruct Repl8F_mem_evex(vecY dst, memory mem) %{ 4365 predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl()); 4366 match(Set dst (ReplicateF (LoadF mem))); 4367 format %{ "vbroadcastss $dst,$mem\t! replicate8F" %} 4368 ins_encode %{ 4369 int vector_len = 1; 4370 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4371 %} 4372 ins_pipe( pipe_slow ); 4373 %} 4374 4375 instruct Repl16F_evex(vecZ dst, regF src) %{ 4376 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4377 match(Set dst (ReplicateF src)); 4378 format %{ "vbroadcastss $dst,$src\t! replicate16F" %} 4379 ins_encode %{ 4380 int vector_len = 2; 4381 __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4382 %} 4383 ins_pipe( pipe_slow ); 4384 %} 4385 4386 instruct Repl16F_mem_evex(vecZ dst, memory mem) %{ 4387 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4388 match(Set dst (ReplicateF (LoadF mem))); 4389 format %{ "vbroadcastss $dst,$mem\t! replicate16F" %} 4390 ins_encode %{ 4391 int vector_len = 2; 4392 __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len); 4393 %} 4394 ins_pipe( pipe_slow ); 4395 %} 4396 4397 instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{ 4398 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4399 match(Set dst (ReplicateF zero)); 4400 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2F zero" %} 4401 ins_encode %{ 4402 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4403 int vector_len = 2; 4404 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4405 %} 4406 ins_pipe( fpu_reg_reg ); 4407 %} 4408 4409 instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{ 4410 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4411 match(Set dst (ReplicateF zero)); 4412 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4F zero" %} 4413 ins_encode %{ 4414 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4415 int vector_len = 2; 4416 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4417 %} 4418 ins_pipe( fpu_reg_reg ); 4419 %} 4420 4421 instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{ 4422 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4423 match(Set dst (ReplicateF zero)); 4424 format %{ "vpxor $dst k0,$dst,$dst\t! replicate8F zero" %} 4425 ins_encode %{ 4426 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4427 int vector_len = 2; 4428 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4429 %} 4430 ins_pipe( fpu_reg_reg ); 4431 %} 4432 4433 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{ 4434 predicate(n->as_Vector()->length() == 16 && UseAVX > 2); 4435 match(Set dst (ReplicateF zero)); 4436 format %{ "vpxor $dst k0,$dst,$dst\t! replicate16F zero" %} 4437 ins_encode %{ 4438 // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation 4439 int vector_len = 2; 4440 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4441 %} 4442 ins_pipe( fpu_reg_reg ); 4443 %} 4444 4445 instruct Repl4D_evex(vecY dst, regD src) %{ 4446 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4447 match(Set dst (ReplicateD src)); 4448 format %{ "vbroadcastsd $dst,$src\t! replicate4D" %} 4449 ins_encode %{ 4450 int vector_len = 1; 4451 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4452 %} 4453 ins_pipe( pipe_slow ); 4454 %} 4455 4456 instruct Repl4D_mem_evex(vecY dst, memory mem) %{ 4457 predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl()); 4458 match(Set dst (ReplicateD (LoadD mem))); 4459 format %{ "vbroadcastsd $dst,$mem\t! replicate4D" %} 4460 ins_encode %{ 4461 int vector_len = 1; 4462 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4463 %} 4464 ins_pipe( pipe_slow ); 4465 %} 4466 4467 instruct Repl8D_evex(vecZ dst, regD src) %{ 4468 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4469 match(Set dst (ReplicateD src)); 4470 format %{ "vbroadcastsd $dst,$src\t! replicate8D" %} 4471 ins_encode %{ 4472 int vector_len = 2; 4473 __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 4474 %} 4475 ins_pipe( pipe_slow ); 4476 %} 4477 4478 instruct Repl8D_mem_evex(vecZ dst, memory mem) %{ 4479 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4480 match(Set dst (ReplicateD (LoadD mem))); 4481 format %{ "vbroadcastsd $dst,$mem\t! replicate8D" %} 4482 ins_encode %{ 4483 int vector_len = 2; 4484 __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len); 4485 %} 4486 ins_pipe( pipe_slow ); 4487 %} 4488 4489 instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{ 4490 predicate(n->as_Vector()->length() == 2 && UseAVX > 2); 4491 match(Set dst (ReplicateD zero)); 4492 format %{ "vpxor $dst k0,$dst,$dst\t! replicate2D zero" %} 4493 ins_encode %{ 4494 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4495 int vector_len = 2; 4496 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4497 %} 4498 ins_pipe( fpu_reg_reg ); 4499 %} 4500 4501 instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{ 4502 predicate(n->as_Vector()->length() == 4 && UseAVX > 2); 4503 match(Set dst (ReplicateD zero)); 4504 format %{ "vpxor $dst k0,$dst,$dst\t! replicate4D zero" %} 4505 ins_encode %{ 4506 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4507 int vector_len = 2; 4508 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4509 %} 4510 ins_pipe( fpu_reg_reg ); 4511 %} 4512 4513 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{ 4514 predicate(n->as_Vector()->length() == 8 && UseAVX > 2); 4515 match(Set dst (ReplicateD zero)); 4516 format %{ "vpxor $dst k0,$dst,$dst,vect512\t! replicate8D zero" %} 4517 ins_encode %{ 4518 // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation 4519 int vector_len = 2; 4520 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len); 4521 %} 4522 ins_pipe( fpu_reg_reg ); 4523 %} 4524 4525 // ====================REDUCTION ARITHMETIC======================================= 4526 4527 instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4528 predicate(UseSSE > 2 && UseAVX == 0); 4529 match(Set dst (AddReductionVI src1 src2)); 4530 effect(TEMP tmp2, TEMP tmp); 4531 format %{ "movdqu $tmp2,$src2\n\t" 4532 "phaddd $tmp2,$tmp2\n\t" 4533 "movd $tmp,$src1\n\t" 4534 "paddd $tmp,$tmp2\n\t" 4535 "movd $dst,$tmp\t! add reduction2I" %} 4536 ins_encode %{ 4537 __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister); 4538 __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister); 4539 __ movdl($tmp$$XMMRegister, $src1$$Register); 4540 __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister); 4541 __ movdl($dst$$Register, $tmp$$XMMRegister); 4542 %} 4543 ins_pipe( pipe_slow ); 4544 %} 4545 4546 instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4547 predicate(VM_Version::supports_avxonly()); 4548 match(Set dst (AddReductionVI src1 src2)); 4549 effect(TEMP tmp, TEMP tmp2); 4550 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4551 "movd $tmp2,$src1\n\t" 4552 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4553 "movd $dst,$tmp2\t! add reduction2I" %} 4554 ins_encode %{ 4555 int vector_len = 0; 4556 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4557 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4558 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4559 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4560 %} 4561 ins_pipe( pipe_slow ); 4562 %} 4563 4564 instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 4565 predicate(UseAVX > 2); 4566 match(Set dst (AddReductionVI src1 src2)); 4567 effect(TEMP tmp, TEMP tmp2); 4568 format %{ "pshufd $tmp2,$src2,0x1\n\t" 4569 "vpaddd $tmp,$src2,$tmp2\n\t" 4570 "movd $tmp2,$src1\n\t" 4571 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4572 "movd $dst,$tmp2\t! add reduction2I" %} 4573 ins_encode %{ 4574 int vector_len = 0; 4575 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4576 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4577 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4578 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4579 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4580 %} 4581 ins_pipe( pipe_slow ); 4582 %} 4583 4584 instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4585 predicate(UseSSE > 2 && UseAVX == 0); 4586 match(Set dst (AddReductionVI src1 src2)); 4587 effect(TEMP tmp, TEMP tmp2); 4588 format %{ "movdqu $tmp,$src2\n\t" 4589 "phaddd $tmp,$tmp\n\t" 4590 "phaddd $tmp,$tmp\n\t" 4591 "movd $tmp2,$src1\n\t" 4592 "paddd $tmp2,$tmp\n\t" 4593 "movd $dst,$tmp2\t! add reduction4I" %} 4594 ins_encode %{ 4595 __ movdqu($tmp$$XMMRegister, $src2$$XMMRegister); 4596 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4597 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 4598 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4599 __ paddd($tmp2$$XMMRegister, $tmp$$XMMRegister); 4600 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4601 %} 4602 ins_pipe( pipe_slow ); 4603 %} 4604 4605 instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4606 predicate(VM_Version::supports_avxonly()); 4607 match(Set dst (AddReductionVI src1 src2)); 4608 effect(TEMP tmp, TEMP tmp2); 4609 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4610 "vphaddd $tmp,$tmp,$tmp\n\t" 4611 "movd $tmp2,$src1\n\t" 4612 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4613 "movd $dst,$tmp2\t! add reduction4I" %} 4614 ins_encode %{ 4615 int vector_len = 0; 4616 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4617 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len); 4618 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4619 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, vector_len); 4620 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4621 %} 4622 ins_pipe( pipe_slow ); 4623 %} 4624 4625 instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 4626 predicate(UseAVX > 2); 4627 match(Set dst (AddReductionVI src1 src2)); 4628 effect(TEMP tmp, TEMP tmp2); 4629 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4630 "vpaddd $tmp,$src2,$tmp2\n\t" 4631 "pshufd $tmp2,$tmp,0x1\n\t" 4632 "vpaddd $tmp,$tmp,$tmp2\n\t" 4633 "movd $tmp2,$src1\n\t" 4634 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4635 "movd $dst,$tmp2\t! add reduction4I" %} 4636 ins_encode %{ 4637 int vector_len = 0; 4638 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4639 __ vpaddd($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4640 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4641 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4642 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4643 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4644 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4645 %} 4646 ins_pipe( pipe_slow ); 4647 %} 4648 4649 instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4650 predicate(VM_Version::supports_avxonly()); 4651 match(Set dst (AddReductionVI src1 src2)); 4652 effect(TEMP tmp, TEMP tmp2); 4653 format %{ "vphaddd $tmp,$src2,$src2\n\t" 4654 "vphaddd $tmp,$tmp,$tmp2\n\t" 4655 "vextracti128_high $tmp2,$tmp\n\t" 4656 "vpaddd $tmp,$tmp,$tmp2\n\t" 4657 "movd $tmp2,$src1\n\t" 4658 "vpaddd $tmp2,$tmp2,$tmp\n\t" 4659 "movd $dst,$tmp2\t! add reduction8I" %} 4660 ins_encode %{ 4661 int vector_len = 1; 4662 __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len); 4663 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4664 __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister); 4665 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4666 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4667 __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4668 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4669 %} 4670 ins_pipe( pipe_slow ); 4671 %} 4672 4673 instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 4674 predicate(UseAVX > 2); 4675 match(Set dst (AddReductionVI src1 src2)); 4676 effect(TEMP tmp, TEMP tmp2); 4677 format %{ "vextracti128_high $tmp,$src2\n\t" 4678 "vpaddd $tmp,$tmp,$src2\n\t" 4679 "pshufd $tmp2,$tmp,0xE\n\t" 4680 "vpaddd $tmp,$tmp,$tmp2\n\t" 4681 "pshufd $tmp2,$tmp,0x1\n\t" 4682 "vpaddd $tmp,$tmp,$tmp2\n\t" 4683 "movd $tmp2,$src1\n\t" 4684 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4685 "movd $dst,$tmp2\t! add reduction8I" %} 4686 ins_encode %{ 4687 int vector_len = 0; 4688 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4689 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 4690 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4691 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4692 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4693 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4694 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4695 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 4696 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4697 %} 4698 ins_pipe( pipe_slow ); 4699 %} 4700 4701 instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 4702 predicate(UseAVX > 2); 4703 match(Set dst (AddReductionVI src1 src2)); 4704 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 4705 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 4706 "vpaddd $tmp3,$tmp3,$src2\n\t" 4707 "vextracti128_high $tmp,$tmp3\n\t" 4708 "vpaddd $tmp,$tmp,$tmp3\n\t" 4709 "pshufd $tmp2,$tmp,0xE\n\t" 4710 "vpaddd $tmp,$tmp,$tmp2\n\t" 4711 "pshufd $tmp2,$tmp,0x1\n\t" 4712 "vpaddd $tmp,$tmp,$tmp2\n\t" 4713 "movd $tmp2,$src1\n\t" 4714 "vpaddd $tmp2,$tmp,$tmp2\n\t" 4715 "movd $dst,$tmp2\t! mul reduction16I" %} 4716 ins_encode %{ 4717 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 4718 __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 4719 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 4720 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 4721 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 4722 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4723 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 4724 __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4725 __ movdl($tmp2$$XMMRegister, $src1$$Register); 4726 __ vpaddd($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4727 __ movdl($dst$$Register, $tmp2$$XMMRegister); 4728 %} 4729 ins_pipe( pipe_slow ); 4730 %} 4731 4732 #ifdef _LP64 4733 instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 4734 predicate(UseAVX > 2); 4735 match(Set dst (AddReductionVL src1 src2)); 4736 effect(TEMP tmp, TEMP tmp2); 4737 format %{ "pshufd $tmp2,$src2,0xE\n\t" 4738 "vpaddq $tmp,$src2,$tmp2\n\t" 4739 "movdq $tmp2,$src1\n\t" 4740 "vpaddq $tmp2,$tmp,$tmp2\n\t" 4741 "movdq $dst,$tmp2\t! add reduction2L" %} 4742 ins_encode %{ 4743 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 4744 __ vpaddq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 4745 __ movdq($tmp2$$XMMRegister, $src1$$Register); 4746 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 4747 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4748 %} 4749 ins_pipe( pipe_slow ); 4750 %} 4751 4752 instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 4753 predicate(UseAVX > 2); 4754 match(Set dst (AddReductionVL src1 src2)); 4755 effect(TEMP tmp, TEMP tmp2); 4756 format %{ "vextracti128_high $tmp,$src2\n\t" 4757 "vpaddq $tmp2,$tmp,$src2\n\t" 4758 "pshufd $tmp,$tmp2,0xE\n\t" 4759 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4760 "movdq $tmp,$src1\n\t" 4761 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4762 "movdq $dst,$tmp2\t! add reduction4L" %} 4763 ins_encode %{ 4764 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 4765 __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 4766 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4767 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4768 __ movdq($tmp$$XMMRegister, $src1$$Register); 4769 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4770 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4771 %} 4772 ins_pipe( pipe_slow ); 4773 %} 4774 4775 instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 4776 predicate(UseAVX > 2); 4777 match(Set dst (AddReductionVL src1 src2)); 4778 effect(TEMP tmp, TEMP tmp2); 4779 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 4780 "vpaddq $tmp2,$tmp2,$src2\n\t" 4781 "vextracti128_high $tmp,$tmp2\n\t" 4782 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4783 "pshufd $tmp,$tmp2,0xE\n\t" 4784 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4785 "movdq $tmp,$src1\n\t" 4786 "vpaddq $tmp2,$tmp2,$tmp\n\t" 4787 "movdq $dst,$tmp2\t! add reduction8L" %} 4788 ins_encode %{ 4789 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4790 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 4791 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 4792 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4793 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 4794 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4795 __ movdq($tmp$$XMMRegister, $src1$$Register); 4796 __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 4797 __ movdq($dst$$Register, $tmp2$$XMMRegister); 4798 %} 4799 ins_pipe( pipe_slow ); 4800 %} 4801 #endif 4802 4803 instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4804 predicate(UseSSE >= 1 && UseAVX == 0); 4805 match(Set dst (AddReductionVF dst src2)); 4806 effect(TEMP dst, TEMP tmp); 4807 format %{ "addss $dst,$src2\n\t" 4808 "pshufd $tmp,$src2,0x01\n\t" 4809 "addss $dst,$tmp\t! add reduction2F" %} 4810 ins_encode %{ 4811 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4812 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4813 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4814 %} 4815 ins_pipe( pipe_slow ); 4816 %} 4817 4818 instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 4819 predicate(UseAVX > 0); 4820 match(Set dst (AddReductionVF dst src2)); 4821 effect(TEMP dst, TEMP tmp); 4822 format %{ "vaddss $dst,$dst,$src2\n\t" 4823 "pshufd $tmp,$src2,0x01\n\t" 4824 "vaddss $dst,$dst,$tmp\t! add reduction2F" %} 4825 ins_encode %{ 4826 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4827 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4828 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4829 %} 4830 ins_pipe( pipe_slow ); 4831 %} 4832 4833 instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4834 predicate(UseSSE >= 1 && UseAVX == 0); 4835 match(Set dst (AddReductionVF dst src2)); 4836 effect(TEMP dst, TEMP tmp); 4837 format %{ "addss $dst,$src2\n\t" 4838 "pshufd $tmp,$src2,0x01\n\t" 4839 "addss $dst,$tmp\n\t" 4840 "pshufd $tmp,$src2,0x02\n\t" 4841 "addss $dst,$tmp\n\t" 4842 "pshufd $tmp,$src2,0x03\n\t" 4843 "addss $dst,$tmp\t! add reduction4F" %} 4844 ins_encode %{ 4845 __ addss($dst$$XMMRegister, $src2$$XMMRegister); 4846 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4847 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4848 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4849 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4850 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4851 __ addss($dst$$XMMRegister, $tmp$$XMMRegister); 4852 %} 4853 ins_pipe( pipe_slow ); 4854 %} 4855 4856 instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 4857 predicate(UseAVX > 0); 4858 match(Set dst (AddReductionVF dst src2)); 4859 effect(TEMP tmp, TEMP dst); 4860 format %{ "vaddss $dst,dst,$src2\n\t" 4861 "pshufd $tmp,$src2,0x01\n\t" 4862 "vaddss $dst,$dst,$tmp\n\t" 4863 "pshufd $tmp,$src2,0x02\n\t" 4864 "vaddss $dst,$dst,$tmp\n\t" 4865 "pshufd $tmp,$src2,0x03\n\t" 4866 "vaddss $dst,$dst,$tmp\t! add reduction4F" %} 4867 ins_encode %{ 4868 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4869 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4870 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4871 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4872 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4873 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4874 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4875 %} 4876 ins_pipe( pipe_slow ); 4877 %} 4878 4879 instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 4880 predicate(UseAVX > 0); 4881 match(Set dst (AddReductionVF dst src2)); 4882 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4883 format %{ "vaddss $dst,$dst,$src2\n\t" 4884 "pshufd $tmp,$src2,0x01\n\t" 4885 "vaddss $dst,$dst,$tmp\n\t" 4886 "pshufd $tmp,$src2,0x02\n\t" 4887 "vaddss $dst,$dst,$tmp\n\t" 4888 "pshufd $tmp,$src2,0x03\n\t" 4889 "vaddss $dst,$dst,$tmp\n\t" 4890 "vextractf128_high $tmp2,$src2\n\t" 4891 "vaddss $dst,$dst,$tmp2\n\t" 4892 "pshufd $tmp,$tmp2,0x01\n\t" 4893 "vaddss $dst,$dst,$tmp\n\t" 4894 "pshufd $tmp,$tmp2,0x02\n\t" 4895 "vaddss $dst,$dst,$tmp\n\t" 4896 "pshufd $tmp,$tmp2,0x03\n\t" 4897 "vaddss $dst,$dst,$tmp\t! add reduction8F" %} 4898 ins_encode %{ 4899 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4900 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4901 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4902 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4903 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4904 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4905 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4906 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 4907 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4908 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4909 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4910 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4911 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4912 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4913 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4914 %} 4915 ins_pipe( pipe_slow ); 4916 %} 4917 4918 instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 4919 predicate(UseAVX > 2); 4920 match(Set dst (AddReductionVF dst src2)); 4921 effect(TEMP tmp, TEMP dst, TEMP tmp2); 4922 format %{ "vaddss $dst,$dst,$src2\n\t" 4923 "pshufd $tmp,$src2,0x01\n\t" 4924 "vaddss $dst,$dst,$tmp\n\t" 4925 "pshufd $tmp,$src2,0x02\n\t" 4926 "vaddss $dst,$dst,$tmp\n\t" 4927 "pshufd $tmp,$src2,0x03\n\t" 4928 "vaddss $dst,$dst,$tmp\n\t" 4929 "vextractf32x4 $tmp2,$src2,0x1\n\t" 4930 "vaddss $dst,$dst,$tmp2\n\t" 4931 "pshufd $tmp,$tmp2,0x01\n\t" 4932 "vaddss $dst,$dst,$tmp\n\t" 4933 "pshufd $tmp,$tmp2,0x02\n\t" 4934 "vaddss $dst,$dst,$tmp\n\t" 4935 "pshufd $tmp,$tmp2,0x03\n\t" 4936 "vaddss $dst,$dst,$tmp\n\t" 4937 "vextractf32x4 $tmp2,$src2,0x2\n\t" 4938 "vaddss $dst,$dst,$tmp2\n\t" 4939 "pshufd $tmp,$tmp2,0x01\n\t" 4940 "vaddss $dst,$dst,$tmp\n\t" 4941 "pshufd $tmp,$tmp2,0x02\n\t" 4942 "vaddss $dst,$dst,$tmp\n\t" 4943 "pshufd $tmp,$tmp2,0x03\n\t" 4944 "vaddss $dst,$dst,$tmp\n\t" 4945 "vextractf32x4 $tmp2,$src2,0x3\n\t" 4946 "vaddss $dst,$dst,$tmp2\n\t" 4947 "pshufd $tmp,$tmp2,0x01\n\t" 4948 "vaddss $dst,$dst,$tmp\n\t" 4949 "pshufd $tmp,$tmp2,0x02\n\t" 4950 "vaddss $dst,$dst,$tmp\n\t" 4951 "pshufd $tmp,$tmp2,0x03\n\t" 4952 "vaddss $dst,$dst,$tmp\t! add reduction16F" %} 4953 ins_encode %{ 4954 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 4955 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 4956 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4957 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 4958 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4959 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 4960 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4961 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 4962 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4963 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4964 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4965 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4966 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4967 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4968 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4969 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 4970 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4971 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4972 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4973 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4974 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4975 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4976 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4977 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 4978 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 4979 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 4980 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4981 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 4982 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4983 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 4984 __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 4985 %} 4986 ins_pipe( pipe_slow ); 4987 %} 4988 4989 instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 4990 predicate(UseSSE >= 1 && UseAVX == 0); 4991 match(Set dst (AddReductionVD dst src2)); 4992 effect(TEMP tmp, TEMP dst); 4993 format %{ "addsd $dst,$src2\n\t" 4994 "pshufd $tmp,$src2,0xE\n\t" 4995 "addsd $dst,$tmp\t! add reduction2D" %} 4996 ins_encode %{ 4997 __ addsd($dst$$XMMRegister, $src2$$XMMRegister); 4998 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 4999 __ addsd($dst$$XMMRegister, $tmp$$XMMRegister); 5000 %} 5001 ins_pipe( pipe_slow ); 5002 %} 5003 5004 instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5005 predicate(UseAVX > 0); 5006 match(Set dst (AddReductionVD dst src2)); 5007 effect(TEMP tmp, TEMP dst); 5008 format %{ "vaddsd $dst,$dst,$src2\n\t" 5009 "pshufd $tmp,$src2,0xE\n\t" 5010 "vaddsd $dst,$dst,$tmp\t! add reduction2D" %} 5011 ins_encode %{ 5012 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5013 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5014 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5015 %} 5016 ins_pipe( pipe_slow ); 5017 %} 5018 5019 instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5020 predicate(UseAVX > 0); 5021 match(Set dst (AddReductionVD dst src2)); 5022 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5023 format %{ "vaddsd $dst,$dst,$src2\n\t" 5024 "pshufd $tmp,$src2,0xE\n\t" 5025 "vaddsd $dst,$dst,$tmp\n\t" 5026 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5027 "vaddsd $dst,$dst,$tmp2\n\t" 5028 "pshufd $tmp,$tmp2,0xE\n\t" 5029 "vaddsd $dst,$dst,$tmp\t! add reduction4D" %} 5030 ins_encode %{ 5031 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5032 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5033 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5034 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5035 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5036 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5037 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5038 %} 5039 ins_pipe( pipe_slow ); 5040 %} 5041 5042 instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5043 predicate(UseAVX > 2); 5044 match(Set dst (AddReductionVD dst src2)); 5045 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5046 format %{ "vaddsd $dst,$dst,$src2\n\t" 5047 "pshufd $tmp,$src2,0xE\n\t" 5048 "vaddsd $dst,$dst,$tmp\n\t" 5049 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5050 "vaddsd $dst,$dst,$tmp2\n\t" 5051 "pshufd $tmp,$tmp2,0xE\n\t" 5052 "vaddsd $dst,$dst,$tmp\n\t" 5053 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5054 "vaddsd $dst,$dst,$tmp2\n\t" 5055 "pshufd $tmp,$tmp2,0xE\n\t" 5056 "vaddsd $dst,$dst,$tmp\n\t" 5057 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5058 "vaddsd $dst,$dst,$tmp2\n\t" 5059 "pshufd $tmp,$tmp2,0xE\n\t" 5060 "vaddsd $dst,$dst,$tmp\t! add reduction8D" %} 5061 ins_encode %{ 5062 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5063 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5064 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5065 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5066 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5067 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5068 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5069 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5070 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5071 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5072 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5073 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5074 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5075 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5076 __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5077 %} 5078 ins_pipe( pipe_slow ); 5079 %} 5080 5081 instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5082 predicate(UseSSE > 3 && UseAVX == 0); 5083 match(Set dst (MulReductionVI src1 src2)); 5084 effect(TEMP tmp, TEMP tmp2); 5085 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5086 "pmulld $tmp2,$src2\n\t" 5087 "movd $tmp,$src1\n\t" 5088 "pmulld $tmp2,$tmp\n\t" 5089 "movd $dst,$tmp2\t! mul reduction2I" %} 5090 ins_encode %{ 5091 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5092 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5093 __ movdl($tmp$$XMMRegister, $src1$$Register); 5094 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5095 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5096 %} 5097 ins_pipe( pipe_slow ); 5098 %} 5099 5100 instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{ 5101 predicate(UseAVX > 0); 5102 match(Set dst (MulReductionVI src1 src2)); 5103 effect(TEMP tmp, TEMP tmp2); 5104 format %{ "pshufd $tmp2,$src2,0x1\n\t" 5105 "vpmulld $tmp,$src2,$tmp2\n\t" 5106 "movd $tmp2,$src1\n\t" 5107 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5108 "movd $dst,$tmp2\t! mul reduction2I" %} 5109 ins_encode %{ 5110 int vector_len = 0; 5111 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5112 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5113 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5114 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5115 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5116 %} 5117 ins_pipe( pipe_slow ); 5118 %} 5119 5120 instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5121 predicate(UseSSE > 3 && UseAVX == 0); 5122 match(Set dst (MulReductionVI src1 src2)); 5123 effect(TEMP tmp, TEMP tmp2); 5124 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5125 "pmulld $tmp2,$src2\n\t" 5126 "pshufd $tmp,$tmp2,0x1\n\t" 5127 "pmulld $tmp2,$tmp\n\t" 5128 "movd $tmp,$src1\n\t" 5129 "pmulld $tmp2,$tmp\n\t" 5130 "movd $dst,$tmp2\t! mul reduction4I" %} 5131 ins_encode %{ 5132 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5133 __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister); 5134 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1); 5135 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5136 __ movdl($tmp$$XMMRegister, $src1$$Register); 5137 __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister); 5138 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5139 %} 5140 ins_pipe( pipe_slow ); 5141 %} 5142 5143 instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{ 5144 predicate(UseAVX > 0); 5145 match(Set dst (MulReductionVI src1 src2)); 5146 effect(TEMP tmp, TEMP tmp2); 5147 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5148 "vpmulld $tmp,$src2,$tmp2\n\t" 5149 "pshufd $tmp2,$tmp,0x1\n\t" 5150 "vpmulld $tmp,$tmp,$tmp2\n\t" 5151 "movd $tmp2,$src1\n\t" 5152 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5153 "movd $dst,$tmp2\t! mul reduction4I" %} 5154 ins_encode %{ 5155 int vector_len = 0; 5156 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5157 __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5158 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5159 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5160 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5161 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5162 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5163 %} 5164 ins_pipe( pipe_slow ); 5165 %} 5166 5167 instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{ 5168 predicate(UseAVX > 0); 5169 match(Set dst (MulReductionVI src1 src2)); 5170 effect(TEMP tmp, TEMP tmp2); 5171 format %{ "vextracti128_high $tmp,$src2\n\t" 5172 "vpmulld $tmp,$tmp,$src2\n\t" 5173 "pshufd $tmp2,$tmp,0xE\n\t" 5174 "vpmulld $tmp,$tmp,$tmp2\n\t" 5175 "pshufd $tmp2,$tmp,0x1\n\t" 5176 "vpmulld $tmp,$tmp,$tmp2\n\t" 5177 "movd $tmp2,$src1\n\t" 5178 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5179 "movd $dst,$tmp2\t! mul reduction8I" %} 5180 ins_encode %{ 5181 int vector_len = 0; 5182 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5183 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len); 5184 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5185 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5186 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5187 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5188 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5189 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len); 5190 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5191 %} 5192 ins_pipe( pipe_slow ); 5193 %} 5194 5195 instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{ 5196 predicate(UseAVX > 2); 5197 match(Set dst (MulReductionVI src1 src2)); 5198 effect(TEMP tmp, TEMP tmp2, TEMP tmp3); 5199 format %{ "vextracti64x4_high $tmp3,$src2\n\t" 5200 "vpmulld $tmp3,$tmp3,$src2\n\t" 5201 "vextracti128_high $tmp,$tmp3\n\t" 5202 "vpmulld $tmp,$tmp,$src2\n\t" 5203 "pshufd $tmp2,$tmp,0xE\n\t" 5204 "vpmulld $tmp,$tmp,$tmp2\n\t" 5205 "pshufd $tmp2,$tmp,0x1\n\t" 5206 "vpmulld $tmp,$tmp,$tmp2\n\t" 5207 "movd $tmp2,$src1\n\t" 5208 "vpmulld $tmp2,$tmp,$tmp2\n\t" 5209 "movd $dst,$tmp2\t! mul reduction16I" %} 5210 ins_encode %{ 5211 __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister); 5212 __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1); 5213 __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister); 5214 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0); 5215 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE); 5216 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5217 __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1); 5218 __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5219 __ movdl($tmp2$$XMMRegister, $src1$$Register); 5220 __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5221 __ movdl($dst$$Register, $tmp2$$XMMRegister); 5222 %} 5223 ins_pipe( pipe_slow ); 5224 %} 5225 5226 #ifdef _LP64 5227 instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{ 5228 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5229 match(Set dst (MulReductionVL src1 src2)); 5230 effect(TEMP tmp, TEMP tmp2); 5231 format %{ "pshufd $tmp2,$src2,0xE\n\t" 5232 "vpmullq $tmp,$src2,$tmp2\n\t" 5233 "movdq $tmp2,$src1\n\t" 5234 "vpmullq $tmp2,$tmp,$tmp2\n\t" 5235 "movdq $dst,$tmp2\t! mul reduction2L" %} 5236 ins_encode %{ 5237 __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE); 5238 __ vpmullq($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, 0); 5239 __ movdq($tmp2$$XMMRegister, $src1$$Register); 5240 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0); 5241 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5242 %} 5243 ins_pipe( pipe_slow ); 5244 %} 5245 5246 instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{ 5247 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5248 match(Set dst (MulReductionVL src1 src2)); 5249 effect(TEMP tmp, TEMP tmp2); 5250 format %{ "vextracti128_high $tmp,$src2\n\t" 5251 "vpmullq $tmp2,$tmp,$src2\n\t" 5252 "pshufd $tmp,$tmp2,0xE\n\t" 5253 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5254 "movdq $tmp,$src1\n\t" 5255 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5256 "movdq $dst,$tmp2\t! mul reduction4L" %} 5257 ins_encode %{ 5258 __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister); 5259 __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0); 5260 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5261 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5262 __ movdq($tmp$$XMMRegister, $src1$$Register); 5263 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5264 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5265 %} 5266 ins_pipe( pipe_slow ); 5267 %} 5268 5269 instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{ 5270 predicate(UseAVX > 2 && VM_Version::supports_avx512dq()); 5271 match(Set dst (MulReductionVL src1 src2)); 5272 effect(TEMP tmp, TEMP tmp2); 5273 format %{ "vextracti64x4_high $tmp2,$src2\n\t" 5274 "vpmullq $tmp2,$tmp2,$src2\n\t" 5275 "vextracti128_high $tmp,$tmp2\n\t" 5276 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5277 "pshufd $tmp,$tmp2,0xE\n\t" 5278 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5279 "movdq $tmp,$src1\n\t" 5280 "vpmullq $tmp2,$tmp2,$tmp\n\t" 5281 "movdq $dst,$tmp2\t! mul reduction8L" %} 5282 ins_encode %{ 5283 __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5284 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1); 5285 __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister); 5286 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5287 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5288 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5289 __ movdq($tmp$$XMMRegister, $src1$$Register); 5290 __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0); 5291 __ movdq($dst$$Register, $tmp2$$XMMRegister); 5292 %} 5293 ins_pipe( pipe_slow ); 5294 %} 5295 #endif 5296 5297 instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{ 5298 predicate(UseSSE >= 1 && UseAVX == 0); 5299 match(Set dst (MulReductionVF dst src2)); 5300 effect(TEMP dst, TEMP tmp); 5301 format %{ "mulss $dst,$src2\n\t" 5302 "pshufd $tmp,$src2,0x01\n\t" 5303 "mulss $dst,$tmp\t! mul reduction2F" %} 5304 ins_encode %{ 5305 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5306 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5307 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5308 %} 5309 ins_pipe( pipe_slow ); 5310 %} 5311 5312 instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{ 5313 predicate(UseAVX > 0); 5314 match(Set dst (MulReductionVF dst src2)); 5315 effect(TEMP tmp, TEMP dst); 5316 format %{ "vmulss $dst,$dst,$src2\n\t" 5317 "pshufd $tmp,$src2,0x01\n\t" 5318 "vmulss $dst,$dst,$tmp\t! mul reduction2F" %} 5319 ins_encode %{ 5320 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5321 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5322 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5323 %} 5324 ins_pipe( pipe_slow ); 5325 %} 5326 5327 instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5328 predicate(UseSSE >= 1 && UseAVX == 0); 5329 match(Set dst (MulReductionVF dst src2)); 5330 effect(TEMP dst, TEMP tmp); 5331 format %{ "mulss $dst,$src2\n\t" 5332 "pshufd $tmp,$src2,0x01\n\t" 5333 "mulss $dst,$tmp\n\t" 5334 "pshufd $tmp,$src2,0x02\n\t" 5335 "mulss $dst,$tmp\n\t" 5336 "pshufd $tmp,$src2,0x03\n\t" 5337 "mulss $dst,$tmp\t! mul reduction4F" %} 5338 ins_encode %{ 5339 __ mulss($dst$$XMMRegister, $src2$$XMMRegister); 5340 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5341 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5342 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5343 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5344 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5345 __ mulss($dst$$XMMRegister, $tmp$$XMMRegister); 5346 %} 5347 ins_pipe( pipe_slow ); 5348 %} 5349 5350 instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{ 5351 predicate(UseAVX > 0); 5352 match(Set dst (MulReductionVF dst src2)); 5353 effect(TEMP tmp, TEMP dst); 5354 format %{ "vmulss $dst,$dst,$src2\n\t" 5355 "pshufd $tmp,$src2,0x01\n\t" 5356 "vmulss $dst,$dst,$tmp\n\t" 5357 "pshufd $tmp,$src2,0x02\n\t" 5358 "vmulss $dst,$dst,$tmp\n\t" 5359 "pshufd $tmp,$src2,0x03\n\t" 5360 "vmulss $dst,$dst,$tmp\t! mul reduction4F" %} 5361 ins_encode %{ 5362 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5363 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5364 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5365 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5366 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5367 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5368 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5369 %} 5370 ins_pipe( pipe_slow ); 5371 %} 5372 5373 instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{ 5374 predicate(UseAVX > 0); 5375 match(Set dst (MulReductionVF dst src2)); 5376 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5377 format %{ "vmulss $dst,$dst,$src2\n\t" 5378 "pshufd $tmp,$src2,0x01\n\t" 5379 "vmulss $dst,$dst,$tmp\n\t" 5380 "pshufd $tmp,$src2,0x02\n\t" 5381 "vmulss $dst,$dst,$tmp\n\t" 5382 "pshufd $tmp,$src2,0x03\n\t" 5383 "vmulss $dst,$dst,$tmp\n\t" 5384 "vextractf128_high $tmp2,$src2\n\t" 5385 "vmulss $dst,$dst,$tmp2\n\t" 5386 "pshufd $tmp,$tmp2,0x01\n\t" 5387 "vmulss $dst,$dst,$tmp\n\t" 5388 "pshufd $tmp,$tmp2,0x02\n\t" 5389 "vmulss $dst,$dst,$tmp\n\t" 5390 "pshufd $tmp,$tmp2,0x03\n\t" 5391 "vmulss $dst,$dst,$tmp\t! mul reduction8F" %} 5392 ins_encode %{ 5393 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5394 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5395 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5396 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5397 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5398 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5399 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5400 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5401 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5402 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5403 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5404 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5405 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5406 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5407 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5408 %} 5409 ins_pipe( pipe_slow ); 5410 %} 5411 5412 instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{ 5413 predicate(UseAVX > 2); 5414 match(Set dst (MulReductionVF dst src2)); 5415 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5416 format %{ "vmulss $dst,$dst,$src2\n\t" 5417 "pshufd $tmp,$src2,0x01\n\t" 5418 "vmulss $dst,$dst,$tmp\n\t" 5419 "pshufd $tmp,$src2,0x02\n\t" 5420 "vmulss $dst,$dst,$tmp\n\t" 5421 "pshufd $tmp,$src2,0x03\n\t" 5422 "vmulss $dst,$dst,$tmp\n\t" 5423 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5424 "vmulss $dst,$dst,$tmp2\n\t" 5425 "pshufd $tmp,$tmp2,0x01\n\t" 5426 "vmulss $dst,$dst,$tmp\n\t" 5427 "pshufd $tmp,$tmp2,0x02\n\t" 5428 "vmulss $dst,$dst,$tmp\n\t" 5429 "pshufd $tmp,$tmp2,0x03\n\t" 5430 "vmulss $dst,$dst,$tmp\n\t" 5431 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5432 "vmulss $dst,$dst,$tmp2\n\t" 5433 "pshufd $tmp,$tmp2,0x01\n\t" 5434 "vmulss $dst,$dst,$tmp\n\t" 5435 "pshufd $tmp,$tmp2,0x02\n\t" 5436 "vmulss $dst,$dst,$tmp\n\t" 5437 "pshufd $tmp,$tmp2,0x03\n\t" 5438 "vmulss $dst,$dst,$tmp\n\t" 5439 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5440 "vmulss $dst,$dst,$tmp2\n\t" 5441 "pshufd $tmp,$tmp2,0x01\n\t" 5442 "vmulss $dst,$dst,$tmp\n\t" 5443 "pshufd $tmp,$tmp2,0x02\n\t" 5444 "vmulss $dst,$dst,$tmp\n\t" 5445 "pshufd $tmp,$tmp2,0x03\n\t" 5446 "vmulss $dst,$dst,$tmp\t! mul reduction16F" %} 5447 ins_encode %{ 5448 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5449 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01); 5450 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5451 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02); 5452 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5453 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03); 5454 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5455 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5456 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5457 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5458 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5459 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5460 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5461 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5462 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5463 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5464 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5465 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5466 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5467 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5468 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5469 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5470 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5471 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5472 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5473 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01); 5474 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5475 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x02); 5476 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5477 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03); 5478 __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5479 %} 5480 ins_pipe( pipe_slow ); 5481 %} 5482 5483 instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5484 predicate(UseSSE >= 1 && UseAVX == 0); 5485 match(Set dst (MulReductionVD dst src2)); 5486 effect(TEMP dst, TEMP tmp); 5487 format %{ "mulsd $dst,$src2\n\t" 5488 "pshufd $tmp,$src2,0xE\n\t" 5489 "mulsd $dst,$tmp\t! mul reduction2D" %} 5490 ins_encode %{ 5491 __ mulsd($dst$$XMMRegister, $src2$$XMMRegister); 5492 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5493 __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister); 5494 %} 5495 ins_pipe( pipe_slow ); 5496 %} 5497 5498 instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{ 5499 predicate(UseAVX > 0); 5500 match(Set dst (MulReductionVD dst src2)); 5501 effect(TEMP tmp, TEMP dst); 5502 format %{ "vmulsd $dst,$dst,$src2\n\t" 5503 "pshufd $tmp,$src2,0xE\n\t" 5504 "vmulsd $dst,$dst,$tmp\t! mul reduction2D" %} 5505 ins_encode %{ 5506 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5507 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5508 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5509 %} 5510 ins_pipe( pipe_slow ); 5511 %} 5512 5513 instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{ 5514 predicate(UseAVX > 0); 5515 match(Set dst (MulReductionVD dst src2)); 5516 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5517 format %{ "vmulsd $dst,$dst,$src2\n\t" 5518 "pshufd $tmp,$src2,0xE\n\t" 5519 "vmulsd $dst,$dst,$tmp\n\t" 5520 "vextractf128_high $tmp2,$src2\n\t" 5521 "vmulsd $dst,$dst,$tmp2\n\t" 5522 "pshufd $tmp,$tmp2,0xE\n\t" 5523 "vmulsd $dst,$dst,$tmp\t! mul reduction4D" %} 5524 ins_encode %{ 5525 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5526 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5527 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5528 __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister); 5529 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5530 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5531 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5532 %} 5533 ins_pipe( pipe_slow ); 5534 %} 5535 5536 instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{ 5537 predicate(UseAVX > 2); 5538 match(Set dst (MulReductionVD dst src2)); 5539 effect(TEMP tmp, TEMP dst, TEMP tmp2); 5540 format %{ "vmulsd $dst,$dst,$src2\n\t" 5541 "pshufd $tmp,$src2,0xE\n\t" 5542 "vmulsd $dst,$dst,$tmp\n\t" 5543 "vextractf32x4 $tmp2,$src2,0x1\n\t" 5544 "vmulsd $dst,$dst,$tmp2\n\t" 5545 "pshufd $tmp,$src2,0xE\n\t" 5546 "vmulsd $dst,$dst,$tmp\n\t" 5547 "vextractf32x4 $tmp2,$src2,0x2\n\t" 5548 "vmulsd $dst,$dst,$tmp2\n\t" 5549 "pshufd $tmp,$tmp2,0xE\n\t" 5550 "vmulsd $dst,$dst,$tmp\n\t" 5551 "vextractf32x4 $tmp2,$src2,0x3\n\t" 5552 "vmulsd $dst,$dst,$tmp2\n\t" 5553 "pshufd $tmp,$tmp2,0xE\n\t" 5554 "vmulsd $dst,$dst,$tmp\t! mul reduction8D" %} 5555 ins_encode %{ 5556 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister); 5557 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE); 5558 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5559 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1); 5560 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5561 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5562 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5563 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2); 5564 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5565 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5566 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5567 __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3); 5568 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister); 5569 __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE); 5570 __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister); 5571 %} 5572 ins_pipe( pipe_slow ); 5573 %} 5574 5575 // ====================VECTOR ARITHMETIC======================================= 5576 5577 // --------------------------------- ADD -------------------------------------- 5578 5579 // Bytes vector add 5580 instruct vadd4B(vecS dst, vecS src) %{ 5581 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5582 match(Set dst (AddVB dst src)); 5583 format %{ "paddb $dst,$src\t! add packed4B" %} 5584 ins_encode %{ 5585 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5586 %} 5587 ins_pipe( pipe_slow ); 5588 %} 5589 5590 instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5591 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5592 match(Set dst (AddVB src1 src2)); 5593 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5594 ins_encode %{ 5595 int vector_len = 0; 5596 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5597 %} 5598 ins_pipe( pipe_slow ); 5599 %} 5600 5601 instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5602 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5603 match(Set dst (AddVB src1 src2)); 5604 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 5605 ins_encode %{ 5606 int vector_len = 0; 5607 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5608 %} 5609 ins_pipe( pipe_slow ); 5610 %} 5611 5612 instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5613 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 5614 match(Set dst (AddVB dst src2)); 5615 effect(TEMP src1); 5616 format %{ "vpaddb $dst,$dst,$src2\t! add packed4B" %} 5617 ins_encode %{ 5618 int vector_len = 0; 5619 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5620 %} 5621 ins_pipe( pipe_slow ); 5622 %} 5623 5624 instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{ 5625 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5626 match(Set dst (AddVB src (LoadVector mem))); 5627 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5628 ins_encode %{ 5629 int vector_len = 0; 5630 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5631 %} 5632 ins_pipe( pipe_slow ); 5633 %} 5634 5635 instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{ 5636 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5637 match(Set dst (AddVB src (LoadVector mem))); 5638 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5639 ins_encode %{ 5640 int vector_len = 0; 5641 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5642 %} 5643 ins_pipe( pipe_slow ); 5644 %} 5645 5646 instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5647 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 5648 match(Set dst (AddVB dst (LoadVector mem))); 5649 effect(TEMP src); 5650 format %{ "vpaddb $dst,$src,$mem\t! add packed4B" %} 5651 ins_encode %{ 5652 int vector_len = 0; 5653 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5654 %} 5655 ins_pipe( pipe_slow ); 5656 %} 5657 5658 instruct vadd8B(vecD dst, vecD src) %{ 5659 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 5660 match(Set dst (AddVB dst src)); 5661 format %{ "paddb $dst,$src\t! add packed8B" %} 5662 ins_encode %{ 5663 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5664 %} 5665 ins_pipe( pipe_slow ); 5666 %} 5667 5668 instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5669 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5670 match(Set dst (AddVB src1 src2)); 5671 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5672 ins_encode %{ 5673 int vector_len = 0; 5674 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5675 %} 5676 ins_pipe( pipe_slow ); 5677 %} 5678 5679 instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 5680 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5681 match(Set dst (AddVB src1 src2)); 5682 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 5683 ins_encode %{ 5684 int vector_len = 0; 5685 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5686 %} 5687 ins_pipe( pipe_slow ); 5688 %} 5689 5690 instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 5691 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 5692 match(Set dst (AddVB dst src2)); 5693 effect(TEMP src1); 5694 format %{ "vpaddb $dst,$dst,$src2\t! add packed8B" %} 5695 ins_encode %{ 5696 int vector_len = 0; 5697 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5698 %} 5699 ins_pipe( pipe_slow ); 5700 %} 5701 5702 instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{ 5703 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 5704 match(Set dst (AddVB src (LoadVector mem))); 5705 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5706 ins_encode %{ 5707 int vector_len = 0; 5708 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5709 %} 5710 ins_pipe( pipe_slow ); 5711 %} 5712 5713 instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{ 5714 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5715 match(Set dst (AddVB src (LoadVector mem))); 5716 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5717 ins_encode %{ 5718 int vector_len = 0; 5719 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5720 %} 5721 ins_pipe( pipe_slow ); 5722 %} 5723 5724 instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 5725 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 5726 match(Set dst (AddVB dst (LoadVector mem))); 5727 effect(TEMP src); 5728 format %{ "vpaddb $dst,$src,$mem\t! add packed8B" %} 5729 ins_encode %{ 5730 int vector_len = 0; 5731 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5732 %} 5733 ins_pipe( pipe_slow ); 5734 %} 5735 5736 instruct vadd16B(vecX dst, vecX src) %{ 5737 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 5738 match(Set dst (AddVB dst src)); 5739 format %{ "paddb $dst,$src\t! add packed16B" %} 5740 ins_encode %{ 5741 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5742 %} 5743 ins_pipe( pipe_slow ); 5744 %} 5745 5746 instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 5747 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5748 match(Set dst (AddVB src1 src2)); 5749 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5750 ins_encode %{ 5751 int vector_len = 0; 5752 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5753 %} 5754 ins_pipe( pipe_slow ); 5755 %} 5756 5757 instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 5758 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5759 match(Set dst (AddVB src1 src2)); 5760 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 5761 ins_encode %{ 5762 int vector_len = 0; 5763 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5764 %} 5765 ins_pipe( pipe_slow ); 5766 %} 5767 5768 instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 5769 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 5770 match(Set dst (AddVB dst src2)); 5771 effect(TEMP src1); 5772 format %{ "vpaddb $dst,$dst,$src2\t! add packed16B" %} 5773 ins_encode %{ 5774 int vector_len = 0; 5775 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5776 %} 5777 ins_pipe( pipe_slow ); 5778 %} 5779 5780 instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{ 5781 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 5782 match(Set dst (AddVB src (LoadVector mem))); 5783 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5784 ins_encode %{ 5785 int vector_len = 0; 5786 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5787 %} 5788 ins_pipe( pipe_slow ); 5789 %} 5790 5791 instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{ 5792 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5793 match(Set dst (AddVB src (LoadVector mem))); 5794 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5795 ins_encode %{ 5796 int vector_len = 0; 5797 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5798 %} 5799 ins_pipe( pipe_slow ); 5800 %} 5801 5802 instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 5803 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 5804 match(Set dst (AddVB dst (LoadVector mem))); 5805 effect(TEMP src); 5806 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 5807 ins_encode %{ 5808 int vector_len = 0; 5809 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5810 %} 5811 ins_pipe( pipe_slow ); 5812 %} 5813 5814 instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 5815 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5816 match(Set dst (AddVB src1 src2)); 5817 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5818 ins_encode %{ 5819 int vector_len = 1; 5820 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5821 %} 5822 ins_pipe( pipe_slow ); 5823 %} 5824 5825 instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 5826 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5827 match(Set dst (AddVB src1 src2)); 5828 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 5829 ins_encode %{ 5830 int vector_len = 1; 5831 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5832 %} 5833 ins_pipe( pipe_slow ); 5834 %} 5835 5836 instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 5837 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 5838 match(Set dst (AddVB dst src2)); 5839 effect(TEMP src1); 5840 format %{ "vpaddb $dst,$dst,$src2\t! add packed32B" %} 5841 ins_encode %{ 5842 int vector_len = 1; 5843 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5844 %} 5845 ins_pipe( pipe_slow ); 5846 %} 5847 5848 instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{ 5849 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 5850 match(Set dst (AddVB src (LoadVector mem))); 5851 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5852 ins_encode %{ 5853 int vector_len = 1; 5854 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5855 %} 5856 ins_pipe( pipe_slow ); 5857 %} 5858 5859 instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{ 5860 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5861 match(Set dst (AddVB src (LoadVector mem))); 5862 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5863 ins_encode %{ 5864 int vector_len = 1; 5865 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5866 %} 5867 ins_pipe( pipe_slow ); 5868 %} 5869 5870 instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 5871 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 5872 match(Set dst (AddVB dst (LoadVector mem))); 5873 effect(TEMP src); 5874 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 5875 ins_encode %{ 5876 int vector_len = 1; 5877 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5878 %} 5879 ins_pipe( pipe_slow ); 5880 %} 5881 5882 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 5883 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5884 match(Set dst (AddVB src1 src2)); 5885 format %{ "vpaddb $dst,$src1,$src2\t! add packed64B" %} 5886 ins_encode %{ 5887 int vector_len = 2; 5888 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5889 %} 5890 ins_pipe( pipe_slow ); 5891 %} 5892 5893 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{ 5894 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 5895 match(Set dst (AddVB src (LoadVector mem))); 5896 format %{ "vpaddb $dst,$src,$mem\t! add packed64B" %} 5897 ins_encode %{ 5898 int vector_len = 2; 5899 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5900 %} 5901 ins_pipe( pipe_slow ); 5902 %} 5903 5904 // Shorts/Chars vector add 5905 instruct vadd2S(vecS dst, vecS src) %{ 5906 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 5907 match(Set dst (AddVS dst src)); 5908 format %{ "paddw $dst,$src\t! add packed2S" %} 5909 ins_encode %{ 5910 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5911 %} 5912 ins_pipe( pipe_slow ); 5913 %} 5914 5915 instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 5916 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5917 match(Set dst (AddVS src1 src2)); 5918 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5919 ins_encode %{ 5920 int vector_len = 0; 5921 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5922 %} 5923 ins_pipe( pipe_slow ); 5924 %} 5925 5926 instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 5927 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5928 match(Set dst (AddVS src1 src2)); 5929 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 5930 ins_encode %{ 5931 int vector_len = 0; 5932 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5933 %} 5934 ins_pipe( pipe_slow ); 5935 %} 5936 5937 instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 5938 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 5939 match(Set dst (AddVS dst src2)); 5940 effect(TEMP src1); 5941 format %{ "vpaddw $dst,$dst,$src2\t! add packed2S" %} 5942 ins_encode %{ 5943 int vector_len = 0; 5944 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 5945 %} 5946 ins_pipe( pipe_slow ); 5947 %} 5948 5949 instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{ 5950 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 5951 match(Set dst (AddVS src (LoadVector mem))); 5952 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5953 ins_encode %{ 5954 int vector_len = 0; 5955 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5956 %} 5957 ins_pipe( pipe_slow ); 5958 %} 5959 5960 instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{ 5961 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5962 match(Set dst (AddVS src (LoadVector mem))); 5963 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5964 ins_encode %{ 5965 int vector_len = 0; 5966 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5967 %} 5968 ins_pipe( pipe_slow ); 5969 %} 5970 5971 instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 5972 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 5973 match(Set dst (AddVS dst (LoadVector mem))); 5974 effect(TEMP src); 5975 format %{ "vpaddw $dst,$src,$mem\t! add packed2S" %} 5976 ins_encode %{ 5977 int vector_len = 0; 5978 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 5979 %} 5980 ins_pipe( pipe_slow ); 5981 %} 5982 5983 instruct vadd4S(vecD dst, vecD src) %{ 5984 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 5985 match(Set dst (AddVS dst src)); 5986 format %{ "paddw $dst,$src\t! add packed4S" %} 5987 ins_encode %{ 5988 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5989 %} 5990 ins_pipe( pipe_slow ); 5991 %} 5992 5993 instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 5994 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 5995 match(Set dst (AddVS src1 src2)); 5996 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 5997 ins_encode %{ 5998 int vector_len = 0; 5999 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6000 %} 6001 ins_pipe( pipe_slow ); 6002 %} 6003 6004 instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6005 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6006 match(Set dst (AddVS src1 src2)); 6007 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 6008 ins_encode %{ 6009 int vector_len = 0; 6010 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6011 %} 6012 ins_pipe( pipe_slow ); 6013 %} 6014 6015 instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6016 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6017 match(Set dst (AddVS dst src2)); 6018 effect(TEMP src1); 6019 format %{ "vpaddw $dst,$dst,$src2\t! add packed4S" %} 6020 ins_encode %{ 6021 int vector_len = 0; 6022 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6023 %} 6024 ins_pipe( pipe_slow ); 6025 %} 6026 6027 instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{ 6028 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6029 match(Set dst (AddVS src (LoadVector mem))); 6030 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6031 ins_encode %{ 6032 int vector_len = 0; 6033 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6034 %} 6035 ins_pipe( pipe_slow ); 6036 %} 6037 6038 instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{ 6039 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6040 match(Set dst (AddVS src (LoadVector mem))); 6041 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6042 ins_encode %{ 6043 int vector_len = 0; 6044 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6045 %} 6046 ins_pipe( pipe_slow ); 6047 %} 6048 6049 instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6050 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6051 match(Set dst (AddVS dst (LoadVector mem))); 6052 effect(TEMP src); 6053 format %{ "vpaddw $dst,$src,$mem\t! add packed4S" %} 6054 ins_encode %{ 6055 int vector_len = 0; 6056 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6057 %} 6058 ins_pipe( pipe_slow ); 6059 %} 6060 6061 instruct vadd8S(vecX dst, vecX src) %{ 6062 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6063 match(Set dst (AddVS dst src)); 6064 format %{ "paddw $dst,$src\t! add packed8S" %} 6065 ins_encode %{ 6066 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 6067 %} 6068 ins_pipe( pipe_slow ); 6069 %} 6070 6071 instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6072 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6073 match(Set dst (AddVS src1 src2)); 6074 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6075 ins_encode %{ 6076 int vector_len = 0; 6077 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6078 %} 6079 ins_pipe( pipe_slow ); 6080 %} 6081 6082 instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6083 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6084 match(Set dst (AddVS src1 src2)); 6085 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 6086 ins_encode %{ 6087 int vector_len = 0; 6088 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6089 %} 6090 ins_pipe( pipe_slow ); 6091 %} 6092 6093 instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6094 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6095 match(Set dst (AddVS dst src2)); 6096 effect(TEMP src1); 6097 format %{ "vpaddw $dst,$dst,$src2\t! add packed8S" %} 6098 ins_encode %{ 6099 int vector_len = 0; 6100 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6101 %} 6102 ins_pipe( pipe_slow ); 6103 %} 6104 6105 instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{ 6106 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6107 match(Set dst (AddVS src (LoadVector mem))); 6108 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6109 ins_encode %{ 6110 int vector_len = 0; 6111 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6112 %} 6113 ins_pipe( pipe_slow ); 6114 %} 6115 6116 instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{ 6117 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6118 match(Set dst (AddVS src (LoadVector mem))); 6119 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6120 ins_encode %{ 6121 int vector_len = 0; 6122 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6123 %} 6124 ins_pipe( pipe_slow ); 6125 %} 6126 6127 instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6128 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6129 match(Set dst (AddVS dst (LoadVector mem))); 6130 effect(TEMP src); 6131 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 6132 ins_encode %{ 6133 int vector_len = 0; 6134 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6135 %} 6136 ins_pipe( pipe_slow ); 6137 %} 6138 6139 instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6140 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6141 match(Set dst (AddVS src1 src2)); 6142 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6143 ins_encode %{ 6144 int vector_len = 1; 6145 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6146 %} 6147 ins_pipe( pipe_slow ); 6148 %} 6149 6150 instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6151 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6152 match(Set dst (AddVS src1 src2)); 6153 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 6154 ins_encode %{ 6155 int vector_len = 1; 6156 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6157 %} 6158 ins_pipe( pipe_slow ); 6159 %} 6160 6161 instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6162 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6163 match(Set dst (AddVS dst src2)); 6164 effect(TEMP src1); 6165 format %{ "vpaddw $dst,$dst,$src2\t! add packed16S" %} 6166 ins_encode %{ 6167 int vector_len = 1; 6168 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6169 %} 6170 ins_pipe( pipe_slow ); 6171 %} 6172 6173 instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{ 6174 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 6175 match(Set dst (AddVS src (LoadVector mem))); 6176 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6177 ins_encode %{ 6178 int vector_len = 1; 6179 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6180 %} 6181 ins_pipe( pipe_slow ); 6182 %} 6183 6184 instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{ 6185 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6186 match(Set dst (AddVS src (LoadVector mem))); 6187 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6188 ins_encode %{ 6189 int vector_len = 1; 6190 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6191 %} 6192 ins_pipe( pipe_slow ); 6193 %} 6194 6195 instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6196 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6197 match(Set dst (AddVS dst (LoadVector mem))); 6198 effect(TEMP src); 6199 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 6200 ins_encode %{ 6201 int vector_len = 1; 6202 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6203 %} 6204 ins_pipe( pipe_slow ); 6205 %} 6206 6207 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6208 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6209 match(Set dst (AddVS src1 src2)); 6210 format %{ "vpaddw $dst,$src1,$src2\t! add packed32S" %} 6211 ins_encode %{ 6212 int vector_len = 2; 6213 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6214 %} 6215 ins_pipe( pipe_slow ); 6216 %} 6217 6218 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{ 6219 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6220 match(Set dst (AddVS src (LoadVector mem))); 6221 format %{ "vpaddw $dst,$src,$mem\t! add packed32S" %} 6222 ins_encode %{ 6223 int vector_len = 2; 6224 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6225 %} 6226 ins_pipe( pipe_slow ); 6227 %} 6228 6229 // Integers vector add 6230 instruct vadd2I(vecD dst, vecD src) %{ 6231 predicate(n->as_Vector()->length() == 2); 6232 match(Set dst (AddVI dst src)); 6233 format %{ "paddd $dst,$src\t! add packed2I" %} 6234 ins_encode %{ 6235 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6236 %} 6237 ins_pipe( pipe_slow ); 6238 %} 6239 6240 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 6241 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6242 match(Set dst (AddVI src1 src2)); 6243 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 6244 ins_encode %{ 6245 int vector_len = 0; 6246 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6247 %} 6248 ins_pipe( pipe_slow ); 6249 %} 6250 6251 instruct vadd2I_mem(vecD dst, vecD src, memory mem) %{ 6252 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6253 match(Set dst (AddVI src (LoadVector mem))); 6254 format %{ "vpaddd $dst,$src,$mem\t! add packed2I" %} 6255 ins_encode %{ 6256 int vector_len = 0; 6257 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6258 %} 6259 ins_pipe( pipe_slow ); 6260 %} 6261 6262 instruct vadd4I(vecX dst, vecX src) %{ 6263 predicate(n->as_Vector()->length() == 4); 6264 match(Set dst (AddVI dst src)); 6265 format %{ "paddd $dst,$src\t! add packed4I" %} 6266 ins_encode %{ 6267 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 6268 %} 6269 ins_pipe( pipe_slow ); 6270 %} 6271 6272 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 6273 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6274 match(Set dst (AddVI src1 src2)); 6275 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 6276 ins_encode %{ 6277 int vector_len = 0; 6278 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6279 %} 6280 ins_pipe( pipe_slow ); 6281 %} 6282 6283 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 6284 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6285 match(Set dst (AddVI src (LoadVector mem))); 6286 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 6287 ins_encode %{ 6288 int vector_len = 0; 6289 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6290 %} 6291 ins_pipe( pipe_slow ); 6292 %} 6293 6294 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 6295 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6296 match(Set dst (AddVI src1 src2)); 6297 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 6298 ins_encode %{ 6299 int vector_len = 1; 6300 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6301 %} 6302 ins_pipe( pipe_slow ); 6303 %} 6304 6305 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 6306 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 6307 match(Set dst (AddVI src (LoadVector mem))); 6308 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 6309 ins_encode %{ 6310 int vector_len = 1; 6311 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6312 %} 6313 ins_pipe( pipe_slow ); 6314 %} 6315 6316 instruct vadd16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6317 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6318 match(Set dst (AddVI src1 src2)); 6319 format %{ "vpaddd $dst,$src1,$src2\t! add packed16I" %} 6320 ins_encode %{ 6321 int vector_len = 2; 6322 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6323 %} 6324 ins_pipe( pipe_slow ); 6325 %} 6326 6327 instruct vadd16I_mem(vecZ dst, vecZ src, memory mem) %{ 6328 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6329 match(Set dst (AddVI src (LoadVector mem))); 6330 format %{ "vpaddd $dst,$src,$mem\t! add packed16I" %} 6331 ins_encode %{ 6332 int vector_len = 2; 6333 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6334 %} 6335 ins_pipe( pipe_slow ); 6336 %} 6337 6338 // Longs vector add 6339 instruct vadd2L(vecX dst, vecX src) %{ 6340 predicate(n->as_Vector()->length() == 2); 6341 match(Set dst (AddVL dst src)); 6342 format %{ "paddq $dst,$src\t! add packed2L" %} 6343 ins_encode %{ 6344 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 6345 %} 6346 ins_pipe( pipe_slow ); 6347 %} 6348 6349 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 6350 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6351 match(Set dst (AddVL src1 src2)); 6352 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 6353 ins_encode %{ 6354 int vector_len = 0; 6355 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6356 %} 6357 ins_pipe( pipe_slow ); 6358 %} 6359 6360 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 6361 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6362 match(Set dst (AddVL src (LoadVector mem))); 6363 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 6364 ins_encode %{ 6365 int vector_len = 0; 6366 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6367 %} 6368 ins_pipe( pipe_slow ); 6369 %} 6370 6371 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 6372 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6373 match(Set dst (AddVL src1 src2)); 6374 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 6375 ins_encode %{ 6376 int vector_len = 1; 6377 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6378 %} 6379 ins_pipe( pipe_slow ); 6380 %} 6381 6382 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 6383 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 6384 match(Set dst (AddVL src (LoadVector mem))); 6385 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 6386 ins_encode %{ 6387 int vector_len = 1; 6388 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6389 %} 6390 ins_pipe( pipe_slow ); 6391 %} 6392 6393 instruct vadd8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6394 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6395 match(Set dst (AddVL src1 src2)); 6396 format %{ "vpaddq $dst,$src1,$src2\t! add packed8L" %} 6397 ins_encode %{ 6398 int vector_len = 2; 6399 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6400 %} 6401 ins_pipe( pipe_slow ); 6402 %} 6403 6404 instruct vadd8L_mem(vecZ dst, vecZ src, memory mem) %{ 6405 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6406 match(Set dst (AddVL src (LoadVector mem))); 6407 format %{ "vpaddq $dst,$src,$mem\t! add packed8L" %} 6408 ins_encode %{ 6409 int vector_len = 2; 6410 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6411 %} 6412 ins_pipe( pipe_slow ); 6413 %} 6414 6415 // Floats vector add 6416 instruct vadd2F(vecD dst, vecD src) %{ 6417 predicate(n->as_Vector()->length() == 2); 6418 match(Set dst (AddVF dst src)); 6419 format %{ "addps $dst,$src\t! add packed2F" %} 6420 ins_encode %{ 6421 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6422 %} 6423 ins_pipe( pipe_slow ); 6424 %} 6425 6426 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 6427 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6428 match(Set dst (AddVF src1 src2)); 6429 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 6430 ins_encode %{ 6431 int vector_len = 0; 6432 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 instruct vadd2F_mem(vecD dst, vecD src, memory mem) %{ 6438 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6439 match(Set dst (AddVF src (LoadVector mem))); 6440 format %{ "vaddps $dst,$src,$mem\t! add packed2F" %} 6441 ins_encode %{ 6442 int vector_len = 0; 6443 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 instruct vadd4F(vecX dst, vecX src) %{ 6449 predicate(n->as_Vector()->length() == 4); 6450 match(Set dst (AddVF dst src)); 6451 format %{ "addps $dst,$src\t! add packed4F" %} 6452 ins_encode %{ 6453 __ addps($dst$$XMMRegister, $src$$XMMRegister); 6454 %} 6455 ins_pipe( pipe_slow ); 6456 %} 6457 6458 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 6459 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6460 match(Set dst (AddVF src1 src2)); 6461 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 6462 ins_encode %{ 6463 int vector_len = 0; 6464 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6465 %} 6466 ins_pipe( pipe_slow ); 6467 %} 6468 6469 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 6470 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6471 match(Set dst (AddVF src (LoadVector mem))); 6472 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 6473 ins_encode %{ 6474 int vector_len = 0; 6475 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6476 %} 6477 ins_pipe( pipe_slow ); 6478 %} 6479 6480 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 6481 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6482 match(Set dst (AddVF src1 src2)); 6483 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 6484 ins_encode %{ 6485 int vector_len = 1; 6486 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6487 %} 6488 ins_pipe( pipe_slow ); 6489 %} 6490 6491 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 6492 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 6493 match(Set dst (AddVF src (LoadVector mem))); 6494 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 6495 ins_encode %{ 6496 int vector_len = 1; 6497 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6498 %} 6499 ins_pipe( pipe_slow ); 6500 %} 6501 6502 instruct vadd16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6503 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6504 match(Set dst (AddVF src1 src2)); 6505 format %{ "vaddps $dst,$src1,$src2\t! add packed16F" %} 6506 ins_encode %{ 6507 int vector_len = 2; 6508 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6509 %} 6510 ins_pipe( pipe_slow ); 6511 %} 6512 6513 instruct vadd16F_mem(vecZ dst, vecZ src, memory mem) %{ 6514 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 6515 match(Set dst (AddVF src (LoadVector mem))); 6516 format %{ "vaddps $dst,$src,$mem\t! add packed16F" %} 6517 ins_encode %{ 6518 int vector_len = 2; 6519 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6520 %} 6521 ins_pipe( pipe_slow ); 6522 %} 6523 6524 // Doubles vector add 6525 instruct vadd2D(vecX dst, vecX src) %{ 6526 predicate(n->as_Vector()->length() == 2); 6527 match(Set dst (AddVD dst src)); 6528 format %{ "addpd $dst,$src\t! add packed2D" %} 6529 ins_encode %{ 6530 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 6531 %} 6532 ins_pipe( pipe_slow ); 6533 %} 6534 6535 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 6536 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6537 match(Set dst (AddVD src1 src2)); 6538 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 6539 ins_encode %{ 6540 int vector_len = 0; 6541 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6542 %} 6543 ins_pipe( pipe_slow ); 6544 %} 6545 6546 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 6547 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 6548 match(Set dst (AddVD src (LoadVector mem))); 6549 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 6550 ins_encode %{ 6551 int vector_len = 0; 6552 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6553 %} 6554 ins_pipe( pipe_slow ); 6555 %} 6556 6557 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 6558 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6559 match(Set dst (AddVD src1 src2)); 6560 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 6561 ins_encode %{ 6562 int vector_len = 1; 6563 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6564 %} 6565 ins_pipe( pipe_slow ); 6566 %} 6567 6568 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 6569 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 6570 match(Set dst (AddVD src (LoadVector mem))); 6571 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 6572 ins_encode %{ 6573 int vector_len = 1; 6574 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6575 %} 6576 ins_pipe( pipe_slow ); 6577 %} 6578 6579 instruct vadd8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6580 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6581 match(Set dst (AddVD src1 src2)); 6582 format %{ "vaddpd $dst,$src1,$src2\t! add packed8D" %} 6583 ins_encode %{ 6584 int vector_len = 2; 6585 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6586 %} 6587 ins_pipe( pipe_slow ); 6588 %} 6589 6590 instruct vadd8D_mem(vecZ dst, vecZ src, memory mem) %{ 6591 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 6592 match(Set dst (AddVD src (LoadVector mem))); 6593 format %{ "vaddpd $dst,$src,$mem\t! add packed8D" %} 6594 ins_encode %{ 6595 int vector_len = 2; 6596 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6597 %} 6598 ins_pipe( pipe_slow ); 6599 %} 6600 6601 // --------------------------------- SUB -------------------------------------- 6602 6603 // Bytes vector sub 6604 instruct vsub4B(vecS dst, vecS src) %{ 6605 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 6606 match(Set dst (SubVB dst src)); 6607 format %{ "psubb $dst,$src\t! sub packed4B" %} 6608 ins_encode %{ 6609 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6610 %} 6611 ins_pipe( pipe_slow ); 6612 %} 6613 6614 instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6615 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6616 match(Set dst (SubVB src1 src2)); 6617 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6618 ins_encode %{ 6619 int vector_len = 0; 6620 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6621 %} 6622 ins_pipe( pipe_slow ); 6623 %} 6624 6625 instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6626 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6627 match(Set dst (SubVB src1 src2)); 6628 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6629 ins_encode %{ 6630 int vector_len = 0; 6631 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6632 %} 6633 ins_pipe( pipe_slow ); 6634 %} 6635 6636 instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{ 6637 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6638 match(Set dst (SubVB dst src2)); 6639 effect(TEMP src1); 6640 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 6641 ins_encode %{ 6642 int vector_len = 0; 6643 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6644 %} 6645 ins_pipe( pipe_slow ); 6646 %} 6647 6648 instruct vsub4B_mem_avx(vecS dst, vecS src, memory mem) %{ 6649 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 6650 match(Set dst (SubVB src (LoadVector mem))); 6651 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6652 ins_encode %{ 6653 int vector_len = 0; 6654 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6655 %} 6656 ins_pipe( pipe_slow ); 6657 %} 6658 6659 instruct vsub4B_mem_evex(vecS dst, vecS src, memory mem) %{ 6660 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 6661 match(Set dst (SubVB src (LoadVector mem))); 6662 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6663 ins_encode %{ 6664 int vector_len = 0; 6665 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6666 %} 6667 ins_pipe( pipe_slow ); 6668 %} 6669 6670 instruct vsub4B_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6671 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 6672 match(Set dst (SubVB dst (LoadVector mem))); 6673 effect(TEMP src); 6674 format %{ "vpsubb $dst,$src,$mem\t! sub packed4B" %} 6675 ins_encode %{ 6676 int vector_len = 0; 6677 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6678 %} 6679 ins_pipe( pipe_slow ); 6680 %} 6681 6682 instruct vsub8B(vecD dst, vecD src) %{ 6683 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 6684 match(Set dst (SubVB dst src)); 6685 format %{ "psubb $dst,$src\t! sub packed8B" %} 6686 ins_encode %{ 6687 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6688 %} 6689 ins_pipe( pipe_slow ); 6690 %} 6691 6692 instruct vsub8B_reg_avx(vecD dst, vecD src1, vecD src2) %{ 6693 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6694 match(Set dst (SubVB src1 src2)); 6695 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6696 ins_encode %{ 6697 int vector_len = 0; 6698 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6699 %} 6700 ins_pipe( pipe_slow ); 6701 %} 6702 6703 instruct vsub8B_reg_evex(vecD dst, vecD src1, vecD src2) %{ 6704 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6705 match(Set dst (SubVB src1 src2)); 6706 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6707 ins_encode %{ 6708 int vector_len = 0; 6709 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6710 %} 6711 ins_pipe( pipe_slow ); 6712 %} 6713 6714 instruct vsub8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 6715 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6716 match(Set dst (SubVB dst src2)); 6717 effect(TEMP src1); 6718 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 6719 ins_encode %{ 6720 int vector_len = 0; 6721 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6722 %} 6723 ins_pipe( pipe_slow ); 6724 %} 6725 6726 instruct vsub8B_mem_avx(vecD dst, vecD src, memory mem) %{ 6727 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 6728 match(Set dst (SubVB src (LoadVector mem))); 6729 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6730 ins_encode %{ 6731 int vector_len = 0; 6732 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6733 %} 6734 ins_pipe( pipe_slow ); 6735 %} 6736 6737 instruct vsub8B_mem_evex(vecD dst, vecD src, memory mem) %{ 6738 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 6739 match(Set dst (SubVB src (LoadVector mem))); 6740 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6741 ins_encode %{ 6742 int vector_len = 0; 6743 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6744 %} 6745 ins_pipe( pipe_slow ); 6746 %} 6747 6748 instruct vsub8B_mem_evex_special(vecD dst, vecD src, memory mem) %{ 6749 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 6750 match(Set dst (SubVB dst (LoadVector mem))); 6751 effect(TEMP src); 6752 format %{ "vpsubb $dst,$src,$mem\t! sub packed8B" %} 6753 ins_encode %{ 6754 int vector_len = 0; 6755 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6756 %} 6757 ins_pipe( pipe_slow ); 6758 %} 6759 6760 instruct vsub16B(vecX dst, vecX src) %{ 6761 predicate(UseAVX == 0 && n->as_Vector()->length() == 16); 6762 match(Set dst (SubVB dst src)); 6763 format %{ "psubb $dst,$src\t! sub packed16B" %} 6764 ins_encode %{ 6765 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 6766 %} 6767 ins_pipe( pipe_slow ); 6768 %} 6769 6770 instruct vsub16B_reg_avx(vecX dst, vecX src1, vecX src2) %{ 6771 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6772 match(Set dst (SubVB src1 src2)); 6773 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6774 ins_encode %{ 6775 int vector_len = 0; 6776 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6777 %} 6778 ins_pipe( pipe_slow ); 6779 %} 6780 6781 instruct vsub16B_reg_evex(vecX dst, vecX src1, vecX src2) %{ 6782 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6783 match(Set dst (SubVB src1 src2)); 6784 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6785 ins_encode %{ 6786 int vector_len = 0; 6787 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6788 %} 6789 ins_pipe( pipe_slow ); 6790 %} 6791 6792 instruct vsub16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 6793 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6794 match(Set dst (SubVB dst src2)); 6795 effect(TEMP src1); 6796 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 6797 ins_encode %{ 6798 int vector_len = 0; 6799 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6800 %} 6801 ins_pipe( pipe_slow ); 6802 %} 6803 6804 instruct vsub16B_mem_avx(vecX dst, vecX src, memory mem) %{ 6805 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 6806 match(Set dst (SubVB src (LoadVector mem))); 6807 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6808 ins_encode %{ 6809 int vector_len = 0; 6810 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6811 %} 6812 ins_pipe( pipe_slow ); 6813 %} 6814 6815 instruct vsub16B_mem_evex(vecX dst, vecX src, memory mem) %{ 6816 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 6817 match(Set dst (SubVB src (LoadVector mem))); 6818 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6819 ins_encode %{ 6820 int vector_len = 0; 6821 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6822 %} 6823 ins_pipe( pipe_slow ); 6824 %} 6825 6826 instruct vsub16B_mem_evex_special(vecX dst, vecX src, memory mem) %{ 6827 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 6828 match(Set dst (SubVB dst (LoadVector mem))); 6829 effect(TEMP src); 6830 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 6831 ins_encode %{ 6832 int vector_len = 0; 6833 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6834 %} 6835 ins_pipe( pipe_slow ); 6836 %} 6837 6838 instruct vsub32B_reg_avx(vecY dst, vecY src1, vecY src2) %{ 6839 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6840 match(Set dst (SubVB src1 src2)); 6841 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6842 ins_encode %{ 6843 int vector_len = 1; 6844 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6845 %} 6846 ins_pipe( pipe_slow ); 6847 %} 6848 6849 instruct vsub32B_reg_evex(vecY dst, vecY src1, vecY src2) %{ 6850 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6851 match(Set dst (SubVB src1 src2)); 6852 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6853 ins_encode %{ 6854 int vector_len = 1; 6855 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6856 %} 6857 ins_pipe( pipe_slow ); 6858 %} 6859 6860 instruct vsub32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 6861 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6862 match(Set dst (SubVB dst src2)); 6863 effect(TEMP src1); 6864 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 6865 ins_encode %{ 6866 int vector_len = 1; 6867 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6868 %} 6869 ins_pipe( pipe_slow ); 6870 %} 6871 6872 instruct vsub32B_mem_avx(vecY dst, vecY src, memory mem) %{ 6873 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32); 6874 match(Set dst (SubVB src (LoadVector mem))); 6875 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6876 ins_encode %{ 6877 int vector_len = 1; 6878 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6879 %} 6880 ins_pipe( pipe_slow ); 6881 %} 6882 6883 instruct vsub32B_mem_evex(vecY dst, vecY src, memory mem) %{ 6884 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 6885 match(Set dst (SubVB src (LoadVector mem))); 6886 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6887 ins_encode %{ 6888 int vector_len = 1; 6889 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6890 %} 6891 ins_pipe( pipe_slow ); 6892 %} 6893 6894 instruct vsub32B_mem_evex_special(vecY dst, vecY src, memory mem) %{ 6895 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32); 6896 match(Set dst (SubVB dst (LoadVector mem))); 6897 effect(TEMP src); 6898 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 6899 ins_encode %{ 6900 int vector_len = 1; 6901 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6902 %} 6903 ins_pipe( pipe_slow ); 6904 %} 6905 6906 instruct vsub64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 6907 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6908 match(Set dst (SubVB src1 src2)); 6909 format %{ "vpsubb $dst,$src1,$src2\t! sub packed64B" %} 6910 ins_encode %{ 6911 int vector_len = 2; 6912 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6913 %} 6914 ins_pipe( pipe_slow ); 6915 %} 6916 6917 instruct vsub64B_mem(vecZ dst, vecZ src, memory mem) %{ 6918 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64); 6919 match(Set dst (SubVB src (LoadVector mem))); 6920 format %{ "vpsubb $dst,$src,$mem\t! sub packed64B" %} 6921 ins_encode %{ 6922 int vector_len = 2; 6923 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6924 %} 6925 ins_pipe( pipe_slow ); 6926 %} 6927 6928 // Shorts/Chars vector sub 6929 instruct vsub2S(vecS dst, vecS src) %{ 6930 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 6931 match(Set dst (SubVS dst src)); 6932 format %{ "psubw $dst,$src\t! sub packed2S" %} 6933 ins_encode %{ 6934 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 6935 %} 6936 ins_pipe( pipe_slow ); 6937 %} 6938 6939 instruct vsub2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 6940 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6941 match(Set dst (SubVS src1 src2)); 6942 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6943 ins_encode %{ 6944 int vector_len = 0; 6945 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6946 %} 6947 ins_pipe( pipe_slow ); 6948 %} 6949 6950 instruct vsub2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 6951 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6952 match(Set dst (SubVS src1 src2)); 6953 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6954 ins_encode %{ 6955 int vector_len = 0; 6956 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6957 %} 6958 ins_pipe( pipe_slow ); 6959 %} 6960 6961 instruct vsub2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{ 6962 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6963 match(Set dst (SubVS dst src2)); 6964 effect(TEMP src1); 6965 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 6966 ins_encode %{ 6967 int vector_len = 0; 6968 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 6969 %} 6970 ins_pipe( pipe_slow ); 6971 %} 6972 6973 instruct vsub2S_mem_avx(vecS dst, vecS src, memory mem) %{ 6974 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 6975 match(Set dst (SubVS src (LoadVector mem))); 6976 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6977 ins_encode %{ 6978 int vector_len = 0; 6979 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6980 %} 6981 ins_pipe( pipe_slow ); 6982 %} 6983 6984 instruct vsub2S_mem_evex(vecS dst, vecS src, memory mem) %{ 6985 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 6986 match(Set dst (SubVS src (LoadVector mem))); 6987 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 6988 ins_encode %{ 6989 int vector_len = 0; 6990 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 6991 %} 6992 ins_pipe( pipe_slow ); 6993 %} 6994 6995 instruct vsub2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 6996 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 6997 match(Set dst (SubVS dst (LoadVector mem))); 6998 effect(TEMP src); 6999 format %{ "vpsubw $dst,$src,$mem\t! sub packed2S" %} 7000 ins_encode %{ 7001 int vector_len = 0; 7002 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct vsub4S(vecD dst, vecD src) %{ 7008 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7009 match(Set dst (SubVS dst src)); 7010 format %{ "psubw $dst,$src\t! sub packed4S" %} 7011 ins_encode %{ 7012 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7013 %} 7014 ins_pipe( pipe_slow ); 7015 %} 7016 7017 instruct vsub4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7018 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7019 match(Set dst (SubVS src1 src2)); 7020 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7021 ins_encode %{ 7022 int vector_len = 0; 7023 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7024 %} 7025 ins_pipe( pipe_slow ); 7026 %} 7027 7028 instruct vsub4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7029 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7030 match(Set dst (SubVS src1 src2)); 7031 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7032 ins_encode %{ 7033 int vector_len = 0; 7034 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7035 %} 7036 ins_pipe( pipe_slow ); 7037 %} 7038 7039 instruct vsub4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7040 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7041 match(Set dst (SubVS dst src2)); 7042 effect(TEMP src1); 7043 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 7044 ins_encode %{ 7045 int vector_len = 0; 7046 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7047 %} 7048 ins_pipe( pipe_slow ); 7049 %} 7050 7051 instruct vsub4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7052 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7053 match(Set dst (SubVS src (LoadVector mem))); 7054 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7055 ins_encode %{ 7056 int vector_len = 0; 7057 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7058 %} 7059 ins_pipe( pipe_slow ); 7060 %} 7061 7062 instruct vsub4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7063 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7064 match(Set dst (SubVS src (LoadVector mem))); 7065 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7066 ins_encode %{ 7067 int vector_len = 0; 7068 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7069 %} 7070 ins_pipe( pipe_slow ); 7071 %} 7072 7073 instruct vsub4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7074 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7075 match(Set dst (SubVS dst (LoadVector mem))); 7076 effect(TEMP src); 7077 format %{ "vpsubw $dst,$src,$mem\t! sub packed4S" %} 7078 ins_encode %{ 7079 int vector_len = 0; 7080 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7081 %} 7082 ins_pipe( pipe_slow ); 7083 %} 7084 7085 instruct vsub8S(vecX dst, vecX src) %{ 7086 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7087 match(Set dst (SubVS dst src)); 7088 format %{ "psubw $dst,$src\t! sub packed8S" %} 7089 ins_encode %{ 7090 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 7091 %} 7092 ins_pipe( pipe_slow ); 7093 %} 7094 7095 instruct vsub8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7096 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7097 match(Set dst (SubVS src1 src2)); 7098 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7099 ins_encode %{ 7100 int vector_len = 0; 7101 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7102 %} 7103 ins_pipe( pipe_slow ); 7104 %} 7105 7106 instruct vsub8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7107 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7108 match(Set dst (SubVS src1 src2)); 7109 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7110 ins_encode %{ 7111 int vector_len = 0; 7112 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7113 %} 7114 ins_pipe( pipe_slow ); 7115 %} 7116 7117 instruct vsub8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7118 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7119 match(Set dst (SubVS dst src2)); 7120 effect(TEMP src1); 7121 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 7122 ins_encode %{ 7123 int vector_len = 0; 7124 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7125 %} 7126 ins_pipe( pipe_slow ); 7127 %} 7128 7129 instruct vsub8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7130 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7131 match(Set dst (SubVS src (LoadVector mem))); 7132 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7133 ins_encode %{ 7134 int vector_len = 0; 7135 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7136 %} 7137 ins_pipe( pipe_slow ); 7138 %} 7139 7140 instruct vsub8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7141 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7142 match(Set dst (SubVS src (LoadVector mem))); 7143 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7144 ins_encode %{ 7145 int vector_len = 0; 7146 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7147 %} 7148 ins_pipe( pipe_slow ); 7149 %} 7150 7151 instruct vsub8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7152 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7153 match(Set dst (SubVS dst (LoadVector mem))); 7154 effect(TEMP src); 7155 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 7156 ins_encode %{ 7157 int vector_len = 0; 7158 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7159 %} 7160 ins_pipe( pipe_slow ); 7161 %} 7162 7163 instruct vsub16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7164 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7165 match(Set dst (SubVS src1 src2)); 7166 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7167 ins_encode %{ 7168 int vector_len = 1; 7169 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7170 %} 7171 ins_pipe( pipe_slow ); 7172 %} 7173 7174 instruct vsub16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7175 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7176 match(Set dst (SubVS src1 src2)); 7177 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7178 ins_encode %{ 7179 int vector_len = 1; 7180 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7181 %} 7182 ins_pipe( pipe_slow ); 7183 %} 7184 7185 instruct vsub16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7186 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7187 match(Set dst (SubVS dst src2)); 7188 effect(TEMP src1); 7189 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 7190 ins_encode %{ 7191 int vector_len = 1; 7192 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7193 %} 7194 ins_pipe( pipe_slow ); 7195 %} 7196 7197 instruct vsub16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7198 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7199 match(Set dst (SubVS src (LoadVector mem))); 7200 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7201 ins_encode %{ 7202 int vector_len = 1; 7203 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7204 %} 7205 ins_pipe( pipe_slow ); 7206 %} 7207 7208 instruct vsub16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7209 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7210 match(Set dst (SubVS src (LoadVector mem))); 7211 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7212 ins_encode %{ 7213 int vector_len = 1; 7214 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7215 %} 7216 ins_pipe( pipe_slow ); 7217 %} 7218 7219 instruct vsub16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7220 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7221 match(Set dst (SubVS dst (LoadVector mem))); 7222 effect(TEMP src); 7223 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 7224 ins_encode %{ 7225 int vector_len = 1; 7226 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7227 %} 7228 ins_pipe( pipe_slow ); 7229 %} 7230 7231 instruct vsub32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7232 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7233 match(Set dst (SubVS src1 src2)); 7234 format %{ "vpsubw $dst,$src1,$src2\t! sub packed32S" %} 7235 ins_encode %{ 7236 int vector_len = 2; 7237 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7238 %} 7239 ins_pipe( pipe_slow ); 7240 %} 7241 7242 instruct vsub32S_mem(vecZ dst, vecZ src, memory mem) %{ 7243 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7244 match(Set dst (SubVS src (LoadVector mem))); 7245 format %{ "vpsubw $dst,$src,$mem\t! sub packed32S" %} 7246 ins_encode %{ 7247 int vector_len = 2; 7248 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7249 %} 7250 ins_pipe( pipe_slow ); 7251 %} 7252 7253 // Integers vector sub 7254 instruct vsub2I(vecD dst, vecD src) %{ 7255 predicate(n->as_Vector()->length() == 2); 7256 match(Set dst (SubVI dst src)); 7257 format %{ "psubd $dst,$src\t! sub packed2I" %} 7258 ins_encode %{ 7259 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7260 %} 7261 ins_pipe( pipe_slow ); 7262 %} 7263 7264 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 7265 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7266 match(Set dst (SubVI src1 src2)); 7267 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 7268 ins_encode %{ 7269 int vector_len = 0; 7270 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7271 %} 7272 ins_pipe( pipe_slow ); 7273 %} 7274 7275 instruct vsub2I_mem(vecD dst, vecD src, memory mem) %{ 7276 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7277 match(Set dst (SubVI src (LoadVector mem))); 7278 format %{ "vpsubd $dst,$src,$mem\t! sub packed2I" %} 7279 ins_encode %{ 7280 int vector_len = 0; 7281 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7282 %} 7283 ins_pipe( pipe_slow ); 7284 %} 7285 7286 instruct vsub4I(vecX dst, vecX src) %{ 7287 predicate(n->as_Vector()->length() == 4); 7288 match(Set dst (SubVI dst src)); 7289 format %{ "psubd $dst,$src\t! sub packed4I" %} 7290 ins_encode %{ 7291 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 7292 %} 7293 ins_pipe( pipe_slow ); 7294 %} 7295 7296 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 7297 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7298 match(Set dst (SubVI src1 src2)); 7299 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 7300 ins_encode %{ 7301 int vector_len = 0; 7302 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7303 %} 7304 ins_pipe( pipe_slow ); 7305 %} 7306 7307 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 7308 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7309 match(Set dst (SubVI src (LoadVector mem))); 7310 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 7311 ins_encode %{ 7312 int vector_len = 0; 7313 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7314 %} 7315 ins_pipe( pipe_slow ); 7316 %} 7317 7318 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 7319 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7320 match(Set dst (SubVI src1 src2)); 7321 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 7322 ins_encode %{ 7323 int vector_len = 1; 7324 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7325 %} 7326 ins_pipe( pipe_slow ); 7327 %} 7328 7329 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 7330 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 7331 match(Set dst (SubVI src (LoadVector mem))); 7332 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 7333 ins_encode %{ 7334 int vector_len = 1; 7335 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7336 %} 7337 ins_pipe( pipe_slow ); 7338 %} 7339 7340 instruct vsub16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7341 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7342 match(Set dst (SubVI src1 src2)); 7343 format %{ "vpsubd $dst,$src1,$src2\t! sub packed16I" %} 7344 ins_encode %{ 7345 int vector_len = 2; 7346 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7347 %} 7348 ins_pipe( pipe_slow ); 7349 %} 7350 7351 instruct vsub16I_mem(vecZ dst, vecZ src, memory mem) %{ 7352 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7353 match(Set dst (SubVI src (LoadVector mem))); 7354 format %{ "vpsubd $dst,$src,$mem\t! sub packed16I" %} 7355 ins_encode %{ 7356 int vector_len = 2; 7357 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7358 %} 7359 ins_pipe( pipe_slow ); 7360 %} 7361 7362 // Longs vector sub 7363 instruct vsub2L(vecX dst, vecX src) %{ 7364 predicate(n->as_Vector()->length() == 2); 7365 match(Set dst (SubVL dst src)); 7366 format %{ "psubq $dst,$src\t! sub packed2L" %} 7367 ins_encode %{ 7368 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 7369 %} 7370 ins_pipe( pipe_slow ); 7371 %} 7372 7373 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 7374 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7375 match(Set dst (SubVL src1 src2)); 7376 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 7377 ins_encode %{ 7378 int vector_len = 0; 7379 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7380 %} 7381 ins_pipe( pipe_slow ); 7382 %} 7383 7384 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 7385 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7386 match(Set dst (SubVL src (LoadVector mem))); 7387 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 7388 ins_encode %{ 7389 int vector_len = 0; 7390 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7391 %} 7392 ins_pipe( pipe_slow ); 7393 %} 7394 7395 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 7396 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7397 match(Set dst (SubVL src1 src2)); 7398 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 7399 ins_encode %{ 7400 int vector_len = 1; 7401 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7402 %} 7403 ins_pipe( pipe_slow ); 7404 %} 7405 7406 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 7407 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 7408 match(Set dst (SubVL src (LoadVector mem))); 7409 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 7410 ins_encode %{ 7411 int vector_len = 1; 7412 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7413 %} 7414 ins_pipe( pipe_slow ); 7415 %} 7416 7417 instruct vsub8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7418 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7419 match(Set dst (SubVL src1 src2)); 7420 format %{ "vpsubq $dst,$src1,$src2\t! sub packed8L" %} 7421 ins_encode %{ 7422 int vector_len = 2; 7423 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7424 %} 7425 ins_pipe( pipe_slow ); 7426 %} 7427 7428 instruct vsub8L_mem(vecZ dst, vecZ src, memory mem) %{ 7429 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7430 match(Set dst (SubVL src (LoadVector mem))); 7431 format %{ "vpsubq $dst,$src,$mem\t! sub packed8L" %} 7432 ins_encode %{ 7433 int vector_len = 2; 7434 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7435 %} 7436 ins_pipe( pipe_slow ); 7437 %} 7438 7439 // Floats vector sub 7440 instruct vsub2F(vecD dst, vecD src) %{ 7441 predicate(n->as_Vector()->length() == 2); 7442 match(Set dst (SubVF dst src)); 7443 format %{ "subps $dst,$src\t! sub packed2F" %} 7444 ins_encode %{ 7445 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7446 %} 7447 ins_pipe( pipe_slow ); 7448 %} 7449 7450 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 7451 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7452 match(Set dst (SubVF src1 src2)); 7453 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 7454 ins_encode %{ 7455 int vector_len = 0; 7456 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 instruct vsub2F_mem(vecD dst, vecD src, memory mem) %{ 7462 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7463 match(Set dst (SubVF src (LoadVector mem))); 7464 format %{ "vsubps $dst,$src,$mem\t! sub packed2F" %} 7465 ins_encode %{ 7466 int vector_len = 0; 7467 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7468 %} 7469 ins_pipe( pipe_slow ); 7470 %} 7471 7472 instruct vsub4F(vecX dst, vecX src) %{ 7473 predicate(n->as_Vector()->length() == 4); 7474 match(Set dst (SubVF dst src)); 7475 format %{ "subps $dst,$src\t! sub packed4F" %} 7476 ins_encode %{ 7477 __ subps($dst$$XMMRegister, $src$$XMMRegister); 7478 %} 7479 ins_pipe( pipe_slow ); 7480 %} 7481 7482 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 7483 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7484 match(Set dst (SubVF src1 src2)); 7485 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 7486 ins_encode %{ 7487 int vector_len = 0; 7488 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7489 %} 7490 ins_pipe( pipe_slow ); 7491 %} 7492 7493 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 7494 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7495 match(Set dst (SubVF src (LoadVector mem))); 7496 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 7497 ins_encode %{ 7498 int vector_len = 0; 7499 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7500 %} 7501 ins_pipe( pipe_slow ); 7502 %} 7503 7504 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 7505 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7506 match(Set dst (SubVF src1 src2)); 7507 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 7508 ins_encode %{ 7509 int vector_len = 1; 7510 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7511 %} 7512 ins_pipe( pipe_slow ); 7513 %} 7514 7515 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 7516 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 7517 match(Set dst (SubVF src (LoadVector mem))); 7518 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 7519 ins_encode %{ 7520 int vector_len = 1; 7521 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7522 %} 7523 ins_pipe( pipe_slow ); 7524 %} 7525 7526 instruct vsub16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7527 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7528 match(Set dst (SubVF src1 src2)); 7529 format %{ "vsubps $dst,$src1,$src2\t! sub packed16F" %} 7530 ins_encode %{ 7531 int vector_len = 2; 7532 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7533 %} 7534 ins_pipe( pipe_slow ); 7535 %} 7536 7537 instruct vsub16F_mem(vecZ dst, vecZ src, memory mem) %{ 7538 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 7539 match(Set dst (SubVF src (LoadVector mem))); 7540 format %{ "vsubps $dst,$src,$mem\t! sub packed16F" %} 7541 ins_encode %{ 7542 int vector_len = 2; 7543 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7544 %} 7545 ins_pipe( pipe_slow ); 7546 %} 7547 7548 // Doubles vector sub 7549 instruct vsub2D(vecX dst, vecX src) %{ 7550 predicate(n->as_Vector()->length() == 2); 7551 match(Set dst (SubVD dst src)); 7552 format %{ "subpd $dst,$src\t! sub packed2D" %} 7553 ins_encode %{ 7554 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 7555 %} 7556 ins_pipe( pipe_slow ); 7557 %} 7558 7559 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 7560 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7561 match(Set dst (SubVD src1 src2)); 7562 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 7563 ins_encode %{ 7564 int vector_len = 0; 7565 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7566 %} 7567 ins_pipe( pipe_slow ); 7568 %} 7569 7570 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 7571 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7572 match(Set dst (SubVD src (LoadVector mem))); 7573 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 7574 ins_encode %{ 7575 int vector_len = 0; 7576 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7577 %} 7578 ins_pipe( pipe_slow ); 7579 %} 7580 7581 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 7582 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7583 match(Set dst (SubVD src1 src2)); 7584 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 7585 ins_encode %{ 7586 int vector_len = 1; 7587 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7588 %} 7589 ins_pipe( pipe_slow ); 7590 %} 7591 7592 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 7593 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7594 match(Set dst (SubVD src (LoadVector mem))); 7595 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 7596 ins_encode %{ 7597 int vector_len = 1; 7598 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7599 %} 7600 ins_pipe( pipe_slow ); 7601 %} 7602 7603 instruct vsub8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7604 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7605 match(Set dst (SubVD src1 src2)); 7606 format %{ "vsubpd $dst,$src1,$src2\t! sub packed8D" %} 7607 ins_encode %{ 7608 int vector_len = 2; 7609 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7610 %} 7611 ins_pipe( pipe_slow ); 7612 %} 7613 7614 instruct vsub8D_mem(vecZ dst, vecZ src, memory mem) %{ 7615 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 7616 match(Set dst (SubVD src (LoadVector mem))); 7617 format %{ "vsubpd $dst,$src,$mem\t! sub packed8D" %} 7618 ins_encode %{ 7619 int vector_len = 2; 7620 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7621 %} 7622 ins_pipe( pipe_slow ); 7623 %} 7624 7625 // --------------------------------- MUL -------------------------------------- 7626 7627 // Shorts/Chars vector mul 7628 instruct vmul2S(vecS dst, vecS src) %{ 7629 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 7630 match(Set dst (MulVS dst src)); 7631 format %{ "pmullw $dst,$src\t! mul packed2S" %} 7632 ins_encode %{ 7633 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7634 %} 7635 ins_pipe( pipe_slow ); 7636 %} 7637 7638 instruct vmul2S_reg_avx(vecS dst, vecS src1, vecS src2) %{ 7639 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7640 match(Set dst (MulVS src1 src2)); 7641 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7642 ins_encode %{ 7643 int vector_len = 0; 7644 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7645 %} 7646 ins_pipe( pipe_slow ); 7647 %} 7648 7649 instruct vmul2S_reg_evex(vecS dst, vecS src1, vecS src2) %{ 7650 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7651 match(Set dst (MulVS src1 src2)); 7652 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7653 ins_encode %{ 7654 int vector_len = 0; 7655 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7656 %} 7657 ins_pipe( pipe_slow ); 7658 %} 7659 7660 instruct vmul2S_evex_special(vecS dst, vecS src1, vecS src2) %{ 7661 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7662 match(Set dst (MulVS dst src2)); 7663 effect(TEMP src1); 7664 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 7665 ins_encode %{ 7666 int vector_len = 0; 7667 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7668 %} 7669 ins_pipe( pipe_slow ); 7670 %} 7671 7672 instruct vmul2S_mem_avx(vecS dst, vecS src, memory mem) %{ 7673 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 7674 match(Set dst (MulVS src (LoadVector mem))); 7675 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7676 ins_encode %{ 7677 int vector_len = 0; 7678 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7679 %} 7680 ins_pipe( pipe_slow ); 7681 %} 7682 7683 instruct vmul2S_mem_evex(vecS dst, vecS src, memory mem) %{ 7684 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 7685 match(Set dst (MulVS src (LoadVector mem))); 7686 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7687 ins_encode %{ 7688 int vector_len = 0; 7689 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7690 %} 7691 ins_pipe( pipe_slow ); 7692 %} 7693 7694 instruct vmul2S_mem_evex_special(vecS dst, vecS src, memory mem) %{ 7695 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 7696 match(Set dst (MulVS dst (LoadVector mem))); 7697 effect(TEMP src); 7698 format %{ "vpmullw $dst,$src,$mem\t! mul packed2S" %} 7699 ins_encode %{ 7700 int vector_len = 0; 7701 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7702 %} 7703 ins_pipe( pipe_slow ); 7704 %} 7705 7706 instruct vmul4S(vecD dst, vecD src) %{ 7707 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 7708 match(Set dst (MulVS dst src)); 7709 format %{ "pmullw $dst,$src\t! mul packed4S" %} 7710 ins_encode %{ 7711 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7712 %} 7713 ins_pipe( pipe_slow ); 7714 %} 7715 7716 instruct vmul4S_reg_avx(vecD dst, vecD src1, vecD src2) %{ 7717 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7718 match(Set dst (MulVS src1 src2)); 7719 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7720 ins_encode %{ 7721 int vector_len = 0; 7722 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7723 %} 7724 ins_pipe( pipe_slow ); 7725 %} 7726 7727 instruct vmul4S_reg_evex(vecD dst, vecD src1, vecD src2) %{ 7728 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7729 match(Set dst (MulVS src1 src2)); 7730 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7731 ins_encode %{ 7732 int vector_len = 0; 7733 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7734 %} 7735 ins_pipe( pipe_slow ); 7736 %} 7737 7738 instruct vmul4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{ 7739 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7740 match(Set dst (MulVS dst src2)); 7741 effect(TEMP src1); 7742 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 7743 ins_encode %{ 7744 int vector_len = 0; 7745 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7746 %} 7747 ins_pipe( pipe_slow ); 7748 %} 7749 7750 instruct vmul4S_mem_avx(vecD dst, vecD src, memory mem) %{ 7751 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 7752 match(Set dst (MulVS src (LoadVector mem))); 7753 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7754 ins_encode %{ 7755 int vector_len = 0; 7756 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7757 %} 7758 ins_pipe( pipe_slow ); 7759 %} 7760 7761 instruct vmul4S_mem_evex(vecD dst, vecD src, memory mem) %{ 7762 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 7763 match(Set dst (MulVS src (LoadVector mem))); 7764 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7765 ins_encode %{ 7766 int vector_len = 0; 7767 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7768 %} 7769 ins_pipe( pipe_slow ); 7770 %} 7771 7772 instruct vmul4S_mem_evex_special(vecD dst, vecD src, memory mem) %{ 7773 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 7774 match(Set dst (MulVS dst (LoadVector mem))); 7775 effect(TEMP src); 7776 format %{ "vpmullw $dst,$src,$mem\t! mul packed4S" %} 7777 ins_encode %{ 7778 int vector_len = 0; 7779 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7780 %} 7781 ins_pipe( pipe_slow ); 7782 %} 7783 7784 instruct vmul8S(vecX dst, vecX src) %{ 7785 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 7786 match(Set dst (MulVS dst src)); 7787 format %{ "pmullw $dst,$src\t! mul packed8S" %} 7788 ins_encode %{ 7789 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 7790 %} 7791 ins_pipe( pipe_slow ); 7792 %} 7793 7794 instruct vmul8S_reg_avx(vecX dst, vecX src1, vecX src2) %{ 7795 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7796 match(Set dst (MulVS src1 src2)); 7797 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7798 ins_encode %{ 7799 int vector_len = 0; 7800 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7801 %} 7802 ins_pipe( pipe_slow ); 7803 %} 7804 7805 instruct vmul8S_reg_evex(vecX dst, vecX src1, vecX src2) %{ 7806 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7807 match(Set dst (MulVS src1 src2)); 7808 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7809 ins_encode %{ 7810 int vector_len = 0; 7811 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7812 %} 7813 ins_pipe( pipe_slow ); 7814 %} 7815 7816 instruct vmul8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{ 7817 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7818 match(Set dst (MulVS dst src2)); 7819 effect(TEMP src1); 7820 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 7821 ins_encode %{ 7822 int vector_len = 0; 7823 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7824 %} 7825 ins_pipe( pipe_slow ); 7826 %} 7827 7828 instruct vmul8S_mem_avx(vecX dst, vecX src, memory mem) %{ 7829 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 7830 match(Set dst (MulVS src (LoadVector mem))); 7831 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7832 ins_encode %{ 7833 int vector_len = 0; 7834 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7835 %} 7836 ins_pipe( pipe_slow ); 7837 %} 7838 7839 instruct vmul8S_mem_evex(vecX dst, vecX src, memory mem) %{ 7840 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 7841 match(Set dst (MulVS src (LoadVector mem))); 7842 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7843 ins_encode %{ 7844 int vector_len = 0; 7845 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7846 %} 7847 ins_pipe( pipe_slow ); 7848 %} 7849 7850 instruct vmul8S_mem_evex_special(vecX dst, vecX src, memory mem) %{ 7851 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 7852 match(Set dst (MulVS dst (LoadVector mem))); 7853 effect(TEMP src); 7854 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 7855 ins_encode %{ 7856 int vector_len = 0; 7857 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7858 %} 7859 ins_pipe( pipe_slow ); 7860 %} 7861 7862 instruct vmul16S_reg_avx(vecY dst, vecY src1, vecY src2) %{ 7863 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7864 match(Set dst (MulVS src1 src2)); 7865 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7866 ins_encode %{ 7867 int vector_len = 1; 7868 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7869 %} 7870 ins_pipe( pipe_slow ); 7871 %} 7872 7873 instruct vmul16S_reg_evex(vecY dst, vecY src1, vecY src2) %{ 7874 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7875 match(Set dst (MulVS src1 src2)); 7876 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7877 ins_encode %{ 7878 int vector_len = 1; 7879 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7880 %} 7881 ins_pipe( pipe_slow ); 7882 %} 7883 7884 instruct vmul16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{ 7885 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7886 match(Set dst (MulVS dst src2)); 7887 effect(TEMP src1); 7888 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 7889 ins_encode %{ 7890 int vector_len = 1; 7891 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7892 %} 7893 ins_pipe( pipe_slow ); 7894 %} 7895 7896 instruct vmul16S_mem_avx(vecY dst, vecY src, memory mem) %{ 7897 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 7898 match(Set dst (MulVS src (LoadVector mem))); 7899 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7900 ins_encode %{ 7901 int vector_len = 1; 7902 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7903 %} 7904 ins_pipe( pipe_slow ); 7905 %} 7906 7907 instruct vmul16S_mem_evex(vecY dst, vecY src, memory mem) %{ 7908 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 7909 match(Set dst (MulVS src (LoadVector mem))); 7910 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7911 ins_encode %{ 7912 int vector_len = 1; 7913 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7914 %} 7915 ins_pipe( pipe_slow ); 7916 %} 7917 7918 instruct vmul16S_mem_evex_special(vecY dst, vecY src, memory mem) %{ 7919 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 7920 match(Set dst (MulVS dst (LoadVector mem))); 7921 effect(TEMP src); 7922 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 7923 ins_encode %{ 7924 int vector_len = 1; 7925 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7926 %} 7927 ins_pipe( pipe_slow ); 7928 %} 7929 7930 instruct vmul32S_reg(vecZ dst, vecZ src1, vecZ src2) %{ 7931 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7932 match(Set dst (MulVS src1 src2)); 7933 format %{ "vpmullw $dst,$src1,$src2\t! mul packed32S" %} 7934 ins_encode %{ 7935 int vector_len = 2; 7936 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7937 %} 7938 ins_pipe( pipe_slow ); 7939 %} 7940 7941 instruct vmul32S_mem(vecZ dst, vecZ src, memory mem) %{ 7942 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 7943 match(Set dst (MulVS src (LoadVector mem))); 7944 format %{ "vpmullw $dst,$src,$mem\t! mul packed32S" %} 7945 ins_encode %{ 7946 int vector_len = 2; 7947 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7948 %} 7949 ins_pipe( pipe_slow ); 7950 %} 7951 7952 // Integers vector mul (sse4_1) 7953 instruct vmul2I(vecD dst, vecD src) %{ 7954 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 7955 match(Set dst (MulVI dst src)); 7956 format %{ "pmulld $dst,$src\t! mul packed2I" %} 7957 ins_encode %{ 7958 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7959 %} 7960 ins_pipe( pipe_slow ); 7961 %} 7962 7963 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 7964 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7965 match(Set dst (MulVI src1 src2)); 7966 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 7967 ins_encode %{ 7968 int vector_len = 0; 7969 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 7970 %} 7971 ins_pipe( pipe_slow ); 7972 %} 7973 7974 instruct vmul2I_mem(vecD dst, vecD src, memory mem) %{ 7975 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 7976 match(Set dst (MulVI src (LoadVector mem))); 7977 format %{ "vpmulld $dst,$src,$mem\t! mul packed2I" %} 7978 ins_encode %{ 7979 int vector_len = 0; 7980 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 7981 %} 7982 ins_pipe( pipe_slow ); 7983 %} 7984 7985 instruct vmul4I(vecX dst, vecX src) %{ 7986 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 7987 match(Set dst (MulVI dst src)); 7988 format %{ "pmulld $dst,$src\t! mul packed4I" %} 7989 ins_encode %{ 7990 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 7991 %} 7992 ins_pipe( pipe_slow ); 7993 %} 7994 7995 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 7996 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 7997 match(Set dst (MulVI src1 src2)); 7998 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 7999 ins_encode %{ 8000 int vector_len = 0; 8001 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8002 %} 8003 ins_pipe( pipe_slow ); 8004 %} 8005 8006 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 8007 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8008 match(Set dst (MulVI src (LoadVector mem))); 8009 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 8010 ins_encode %{ 8011 int vector_len = 0; 8012 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8013 %} 8014 ins_pipe( pipe_slow ); 8015 %} 8016 8017 instruct vmul2L_reg(vecX dst, vecX src1, vecX src2) %{ 8018 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8019 match(Set dst (MulVL src1 src2)); 8020 format %{ "vpmullq $dst,$src1,$src2\t! mul packed2L" %} 8021 ins_encode %{ 8022 int vector_len = 0; 8023 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8024 %} 8025 ins_pipe( pipe_slow ); 8026 %} 8027 8028 instruct vmul2L_mem(vecX dst, vecX src, memory mem) %{ 8029 predicate(UseAVX > 2 && n->as_Vector()->length() == 2 && VM_Version::supports_avx512dq()); 8030 match(Set dst (MulVL src (LoadVector mem))); 8031 format %{ "vpmullq $dst,$src,$mem\t! mul packed2L" %} 8032 ins_encode %{ 8033 int vector_len = 0; 8034 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8035 %} 8036 ins_pipe( pipe_slow ); 8037 %} 8038 8039 instruct vmul4L_reg(vecY dst, vecY src1, vecY src2) %{ 8040 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8041 match(Set dst (MulVL src1 src2)); 8042 format %{ "vpmullq $dst,$src1,$src2\t! mul packed4L" %} 8043 ins_encode %{ 8044 int vector_len = 1; 8045 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8046 %} 8047 ins_pipe( pipe_slow ); 8048 %} 8049 8050 instruct vmul4L_mem(vecY dst, vecY src, memory mem) %{ 8051 predicate(UseAVX > 2 && n->as_Vector()->length() == 4 && VM_Version::supports_avx512dq()); 8052 match(Set dst (MulVL src (LoadVector mem))); 8053 format %{ "vpmullq $dst,$src,$mem\t! mul packed4L" %} 8054 ins_encode %{ 8055 int vector_len = 1; 8056 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8057 %} 8058 ins_pipe( pipe_slow ); 8059 %} 8060 8061 instruct vmul8L_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8062 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8063 match(Set dst (MulVL src1 src2)); 8064 format %{ "vpmullq $dst,$src1,$src2\t! mul packed8L" %} 8065 ins_encode %{ 8066 int vector_len = 2; 8067 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8068 %} 8069 ins_pipe( pipe_slow ); 8070 %} 8071 8072 instruct vmul8L_mem(vecZ dst, vecZ src, memory mem) %{ 8073 predicate(UseAVX > 2 && n->as_Vector()->length() == 8 && VM_Version::supports_avx512dq()); 8074 match(Set dst (MulVL src (LoadVector mem))); 8075 format %{ "vpmullq $dst,$src,$mem\t! mul packed8L" %} 8076 ins_encode %{ 8077 int vector_len = 2; 8078 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8079 %} 8080 ins_pipe( pipe_slow ); 8081 %} 8082 8083 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 8084 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8085 match(Set dst (MulVI src1 src2)); 8086 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 8087 ins_encode %{ 8088 int vector_len = 1; 8089 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8090 %} 8091 ins_pipe( pipe_slow ); 8092 %} 8093 8094 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 8095 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 8096 match(Set dst (MulVI src (LoadVector mem))); 8097 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 8098 ins_encode %{ 8099 int vector_len = 1; 8100 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8101 %} 8102 ins_pipe( pipe_slow ); 8103 %} 8104 8105 instruct vmul16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8106 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8107 match(Set dst (MulVI src1 src2)); 8108 format %{ "vpmulld $dst,$src1,$src2\t! mul packed16I" %} 8109 ins_encode %{ 8110 int vector_len = 2; 8111 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8112 %} 8113 ins_pipe( pipe_slow ); 8114 %} 8115 8116 instruct vmul16I_mem(vecZ dst, vecZ src, memory mem) %{ 8117 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8118 match(Set dst (MulVI src (LoadVector mem))); 8119 format %{ "vpmulld $dst,$src,$mem\t! mul packed16I" %} 8120 ins_encode %{ 8121 int vector_len = 2; 8122 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8123 %} 8124 ins_pipe( pipe_slow ); 8125 %} 8126 8127 // Floats vector mul 8128 instruct vmul2F(vecD dst, vecD src) %{ 8129 predicate(n->as_Vector()->length() == 2); 8130 match(Set dst (MulVF dst src)); 8131 format %{ "mulps $dst,$src\t! mul packed2F" %} 8132 ins_encode %{ 8133 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8134 %} 8135 ins_pipe( pipe_slow ); 8136 %} 8137 8138 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 8139 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8140 match(Set dst (MulVF src1 src2)); 8141 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 8142 ins_encode %{ 8143 int vector_len = 0; 8144 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8145 %} 8146 ins_pipe( pipe_slow ); 8147 %} 8148 8149 instruct vmul2F_mem(vecD dst, vecD src, memory mem) %{ 8150 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8151 match(Set dst (MulVF src (LoadVector mem))); 8152 format %{ "vmulps $dst,$src,$mem\t! mul packed2F" %} 8153 ins_encode %{ 8154 int vector_len = 0; 8155 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8156 %} 8157 ins_pipe( pipe_slow ); 8158 %} 8159 8160 instruct vmul4F(vecX dst, vecX src) %{ 8161 predicate(n->as_Vector()->length() == 4); 8162 match(Set dst (MulVF dst src)); 8163 format %{ "mulps $dst,$src\t! mul packed4F" %} 8164 ins_encode %{ 8165 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 8166 %} 8167 ins_pipe( pipe_slow ); 8168 %} 8169 8170 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 8171 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8172 match(Set dst (MulVF src1 src2)); 8173 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 8174 ins_encode %{ 8175 int vector_len = 0; 8176 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8177 %} 8178 ins_pipe( pipe_slow ); 8179 %} 8180 8181 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 8182 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8183 match(Set dst (MulVF src (LoadVector mem))); 8184 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 8185 ins_encode %{ 8186 int vector_len = 0; 8187 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8188 %} 8189 ins_pipe( pipe_slow ); 8190 %} 8191 8192 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 8193 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8194 match(Set dst (MulVF src1 src2)); 8195 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 8196 ins_encode %{ 8197 int vector_len = 1; 8198 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8199 %} 8200 ins_pipe( pipe_slow ); 8201 %} 8202 8203 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 8204 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8205 match(Set dst (MulVF src (LoadVector mem))); 8206 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 8207 ins_encode %{ 8208 int vector_len = 1; 8209 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8210 %} 8211 ins_pipe( pipe_slow ); 8212 %} 8213 8214 instruct vmul16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8215 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8216 match(Set dst (MulVF src1 src2)); 8217 format %{ "vmulps $dst,$src1,$src2\t! mul packed16F" %} 8218 ins_encode %{ 8219 int vector_len = 2; 8220 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8221 %} 8222 ins_pipe( pipe_slow ); 8223 %} 8224 8225 instruct vmul16F_mem(vecZ dst, vecZ src, memory mem) %{ 8226 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8227 match(Set dst (MulVF src (LoadVector mem))); 8228 format %{ "vmulps $dst,$src,$mem\t! mul packed16F" %} 8229 ins_encode %{ 8230 int vector_len = 2; 8231 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8232 %} 8233 ins_pipe( pipe_slow ); 8234 %} 8235 8236 // Doubles vector mul 8237 instruct vmul2D(vecX dst, vecX src) %{ 8238 predicate(n->as_Vector()->length() == 2); 8239 match(Set dst (MulVD dst src)); 8240 format %{ "mulpd $dst,$src\t! mul packed2D" %} 8241 ins_encode %{ 8242 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 8243 %} 8244 ins_pipe( pipe_slow ); 8245 %} 8246 8247 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 8248 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8249 match(Set dst (MulVD src1 src2)); 8250 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 8251 ins_encode %{ 8252 int vector_len = 0; 8253 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8254 %} 8255 ins_pipe( pipe_slow ); 8256 %} 8257 8258 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 8259 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8260 match(Set dst (MulVD src (LoadVector mem))); 8261 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 8262 ins_encode %{ 8263 int vector_len = 0; 8264 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8265 %} 8266 ins_pipe( pipe_slow ); 8267 %} 8268 8269 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 8270 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8271 match(Set dst (MulVD src1 src2)); 8272 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 8273 ins_encode %{ 8274 int vector_len = 1; 8275 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8276 %} 8277 ins_pipe( pipe_slow ); 8278 %} 8279 8280 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 8281 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8282 match(Set dst (MulVD src (LoadVector mem))); 8283 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 8284 ins_encode %{ 8285 int vector_len = 1; 8286 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8287 %} 8288 ins_pipe( pipe_slow ); 8289 %} 8290 8291 instruct vmul8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8292 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8293 match(Set dst (MulVD src1 src2)); 8294 format %{ "vmulpd $dst k0,$src1,$src2\t! mul packed8D" %} 8295 ins_encode %{ 8296 int vector_len = 2; 8297 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8298 %} 8299 ins_pipe( pipe_slow ); 8300 %} 8301 8302 instruct vmul8D_mem(vecZ dst, vecZ src, memory mem) %{ 8303 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8304 match(Set dst (MulVD src (LoadVector mem))); 8305 format %{ "vmulpd $dst k0,$src,$mem\t! mul packed8D" %} 8306 ins_encode %{ 8307 int vector_len = 2; 8308 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8309 %} 8310 ins_pipe( pipe_slow ); 8311 %} 8312 8313 instruct vcmov8F_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8314 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 8); 8315 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 8316 effect(TEMP dst, USE src1, USE src2); 8317 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 8318 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 8319 %} 8320 ins_encode %{ 8321 int vector_len = 1; 8322 int cond = (Assembler::Condition)($copnd$$cmpcode); 8323 __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8324 __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{ 8330 predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4); 8331 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 8332 effect(TEMP dst, USE src1, USE src2); 8333 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 8334 "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 8335 %} 8336 ins_encode %{ 8337 int vector_len = 1; 8338 int cond = (Assembler::Condition)($copnd$$cmpcode); 8339 __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); 8340 __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); 8341 %} 8342 ins_pipe( pipe_slow ); 8343 %} 8344 8345 // --------------------------------- DIV -------------------------------------- 8346 8347 // Floats vector div 8348 instruct vdiv2F(vecD dst, vecD src) %{ 8349 predicate(n->as_Vector()->length() == 2); 8350 match(Set dst (DivVF dst src)); 8351 format %{ "divps $dst,$src\t! div packed2F" %} 8352 ins_encode %{ 8353 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8354 %} 8355 ins_pipe( pipe_slow ); 8356 %} 8357 8358 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 8359 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8360 match(Set dst (DivVF src1 src2)); 8361 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 8362 ins_encode %{ 8363 int vector_len = 0; 8364 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8365 %} 8366 ins_pipe( pipe_slow ); 8367 %} 8368 8369 instruct vdiv2F_mem(vecD dst, vecD src, memory mem) %{ 8370 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8371 match(Set dst (DivVF src (LoadVector mem))); 8372 format %{ "vdivps $dst,$src,$mem\t! div packed2F" %} 8373 ins_encode %{ 8374 int vector_len = 0; 8375 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8376 %} 8377 ins_pipe( pipe_slow ); 8378 %} 8379 8380 instruct vdiv4F(vecX dst, vecX src) %{ 8381 predicate(n->as_Vector()->length() == 4); 8382 match(Set dst (DivVF dst src)); 8383 format %{ "divps $dst,$src\t! div packed4F" %} 8384 ins_encode %{ 8385 __ divps($dst$$XMMRegister, $src$$XMMRegister); 8386 %} 8387 ins_pipe( pipe_slow ); 8388 %} 8389 8390 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 8391 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8392 match(Set dst (DivVF src1 src2)); 8393 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 8394 ins_encode %{ 8395 int vector_len = 0; 8396 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8397 %} 8398 ins_pipe( pipe_slow ); 8399 %} 8400 8401 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 8402 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8403 match(Set dst (DivVF src (LoadVector mem))); 8404 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 8405 ins_encode %{ 8406 int vector_len = 0; 8407 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8408 %} 8409 ins_pipe( pipe_slow ); 8410 %} 8411 8412 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 8413 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8414 match(Set dst (DivVF src1 src2)); 8415 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 8416 ins_encode %{ 8417 int vector_len = 1; 8418 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8419 %} 8420 ins_pipe( pipe_slow ); 8421 %} 8422 8423 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 8424 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8425 match(Set dst (DivVF src (LoadVector mem))); 8426 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 8427 ins_encode %{ 8428 int vector_len = 1; 8429 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8430 %} 8431 ins_pipe( pipe_slow ); 8432 %} 8433 8434 instruct vdiv16F_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8435 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8436 match(Set dst (DivVF src1 src2)); 8437 format %{ "vdivps $dst,$src1,$src2\t! div packed16F" %} 8438 ins_encode %{ 8439 int vector_len = 2; 8440 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8441 %} 8442 ins_pipe( pipe_slow ); 8443 %} 8444 8445 instruct vdiv16F_mem(vecZ dst, vecZ src, memory mem) %{ 8446 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 8447 match(Set dst (DivVF src (LoadVector mem))); 8448 format %{ "vdivps $dst,$src,$mem\t! div packed16F" %} 8449 ins_encode %{ 8450 int vector_len = 2; 8451 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8452 %} 8453 ins_pipe( pipe_slow ); 8454 %} 8455 8456 // Doubles vector div 8457 instruct vdiv2D(vecX dst, vecX src) %{ 8458 predicate(n->as_Vector()->length() == 2); 8459 match(Set dst (DivVD dst src)); 8460 format %{ "divpd $dst,$src\t! div packed2D" %} 8461 ins_encode %{ 8462 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 8463 %} 8464 ins_pipe( pipe_slow ); 8465 %} 8466 8467 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 8468 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8469 match(Set dst (DivVD src1 src2)); 8470 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 8471 ins_encode %{ 8472 int vector_len = 0; 8473 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8474 %} 8475 ins_pipe( pipe_slow ); 8476 %} 8477 8478 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 8479 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8480 match(Set dst (DivVD src (LoadVector mem))); 8481 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 8482 ins_encode %{ 8483 int vector_len = 0; 8484 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8485 %} 8486 ins_pipe( pipe_slow ); 8487 %} 8488 8489 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 8490 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8491 match(Set dst (DivVD src1 src2)); 8492 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 8493 ins_encode %{ 8494 int vector_len = 1; 8495 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8496 %} 8497 ins_pipe( pipe_slow ); 8498 %} 8499 8500 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 8501 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8502 match(Set dst (DivVD src (LoadVector mem))); 8503 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 8504 ins_encode %{ 8505 int vector_len = 1; 8506 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8507 %} 8508 ins_pipe( pipe_slow ); 8509 %} 8510 8511 instruct vdiv8D_reg(vecZ dst, vecZ src1, vecZ src2) %{ 8512 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8513 match(Set dst (DivVD src1 src2)); 8514 format %{ "vdivpd $dst,$src1,$src2\t! div packed8D" %} 8515 ins_encode %{ 8516 int vector_len = 2; 8517 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 8518 %} 8519 ins_pipe( pipe_slow ); 8520 %} 8521 8522 instruct vdiv8D_mem(vecZ dst, vecZ src, memory mem) %{ 8523 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8524 match(Set dst (DivVD src (LoadVector mem))); 8525 format %{ "vdivpd $dst,$src,$mem\t! div packed8D" %} 8526 ins_encode %{ 8527 int vector_len = 2; 8528 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 8529 %} 8530 ins_pipe( pipe_slow ); 8531 %} 8532 8533 // ------------------------------ Shift --------------------------------------- 8534 8535 // Left and right shift count vectors are the same on x86 8536 // (only lowest bits of xmm reg are used for count). 8537 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 8538 match(Set dst (LShiftCntV cnt)); 8539 match(Set dst (RShiftCntV cnt)); 8540 format %{ "movd $dst,$cnt\t! load shift count" %} 8541 ins_encode %{ 8542 __ movdl($dst$$XMMRegister, $cnt$$Register); 8543 %} 8544 ins_pipe( pipe_slow ); 8545 %} 8546 8547 // --------------------------------- Sqrt -------------------------------------- 8548 8549 // Floating point vector sqrt 8550 instruct vsqrt2D_reg(vecX dst, vecX src) %{ 8551 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8552 match(Set dst (SqrtVD src)); 8553 format %{ "vsqrtpd $dst,$src\t! sqrt packed2D" %} 8554 ins_encode %{ 8555 int vector_len = 0; 8556 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8557 %} 8558 ins_pipe( pipe_slow ); 8559 %} 8560 8561 instruct vsqrt2D_mem(vecX dst, memory mem) %{ 8562 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8563 match(Set dst (SqrtVD (LoadVector mem))); 8564 format %{ "vsqrtpd $dst,$mem\t! sqrt packed2D" %} 8565 ins_encode %{ 8566 int vector_len = 0; 8567 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8568 %} 8569 ins_pipe( pipe_slow ); 8570 %} 8571 8572 instruct vsqrt4D_reg(vecY dst, vecY src) %{ 8573 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8574 match(Set dst (SqrtVD src)); 8575 format %{ "vsqrtpd $dst,$src\t! sqrt packed4D" %} 8576 ins_encode %{ 8577 int vector_len = 1; 8578 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8579 %} 8580 ins_pipe( pipe_slow ); 8581 %} 8582 8583 instruct vsqrt4D_mem(vecY dst, memory mem) %{ 8584 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8585 match(Set dst (SqrtVD (LoadVector mem))); 8586 format %{ "vsqrtpd $dst,$mem\t! sqrt packed4D" %} 8587 ins_encode %{ 8588 int vector_len = 1; 8589 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8590 %} 8591 ins_pipe( pipe_slow ); 8592 %} 8593 8594 instruct vsqrt8D_reg(vecZ dst, vecZ src) %{ 8595 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8596 match(Set dst (SqrtVD src)); 8597 format %{ "vsqrtpd $dst,$src\t! sqrt packed8D" %} 8598 ins_encode %{ 8599 int vector_len = 2; 8600 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8601 %} 8602 ins_pipe( pipe_slow ); 8603 %} 8604 8605 instruct vsqrt8D_mem(vecZ dst, memory mem) %{ 8606 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 8607 match(Set dst (SqrtVD (LoadVector mem))); 8608 format %{ "vsqrtpd $dst,$mem\t! sqrt packed8D" %} 8609 ins_encode %{ 8610 int vector_len = 2; 8611 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len); 8612 %} 8613 ins_pipe( pipe_slow ); 8614 %} 8615 8616 instruct vsqrt2F_reg(vecD dst, vecD src) %{ 8617 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8618 match(Set dst (SqrtVF src)); 8619 format %{ "vsqrtps $dst,$src\t! sqrt packed2F" %} 8620 ins_encode %{ 8621 int vector_len = 0; 8622 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8623 %} 8624 ins_pipe( pipe_slow ); 8625 %} 8626 8627 instruct vsqrt2F_mem(vecD dst, memory mem) %{ 8628 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 8629 match(Set dst (SqrtVF (LoadVector mem))); 8630 format %{ "vsqrtps $dst,$mem\t! sqrt packed2F" %} 8631 ins_encode %{ 8632 int vector_len = 0; 8633 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8634 %} 8635 ins_pipe( pipe_slow ); 8636 %} 8637 8638 instruct vsqrt4F_reg(vecX dst, vecX src) %{ 8639 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8640 match(Set dst (SqrtVF src)); 8641 format %{ "vsqrtps $dst,$src\t! sqrt packed4F" %} 8642 ins_encode %{ 8643 int vector_len = 0; 8644 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8645 %} 8646 ins_pipe( pipe_slow ); 8647 %} 8648 8649 instruct vsqrt4F_mem(vecX dst, memory mem) %{ 8650 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 8651 match(Set dst (SqrtVF (LoadVector mem))); 8652 format %{ "vsqrtps $dst,$mem\t! sqrt packed4F" %} 8653 ins_encode %{ 8654 int vector_len = 0; 8655 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8656 %} 8657 ins_pipe( pipe_slow ); 8658 %} 8659 8660 instruct vsqrt8F_reg(vecY dst, vecY src) %{ 8661 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8662 match(Set dst (SqrtVF src)); 8663 format %{ "vsqrtps $dst,$src\t! sqrt packed8F" %} 8664 ins_encode %{ 8665 int vector_len = 1; 8666 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8667 %} 8668 ins_pipe( pipe_slow ); 8669 %} 8670 8671 instruct vsqrt8F_mem(vecY dst, memory mem) %{ 8672 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 8673 match(Set dst (SqrtVF (LoadVector mem))); 8674 format %{ "vsqrtps $dst,$mem\t! sqrt packed8F" %} 8675 ins_encode %{ 8676 int vector_len = 1; 8677 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8678 %} 8679 ins_pipe( pipe_slow ); 8680 %} 8681 8682 instruct vsqrt16F_reg(vecZ dst, vecZ src) %{ 8683 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8684 match(Set dst (SqrtVF src)); 8685 format %{ "vsqrtps $dst,$src\t! sqrt packed16F" %} 8686 ins_encode %{ 8687 int vector_len = 2; 8688 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len); 8689 %} 8690 ins_pipe( pipe_slow ); 8691 %} 8692 8693 instruct vsqrt16F_mem(vecZ dst, memory mem) %{ 8694 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 8695 match(Set dst (SqrtVF (LoadVector mem))); 8696 format %{ "vsqrtps $dst,$mem\t! sqrt packed16F" %} 8697 ins_encode %{ 8698 int vector_len = 2; 8699 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len); 8700 %} 8701 ins_pipe( pipe_slow ); 8702 %} 8703 8704 // ------------------------------ LeftShift ----------------------------------- 8705 8706 // Shorts/Chars vector left shift 8707 instruct vsll2S(vecS dst, vecS shift) %{ 8708 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8709 match(Set dst (LShiftVS dst shift)); 8710 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8711 ins_encode %{ 8712 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8713 %} 8714 ins_pipe( pipe_slow ); 8715 %} 8716 8717 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 8718 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 8719 match(Set dst (LShiftVS dst shift)); 8720 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 8721 ins_encode %{ 8722 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8723 %} 8724 ins_pipe( pipe_slow ); 8725 %} 8726 8727 instruct vsll2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 8728 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8729 match(Set dst (LShiftVS src shift)); 8730 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8731 ins_encode %{ 8732 int vector_len = 0; 8733 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8734 %} 8735 ins_pipe( pipe_slow ); 8736 %} 8737 8738 instruct vsll2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 8739 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8740 match(Set dst (LShiftVS src shift)); 8741 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8742 ins_encode %{ 8743 int vector_len = 0; 8744 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8745 %} 8746 ins_pipe( pipe_slow ); 8747 %} 8748 8749 instruct vsll2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 8750 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8751 match(Set dst (LShiftVS dst shift)); 8752 effect(TEMP src); 8753 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8754 ins_encode %{ 8755 int vector_len = 0; 8756 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8757 %} 8758 ins_pipe( pipe_slow ); 8759 %} 8760 8761 instruct vsll2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 8762 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 8763 match(Set dst (LShiftVS src shift)); 8764 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8765 ins_encode %{ 8766 int vector_len = 0; 8767 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8768 %} 8769 ins_pipe( pipe_slow ); 8770 %} 8771 8772 instruct vsll2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 8773 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 8774 match(Set dst (LShiftVS src shift)); 8775 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8776 ins_encode %{ 8777 int vector_len = 0; 8778 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8779 %} 8780 ins_pipe( pipe_slow ); 8781 %} 8782 8783 instruct vsll2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 8784 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 8785 match(Set dst (LShiftVS dst shift)); 8786 effect(TEMP src); 8787 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 8788 ins_encode %{ 8789 int vector_len = 0; 8790 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8791 %} 8792 ins_pipe( pipe_slow ); 8793 %} 8794 8795 instruct vsll4S(vecD dst, vecS shift) %{ 8796 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8797 match(Set dst (LShiftVS dst shift)); 8798 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8799 ins_encode %{ 8800 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8801 %} 8802 ins_pipe( pipe_slow ); 8803 %} 8804 8805 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 8806 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 8807 match(Set dst (LShiftVS dst shift)); 8808 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 8809 ins_encode %{ 8810 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8811 %} 8812 ins_pipe( pipe_slow ); 8813 %} 8814 8815 instruct vsll4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 8816 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8817 match(Set dst (LShiftVS src shift)); 8818 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8819 ins_encode %{ 8820 int vector_len = 0; 8821 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8822 %} 8823 ins_pipe( pipe_slow ); 8824 %} 8825 8826 instruct vsll4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 8827 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8828 match(Set dst (LShiftVS src shift)); 8829 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8830 ins_encode %{ 8831 int vector_len = 0; 8832 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8833 %} 8834 ins_pipe( pipe_slow ); 8835 %} 8836 8837 instruct vsll4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 8838 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8839 match(Set dst (LShiftVS dst shift)); 8840 effect(TEMP src); 8841 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8842 ins_encode %{ 8843 int vector_len = 0; 8844 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8845 %} 8846 ins_pipe( pipe_slow ); 8847 %} 8848 8849 instruct vsll4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 8850 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 8851 match(Set dst (LShiftVS src shift)); 8852 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8853 ins_encode %{ 8854 int vector_len = 0; 8855 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8856 %} 8857 ins_pipe( pipe_slow ); 8858 %} 8859 8860 instruct vsll4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 8861 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 8862 match(Set dst (LShiftVS src shift)); 8863 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8864 ins_encode %{ 8865 int vector_len = 0; 8866 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8867 %} 8868 ins_pipe( pipe_slow ); 8869 %} 8870 8871 instruct vsll4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 8872 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 8873 match(Set dst (LShiftVS dst shift)); 8874 effect(TEMP src); 8875 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 8876 ins_encode %{ 8877 int vector_len = 0; 8878 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8879 %} 8880 ins_pipe( pipe_slow ); 8881 %} 8882 8883 instruct vsll8S(vecX dst, vecS shift) %{ 8884 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8885 match(Set dst (LShiftVS dst shift)); 8886 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8887 ins_encode %{ 8888 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 8889 %} 8890 ins_pipe( pipe_slow ); 8891 %} 8892 8893 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 8894 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 8895 match(Set dst (LShiftVS dst shift)); 8896 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 8897 ins_encode %{ 8898 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 8899 %} 8900 ins_pipe( pipe_slow ); 8901 %} 8902 8903 instruct vsll8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 8904 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8905 match(Set dst (LShiftVS src shift)); 8906 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8907 ins_encode %{ 8908 int vector_len = 0; 8909 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8910 %} 8911 ins_pipe( pipe_slow ); 8912 %} 8913 8914 instruct vsll8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 8915 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8916 match(Set dst (LShiftVS src shift)); 8917 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8918 ins_encode %{ 8919 int vector_len = 0; 8920 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8921 %} 8922 ins_pipe( pipe_slow ); 8923 %} 8924 8925 instruct vsll8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 8926 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8927 match(Set dst (LShiftVS dst shift)); 8928 effect(TEMP src); 8929 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8930 ins_encode %{ 8931 int vector_len = 0; 8932 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8933 %} 8934 ins_pipe( pipe_slow ); 8935 %} 8936 8937 instruct vsll8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 8938 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 8939 match(Set dst (LShiftVS src shift)); 8940 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8941 ins_encode %{ 8942 int vector_len = 0; 8943 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8944 %} 8945 ins_pipe( pipe_slow ); 8946 %} 8947 8948 instruct vsll8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 8949 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 8950 match(Set dst (LShiftVS src shift)); 8951 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8952 ins_encode %{ 8953 int vector_len = 0; 8954 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8955 %} 8956 ins_pipe( pipe_slow ); 8957 %} 8958 8959 instruct vsll8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 8960 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 8961 match(Set dst (LShiftVS dst shift)); 8962 effect(TEMP src); 8963 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 8964 ins_encode %{ 8965 int vector_len = 0; 8966 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 8967 %} 8968 ins_pipe( pipe_slow ); 8969 %} 8970 8971 instruct vsll16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 8972 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 8973 match(Set dst (LShiftVS src shift)); 8974 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8975 ins_encode %{ 8976 int vector_len = 1; 8977 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8978 %} 8979 ins_pipe( pipe_slow ); 8980 %} 8981 8982 instruct vsll16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 8983 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 8984 match(Set dst (LShiftVS src shift)); 8985 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8986 ins_encode %{ 8987 int vector_len = 1; 8988 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8989 %} 8990 ins_pipe( pipe_slow ); 8991 %} 8992 8993 instruct vsll16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 8994 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 8995 match(Set dst (LShiftVS dst shift)); 8996 effect(TEMP src); 8997 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 8998 ins_encode %{ 8999 int vector_len = 1; 9000 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9001 %} 9002 ins_pipe( pipe_slow ); 9003 %} 9004 9005 instruct vsll16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9006 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9007 match(Set dst (LShiftVS src shift)); 9008 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9009 ins_encode %{ 9010 int vector_len = 1; 9011 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9012 %} 9013 ins_pipe( pipe_slow ); 9014 %} 9015 9016 instruct vsll16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9017 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9018 match(Set dst (LShiftVS src shift)); 9019 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9020 ins_encode %{ 9021 int vector_len = 1; 9022 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9023 %} 9024 ins_pipe( pipe_slow ); 9025 %} 9026 9027 instruct vsll16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9028 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9029 match(Set dst (LShiftVS dst shift)); 9030 effect(TEMP src); 9031 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 9032 ins_encode %{ 9033 int vector_len = 1; 9034 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9035 %} 9036 ins_pipe( pipe_slow ); 9037 %} 9038 9039 instruct vsll32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9040 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9041 match(Set dst (LShiftVS src shift)); 9042 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9043 ins_encode %{ 9044 int vector_len = 2; 9045 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9046 %} 9047 ins_pipe( pipe_slow ); 9048 %} 9049 9050 instruct vsll32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9051 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9052 match(Set dst (LShiftVS src shift)); 9053 format %{ "vpsllw $dst,$src,$shift\t! left shift packed32S" %} 9054 ins_encode %{ 9055 int vector_len = 2; 9056 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9057 %} 9058 ins_pipe( pipe_slow ); 9059 %} 9060 9061 // Integers vector left shift 9062 instruct vsll2I(vecD dst, vecS shift) %{ 9063 predicate(n->as_Vector()->length() == 2); 9064 match(Set dst (LShiftVI dst shift)); 9065 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9066 ins_encode %{ 9067 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9068 %} 9069 ins_pipe( pipe_slow ); 9070 %} 9071 9072 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 9073 predicate(n->as_Vector()->length() == 2); 9074 match(Set dst (LShiftVI dst shift)); 9075 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 9076 ins_encode %{ 9077 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9078 %} 9079 ins_pipe( pipe_slow ); 9080 %} 9081 9082 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 9083 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9084 match(Set dst (LShiftVI src shift)); 9085 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9086 ins_encode %{ 9087 int vector_len = 0; 9088 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9089 %} 9090 ins_pipe( pipe_slow ); 9091 %} 9092 9093 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9094 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9095 match(Set dst (LShiftVI src shift)); 9096 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 9097 ins_encode %{ 9098 int vector_len = 0; 9099 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9100 %} 9101 ins_pipe( pipe_slow ); 9102 %} 9103 9104 instruct vsll4I(vecX dst, vecS shift) %{ 9105 predicate(n->as_Vector()->length() == 4); 9106 match(Set dst (LShiftVI dst shift)); 9107 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9108 ins_encode %{ 9109 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 9110 %} 9111 ins_pipe( pipe_slow ); 9112 %} 9113 9114 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 9115 predicate(n->as_Vector()->length() == 4); 9116 match(Set dst (LShiftVI dst shift)); 9117 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 9118 ins_encode %{ 9119 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 9120 %} 9121 ins_pipe( pipe_slow ); 9122 %} 9123 9124 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 9125 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9126 match(Set dst (LShiftVI src shift)); 9127 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9128 ins_encode %{ 9129 int vector_len = 0; 9130 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9131 %} 9132 ins_pipe( pipe_slow ); 9133 %} 9134 9135 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9136 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9137 match(Set dst (LShiftVI src shift)); 9138 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 9139 ins_encode %{ 9140 int vector_len = 0; 9141 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9142 %} 9143 ins_pipe( pipe_slow ); 9144 %} 9145 9146 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 9147 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9148 match(Set dst (LShiftVI src shift)); 9149 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9150 ins_encode %{ 9151 int vector_len = 1; 9152 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9153 %} 9154 ins_pipe( pipe_slow ); 9155 %} 9156 9157 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9158 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9159 match(Set dst (LShiftVI src shift)); 9160 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 9161 ins_encode %{ 9162 int vector_len = 1; 9163 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9164 %} 9165 ins_pipe( pipe_slow ); 9166 %} 9167 9168 instruct vsll16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9169 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9170 match(Set dst (LShiftVI src shift)); 9171 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9172 ins_encode %{ 9173 int vector_len = 2; 9174 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9175 %} 9176 ins_pipe( pipe_slow ); 9177 %} 9178 9179 instruct vsll16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9180 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9181 match(Set dst (LShiftVI src shift)); 9182 format %{ "vpslld $dst,$src,$shift\t! left shift packed16I" %} 9183 ins_encode %{ 9184 int vector_len = 2; 9185 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9186 %} 9187 ins_pipe( pipe_slow ); 9188 %} 9189 9190 // Longs vector left shift 9191 instruct vsll2L(vecX dst, vecS shift) %{ 9192 predicate(n->as_Vector()->length() == 2); 9193 match(Set dst (LShiftVL dst shift)); 9194 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9195 ins_encode %{ 9196 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 9197 %} 9198 ins_pipe( pipe_slow ); 9199 %} 9200 9201 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 9202 predicate(n->as_Vector()->length() == 2); 9203 match(Set dst (LShiftVL dst shift)); 9204 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 9205 ins_encode %{ 9206 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 9207 %} 9208 ins_pipe( pipe_slow ); 9209 %} 9210 9211 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 9212 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9213 match(Set dst (LShiftVL src shift)); 9214 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9215 ins_encode %{ 9216 int vector_len = 0; 9217 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9218 %} 9219 ins_pipe( pipe_slow ); 9220 %} 9221 9222 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9223 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9224 match(Set dst (LShiftVL src shift)); 9225 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 9226 ins_encode %{ 9227 int vector_len = 0; 9228 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9229 %} 9230 ins_pipe( pipe_slow ); 9231 %} 9232 9233 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 9234 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9235 match(Set dst (LShiftVL src shift)); 9236 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9237 ins_encode %{ 9238 int vector_len = 1; 9239 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9240 %} 9241 ins_pipe( pipe_slow ); 9242 %} 9243 9244 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9245 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9246 match(Set dst (LShiftVL src shift)); 9247 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 9248 ins_encode %{ 9249 int vector_len = 1; 9250 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9251 %} 9252 ins_pipe( pipe_slow ); 9253 %} 9254 9255 instruct vsll8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9256 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9257 match(Set dst (LShiftVL src shift)); 9258 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9259 ins_encode %{ 9260 int vector_len = 2; 9261 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9262 %} 9263 ins_pipe( pipe_slow ); 9264 %} 9265 9266 instruct vsll8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9267 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9268 match(Set dst (LShiftVL src shift)); 9269 format %{ "vpsllq $dst,$src,$shift\t! left shift packed8L" %} 9270 ins_encode %{ 9271 int vector_len = 2; 9272 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9273 %} 9274 ins_pipe( pipe_slow ); 9275 %} 9276 9277 // ----------------------- LogicalRightShift ----------------------------------- 9278 9279 // Shorts vector logical right shift produces incorrect Java result 9280 // for negative data because java code convert short value into int with 9281 // sign extension before a shift. But char vectors are fine since chars are 9282 // unsigned values. 9283 9284 instruct vsrl2S(vecS dst, vecS shift) %{ 9285 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9286 match(Set dst (URShiftVS dst shift)); 9287 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9288 ins_encode %{ 9289 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9290 %} 9291 ins_pipe( pipe_slow ); 9292 %} 9293 9294 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 9295 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9296 match(Set dst (URShiftVS dst shift)); 9297 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 9298 ins_encode %{ 9299 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9300 %} 9301 ins_pipe( pipe_slow ); 9302 %} 9303 9304 instruct vsrl2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9305 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9306 match(Set dst (URShiftVS src shift)); 9307 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9308 ins_encode %{ 9309 int vector_len = 0; 9310 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9311 %} 9312 ins_pipe( pipe_slow ); 9313 %} 9314 9315 instruct vsrl2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9316 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9317 match(Set dst (URShiftVS src shift)); 9318 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9319 ins_encode %{ 9320 int vector_len = 0; 9321 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9322 %} 9323 ins_pipe( pipe_slow ); 9324 %} 9325 9326 instruct vsrl2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9327 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9328 match(Set dst (URShiftVS dst shift)); 9329 effect(TEMP src); 9330 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9331 ins_encode %{ 9332 int vector_len = 0; 9333 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9334 %} 9335 ins_pipe( pipe_slow ); 9336 %} 9337 9338 instruct vsrl2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9339 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9340 match(Set dst (URShiftVS src shift)); 9341 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9342 ins_encode %{ 9343 int vector_len = 0; 9344 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9345 %} 9346 ins_pipe( pipe_slow ); 9347 %} 9348 9349 instruct vsrl2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9350 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9351 match(Set dst (URShiftVS src shift)); 9352 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9353 ins_encode %{ 9354 int vector_len = 0; 9355 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9356 %} 9357 ins_pipe( pipe_slow ); 9358 %} 9359 9360 instruct vsrl2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9361 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9362 match(Set dst (URShiftVS dst shift)); 9363 effect(TEMP src); 9364 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 9365 ins_encode %{ 9366 int vector_len = 0; 9367 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9368 %} 9369 ins_pipe( pipe_slow ); 9370 %} 9371 9372 instruct vsrl4S(vecD dst, vecS shift) %{ 9373 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9374 match(Set dst (URShiftVS dst shift)); 9375 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9376 ins_encode %{ 9377 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9378 %} 9379 ins_pipe( pipe_slow ); 9380 %} 9381 9382 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 9383 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9384 match(Set dst (URShiftVS dst shift)); 9385 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 9386 ins_encode %{ 9387 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9388 %} 9389 ins_pipe( pipe_slow ); 9390 %} 9391 9392 instruct vsrl4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9393 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9394 match(Set dst (URShiftVS src shift)); 9395 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9396 ins_encode %{ 9397 int vector_len = 0; 9398 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9399 %} 9400 ins_pipe( pipe_slow ); 9401 %} 9402 9403 instruct vsrl4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9404 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9405 match(Set dst (URShiftVS src shift)); 9406 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9407 ins_encode %{ 9408 int vector_len = 0; 9409 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9410 %} 9411 ins_pipe( pipe_slow ); 9412 %} 9413 9414 instruct vsrl4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9415 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9416 match(Set dst (URShiftVS dst shift)); 9417 effect(TEMP src); 9418 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9419 ins_encode %{ 9420 int vector_len = 0; 9421 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9422 %} 9423 ins_pipe( pipe_slow ); 9424 %} 9425 9426 instruct vsrl4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 9427 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9428 match(Set dst (URShiftVS src shift)); 9429 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9430 ins_encode %{ 9431 int vector_len = 0; 9432 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9433 %} 9434 ins_pipe( pipe_slow ); 9435 %} 9436 9437 instruct vsrl4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 9438 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9439 match(Set dst (URShiftVS src shift)); 9440 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9441 ins_encode %{ 9442 int vector_len = 0; 9443 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9444 %} 9445 ins_pipe( pipe_slow ); 9446 %} 9447 9448 instruct vsrl4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 9449 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9450 match(Set dst (URShiftVS dst shift)); 9451 effect(TEMP src); 9452 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 9453 ins_encode %{ 9454 int vector_len = 0; 9455 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9456 %} 9457 ins_pipe( pipe_slow ); 9458 %} 9459 9460 instruct vsrl8S(vecX dst, vecS shift) %{ 9461 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9462 match(Set dst (URShiftVS dst shift)); 9463 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9464 ins_encode %{ 9465 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 9466 %} 9467 ins_pipe( pipe_slow ); 9468 %} 9469 9470 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 9471 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 9472 match(Set dst (URShiftVS dst shift)); 9473 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 9474 ins_encode %{ 9475 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 9476 %} 9477 ins_pipe( pipe_slow ); 9478 %} 9479 9480 instruct vsrl8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 9481 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9482 match(Set dst (URShiftVS src shift)); 9483 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9484 ins_encode %{ 9485 int vector_len = 0; 9486 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9487 %} 9488 ins_pipe( pipe_slow ); 9489 %} 9490 9491 instruct vsrl8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 9492 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9493 match(Set dst (URShiftVS src shift)); 9494 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9495 ins_encode %{ 9496 int vector_len = 0; 9497 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9498 %} 9499 ins_pipe( pipe_slow ); 9500 %} 9501 9502 instruct vsrl8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 9503 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9504 match(Set dst (URShiftVS dst shift)); 9505 effect(TEMP src); 9506 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9507 ins_encode %{ 9508 int vector_len = 0; 9509 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9510 %} 9511 ins_pipe( pipe_slow ); 9512 %} 9513 9514 instruct vsrl8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 9515 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 9516 match(Set dst (URShiftVS src shift)); 9517 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9518 ins_encode %{ 9519 int vector_len = 0; 9520 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9521 %} 9522 ins_pipe( pipe_slow ); 9523 %} 9524 9525 instruct vsrl8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 9526 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 9527 match(Set dst (URShiftVS src shift)); 9528 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9529 ins_encode %{ 9530 int vector_len = 0; 9531 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9532 %} 9533 ins_pipe( pipe_slow ); 9534 %} 9535 9536 instruct vsrl8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 9537 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 9538 match(Set dst (URShiftVS dst shift)); 9539 effect(TEMP src); 9540 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 9541 ins_encode %{ 9542 int vector_len = 0; 9543 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9544 %} 9545 ins_pipe( pipe_slow ); 9546 %} 9547 9548 instruct vsrl16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 9549 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9550 match(Set dst (URShiftVS src shift)); 9551 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9552 ins_encode %{ 9553 int vector_len = 1; 9554 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9555 %} 9556 ins_pipe( pipe_slow ); 9557 %} 9558 9559 instruct vsrl16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 9560 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9561 match(Set dst (URShiftVS src shift)); 9562 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9563 ins_encode %{ 9564 int vector_len = 1; 9565 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9566 %} 9567 ins_pipe( pipe_slow ); 9568 %} 9569 9570 instruct vsrl16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 9571 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9572 match(Set dst (URShiftVS dst shift)); 9573 effect(TEMP src); 9574 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9575 ins_encode %{ 9576 int vector_len = 1; 9577 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9578 %} 9579 ins_pipe( pipe_slow ); 9580 %} 9581 9582 instruct vsrl16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 9583 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 9584 match(Set dst (URShiftVS src shift)); 9585 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9586 ins_encode %{ 9587 int vector_len = 1; 9588 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9589 %} 9590 ins_pipe( pipe_slow ); 9591 %} 9592 9593 instruct vsrl16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 9594 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 9595 match(Set dst (URShiftVS src shift)); 9596 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9597 ins_encode %{ 9598 int vector_len = 1; 9599 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9600 %} 9601 ins_pipe( pipe_slow ); 9602 %} 9603 9604 instruct vsrl16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 9605 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 9606 match(Set dst (URShiftVS dst shift)); 9607 effect(TEMP src); 9608 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 9609 ins_encode %{ 9610 int vector_len = 1; 9611 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9612 %} 9613 ins_pipe( pipe_slow ); 9614 %} 9615 9616 instruct vsrl32S_reg(vecZ dst, vecZ src, vecS shift) %{ 9617 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9618 match(Set dst (URShiftVS src shift)); 9619 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9620 ins_encode %{ 9621 int vector_len = 2; 9622 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9623 %} 9624 ins_pipe( pipe_slow ); 9625 %} 9626 9627 instruct vsrl32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9628 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 9629 match(Set dst (URShiftVS src shift)); 9630 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed32S" %} 9631 ins_encode %{ 9632 int vector_len = 2; 9633 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9634 %} 9635 ins_pipe( pipe_slow ); 9636 %} 9637 9638 // Integers vector logical right shift 9639 instruct vsrl2I(vecD dst, vecS shift) %{ 9640 predicate(n->as_Vector()->length() == 2); 9641 match(Set dst (URShiftVI dst shift)); 9642 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9643 ins_encode %{ 9644 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9645 %} 9646 ins_pipe( pipe_slow ); 9647 %} 9648 9649 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 9650 predicate(n->as_Vector()->length() == 2); 9651 match(Set dst (URShiftVI dst shift)); 9652 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 9653 ins_encode %{ 9654 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9655 %} 9656 ins_pipe( pipe_slow ); 9657 %} 9658 9659 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 9660 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9661 match(Set dst (URShiftVI src shift)); 9662 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9663 ins_encode %{ 9664 int vector_len = 0; 9665 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9666 %} 9667 ins_pipe( pipe_slow ); 9668 %} 9669 9670 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 9671 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9672 match(Set dst (URShiftVI src shift)); 9673 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 9674 ins_encode %{ 9675 int vector_len = 0; 9676 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9677 %} 9678 ins_pipe( pipe_slow ); 9679 %} 9680 9681 instruct vsrl4I(vecX dst, vecS shift) %{ 9682 predicate(n->as_Vector()->length() == 4); 9683 match(Set dst (URShiftVI dst shift)); 9684 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9685 ins_encode %{ 9686 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 9687 %} 9688 ins_pipe( pipe_slow ); 9689 %} 9690 9691 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 9692 predicate(n->as_Vector()->length() == 4); 9693 match(Set dst (URShiftVI dst shift)); 9694 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 9695 ins_encode %{ 9696 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 9697 %} 9698 ins_pipe( pipe_slow ); 9699 %} 9700 9701 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 9702 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9703 match(Set dst (URShiftVI src shift)); 9704 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9705 ins_encode %{ 9706 int vector_len = 0; 9707 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9708 %} 9709 ins_pipe( pipe_slow ); 9710 %} 9711 9712 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9713 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 9714 match(Set dst (URShiftVI src shift)); 9715 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 9716 ins_encode %{ 9717 int vector_len = 0; 9718 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9719 %} 9720 ins_pipe( pipe_slow ); 9721 %} 9722 9723 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 9724 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9725 match(Set dst (URShiftVI src shift)); 9726 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9727 ins_encode %{ 9728 int vector_len = 1; 9729 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9730 %} 9731 ins_pipe( pipe_slow ); 9732 %} 9733 9734 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9735 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 9736 match(Set dst (URShiftVI src shift)); 9737 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 9738 ins_encode %{ 9739 int vector_len = 1; 9740 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9741 %} 9742 ins_pipe( pipe_slow ); 9743 %} 9744 9745 instruct vsrl16I_reg(vecZ dst, vecZ src, vecS shift) %{ 9746 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9747 match(Set dst (URShiftVI src shift)); 9748 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9749 ins_encode %{ 9750 int vector_len = 2; 9751 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9752 %} 9753 ins_pipe( pipe_slow ); 9754 %} 9755 9756 instruct vsrl16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9757 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 9758 match(Set dst (URShiftVI src shift)); 9759 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed16I" %} 9760 ins_encode %{ 9761 int vector_len = 2; 9762 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9763 %} 9764 ins_pipe( pipe_slow ); 9765 %} 9766 9767 // Longs vector logical right shift 9768 instruct vsrl2L(vecX dst, vecS shift) %{ 9769 predicate(n->as_Vector()->length() == 2); 9770 match(Set dst (URShiftVL dst shift)); 9771 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9772 ins_encode %{ 9773 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 9774 %} 9775 ins_pipe( pipe_slow ); 9776 %} 9777 9778 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 9779 predicate(n->as_Vector()->length() == 2); 9780 match(Set dst (URShiftVL dst shift)); 9781 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 9782 ins_encode %{ 9783 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 9784 %} 9785 ins_pipe( pipe_slow ); 9786 %} 9787 9788 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 9789 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9790 match(Set dst (URShiftVL src shift)); 9791 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9792 ins_encode %{ 9793 int vector_len = 0; 9794 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9795 %} 9796 ins_pipe( pipe_slow ); 9797 %} 9798 9799 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 9800 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 9801 match(Set dst (URShiftVL src shift)); 9802 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 9803 ins_encode %{ 9804 int vector_len = 0; 9805 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9806 %} 9807 ins_pipe( pipe_slow ); 9808 %} 9809 9810 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 9811 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9812 match(Set dst (URShiftVL src shift)); 9813 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9814 ins_encode %{ 9815 int vector_len = 1; 9816 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9817 %} 9818 ins_pipe( pipe_slow ); 9819 %} 9820 9821 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 9822 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 9823 match(Set dst (URShiftVL src shift)); 9824 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 9825 ins_encode %{ 9826 int vector_len = 1; 9827 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9828 %} 9829 ins_pipe( pipe_slow ); 9830 %} 9831 9832 instruct vsrl8L_reg(vecZ dst, vecZ src, vecS shift) %{ 9833 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9834 match(Set dst (URShiftVL src shift)); 9835 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9836 ins_encode %{ 9837 int vector_len = 2; 9838 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9839 %} 9840 ins_pipe( pipe_slow ); 9841 %} 9842 9843 instruct vsrl8L_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 9844 predicate(UseAVX > 2 && n->as_Vector()->length() == 8); 9845 match(Set dst (URShiftVL src shift)); 9846 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed8L" %} 9847 ins_encode %{ 9848 int vector_len = 2; 9849 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9850 %} 9851 ins_pipe( pipe_slow ); 9852 %} 9853 9854 // ------------------- ArithmeticRightShift ----------------------------------- 9855 9856 // Shorts/Chars vector arithmetic right shift 9857 instruct vsra2S(vecS dst, vecS shift) %{ 9858 predicate(UseAVX == 0 && n->as_Vector()->length() == 2); 9859 match(Set dst (RShiftVS dst shift)); 9860 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9861 ins_encode %{ 9862 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9863 %} 9864 ins_pipe( pipe_slow ); 9865 %} 9866 9867 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 9868 predicate(n->as_Vector()->length() == 2); 9869 match(Set dst (RShiftVS dst shift)); 9870 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 9871 ins_encode %{ 9872 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9873 %} 9874 ins_pipe( pipe_slow ); 9875 %} 9876 9877 instruct vsra2S_reg_avx(vecS dst, vecS src, vecS shift) %{ 9878 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9879 match(Set dst (RShiftVS src shift)); 9880 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9881 ins_encode %{ 9882 int vector_len = 0; 9883 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9884 %} 9885 ins_pipe( pipe_slow ); 9886 %} 9887 9888 instruct vsra2S_reg_evex(vecS dst, vecS src, vecS shift) %{ 9889 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9890 match(Set dst (RShiftVS src shift)); 9891 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9892 ins_encode %{ 9893 int vector_len = 0; 9894 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9895 %} 9896 ins_pipe( pipe_slow ); 9897 %} 9898 9899 instruct vsra2S_reg_evex_special(vecS dst, vecS src, vecS shift) %{ 9900 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9901 match(Set dst (RShiftVS dst shift)); 9902 effect(TEMP src); 9903 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9904 ins_encode %{ 9905 int vector_len = 0; 9906 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9907 %} 9908 ins_pipe( pipe_slow ); 9909 %} 9910 9911 instruct vsra2S_reg_imm_avx(vecS dst, vecS src, immI8 shift) %{ 9912 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2); 9913 match(Set dst (RShiftVS src shift)); 9914 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9915 ins_encode %{ 9916 int vector_len = 0; 9917 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9918 %} 9919 ins_pipe( pipe_slow ); 9920 %} 9921 9922 instruct vsra2S_reg_imm_evex(vecS dst, vecS src, immI8 shift) %{ 9923 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2); 9924 match(Set dst (RShiftVS src shift)); 9925 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9926 ins_encode %{ 9927 int vector_len = 0; 9928 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9929 %} 9930 ins_pipe( pipe_slow ); 9931 %} 9932 9933 instruct vsra2S_reg_imm_evex_special(vecS dst, vecS src, immI8 shift) %{ 9934 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2); 9935 match(Set dst (RShiftVS dst shift)); 9936 effect(TEMP src); 9937 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 9938 ins_encode %{ 9939 int vector_len = 0; 9940 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 9941 %} 9942 ins_pipe( pipe_slow ); 9943 %} 9944 9945 instruct vsra4S(vecD dst, vecS shift) %{ 9946 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9947 match(Set dst (RShiftVS dst shift)); 9948 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9949 ins_encode %{ 9950 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 9951 %} 9952 ins_pipe( pipe_slow ); 9953 %} 9954 9955 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 9956 predicate(UseAVX == 0 && n->as_Vector()->length() == 4); 9957 match(Set dst (RShiftVS dst shift)); 9958 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 9959 ins_encode %{ 9960 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 9961 %} 9962 ins_pipe( pipe_slow ); 9963 %} 9964 9965 instruct vsra4S_reg_avx(vecD dst, vecD src, vecS shift) %{ 9966 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 9967 match(Set dst (RShiftVS src shift)); 9968 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9969 ins_encode %{ 9970 int vector_len = 0; 9971 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9972 %} 9973 ins_pipe( pipe_slow ); 9974 %} 9975 9976 instruct vsra4S_reg_evex(vecD dst, vecD src, vecS shift) %{ 9977 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 9978 match(Set dst (RShiftVS src shift)); 9979 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9980 ins_encode %{ 9981 int vector_len = 0; 9982 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9983 %} 9984 ins_pipe( pipe_slow ); 9985 %} 9986 9987 instruct vsra4S_reg_evex_special(vecD dst, vecD src, vecS shift) %{ 9988 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 9989 match(Set dst (RShiftVS dst shift)); 9990 effect(TEMP src); 9991 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 9992 ins_encode %{ 9993 int vector_len = 0; 9994 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9995 %} 9996 ins_pipe( pipe_slow ); 9997 %} 9998 9999 instruct vsra4S_reg_imm_avx(vecD dst, vecD src, immI8 shift) %{ 10000 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4); 10001 match(Set dst (RShiftVS src shift)); 10002 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10003 ins_encode %{ 10004 int vector_len = 0; 10005 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10006 %} 10007 ins_pipe( pipe_slow ); 10008 %} 10009 10010 instruct vsra4S_reg_imm_evex(vecD dst, vecD src, immI8 shift) %{ 10011 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4); 10012 match(Set dst (RShiftVS src shift)); 10013 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10014 ins_encode %{ 10015 int vector_len = 0; 10016 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10017 %} 10018 ins_pipe( pipe_slow ); 10019 %} 10020 10021 instruct vsra4S_reg_imm_evex_special(vecD dst, vecD src, immI8 shift) %{ 10022 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4); 10023 match(Set dst (RShiftVS dst shift)); 10024 effect(TEMP src); 10025 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 10026 ins_encode %{ 10027 int vector_len = 0; 10028 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10029 %} 10030 ins_pipe( pipe_slow ); 10031 %} 10032 10033 instruct vsra8S(vecX dst, vecS shift) %{ 10034 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10035 match(Set dst (RShiftVS dst shift)); 10036 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10037 ins_encode %{ 10038 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 10039 %} 10040 ins_pipe( pipe_slow ); 10041 %} 10042 10043 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 10044 predicate(UseAVX == 0 && n->as_Vector()->length() == 8); 10045 match(Set dst (RShiftVS dst shift)); 10046 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 10047 ins_encode %{ 10048 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 10049 %} 10050 ins_pipe( pipe_slow ); 10051 %} 10052 10053 instruct vsra8S_reg_avx(vecX dst, vecX src, vecS shift) %{ 10054 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10055 match(Set dst (RShiftVS src shift)); 10056 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10057 ins_encode %{ 10058 int vector_len = 0; 10059 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10060 %} 10061 ins_pipe( pipe_slow ); 10062 %} 10063 10064 instruct vsra8S_reg_evex(vecX dst, vecX src, vecS shift) %{ 10065 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10066 match(Set dst (RShiftVS src shift)); 10067 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10068 ins_encode %{ 10069 int vector_len = 0; 10070 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10071 %} 10072 ins_pipe( pipe_slow ); 10073 %} 10074 10075 instruct vsra8S_reg_evex_special(vecX dst, vecX src, vecS shift) %{ 10076 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10077 match(Set dst (RShiftVS dst shift)); 10078 effect(TEMP src); 10079 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10080 ins_encode %{ 10081 int vector_len = 0; 10082 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10083 %} 10084 ins_pipe( pipe_slow ); 10085 %} 10086 10087 instruct vsra8S_reg_imm_avx(vecX dst, vecX src, immI8 shift) %{ 10088 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8); 10089 match(Set dst (RShiftVS src shift)); 10090 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10091 ins_encode %{ 10092 int vector_len = 0; 10093 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10094 %} 10095 ins_pipe( pipe_slow ); 10096 %} 10097 10098 instruct vsra8S_reg_imm_evex(vecX dst, vecX src, immI8 shift) %{ 10099 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8); 10100 match(Set dst (RShiftVS src shift)); 10101 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10102 ins_encode %{ 10103 int vector_len = 0; 10104 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10105 %} 10106 ins_pipe( pipe_slow ); 10107 %} 10108 10109 instruct vsra8S_reg_imm_evex_special(vecX dst, vecX src, immI8 shift) %{ 10110 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8); 10111 match(Set dst (RShiftVS dst shift)); 10112 effect(TEMP src); 10113 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 10114 ins_encode %{ 10115 int vector_len = 0; 10116 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10117 %} 10118 ins_pipe( pipe_slow ); 10119 %} 10120 10121 instruct vsra16S_reg_avx(vecY dst, vecY src, vecS shift) %{ 10122 predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16); 10123 match(Set dst (RShiftVS src shift)); 10124 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10125 ins_encode %{ 10126 int vector_len = 1; 10127 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10128 %} 10129 ins_pipe( pipe_slow ); 10130 %} 10131 10132 instruct vsra16S_reg_evex(vecY dst, vecY src, vecS shift) %{ 10133 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10134 match(Set dst (RShiftVS src shift)); 10135 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10136 ins_encode %{ 10137 int vector_len = 1; 10138 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10139 %} 10140 ins_pipe( pipe_slow ); 10141 %} 10142 10143 instruct vsra16S_reg_evex_special(vecY dst, vecY src, vecS shift) %{ 10144 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10145 match(Set dst (RShiftVS dst shift)); 10146 effect(TEMP src); 10147 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10148 ins_encode %{ 10149 int vector_len = 1; 10150 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10151 %} 10152 ins_pipe( pipe_slow ); 10153 %} 10154 10155 instruct vsra16S_reg_imm_avx(vecY dst, vecY src, immI8 shift) %{ 10156 predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16); 10157 match(Set dst (RShiftVS src shift)); 10158 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10159 ins_encode %{ 10160 int vector_len = 1; 10161 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10162 %} 10163 ins_pipe( pipe_slow ); 10164 %} 10165 10166 instruct vsra16S_reg_imm_evex(vecY dst, vecY src, immI8 shift) %{ 10167 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16); 10168 match(Set dst (RShiftVS src shift)); 10169 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10170 ins_encode %{ 10171 int vector_len = 1; 10172 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10173 %} 10174 ins_pipe( pipe_slow ); 10175 %} 10176 10177 instruct vsra16S_reg_imm_evex_special(vecY dst, vecY src, immI8 shift) %{ 10178 predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16); 10179 match(Set dst (RShiftVS dst shift)); 10180 effect(TEMP src); 10181 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 10182 ins_encode %{ 10183 int vector_len = 1; 10184 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10185 %} 10186 ins_pipe( pipe_slow ); 10187 %} 10188 10189 instruct vsra32S_reg(vecZ dst, vecZ src, vecS shift) %{ 10190 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10191 match(Set dst (RShiftVS src shift)); 10192 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10193 ins_encode %{ 10194 int vector_len = 2; 10195 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10196 %} 10197 ins_pipe( pipe_slow ); 10198 %} 10199 10200 instruct vsra32S_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10201 predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32); 10202 match(Set dst (RShiftVS src shift)); 10203 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed32S" %} 10204 ins_encode %{ 10205 int vector_len = 2; 10206 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10207 %} 10208 ins_pipe( pipe_slow ); 10209 %} 10210 10211 // Integers vector arithmetic right shift 10212 instruct vsra2I(vecD dst, vecS shift) %{ 10213 predicate(n->as_Vector()->length() == 2); 10214 match(Set dst (RShiftVI dst shift)); 10215 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10216 ins_encode %{ 10217 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10218 %} 10219 ins_pipe( pipe_slow ); 10220 %} 10221 10222 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 10223 predicate(n->as_Vector()->length() == 2); 10224 match(Set dst (RShiftVI dst shift)); 10225 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 10226 ins_encode %{ 10227 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10228 %} 10229 ins_pipe( pipe_slow ); 10230 %} 10231 10232 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 10233 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10234 match(Set dst (RShiftVI src shift)); 10235 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10236 ins_encode %{ 10237 int vector_len = 0; 10238 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10239 %} 10240 ins_pipe( pipe_slow ); 10241 %} 10242 10243 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 10244 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 10245 match(Set dst (RShiftVI src shift)); 10246 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 10247 ins_encode %{ 10248 int vector_len = 0; 10249 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10250 %} 10251 ins_pipe( pipe_slow ); 10252 %} 10253 10254 instruct vsra4I(vecX dst, vecS shift) %{ 10255 predicate(n->as_Vector()->length() == 4); 10256 match(Set dst (RShiftVI dst shift)); 10257 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10258 ins_encode %{ 10259 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 10260 %} 10261 ins_pipe( pipe_slow ); 10262 %} 10263 10264 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 10265 predicate(n->as_Vector()->length() == 4); 10266 match(Set dst (RShiftVI dst shift)); 10267 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 10268 ins_encode %{ 10269 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 10270 %} 10271 ins_pipe( pipe_slow ); 10272 %} 10273 10274 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 10275 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10276 match(Set dst (RShiftVI src shift)); 10277 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10278 ins_encode %{ 10279 int vector_len = 0; 10280 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10281 %} 10282 ins_pipe( pipe_slow ); 10283 %} 10284 10285 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 10286 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 10287 match(Set dst (RShiftVI src shift)); 10288 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 10289 ins_encode %{ 10290 int vector_len = 0; 10291 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10292 %} 10293 ins_pipe( pipe_slow ); 10294 %} 10295 10296 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 10297 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10298 match(Set dst (RShiftVI src shift)); 10299 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10300 ins_encode %{ 10301 int vector_len = 1; 10302 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10303 %} 10304 ins_pipe( pipe_slow ); 10305 %} 10306 10307 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 10308 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 10309 match(Set dst (RShiftVI src shift)); 10310 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 10311 ins_encode %{ 10312 int vector_len = 1; 10313 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10314 %} 10315 ins_pipe( pipe_slow ); 10316 %} 10317 10318 instruct vsra16I_reg(vecZ dst, vecZ src, vecS shift) %{ 10319 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10320 match(Set dst (RShiftVI src shift)); 10321 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10322 ins_encode %{ 10323 int vector_len = 2; 10324 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 10325 %} 10326 ins_pipe( pipe_slow ); 10327 %} 10328 10329 instruct vsra16I_reg_imm(vecZ dst, vecZ src, immI8 shift) %{ 10330 predicate(UseAVX > 2 && n->as_Vector()->length() == 16); 10331 match(Set dst (RShiftVI src shift)); 10332 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed16I" %} 10333 ins_encode %{ 10334 int vector_len = 2; 10335 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector_len); 10336 %} 10337 ins_pipe( pipe_slow ); 10338 %} 10339 10340 // There are no longs vector arithmetic right shift instructions. 10341 10342 10343 // --------------------------------- AND -------------------------------------- 10344 10345 instruct vand4B(vecS dst, vecS src) %{ 10346 predicate(n->as_Vector()->length_in_bytes() == 4); 10347 match(Set dst (AndV dst src)); 10348 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 10349 ins_encode %{ 10350 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10351 %} 10352 ins_pipe( pipe_slow ); 10353 %} 10354 10355 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 10356 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10357 match(Set dst (AndV src1 src2)); 10358 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 10359 ins_encode %{ 10360 int vector_len = 0; 10361 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10362 %} 10363 ins_pipe( pipe_slow ); 10364 %} 10365 10366 instruct vand4B_mem(vecS dst, vecS src, memory mem) %{ 10367 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10368 match(Set dst (AndV src (LoadVector mem))); 10369 format %{ "vpand $dst,$src,$mem\t! and vectors (4 bytes)" %} 10370 ins_encode %{ 10371 int vector_len = 0; 10372 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10373 %} 10374 ins_pipe( pipe_slow ); 10375 %} 10376 10377 instruct vand8B(vecD dst, vecD src) %{ 10378 predicate(n->as_Vector()->length_in_bytes() == 8); 10379 match(Set dst (AndV dst src)); 10380 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 10381 ins_encode %{ 10382 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10383 %} 10384 ins_pipe( pipe_slow ); 10385 %} 10386 10387 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 10388 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10389 match(Set dst (AndV src1 src2)); 10390 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 10391 ins_encode %{ 10392 int vector_len = 0; 10393 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10394 %} 10395 ins_pipe( pipe_slow ); 10396 %} 10397 10398 instruct vand8B_mem(vecD dst, vecD src, memory mem) %{ 10399 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10400 match(Set dst (AndV src (LoadVector mem))); 10401 format %{ "vpand $dst,$src,$mem\t! and vectors (8 bytes)" %} 10402 ins_encode %{ 10403 int vector_len = 0; 10404 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10405 %} 10406 ins_pipe( pipe_slow ); 10407 %} 10408 10409 instruct vand16B(vecX dst, vecX src) %{ 10410 predicate(n->as_Vector()->length_in_bytes() == 16); 10411 match(Set dst (AndV dst src)); 10412 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 10413 ins_encode %{ 10414 __ pand($dst$$XMMRegister, $src$$XMMRegister); 10415 %} 10416 ins_pipe( pipe_slow ); 10417 %} 10418 10419 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 10420 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10421 match(Set dst (AndV src1 src2)); 10422 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 10423 ins_encode %{ 10424 int vector_len = 0; 10425 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10426 %} 10427 ins_pipe( pipe_slow ); 10428 %} 10429 10430 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 10431 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10432 match(Set dst (AndV src (LoadVector mem))); 10433 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 10434 ins_encode %{ 10435 int vector_len = 0; 10436 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10437 %} 10438 ins_pipe( pipe_slow ); 10439 %} 10440 10441 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 10442 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10443 match(Set dst (AndV src1 src2)); 10444 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 10445 ins_encode %{ 10446 int vector_len = 1; 10447 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10448 %} 10449 ins_pipe( pipe_slow ); 10450 %} 10451 10452 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 10453 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10454 match(Set dst (AndV src (LoadVector mem))); 10455 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 10456 ins_encode %{ 10457 int vector_len = 1; 10458 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10459 %} 10460 ins_pipe( pipe_slow ); 10461 %} 10462 10463 instruct vand64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10464 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10465 match(Set dst (AndV src1 src2)); 10466 format %{ "vpand $dst,$src1,$src2\t! and vectors (64 bytes)" %} 10467 ins_encode %{ 10468 int vector_len = 2; 10469 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10470 %} 10471 ins_pipe( pipe_slow ); 10472 %} 10473 10474 instruct vand64B_mem(vecZ dst, vecZ src, memory mem) %{ 10475 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10476 match(Set dst (AndV src (LoadVector mem))); 10477 format %{ "vpand $dst,$src,$mem\t! and vectors (64 bytes)" %} 10478 ins_encode %{ 10479 int vector_len = 2; 10480 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10481 %} 10482 ins_pipe( pipe_slow ); 10483 %} 10484 10485 // --------------------------------- OR --------------------------------------- 10486 10487 instruct vor4B(vecS dst, vecS src) %{ 10488 predicate(n->as_Vector()->length_in_bytes() == 4); 10489 match(Set dst (OrV dst src)); 10490 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 10491 ins_encode %{ 10492 __ por($dst$$XMMRegister, $src$$XMMRegister); 10493 %} 10494 ins_pipe( pipe_slow ); 10495 %} 10496 10497 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10498 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10499 match(Set dst (OrV src1 src2)); 10500 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 10501 ins_encode %{ 10502 int vector_len = 0; 10503 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10504 %} 10505 ins_pipe( pipe_slow ); 10506 %} 10507 10508 instruct vor4B_mem(vecS dst, vecS src, memory mem) %{ 10509 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10510 match(Set dst (OrV src (LoadVector mem))); 10511 format %{ "vpor $dst,$src,$mem\t! or vectors (4 bytes)" %} 10512 ins_encode %{ 10513 int vector_len = 0; 10514 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10515 %} 10516 ins_pipe( pipe_slow ); 10517 %} 10518 10519 instruct vor8B(vecD dst, vecD src) %{ 10520 predicate(n->as_Vector()->length_in_bytes() == 8); 10521 match(Set dst (OrV dst src)); 10522 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 10523 ins_encode %{ 10524 __ por($dst$$XMMRegister, $src$$XMMRegister); 10525 %} 10526 ins_pipe( pipe_slow ); 10527 %} 10528 10529 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10530 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10531 match(Set dst (OrV src1 src2)); 10532 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 10533 ins_encode %{ 10534 int vector_len = 0; 10535 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10536 %} 10537 ins_pipe( pipe_slow ); 10538 %} 10539 10540 instruct vor8B_mem(vecD dst, vecD src, memory mem) %{ 10541 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10542 match(Set dst (OrV src (LoadVector mem))); 10543 format %{ "vpor $dst,$src,$mem\t! or vectors (8 bytes)" %} 10544 ins_encode %{ 10545 int vector_len = 0; 10546 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10547 %} 10548 ins_pipe( pipe_slow ); 10549 %} 10550 10551 instruct vor16B(vecX dst, vecX src) %{ 10552 predicate(n->as_Vector()->length_in_bytes() == 16); 10553 match(Set dst (OrV dst src)); 10554 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 10555 ins_encode %{ 10556 __ por($dst$$XMMRegister, $src$$XMMRegister); 10557 %} 10558 ins_pipe( pipe_slow ); 10559 %} 10560 10561 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10562 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10563 match(Set dst (OrV src1 src2)); 10564 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 10565 ins_encode %{ 10566 int vector_len = 0; 10567 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10568 %} 10569 ins_pipe( pipe_slow ); 10570 %} 10571 10572 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 10573 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10574 match(Set dst (OrV src (LoadVector mem))); 10575 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 10576 ins_encode %{ 10577 int vector_len = 0; 10578 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10579 %} 10580 ins_pipe( pipe_slow ); 10581 %} 10582 10583 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10584 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10585 match(Set dst (OrV src1 src2)); 10586 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 10587 ins_encode %{ 10588 int vector_len = 1; 10589 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10590 %} 10591 ins_pipe( pipe_slow ); 10592 %} 10593 10594 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 10595 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10596 match(Set dst (OrV src (LoadVector mem))); 10597 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 10598 ins_encode %{ 10599 int vector_len = 1; 10600 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10601 %} 10602 ins_pipe( pipe_slow ); 10603 %} 10604 10605 instruct vor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10606 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10607 match(Set dst (OrV src1 src2)); 10608 format %{ "vpor $dst,$src1,$src2\t! or vectors (64 bytes)" %} 10609 ins_encode %{ 10610 int vector_len = 2; 10611 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10612 %} 10613 ins_pipe( pipe_slow ); 10614 %} 10615 10616 instruct vor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10617 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10618 match(Set dst (OrV src (LoadVector mem))); 10619 format %{ "vpor $dst,$src,$mem\t! or vectors (64 bytes)" %} 10620 ins_encode %{ 10621 int vector_len = 2; 10622 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10623 %} 10624 ins_pipe( pipe_slow ); 10625 %} 10626 10627 // --------------------------------- XOR -------------------------------------- 10628 10629 instruct vxor4B(vecS dst, vecS src) %{ 10630 predicate(n->as_Vector()->length_in_bytes() == 4); 10631 match(Set dst (XorV dst src)); 10632 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 10633 ins_encode %{ 10634 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10635 %} 10636 ins_pipe( pipe_slow ); 10637 %} 10638 10639 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 10640 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10641 match(Set dst (XorV src1 src2)); 10642 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 10643 ins_encode %{ 10644 int vector_len = 0; 10645 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10646 %} 10647 ins_pipe( pipe_slow ); 10648 %} 10649 10650 instruct vxor4B_mem(vecS dst, vecS src, memory mem) %{ 10651 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 10652 match(Set dst (XorV src (LoadVector mem))); 10653 format %{ "vpxor $dst,$src,$mem\t! xor vectors (4 bytes)" %} 10654 ins_encode %{ 10655 int vector_len = 0; 10656 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10657 %} 10658 ins_pipe( pipe_slow ); 10659 %} 10660 10661 instruct vxor8B(vecD dst, vecD src) %{ 10662 predicate(n->as_Vector()->length_in_bytes() == 8); 10663 match(Set dst (XorV dst src)); 10664 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 10665 ins_encode %{ 10666 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10667 %} 10668 ins_pipe( pipe_slow ); 10669 %} 10670 10671 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 10672 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10673 match(Set dst (XorV src1 src2)); 10674 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 10675 ins_encode %{ 10676 int vector_len = 0; 10677 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10678 %} 10679 ins_pipe( pipe_slow ); 10680 %} 10681 10682 instruct vxor8B_mem(vecD dst, vecD src, memory mem) %{ 10683 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 10684 match(Set dst (XorV src (LoadVector mem))); 10685 format %{ "vpxor $dst,$src,$mem\t! xor vectors (8 bytes)" %} 10686 ins_encode %{ 10687 int vector_len = 0; 10688 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10689 %} 10690 ins_pipe( pipe_slow ); 10691 %} 10692 10693 instruct vxor16B(vecX dst, vecX src) %{ 10694 predicate(n->as_Vector()->length_in_bytes() == 16); 10695 match(Set dst (XorV dst src)); 10696 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 10697 ins_encode %{ 10698 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 10699 %} 10700 ins_pipe( pipe_slow ); 10701 %} 10702 10703 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 10704 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10705 match(Set dst (XorV src1 src2)); 10706 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 10707 ins_encode %{ 10708 int vector_len = 0; 10709 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10710 %} 10711 ins_pipe( pipe_slow ); 10712 %} 10713 10714 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 10715 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 10716 match(Set dst (XorV src (LoadVector mem))); 10717 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 10718 ins_encode %{ 10719 int vector_len = 0; 10720 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10721 %} 10722 ins_pipe( pipe_slow ); 10723 %} 10724 10725 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 10726 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10727 match(Set dst (XorV src1 src2)); 10728 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 10729 ins_encode %{ 10730 int vector_len = 1; 10731 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10732 %} 10733 ins_pipe( pipe_slow ); 10734 %} 10735 10736 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 10737 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 10738 match(Set dst (XorV src (LoadVector mem))); 10739 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 10740 ins_encode %{ 10741 int vector_len = 1; 10742 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10743 %} 10744 ins_pipe( pipe_slow ); 10745 %} 10746 10747 instruct vxor64B_reg(vecZ dst, vecZ src1, vecZ src2) %{ 10748 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10749 match(Set dst (XorV src1 src2)); 10750 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (64 bytes)" %} 10751 ins_encode %{ 10752 int vector_len = 2; 10753 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); 10754 %} 10755 ins_pipe( pipe_slow ); 10756 %} 10757 10758 instruct vxor64B_mem(vecZ dst, vecZ src, memory mem) %{ 10759 predicate(UseAVX > 2 && n->as_Vector()->length_in_bytes() == 64); 10760 match(Set dst (XorV src (LoadVector mem))); 10761 format %{ "vpxor $dst,$src,$mem\t! xor vectors (64 bytes)" %} 10762 ins_encode %{ 10763 int vector_len = 2; 10764 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len); 10765 %} 10766 ins_pipe( pipe_slow ); 10767 %} 10768 10769 // --------------------------------- FMA -------------------------------------- 10770 10771 // a * b + c 10772 instruct vfma2D_reg(vecX a, vecX b, vecX c) %{ 10773 predicate(UseFMA && n->as_Vector()->length() == 2); 10774 match(Set c (FmaVD c (Binary a b))); 10775 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10776 ins_cost(150); 10777 ins_encode %{ 10778 int vector_len = 0; 10779 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10780 %} 10781 ins_pipe( pipe_slow ); 10782 %} 10783 10784 // a * b + c 10785 instruct vfma2D_mem(vecX a, memory b, vecX c) %{ 10786 predicate(UseFMA && n->as_Vector()->length() == 2); 10787 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10788 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed2D" %} 10789 ins_cost(150); 10790 ins_encode %{ 10791 int vector_len = 0; 10792 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10793 %} 10794 ins_pipe( pipe_slow ); 10795 %} 10796 10797 10798 // a * b + c 10799 instruct vfma4D_reg(vecY a, vecY b, vecY c) %{ 10800 predicate(UseFMA && n->as_Vector()->length() == 4); 10801 match(Set c (FmaVD c (Binary a b))); 10802 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10803 ins_cost(150); 10804 ins_encode %{ 10805 int vector_len = 1; 10806 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10807 %} 10808 ins_pipe( pipe_slow ); 10809 %} 10810 10811 // a * b + c 10812 instruct vfma4D_mem(vecY a, memory b, vecY c) %{ 10813 predicate(UseFMA && n->as_Vector()->length() == 4); 10814 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10815 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed4D" %} 10816 ins_cost(150); 10817 ins_encode %{ 10818 int vector_len = 1; 10819 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10820 %} 10821 ins_pipe( pipe_slow ); 10822 %} 10823 10824 // a * b + c 10825 instruct vfma8D_reg(vecZ a, vecZ b, vecZ c) %{ 10826 predicate(UseFMA && n->as_Vector()->length() == 8); 10827 match(Set c (FmaVD c (Binary a b))); 10828 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10829 ins_cost(150); 10830 ins_encode %{ 10831 int vector_len = 2; 10832 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10833 %} 10834 ins_pipe( pipe_slow ); 10835 %} 10836 10837 // a * b + c 10838 instruct vfma8D_mem(vecZ a, memory b, vecZ c) %{ 10839 predicate(UseFMA && n->as_Vector()->length() == 8); 10840 match(Set c (FmaVD c (Binary a (LoadVector b)))); 10841 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packed8D" %} 10842 ins_cost(150); 10843 ins_encode %{ 10844 int vector_len = 2; 10845 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10846 %} 10847 ins_pipe( pipe_slow ); 10848 %} 10849 10850 // a * b + c 10851 instruct vfma4F_reg(vecX a, vecX b, vecX c) %{ 10852 predicate(UseFMA && n->as_Vector()->length() == 4); 10853 match(Set c (FmaVF c (Binary a b))); 10854 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10855 ins_cost(150); 10856 ins_encode %{ 10857 int vector_len = 0; 10858 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10859 %} 10860 ins_pipe( pipe_slow ); 10861 %} 10862 10863 // a * b + c 10864 instruct vfma4F_mem(vecX a, memory b, vecX c) %{ 10865 predicate(UseFMA && n->as_Vector()->length() == 4); 10866 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10867 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed4F" %} 10868 ins_cost(150); 10869 ins_encode %{ 10870 int vector_len = 0; 10871 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10872 %} 10873 ins_pipe( pipe_slow ); 10874 %} 10875 10876 // a * b + c 10877 instruct vfma8F_reg(vecY a, vecY b, vecY c) %{ 10878 predicate(UseFMA && n->as_Vector()->length() == 8); 10879 match(Set c (FmaVF c (Binary a b))); 10880 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10881 ins_cost(150); 10882 ins_encode %{ 10883 int vector_len = 1; 10884 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10885 %} 10886 ins_pipe( pipe_slow ); 10887 %} 10888 10889 // a * b + c 10890 instruct vfma8F_mem(vecY a, memory b, vecY c) %{ 10891 predicate(UseFMA && n->as_Vector()->length() == 8); 10892 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10893 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed8F" %} 10894 ins_cost(150); 10895 ins_encode %{ 10896 int vector_len = 1; 10897 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10898 %} 10899 ins_pipe( pipe_slow ); 10900 %} 10901 10902 // a * b + c 10903 instruct vfma16F_reg(vecZ a, vecZ b, vecZ c) %{ 10904 predicate(UseFMA && n->as_Vector()->length() == 16); 10905 match(Set c (FmaVF c (Binary a b))); 10906 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10907 ins_cost(150); 10908 ins_encode %{ 10909 int vector_len = 2; 10910 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len); 10911 %} 10912 ins_pipe( pipe_slow ); 10913 %} 10914 10915 // a * b + c 10916 instruct vfma16F_mem(vecZ a, memory b, vecZ c) %{ 10917 predicate(UseFMA && n->as_Vector()->length() == 16); 10918 match(Set c (FmaVF c (Binary a (LoadVector b)))); 10919 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packed16F" %} 10920 ins_cost(150); 10921 ins_encode %{ 10922 int vector_len = 2; 10923 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len); 10924 %} 10925 ins_pipe( pipe_slow ); 10926 %} 10927 10928 // --------------------------------- PopCount -------------------------------------- 10929 10930 instruct vpopcount2I(vecD dst, vecD src) %{ 10931 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 2); 10932 match(Set dst (PopCountVI src)); 10933 format %{ "vpopcntd $dst,$src\t! vector popcount packed2I" %} 10934 ins_encode %{ 10935 int vector_len = 0; 10936 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10937 %} 10938 ins_pipe( pipe_slow ); 10939 %} 10940 10941 instruct vpopcount4I(vecX dst, vecX src) %{ 10942 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 4); 10943 match(Set dst (PopCountVI src)); 10944 format %{ "vpopcntd $dst,$src\t! vector popcount packed4I" %} 10945 ins_encode %{ 10946 int vector_len = 0; 10947 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10948 %} 10949 ins_pipe( pipe_slow ); 10950 %} 10951 10952 instruct vpopcount8I(vecY dst, vecY src) %{ 10953 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 8); 10954 match(Set dst (PopCountVI src)); 10955 format %{ "vpopcntd $dst,$src\t! vector popcount packed8I" %} 10956 ins_encode %{ 10957 int vector_len = 1; 10958 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10959 %} 10960 ins_pipe( pipe_slow ); 10961 %} 10962 10963 instruct vpopcount16I(vecZ dst, vecZ src) %{ 10964 predicate(VM_Version::supports_vpopcntdq() && UsePopCountInstruction && n->as_Vector()->length() == 16); 10965 match(Set dst (PopCountVI src)); 10966 format %{ "vpopcntd $dst,$src\t! vector popcount packed16I" %} 10967 ins_encode %{ 10968 int vector_len = 2; 10969 __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len); 10970 %} 10971 ins_pipe( pipe_slow ); 10972 %}