1 // 2 // Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 256-bit registers or 8 words each, labeled (a)-h. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX). 68 // Linux ABI: No register preserved across function calls 69 // XMM0-XMM7 might hold parameters 70 // Windows ABI: XMM6-XMM15 preserved across function calls 71 // XMM0-XMM3 might hold parameters 72 73 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 74 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 75 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 76 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 77 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 78 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 79 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 80 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 81 82 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 83 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 84 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 85 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 86 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 87 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 88 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 89 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 90 91 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 92 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 93 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 94 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 95 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 96 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 97 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 98 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 99 100 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 101 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 102 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 103 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 104 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 105 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 106 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 107 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 108 109 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 110 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 111 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 112 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 113 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 114 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 115 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 116 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 117 118 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 119 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 120 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 121 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 122 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 123 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 124 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 125 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 126 127 #ifdef _WIN64 128 129 reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()); 130 reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 131 reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 132 reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 133 reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 134 reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 135 reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 136 reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 137 138 reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()); 139 reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 140 reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 141 reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 142 reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 143 reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 144 reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 145 reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 146 147 reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()); 148 reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 149 reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 150 reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 151 reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 152 reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 153 reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 154 reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 155 156 reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()); 157 reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 158 reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 159 reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 160 reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 161 reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 162 reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 163 reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 164 165 reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()); 166 reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 167 reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 168 reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 169 reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 170 reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 171 reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 172 reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 173 174 reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()); 175 reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 176 reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 177 reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 178 reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 179 reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 180 reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 181 reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 182 183 reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()); 184 reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 185 reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 186 reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 187 reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 188 reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 189 reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 190 reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 191 192 reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()); 193 reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 194 reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 195 reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 196 reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 197 reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 198 reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 199 reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 200 201 reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()); 202 reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 203 reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 204 reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 205 reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 206 reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 207 reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 208 reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 209 210 reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()); 211 reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 212 reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 213 reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 214 reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 215 reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 216 reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 217 reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 218 219 #else // _WIN64 220 221 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 222 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 223 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 224 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 225 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 226 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 227 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 228 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 229 230 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 231 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 232 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 233 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 234 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 235 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 236 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 237 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 238 239 #ifdef _LP64 240 241 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 242 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 243 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 244 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 245 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 246 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 247 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 248 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 249 250 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 251 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 252 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 253 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 254 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 255 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 256 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 257 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 258 259 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 260 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 261 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 262 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 263 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 264 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 265 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 266 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 267 268 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 269 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 270 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 271 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 272 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 273 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 274 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 275 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 276 277 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 278 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 279 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 280 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 281 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 282 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 283 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 284 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 285 286 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 287 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 288 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 289 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 290 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 291 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 292 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 293 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 294 295 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 296 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 297 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 298 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 299 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 300 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 301 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 302 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 303 304 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 305 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 306 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 307 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 308 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 309 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 310 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 311 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 312 313 #endif // _LP64 314 315 #endif // _WIN64 316 317 #ifdef _LP64 318 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 319 #else 320 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 321 #endif // _LP64 322 323 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 324 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 325 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 326 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 327 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 328 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 329 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 330 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 331 #ifdef _LP64 332 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 333 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 334 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 335 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 336 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 337 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 338 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 339 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 340 #endif 341 ); 342 343 // flags allocation class should be last. 344 alloc_class chunk2(RFLAGS); 345 346 // Singleton class for condition codes 347 reg_class int_flags(RFLAGS); 348 349 // Class for all float registers 350 reg_class float_reg(XMM0, 351 XMM1, 352 XMM2, 353 XMM3, 354 XMM4, 355 XMM5, 356 XMM6, 357 XMM7 358 #ifdef _LP64 359 ,XMM8, 360 XMM9, 361 XMM10, 362 XMM11, 363 XMM12, 364 XMM13, 365 XMM14, 366 XMM15 367 #endif 368 ); 369 370 // Class for all double registers 371 reg_class double_reg(XMM0, XMM0b, 372 XMM1, XMM1b, 373 XMM2, XMM2b, 374 XMM3, XMM3b, 375 XMM4, XMM4b, 376 XMM5, XMM5b, 377 XMM6, XMM6b, 378 XMM7, XMM7b 379 #ifdef _LP64 380 ,XMM8, XMM8b, 381 XMM9, XMM9b, 382 XMM10, XMM10b, 383 XMM11, XMM11b, 384 XMM12, XMM12b, 385 XMM13, XMM13b, 386 XMM14, XMM14b, 387 XMM15, XMM15b 388 #endif 389 ); 390 391 // Class for all 32bit vector registers 392 reg_class vectors_reg(XMM0, 393 XMM1, 394 XMM2, 395 XMM3, 396 XMM4, 397 XMM5, 398 XMM6, 399 XMM7 400 #ifdef _LP64 401 ,XMM8, 402 XMM9, 403 XMM10, 404 XMM11, 405 XMM12, 406 XMM13, 407 XMM14, 408 XMM15 409 #endif 410 ); 411 412 // Class for all 64bit vector registers 413 reg_class vectord_reg(XMM0, XMM0b, 414 XMM1, XMM1b, 415 XMM2, XMM2b, 416 XMM3, XMM3b, 417 XMM4, XMM4b, 418 XMM5, XMM5b, 419 XMM6, XMM6b, 420 XMM7, XMM7b 421 #ifdef _LP64 422 ,XMM8, XMM8b, 423 XMM9, XMM9b, 424 XMM10, XMM10b, 425 XMM11, XMM11b, 426 XMM12, XMM12b, 427 XMM13, XMM13b, 428 XMM14, XMM14b, 429 XMM15, XMM15b 430 #endif 431 ); 432 433 // Class for all 128bit vector registers 434 reg_class vectorx_reg(XMM0, XMM0b, XMM0c, XMM0d, 435 XMM1, XMM1b, XMM1c, XMM1d, 436 XMM2, XMM2b, XMM2c, XMM2d, 437 XMM3, XMM3b, XMM3c, XMM3d, 438 XMM4, XMM4b, XMM4c, XMM4d, 439 XMM5, XMM5b, XMM5c, XMM5d, 440 XMM6, XMM6b, XMM6c, XMM6d, 441 XMM7, XMM7b, XMM7c, XMM7d 442 #ifdef _LP64 443 ,XMM8, XMM8b, XMM8c, XMM8d, 444 XMM9, XMM9b, XMM9c, XMM9d, 445 XMM10, XMM10b, XMM10c, XMM10d, 446 XMM11, XMM11b, XMM11c, XMM11d, 447 XMM12, XMM12b, XMM12c, XMM12d, 448 XMM13, XMM13b, XMM13c, XMM13d, 449 XMM14, XMM14b, XMM14c, XMM14d, 450 XMM15, XMM15b, XMM15c, XMM15d 451 #endif 452 ); 453 454 // Class for all 256bit vector registers 455 reg_class vectory_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 456 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 457 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 458 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 459 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 460 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 461 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 462 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 463 #ifdef _LP64 464 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 465 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 466 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 467 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 468 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 469 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 470 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 471 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 472 #endif 473 ); 474 475 %} 476 477 478 //----------SOURCE BLOCK------------------------------------------------------- 479 // This is a block of C++ code which provides values, functions, and 480 // definitions necessary in the rest of the architecture description 481 482 source_hpp %{ 483 // Header information of the source block. 484 // Method declarations/definitions which are used outside 485 // the ad-scope can conveniently be defined here. 486 // 487 // To keep related declarations/definitions/uses close together, 488 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 489 490 class CallStubImpl { 491 492 //-------------------------------------------------------------- 493 //---< Used for optimization in Compile::shorten_branches >--- 494 //-------------------------------------------------------------- 495 496 public: 497 // Size of call trampoline stub. 498 static uint size_call_trampoline() { 499 return 0; // no call trampolines on this platform 500 } 501 502 // number of relocations needed by a call trampoline stub 503 static uint reloc_call_trampoline() { 504 return 0; // no call trampolines on this platform 505 } 506 }; 507 508 class HandlerImpl { 509 510 public: 511 512 static int emit_exception_handler(CodeBuffer &cbuf); 513 static int emit_deopt_handler(CodeBuffer& cbuf); 514 515 static uint size_exception_handler() { 516 // NativeCall instruction size is the same as NativeJump. 517 // exception handler starts out as jump and can be patched to 518 // a call be deoptimization. (4932387) 519 // Note that this value is also credited (in output.cpp) to 520 // the size of the code section. 521 return NativeJump::instruction_size; 522 } 523 524 #ifdef _LP64 525 static uint size_deopt_handler() { 526 // three 5 byte instructions 527 return 15; 528 } 529 #else 530 static uint size_deopt_handler() { 531 // NativeCall instruction size is the same as NativeJump. 532 // exception handler starts out as jump and can be patched to 533 // a call be deoptimization. (4932387) 534 // Note that this value is also credited (in output.cpp) to 535 // the size of the code section. 536 return 5 + NativeJump::instruction_size; // pushl(); jmp; 537 } 538 #endif 539 }; 540 541 %} // end source_hpp 542 543 source %{ 544 545 // Emit exception handler code. 546 // Stuff framesize into a register and call a VM stub routine. 547 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 548 549 // Note that the code buffer's insts_mark is always relative to insts. 550 // That's why we must use the macroassembler to generate a handler. 551 MacroAssembler _masm(&cbuf); 552 address base = __ start_a_stub(size_exception_handler()); 553 if (base == NULL) return 0; // CodeBuffer::expand failed 554 int offset = __ offset(); 555 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 556 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 557 __ end_a_stub(); 558 return offset; 559 } 560 561 // Emit deopt handler code. 562 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 563 564 // Note that the code buffer's insts_mark is always relative to insts. 565 // That's why we must use the macroassembler to generate a handler. 566 MacroAssembler _masm(&cbuf); 567 address base = __ start_a_stub(size_deopt_handler()); 568 if (base == NULL) return 0; // CodeBuffer::expand failed 569 int offset = __ offset(); 570 571 #ifdef _LP64 572 address the_pc = (address) __ pc(); 573 Label next; 574 // push a "the_pc" on the stack without destroying any registers 575 // as they all may be live. 576 577 // push address of "next" 578 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 579 __ bind(next); 580 // adjust it so it matches "the_pc" 581 __ subptr(Address(rsp, 0), __ offset() - offset); 582 #else 583 InternalAddress here(__ pc()); 584 __ pushptr(here.addr()); 585 #endif 586 587 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 588 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); 589 __ end_a_stub(); 590 return offset; 591 } 592 593 594 //============================================================================= 595 596 // Float masks come from different places depending on platform. 597 #ifdef _LP64 598 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 599 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 600 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 601 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 602 #else 603 static address float_signmask() { return (address)float_signmask_pool; } 604 static address float_signflip() { return (address)float_signflip_pool; } 605 static address double_signmask() { return (address)double_signmask_pool; } 606 static address double_signflip() { return (address)double_signflip_pool; } 607 #endif 608 609 610 const bool Matcher::match_rule_supported(int opcode) { 611 if (!has_match_rule(opcode)) 612 return false; 613 614 switch (opcode) { 615 case Op_PopCountI: 616 case Op_PopCountL: 617 if (!UsePopCountInstruction) 618 return false; 619 break; 620 case Op_MulVI: 621 if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX 622 return false; 623 break; 624 case Op_CompareAndSwapL: 625 #ifdef _LP64 626 case Op_CompareAndSwapP: 627 #endif 628 if (!VM_Version::supports_cx8()) 629 return false; 630 break; 631 } 632 633 return true; // Per default match rules are supported. 634 } 635 636 // Max vector size in bytes. 0 if not supported. 637 const int Matcher::vector_width_in_bytes(BasicType bt) { 638 assert(is_java_primitive(bt), "only primitive type vectors"); 639 if (UseSSE < 2) return 0; 640 // SSE2 supports 128bit vectors for all types. 641 // AVX2 supports 256bit vectors for all types. 642 int size = (UseAVX > 1) ? 32 : 16; 643 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 644 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 645 size = 32; 646 // Use flag to limit vector size. 647 size = MIN2(size,(int)MaxVectorSize); 648 // Minimum 2 values in vector (or 4 for bytes). 649 switch (bt) { 650 case T_DOUBLE: 651 case T_LONG: 652 if (size < 16) return 0; 653 case T_FLOAT: 654 case T_INT: 655 if (size < 8) return 0; 656 case T_BOOLEAN: 657 case T_BYTE: 658 case T_CHAR: 659 case T_SHORT: 660 if (size < 4) return 0; 661 break; 662 default: 663 ShouldNotReachHere(); 664 } 665 return size; 666 } 667 668 // Limits on vector size (number of elements) loaded into vector. 669 const int Matcher::max_vector_size(const BasicType bt) { 670 return vector_width_in_bytes(bt)/type2aelembytes(bt); 671 } 672 const int Matcher::min_vector_size(const BasicType bt) { 673 int max_size = max_vector_size(bt); 674 // Min size which can be loaded into vector is 4 bytes. 675 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 676 return MIN2(size,max_size); 677 } 678 679 // Vector ideal reg corresponding to specidied size in bytes 680 const int Matcher::vector_ideal_reg(int size) { 681 assert(MaxVectorSize >= size, ""); 682 switch(size) { 683 case 4: return Op_VecS; 684 case 8: return Op_VecD; 685 case 16: return Op_VecX; 686 case 32: return Op_VecY; 687 } 688 ShouldNotReachHere(); 689 return 0; 690 } 691 692 // Only lowest bits of xmm reg are used for vector shift count. 693 const int Matcher::vector_shift_count_ideal_reg(int size) { 694 return Op_VecS; 695 } 696 697 // x86 supports misaligned vectors store/load. 698 const bool Matcher::misaligned_vectors_ok() { 699 return !AlignVector; // can be changed by flag 700 } 701 702 // x86 AES instructions are compatible with SunJCE expanded 703 // keys, hence we do not need to pass the original key to stubs 704 const bool Matcher::pass_original_key_for_aes() { 705 return false; 706 } 707 708 // Helper methods for MachSpillCopyNode::implementation(). 709 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 710 int src_hi, int dst_hi, uint ireg, outputStream* st) { 711 // In 64-bit VM size calculation is very complex. Emitting instructions 712 // into scratch buffer is used to get size in 64-bit VM. 713 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 714 assert(ireg == Op_VecS || // 32bit vector 715 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 716 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 717 "no non-adjacent vector moves" ); 718 if (cbuf) { 719 MacroAssembler _masm(cbuf); 720 int offset = __ offset(); 721 switch (ireg) { 722 case Op_VecS: // copy whole register 723 case Op_VecD: 724 case Op_VecX: 725 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 726 break; 727 case Op_VecY: 728 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 729 break; 730 default: 731 ShouldNotReachHere(); 732 } 733 int size = __ offset() - offset; 734 #ifdef ASSERT 735 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 736 assert(!do_size || size == 4, "incorrect size calculattion"); 737 #endif 738 return size; 739 #ifndef PRODUCT 740 } else if (!do_size) { 741 switch (ireg) { 742 case Op_VecS: 743 case Op_VecD: 744 case Op_VecX: 745 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 746 break; 747 case Op_VecY: 748 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 749 break; 750 default: 751 ShouldNotReachHere(); 752 } 753 #endif 754 } 755 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 756 return 4; 757 } 758 759 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 760 int stack_offset, int reg, uint ireg, outputStream* st) { 761 // In 64-bit VM size calculation is very complex. Emitting instructions 762 // into scratch buffer is used to get size in 64-bit VM. 763 LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) 764 if (cbuf) { 765 MacroAssembler _masm(cbuf); 766 int offset = __ offset(); 767 if (is_load) { 768 switch (ireg) { 769 case Op_VecS: 770 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 771 break; 772 case Op_VecD: 773 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 774 break; 775 case Op_VecX: 776 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 777 break; 778 case Op_VecY: 779 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 780 break; 781 default: 782 ShouldNotReachHere(); 783 } 784 } else { // store 785 switch (ireg) { 786 case Op_VecS: 787 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 788 break; 789 case Op_VecD: 790 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 791 break; 792 case Op_VecX: 793 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 794 break; 795 case Op_VecY: 796 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 797 break; 798 default: 799 ShouldNotReachHere(); 800 } 801 } 802 int size = __ offset() - offset; 803 #ifdef ASSERT 804 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 805 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 806 assert(!do_size || size == (5+offset_size), "incorrect size calculattion"); 807 #endif 808 return size; 809 #ifndef PRODUCT 810 } else if (!do_size) { 811 if (is_load) { 812 switch (ireg) { 813 case Op_VecS: 814 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 815 break; 816 case Op_VecD: 817 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 818 break; 819 case Op_VecX: 820 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 821 break; 822 case Op_VecY: 823 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 824 break; 825 default: 826 ShouldNotReachHere(); 827 } 828 } else { // store 829 switch (ireg) { 830 case Op_VecS: 831 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 832 break; 833 case Op_VecD: 834 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 835 break; 836 case Op_VecX: 837 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 838 break; 839 case Op_VecY: 840 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 841 break; 842 default: 843 ShouldNotReachHere(); 844 } 845 } 846 #endif 847 } 848 int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4); 849 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 850 return 5+offset_size; 851 } 852 853 static inline jfloat replicate4_imm(int con, int width) { 854 // Load a constant of "width" (in bytes) and replicate it to fill 32bit. 855 assert(width == 1 || width == 2, "only byte or short types here"); 856 int bit_width = width * 8; 857 jint val = con; 858 val &= (1 << bit_width) - 1; // mask off sign bits 859 while(bit_width < 32) { 860 val |= (val << bit_width); 861 bit_width <<= 1; 862 } 863 jfloat fval = *((jfloat*) &val); // coerce to float type 864 return fval; 865 } 866 867 static inline jdouble replicate8_imm(int con, int width) { 868 // Load a constant of "width" (in bytes) and replicate it to fill 64bit. 869 assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here"); 870 int bit_width = width * 8; 871 jlong val = con; 872 val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits 873 while(bit_width < 64) { 874 val |= (val << bit_width); 875 bit_width <<= 1; 876 } 877 jdouble dval = *((jdouble*) &val); // coerce to double type 878 return dval; 879 } 880 881 #ifndef PRODUCT 882 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 883 st->print("nop \t# %d bytes pad for loops and calls", _count); 884 } 885 #endif 886 887 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 888 MacroAssembler _masm(&cbuf); 889 __ nop(_count); 890 } 891 892 uint MachNopNode::size(PhaseRegAlloc*) const { 893 return _count; 894 } 895 896 #ifndef PRODUCT 897 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 898 st->print("# breakpoint"); 899 } 900 #endif 901 902 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 903 MacroAssembler _masm(&cbuf); 904 __ int3(); 905 } 906 907 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 908 return MachNode::size(ra_); 909 } 910 911 %} 912 913 encode %{ 914 915 enc_class call_epilog %{ 916 if (VerifyStackAtCalls) { 917 // Check that stack depth is unchanged: find majik cookie on stack 918 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 919 MacroAssembler _masm(&cbuf); 920 Label L; 921 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 922 __ jccb(Assembler::equal, L); 923 // Die if stack mismatch 924 __ int3(); 925 __ bind(L); 926 } 927 %} 928 929 %} 930 931 932 //----------OPERANDS----------------------------------------------------------- 933 // Operand definitions must precede instruction definitions for correct parsing 934 // in the ADLC because operands constitute user defined types which are used in 935 // instruction definitions. 936 937 // Vectors 938 operand vecS() %{ 939 constraint(ALLOC_IN_RC(vectors_reg)); 940 match(VecS); 941 942 format %{ %} 943 interface(REG_INTER); 944 %} 945 946 operand vecD() %{ 947 constraint(ALLOC_IN_RC(vectord_reg)); 948 match(VecD); 949 950 format %{ %} 951 interface(REG_INTER); 952 %} 953 954 operand vecX() %{ 955 constraint(ALLOC_IN_RC(vectorx_reg)); 956 match(VecX); 957 958 format %{ %} 959 interface(REG_INTER); 960 %} 961 962 operand vecY() %{ 963 constraint(ALLOC_IN_RC(vectory_reg)); 964 match(VecY); 965 966 format %{ %} 967 interface(REG_INTER); 968 %} 969 970 971 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 972 973 // ============================================================================ 974 975 instruct ShouldNotReachHere() %{ 976 match(Halt); 977 format %{ "int3\t# ShouldNotReachHere" %} 978 ins_encode %{ 979 __ int3(); 980 %} 981 ins_pipe(pipe_slow); 982 %} 983 984 // ============================================================================ 985 986 instruct addF_reg(regF dst, regF src) %{ 987 predicate((UseSSE>=1) && (UseAVX == 0)); 988 match(Set dst (AddF dst src)); 989 990 format %{ "addss $dst, $src" %} 991 ins_cost(150); 992 ins_encode %{ 993 __ addss($dst$$XMMRegister, $src$$XMMRegister); 994 %} 995 ins_pipe(pipe_slow); 996 %} 997 998 instruct addF_mem(regF dst, memory src) %{ 999 predicate((UseSSE>=1) && (UseAVX == 0)); 1000 match(Set dst (AddF dst (LoadF src))); 1001 1002 format %{ "addss $dst, $src" %} 1003 ins_cost(150); 1004 ins_encode %{ 1005 __ addss($dst$$XMMRegister, $src$$Address); 1006 %} 1007 ins_pipe(pipe_slow); 1008 %} 1009 1010 instruct addF_imm(regF dst, immF con) %{ 1011 predicate((UseSSE>=1) && (UseAVX == 0)); 1012 match(Set dst (AddF dst con)); 1013 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1014 ins_cost(150); 1015 ins_encode %{ 1016 __ addss($dst$$XMMRegister, $constantaddress($con)); 1017 %} 1018 ins_pipe(pipe_slow); 1019 %} 1020 1021 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 1022 predicate(UseAVX > 0); 1023 match(Set dst (AddF src1 src2)); 1024 1025 format %{ "vaddss $dst, $src1, $src2" %} 1026 ins_cost(150); 1027 ins_encode %{ 1028 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1029 %} 1030 ins_pipe(pipe_slow); 1031 %} 1032 1033 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 1034 predicate(UseAVX > 0); 1035 match(Set dst (AddF src1 (LoadF src2))); 1036 1037 format %{ "vaddss $dst, $src1, $src2" %} 1038 ins_cost(150); 1039 ins_encode %{ 1040 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1041 %} 1042 ins_pipe(pipe_slow); 1043 %} 1044 1045 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 1046 predicate(UseAVX > 0); 1047 match(Set dst (AddF src con)); 1048 1049 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1050 ins_cost(150); 1051 ins_encode %{ 1052 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1053 %} 1054 ins_pipe(pipe_slow); 1055 %} 1056 1057 instruct addD_reg(regD dst, regD src) %{ 1058 predicate((UseSSE>=2) && (UseAVX == 0)); 1059 match(Set dst (AddD dst src)); 1060 1061 format %{ "addsd $dst, $src" %} 1062 ins_cost(150); 1063 ins_encode %{ 1064 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 1065 %} 1066 ins_pipe(pipe_slow); 1067 %} 1068 1069 instruct addD_mem(regD dst, memory src) %{ 1070 predicate((UseSSE>=2) && (UseAVX == 0)); 1071 match(Set dst (AddD dst (LoadD src))); 1072 1073 format %{ "addsd $dst, $src" %} 1074 ins_cost(150); 1075 ins_encode %{ 1076 __ addsd($dst$$XMMRegister, $src$$Address); 1077 %} 1078 ins_pipe(pipe_slow); 1079 %} 1080 1081 instruct addD_imm(regD dst, immD con) %{ 1082 predicate((UseSSE>=2) && (UseAVX == 0)); 1083 match(Set dst (AddD dst con)); 1084 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1085 ins_cost(150); 1086 ins_encode %{ 1087 __ addsd($dst$$XMMRegister, $constantaddress($con)); 1088 %} 1089 ins_pipe(pipe_slow); 1090 %} 1091 1092 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 1093 predicate(UseAVX > 0); 1094 match(Set dst (AddD src1 src2)); 1095 1096 format %{ "vaddsd $dst, $src1, $src2" %} 1097 ins_cost(150); 1098 ins_encode %{ 1099 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1100 %} 1101 ins_pipe(pipe_slow); 1102 %} 1103 1104 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 1105 predicate(UseAVX > 0); 1106 match(Set dst (AddD src1 (LoadD src2))); 1107 1108 format %{ "vaddsd $dst, $src1, $src2" %} 1109 ins_cost(150); 1110 ins_encode %{ 1111 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1112 %} 1113 ins_pipe(pipe_slow); 1114 %} 1115 1116 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 1117 predicate(UseAVX > 0); 1118 match(Set dst (AddD src con)); 1119 1120 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1121 ins_cost(150); 1122 ins_encode %{ 1123 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1124 %} 1125 ins_pipe(pipe_slow); 1126 %} 1127 1128 instruct subF_reg(regF dst, regF src) %{ 1129 predicate((UseSSE>=1) && (UseAVX == 0)); 1130 match(Set dst (SubF dst src)); 1131 1132 format %{ "subss $dst, $src" %} 1133 ins_cost(150); 1134 ins_encode %{ 1135 __ subss($dst$$XMMRegister, $src$$XMMRegister); 1136 %} 1137 ins_pipe(pipe_slow); 1138 %} 1139 1140 instruct subF_mem(regF dst, memory src) %{ 1141 predicate((UseSSE>=1) && (UseAVX == 0)); 1142 match(Set dst (SubF dst (LoadF src))); 1143 1144 format %{ "subss $dst, $src" %} 1145 ins_cost(150); 1146 ins_encode %{ 1147 __ subss($dst$$XMMRegister, $src$$Address); 1148 %} 1149 ins_pipe(pipe_slow); 1150 %} 1151 1152 instruct subF_imm(regF dst, immF con) %{ 1153 predicate((UseSSE>=1) && (UseAVX == 0)); 1154 match(Set dst (SubF dst con)); 1155 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1156 ins_cost(150); 1157 ins_encode %{ 1158 __ subss($dst$$XMMRegister, $constantaddress($con)); 1159 %} 1160 ins_pipe(pipe_slow); 1161 %} 1162 1163 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 1164 predicate(UseAVX > 0); 1165 match(Set dst (SubF src1 src2)); 1166 1167 format %{ "vsubss $dst, $src1, $src2" %} 1168 ins_cost(150); 1169 ins_encode %{ 1170 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1171 %} 1172 ins_pipe(pipe_slow); 1173 %} 1174 1175 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 1176 predicate(UseAVX > 0); 1177 match(Set dst (SubF src1 (LoadF src2))); 1178 1179 format %{ "vsubss $dst, $src1, $src2" %} 1180 ins_cost(150); 1181 ins_encode %{ 1182 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1183 %} 1184 ins_pipe(pipe_slow); 1185 %} 1186 1187 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 1188 predicate(UseAVX > 0); 1189 match(Set dst (SubF src con)); 1190 1191 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1192 ins_cost(150); 1193 ins_encode %{ 1194 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1195 %} 1196 ins_pipe(pipe_slow); 1197 %} 1198 1199 instruct subD_reg(regD dst, regD src) %{ 1200 predicate((UseSSE>=2) && (UseAVX == 0)); 1201 match(Set dst (SubD dst src)); 1202 1203 format %{ "subsd $dst, $src" %} 1204 ins_cost(150); 1205 ins_encode %{ 1206 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 1207 %} 1208 ins_pipe(pipe_slow); 1209 %} 1210 1211 instruct subD_mem(regD dst, memory src) %{ 1212 predicate((UseSSE>=2) && (UseAVX == 0)); 1213 match(Set dst (SubD dst (LoadD src))); 1214 1215 format %{ "subsd $dst, $src" %} 1216 ins_cost(150); 1217 ins_encode %{ 1218 __ subsd($dst$$XMMRegister, $src$$Address); 1219 %} 1220 ins_pipe(pipe_slow); 1221 %} 1222 1223 instruct subD_imm(regD dst, immD con) %{ 1224 predicate((UseSSE>=2) && (UseAVX == 0)); 1225 match(Set dst (SubD dst con)); 1226 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1227 ins_cost(150); 1228 ins_encode %{ 1229 __ subsd($dst$$XMMRegister, $constantaddress($con)); 1230 %} 1231 ins_pipe(pipe_slow); 1232 %} 1233 1234 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 1235 predicate(UseAVX > 0); 1236 match(Set dst (SubD src1 src2)); 1237 1238 format %{ "vsubsd $dst, $src1, $src2" %} 1239 ins_cost(150); 1240 ins_encode %{ 1241 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1242 %} 1243 ins_pipe(pipe_slow); 1244 %} 1245 1246 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 1247 predicate(UseAVX > 0); 1248 match(Set dst (SubD src1 (LoadD src2))); 1249 1250 format %{ "vsubsd $dst, $src1, $src2" %} 1251 ins_cost(150); 1252 ins_encode %{ 1253 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1254 %} 1255 ins_pipe(pipe_slow); 1256 %} 1257 1258 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 1259 predicate(UseAVX > 0); 1260 match(Set dst (SubD src con)); 1261 1262 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1263 ins_cost(150); 1264 ins_encode %{ 1265 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1266 %} 1267 ins_pipe(pipe_slow); 1268 %} 1269 1270 instruct mulF_reg(regF dst, regF src) %{ 1271 predicate((UseSSE>=1) && (UseAVX == 0)); 1272 match(Set dst (MulF dst src)); 1273 1274 format %{ "mulss $dst, $src" %} 1275 ins_cost(150); 1276 ins_encode %{ 1277 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 1278 %} 1279 ins_pipe(pipe_slow); 1280 %} 1281 1282 instruct mulF_mem(regF dst, memory src) %{ 1283 predicate((UseSSE>=1) && (UseAVX == 0)); 1284 match(Set dst (MulF dst (LoadF src))); 1285 1286 format %{ "mulss $dst, $src" %} 1287 ins_cost(150); 1288 ins_encode %{ 1289 __ mulss($dst$$XMMRegister, $src$$Address); 1290 %} 1291 ins_pipe(pipe_slow); 1292 %} 1293 1294 instruct mulF_imm(regF dst, immF con) %{ 1295 predicate((UseSSE>=1) && (UseAVX == 0)); 1296 match(Set dst (MulF dst con)); 1297 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1298 ins_cost(150); 1299 ins_encode %{ 1300 __ mulss($dst$$XMMRegister, $constantaddress($con)); 1301 %} 1302 ins_pipe(pipe_slow); 1303 %} 1304 1305 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 1306 predicate(UseAVX > 0); 1307 match(Set dst (MulF src1 src2)); 1308 1309 format %{ "vmulss $dst, $src1, $src2" %} 1310 ins_cost(150); 1311 ins_encode %{ 1312 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1313 %} 1314 ins_pipe(pipe_slow); 1315 %} 1316 1317 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 1318 predicate(UseAVX > 0); 1319 match(Set dst (MulF src1 (LoadF src2))); 1320 1321 format %{ "vmulss $dst, $src1, $src2" %} 1322 ins_cost(150); 1323 ins_encode %{ 1324 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1325 %} 1326 ins_pipe(pipe_slow); 1327 %} 1328 1329 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 1330 predicate(UseAVX > 0); 1331 match(Set dst (MulF src con)); 1332 1333 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1334 ins_cost(150); 1335 ins_encode %{ 1336 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1337 %} 1338 ins_pipe(pipe_slow); 1339 %} 1340 1341 instruct mulD_reg(regD dst, regD src) %{ 1342 predicate((UseSSE>=2) && (UseAVX == 0)); 1343 match(Set dst (MulD dst src)); 1344 1345 format %{ "mulsd $dst, $src" %} 1346 ins_cost(150); 1347 ins_encode %{ 1348 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 1349 %} 1350 ins_pipe(pipe_slow); 1351 %} 1352 1353 instruct mulD_mem(regD dst, memory src) %{ 1354 predicate((UseSSE>=2) && (UseAVX == 0)); 1355 match(Set dst (MulD dst (LoadD src))); 1356 1357 format %{ "mulsd $dst, $src" %} 1358 ins_cost(150); 1359 ins_encode %{ 1360 __ mulsd($dst$$XMMRegister, $src$$Address); 1361 %} 1362 ins_pipe(pipe_slow); 1363 %} 1364 1365 instruct mulD_imm(regD dst, immD con) %{ 1366 predicate((UseSSE>=2) && (UseAVX == 0)); 1367 match(Set dst (MulD dst con)); 1368 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1369 ins_cost(150); 1370 ins_encode %{ 1371 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 1372 %} 1373 ins_pipe(pipe_slow); 1374 %} 1375 1376 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 1377 predicate(UseAVX > 0); 1378 match(Set dst (MulD src1 src2)); 1379 1380 format %{ "vmulsd $dst, $src1, $src2" %} 1381 ins_cost(150); 1382 ins_encode %{ 1383 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1384 %} 1385 ins_pipe(pipe_slow); 1386 %} 1387 1388 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 1389 predicate(UseAVX > 0); 1390 match(Set dst (MulD src1 (LoadD src2))); 1391 1392 format %{ "vmulsd $dst, $src1, $src2" %} 1393 ins_cost(150); 1394 ins_encode %{ 1395 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1396 %} 1397 ins_pipe(pipe_slow); 1398 %} 1399 1400 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 1401 predicate(UseAVX > 0); 1402 match(Set dst (MulD src con)); 1403 1404 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1405 ins_cost(150); 1406 ins_encode %{ 1407 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1408 %} 1409 ins_pipe(pipe_slow); 1410 %} 1411 1412 instruct divF_reg(regF dst, regF src) %{ 1413 predicate((UseSSE>=1) && (UseAVX == 0)); 1414 match(Set dst (DivF dst src)); 1415 1416 format %{ "divss $dst, $src" %} 1417 ins_cost(150); 1418 ins_encode %{ 1419 __ divss($dst$$XMMRegister, $src$$XMMRegister); 1420 %} 1421 ins_pipe(pipe_slow); 1422 %} 1423 1424 instruct divF_mem(regF dst, memory src) %{ 1425 predicate((UseSSE>=1) && (UseAVX == 0)); 1426 match(Set dst (DivF dst (LoadF src))); 1427 1428 format %{ "divss $dst, $src" %} 1429 ins_cost(150); 1430 ins_encode %{ 1431 __ divss($dst$$XMMRegister, $src$$Address); 1432 %} 1433 ins_pipe(pipe_slow); 1434 %} 1435 1436 instruct divF_imm(regF dst, immF con) %{ 1437 predicate((UseSSE>=1) && (UseAVX == 0)); 1438 match(Set dst (DivF dst con)); 1439 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1440 ins_cost(150); 1441 ins_encode %{ 1442 __ divss($dst$$XMMRegister, $constantaddress($con)); 1443 %} 1444 ins_pipe(pipe_slow); 1445 %} 1446 1447 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 1448 predicate(UseAVX > 0); 1449 match(Set dst (DivF src1 src2)); 1450 1451 format %{ "vdivss $dst, $src1, $src2" %} 1452 ins_cost(150); 1453 ins_encode %{ 1454 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1455 %} 1456 ins_pipe(pipe_slow); 1457 %} 1458 1459 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 1460 predicate(UseAVX > 0); 1461 match(Set dst (DivF src1 (LoadF src2))); 1462 1463 format %{ "vdivss $dst, $src1, $src2" %} 1464 ins_cost(150); 1465 ins_encode %{ 1466 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1467 %} 1468 ins_pipe(pipe_slow); 1469 %} 1470 1471 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 1472 predicate(UseAVX > 0); 1473 match(Set dst (DivF src con)); 1474 1475 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 1476 ins_cost(150); 1477 ins_encode %{ 1478 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1479 %} 1480 ins_pipe(pipe_slow); 1481 %} 1482 1483 instruct divD_reg(regD dst, regD src) %{ 1484 predicate((UseSSE>=2) && (UseAVX == 0)); 1485 match(Set dst (DivD dst src)); 1486 1487 format %{ "divsd $dst, $src" %} 1488 ins_cost(150); 1489 ins_encode %{ 1490 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 1491 %} 1492 ins_pipe(pipe_slow); 1493 %} 1494 1495 instruct divD_mem(regD dst, memory src) %{ 1496 predicate((UseSSE>=2) && (UseAVX == 0)); 1497 match(Set dst (DivD dst (LoadD src))); 1498 1499 format %{ "divsd $dst, $src" %} 1500 ins_cost(150); 1501 ins_encode %{ 1502 __ divsd($dst$$XMMRegister, $src$$Address); 1503 %} 1504 ins_pipe(pipe_slow); 1505 %} 1506 1507 instruct divD_imm(regD dst, immD con) %{ 1508 predicate((UseSSE>=2) && (UseAVX == 0)); 1509 match(Set dst (DivD dst con)); 1510 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1511 ins_cost(150); 1512 ins_encode %{ 1513 __ divsd($dst$$XMMRegister, $constantaddress($con)); 1514 %} 1515 ins_pipe(pipe_slow); 1516 %} 1517 1518 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 1519 predicate(UseAVX > 0); 1520 match(Set dst (DivD src1 src2)); 1521 1522 format %{ "vdivsd $dst, $src1, $src2" %} 1523 ins_cost(150); 1524 ins_encode %{ 1525 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 1526 %} 1527 ins_pipe(pipe_slow); 1528 %} 1529 1530 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 1531 predicate(UseAVX > 0); 1532 match(Set dst (DivD src1 (LoadD src2))); 1533 1534 format %{ "vdivsd $dst, $src1, $src2" %} 1535 ins_cost(150); 1536 ins_encode %{ 1537 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 1538 %} 1539 ins_pipe(pipe_slow); 1540 %} 1541 1542 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 1543 predicate(UseAVX > 0); 1544 match(Set dst (DivD src con)); 1545 1546 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 1547 ins_cost(150); 1548 ins_encode %{ 1549 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 1550 %} 1551 ins_pipe(pipe_slow); 1552 %} 1553 1554 instruct absF_reg(regF dst) %{ 1555 predicate((UseSSE>=1) && (UseAVX == 0)); 1556 match(Set dst (AbsF dst)); 1557 ins_cost(150); 1558 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 1559 ins_encode %{ 1560 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 1561 %} 1562 ins_pipe(pipe_slow); 1563 %} 1564 1565 instruct absF_reg_reg(regF dst, regF src) %{ 1566 predicate(UseAVX > 0); 1567 match(Set dst (AbsF src)); 1568 ins_cost(150); 1569 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 1570 ins_encode %{ 1571 bool vector256 = false; 1572 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 1573 ExternalAddress(float_signmask()), vector256); 1574 %} 1575 ins_pipe(pipe_slow); 1576 %} 1577 1578 instruct absD_reg(regD dst) %{ 1579 predicate((UseSSE>=2) && (UseAVX == 0)); 1580 match(Set dst (AbsD dst)); 1581 ins_cost(150); 1582 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 1583 "# abs double by sign masking" %} 1584 ins_encode %{ 1585 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 1586 %} 1587 ins_pipe(pipe_slow); 1588 %} 1589 1590 instruct absD_reg_reg(regD dst, regD src) %{ 1591 predicate(UseAVX > 0); 1592 match(Set dst (AbsD src)); 1593 ins_cost(150); 1594 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 1595 "# abs double by sign masking" %} 1596 ins_encode %{ 1597 bool vector256 = false; 1598 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 1599 ExternalAddress(double_signmask()), vector256); 1600 %} 1601 ins_pipe(pipe_slow); 1602 %} 1603 1604 instruct negF_reg(regF dst) %{ 1605 predicate((UseSSE>=1) && (UseAVX == 0)); 1606 match(Set dst (NegF dst)); 1607 ins_cost(150); 1608 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 1609 ins_encode %{ 1610 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 1611 %} 1612 ins_pipe(pipe_slow); 1613 %} 1614 1615 instruct negF_reg_reg(regF dst, regF src) %{ 1616 predicate(UseAVX > 0); 1617 match(Set dst (NegF src)); 1618 ins_cost(150); 1619 format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 1620 ins_encode %{ 1621 bool vector256 = false; 1622 __ vxorps($dst$$XMMRegister, $src$$XMMRegister, 1623 ExternalAddress(float_signflip()), vector256); 1624 %} 1625 ins_pipe(pipe_slow); 1626 %} 1627 1628 instruct negD_reg(regD dst) %{ 1629 predicate((UseSSE>=2) && (UseAVX == 0)); 1630 match(Set dst (NegD dst)); 1631 ins_cost(150); 1632 format %{ "xorpd $dst, [0x8000000000000000]\t" 1633 "# neg double by sign flipping" %} 1634 ins_encode %{ 1635 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 1636 %} 1637 ins_pipe(pipe_slow); 1638 %} 1639 1640 instruct negD_reg_reg(regD dst, regD src) %{ 1641 predicate(UseAVX > 0); 1642 match(Set dst (NegD src)); 1643 ins_cost(150); 1644 format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" 1645 "# neg double by sign flipping" %} 1646 ins_encode %{ 1647 bool vector256 = false; 1648 __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, 1649 ExternalAddress(double_signflip()), vector256); 1650 %} 1651 ins_pipe(pipe_slow); 1652 %} 1653 1654 instruct sqrtF_reg(regF dst, regF src) %{ 1655 predicate(UseSSE>=1); 1656 match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); 1657 1658 format %{ "sqrtss $dst, $src" %} 1659 ins_cost(150); 1660 ins_encode %{ 1661 __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); 1662 %} 1663 ins_pipe(pipe_slow); 1664 %} 1665 1666 instruct sqrtF_mem(regF dst, memory src) %{ 1667 predicate(UseSSE>=1); 1668 match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); 1669 1670 format %{ "sqrtss $dst, $src" %} 1671 ins_cost(150); 1672 ins_encode %{ 1673 __ sqrtss($dst$$XMMRegister, $src$$Address); 1674 %} 1675 ins_pipe(pipe_slow); 1676 %} 1677 1678 instruct sqrtF_imm(regF dst, immF con) %{ 1679 predicate(UseSSE>=1); 1680 match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); 1681 format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 1682 ins_cost(150); 1683 ins_encode %{ 1684 __ sqrtss($dst$$XMMRegister, $constantaddress($con)); 1685 %} 1686 ins_pipe(pipe_slow); 1687 %} 1688 1689 instruct sqrtD_reg(regD dst, regD src) %{ 1690 predicate(UseSSE>=2); 1691 match(Set dst (SqrtD src)); 1692 1693 format %{ "sqrtsd $dst, $src" %} 1694 ins_cost(150); 1695 ins_encode %{ 1696 __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); 1697 %} 1698 ins_pipe(pipe_slow); 1699 %} 1700 1701 instruct sqrtD_mem(regD dst, memory src) %{ 1702 predicate(UseSSE>=2); 1703 match(Set dst (SqrtD (LoadD src))); 1704 1705 format %{ "sqrtsd $dst, $src" %} 1706 ins_cost(150); 1707 ins_encode %{ 1708 __ sqrtsd($dst$$XMMRegister, $src$$Address); 1709 %} 1710 ins_pipe(pipe_slow); 1711 %} 1712 1713 instruct sqrtD_imm(regD dst, immD con) %{ 1714 predicate(UseSSE>=2); 1715 match(Set dst (SqrtD con)); 1716 format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 1717 ins_cost(150); 1718 ins_encode %{ 1719 __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); 1720 %} 1721 ins_pipe(pipe_slow); 1722 %} 1723 1724 1725 // ====================VECTOR INSTRUCTIONS===================================== 1726 1727 // Load vectors (4 bytes long) 1728 instruct loadV4(vecS dst, memory mem) %{ 1729 predicate(n->as_LoadVector()->memory_size() == 4); 1730 match(Set dst (LoadVector mem)); 1731 ins_cost(125); 1732 format %{ "movd $dst,$mem\t! load vector (4 bytes)" %} 1733 ins_encode %{ 1734 __ movdl($dst$$XMMRegister, $mem$$Address); 1735 %} 1736 ins_pipe( pipe_slow ); 1737 %} 1738 1739 // Load vectors (8 bytes long) 1740 instruct loadV8(vecD dst, memory mem) %{ 1741 predicate(n->as_LoadVector()->memory_size() == 8); 1742 match(Set dst (LoadVector mem)); 1743 ins_cost(125); 1744 format %{ "movq $dst,$mem\t! load vector (8 bytes)" %} 1745 ins_encode %{ 1746 __ movq($dst$$XMMRegister, $mem$$Address); 1747 %} 1748 ins_pipe( pipe_slow ); 1749 %} 1750 1751 // Load vectors (16 bytes long) 1752 instruct loadV16(vecX dst, memory mem) %{ 1753 predicate(n->as_LoadVector()->memory_size() == 16); 1754 match(Set dst (LoadVector mem)); 1755 ins_cost(125); 1756 format %{ "movdqu $dst,$mem\t! load vector (16 bytes)" %} 1757 ins_encode %{ 1758 __ movdqu($dst$$XMMRegister, $mem$$Address); 1759 %} 1760 ins_pipe( pipe_slow ); 1761 %} 1762 1763 // Load vectors (32 bytes long) 1764 instruct loadV32(vecY dst, memory mem) %{ 1765 predicate(n->as_LoadVector()->memory_size() == 32); 1766 match(Set dst (LoadVector mem)); 1767 ins_cost(125); 1768 format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %} 1769 ins_encode %{ 1770 __ vmovdqu($dst$$XMMRegister, $mem$$Address); 1771 %} 1772 ins_pipe( pipe_slow ); 1773 %} 1774 1775 // Store vectors 1776 instruct storeV4(memory mem, vecS src) %{ 1777 predicate(n->as_StoreVector()->memory_size() == 4); 1778 match(Set mem (StoreVector mem src)); 1779 ins_cost(145); 1780 format %{ "movd $mem,$src\t! store vector (4 bytes)" %} 1781 ins_encode %{ 1782 __ movdl($mem$$Address, $src$$XMMRegister); 1783 %} 1784 ins_pipe( pipe_slow ); 1785 %} 1786 1787 instruct storeV8(memory mem, vecD src) %{ 1788 predicate(n->as_StoreVector()->memory_size() == 8); 1789 match(Set mem (StoreVector mem src)); 1790 ins_cost(145); 1791 format %{ "movq $mem,$src\t! store vector (8 bytes)" %} 1792 ins_encode %{ 1793 __ movq($mem$$Address, $src$$XMMRegister); 1794 %} 1795 ins_pipe( pipe_slow ); 1796 %} 1797 1798 instruct storeV16(memory mem, vecX src) %{ 1799 predicate(n->as_StoreVector()->memory_size() == 16); 1800 match(Set mem (StoreVector mem src)); 1801 ins_cost(145); 1802 format %{ "movdqu $mem,$src\t! store vector (16 bytes)" %} 1803 ins_encode %{ 1804 __ movdqu($mem$$Address, $src$$XMMRegister); 1805 %} 1806 ins_pipe( pipe_slow ); 1807 %} 1808 1809 instruct storeV32(memory mem, vecY src) %{ 1810 predicate(n->as_StoreVector()->memory_size() == 32); 1811 match(Set mem (StoreVector mem src)); 1812 ins_cost(145); 1813 format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %} 1814 ins_encode %{ 1815 __ vmovdqu($mem$$Address, $src$$XMMRegister); 1816 %} 1817 ins_pipe( pipe_slow ); 1818 %} 1819 1820 // Replicate byte scalar to be vector 1821 instruct Repl4B(vecS dst, rRegI src) %{ 1822 predicate(n->as_Vector()->length() == 4); 1823 match(Set dst (ReplicateB src)); 1824 format %{ "movd $dst,$src\n\t" 1825 "punpcklbw $dst,$dst\n\t" 1826 "pshuflw $dst,$dst,0x00\t! replicate4B" %} 1827 ins_encode %{ 1828 __ movdl($dst$$XMMRegister, $src$$Register); 1829 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1830 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1831 %} 1832 ins_pipe( pipe_slow ); 1833 %} 1834 1835 instruct Repl8B(vecD dst, rRegI src) %{ 1836 predicate(n->as_Vector()->length() == 8); 1837 match(Set dst (ReplicateB src)); 1838 format %{ "movd $dst,$src\n\t" 1839 "punpcklbw $dst,$dst\n\t" 1840 "pshuflw $dst,$dst,0x00\t! replicate8B" %} 1841 ins_encode %{ 1842 __ movdl($dst$$XMMRegister, $src$$Register); 1843 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1844 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1845 %} 1846 ins_pipe( pipe_slow ); 1847 %} 1848 1849 instruct Repl16B(vecX dst, rRegI src) %{ 1850 predicate(n->as_Vector()->length() == 16); 1851 match(Set dst (ReplicateB src)); 1852 format %{ "movd $dst,$src\n\t" 1853 "punpcklbw $dst,$dst\n\t" 1854 "pshuflw $dst,$dst,0x00\n\t" 1855 "punpcklqdq $dst,$dst\t! replicate16B" %} 1856 ins_encode %{ 1857 __ movdl($dst$$XMMRegister, $src$$Register); 1858 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1859 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1860 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1861 %} 1862 ins_pipe( pipe_slow ); 1863 %} 1864 1865 instruct Repl32B(vecY dst, rRegI src) %{ 1866 predicate(n->as_Vector()->length() == 32); 1867 match(Set dst (ReplicateB src)); 1868 format %{ "movd $dst,$src\n\t" 1869 "punpcklbw $dst,$dst\n\t" 1870 "pshuflw $dst,$dst,0x00\n\t" 1871 "punpcklqdq $dst,$dst\n\t" 1872 "vinserti128h $dst,$dst,$dst\t! replicate32B" %} 1873 ins_encode %{ 1874 __ movdl($dst$$XMMRegister, $src$$Register); 1875 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 1876 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1877 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1878 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1879 %} 1880 ins_pipe( pipe_slow ); 1881 %} 1882 1883 // Replicate byte scalar immediate to be vector by loading from const table. 1884 instruct Repl4B_imm(vecS dst, immI con) %{ 1885 predicate(n->as_Vector()->length() == 4); 1886 match(Set dst (ReplicateB con)); 1887 format %{ "movdl $dst,[$constantaddress]\t! replicate4B($con)" %} 1888 ins_encode %{ 1889 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1))); 1890 %} 1891 ins_pipe( pipe_slow ); 1892 %} 1893 1894 instruct Repl8B_imm(vecD dst, immI con) %{ 1895 predicate(n->as_Vector()->length() == 8); 1896 match(Set dst (ReplicateB con)); 1897 format %{ "movq $dst,[$constantaddress]\t! replicate8B($con)" %} 1898 ins_encode %{ 1899 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1900 %} 1901 ins_pipe( pipe_slow ); 1902 %} 1903 1904 instruct Repl16B_imm(vecX dst, immI con) %{ 1905 predicate(n->as_Vector()->length() == 16); 1906 match(Set dst (ReplicateB con)); 1907 format %{ "movq $dst,[$constantaddress]\n\t" 1908 "punpcklqdq $dst,$dst\t! replicate16B($con)" %} 1909 ins_encode %{ 1910 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1911 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1912 %} 1913 ins_pipe( pipe_slow ); 1914 %} 1915 1916 instruct Repl32B_imm(vecY dst, immI con) %{ 1917 predicate(n->as_Vector()->length() == 32); 1918 match(Set dst (ReplicateB con)); 1919 format %{ "movq $dst,[$constantaddress]\n\t" 1920 "punpcklqdq $dst,$dst\n\t" 1921 "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %} 1922 ins_encode %{ 1923 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1))); 1924 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 1925 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 1926 %} 1927 ins_pipe( pipe_slow ); 1928 %} 1929 1930 // Replicate byte scalar zero to be vector 1931 instruct Repl4B_zero(vecS dst, immI0 zero) %{ 1932 predicate(n->as_Vector()->length() == 4); 1933 match(Set dst (ReplicateB zero)); 1934 format %{ "pxor $dst,$dst\t! replicate4B zero" %} 1935 ins_encode %{ 1936 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1937 %} 1938 ins_pipe( fpu_reg_reg ); 1939 %} 1940 1941 instruct Repl8B_zero(vecD dst, immI0 zero) %{ 1942 predicate(n->as_Vector()->length() == 8); 1943 match(Set dst (ReplicateB zero)); 1944 format %{ "pxor $dst,$dst\t! replicate8B zero" %} 1945 ins_encode %{ 1946 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1947 %} 1948 ins_pipe( fpu_reg_reg ); 1949 %} 1950 1951 instruct Repl16B_zero(vecX dst, immI0 zero) %{ 1952 predicate(n->as_Vector()->length() == 16); 1953 match(Set dst (ReplicateB zero)); 1954 format %{ "pxor $dst,$dst\t! replicate16B zero" %} 1955 ins_encode %{ 1956 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 1957 %} 1958 ins_pipe( fpu_reg_reg ); 1959 %} 1960 1961 instruct Repl32B_zero(vecY dst, immI0 zero) %{ 1962 predicate(n->as_Vector()->length() == 32); 1963 match(Set dst (ReplicateB zero)); 1964 format %{ "vpxor $dst,$dst,$dst\t! replicate32B zero" %} 1965 ins_encode %{ 1966 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 1967 bool vector256 = true; 1968 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 1969 %} 1970 ins_pipe( fpu_reg_reg ); 1971 %} 1972 1973 // Replicate char/short (2 byte) scalar to be vector 1974 instruct Repl2S(vecS dst, rRegI src) %{ 1975 predicate(n->as_Vector()->length() == 2); 1976 match(Set dst (ReplicateS src)); 1977 format %{ "movd $dst,$src\n\t" 1978 "pshuflw $dst,$dst,0x00\t! replicate2S" %} 1979 ins_encode %{ 1980 __ movdl($dst$$XMMRegister, $src$$Register); 1981 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1982 %} 1983 ins_pipe( fpu_reg_reg ); 1984 %} 1985 1986 instruct Repl4S(vecD dst, rRegI src) %{ 1987 predicate(n->as_Vector()->length() == 4); 1988 match(Set dst (ReplicateS src)); 1989 format %{ "movd $dst,$src\n\t" 1990 "pshuflw $dst,$dst,0x00\t! replicate4S" %} 1991 ins_encode %{ 1992 __ movdl($dst$$XMMRegister, $src$$Register); 1993 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 1994 %} 1995 ins_pipe( fpu_reg_reg ); 1996 %} 1997 1998 instruct Repl8S(vecX dst, rRegI src) %{ 1999 predicate(n->as_Vector()->length() == 8); 2000 match(Set dst (ReplicateS src)); 2001 format %{ "movd $dst,$src\n\t" 2002 "pshuflw $dst,$dst,0x00\n\t" 2003 "punpcklqdq $dst,$dst\t! replicate8S" %} 2004 ins_encode %{ 2005 __ movdl($dst$$XMMRegister, $src$$Register); 2006 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2007 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2008 %} 2009 ins_pipe( pipe_slow ); 2010 %} 2011 2012 instruct Repl16S(vecY dst, rRegI src) %{ 2013 predicate(n->as_Vector()->length() == 16); 2014 match(Set dst (ReplicateS src)); 2015 format %{ "movd $dst,$src\n\t" 2016 "pshuflw $dst,$dst,0x00\n\t" 2017 "punpcklqdq $dst,$dst\n\t" 2018 "vinserti128h $dst,$dst,$dst\t! replicate16S" %} 2019 ins_encode %{ 2020 __ movdl($dst$$XMMRegister, $src$$Register); 2021 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2022 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2023 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2024 %} 2025 ins_pipe( pipe_slow ); 2026 %} 2027 2028 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table. 2029 instruct Repl2S_imm(vecS dst, immI con) %{ 2030 predicate(n->as_Vector()->length() == 2); 2031 match(Set dst (ReplicateS con)); 2032 format %{ "movdl $dst,[$constantaddress]\t! replicate2S($con)" %} 2033 ins_encode %{ 2034 __ movdl($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2))); 2035 %} 2036 ins_pipe( fpu_reg_reg ); 2037 %} 2038 2039 instruct Repl4S_imm(vecD dst, immI con) %{ 2040 predicate(n->as_Vector()->length() == 4); 2041 match(Set dst (ReplicateS con)); 2042 format %{ "movq $dst,[$constantaddress]\t! replicate4S($con)" %} 2043 ins_encode %{ 2044 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2045 %} 2046 ins_pipe( fpu_reg_reg ); 2047 %} 2048 2049 instruct Repl8S_imm(vecX dst, immI con) %{ 2050 predicate(n->as_Vector()->length() == 8); 2051 match(Set dst (ReplicateS con)); 2052 format %{ "movq $dst,[$constantaddress]\n\t" 2053 "punpcklqdq $dst,$dst\t! replicate8S($con)" %} 2054 ins_encode %{ 2055 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2056 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2057 %} 2058 ins_pipe( pipe_slow ); 2059 %} 2060 2061 instruct Repl16S_imm(vecY dst, immI con) %{ 2062 predicate(n->as_Vector()->length() == 16); 2063 match(Set dst (ReplicateS con)); 2064 format %{ "movq $dst,[$constantaddress]\n\t" 2065 "punpcklqdq $dst,$dst\n\t" 2066 "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %} 2067 ins_encode %{ 2068 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2))); 2069 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2070 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2071 %} 2072 ins_pipe( pipe_slow ); 2073 %} 2074 2075 // Replicate char/short (2 byte) scalar zero to be vector 2076 instruct Repl2S_zero(vecS dst, immI0 zero) %{ 2077 predicate(n->as_Vector()->length() == 2); 2078 match(Set dst (ReplicateS zero)); 2079 format %{ "pxor $dst,$dst\t! replicate2S zero" %} 2080 ins_encode %{ 2081 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2082 %} 2083 ins_pipe( fpu_reg_reg ); 2084 %} 2085 2086 instruct Repl4S_zero(vecD dst, immI0 zero) %{ 2087 predicate(n->as_Vector()->length() == 4); 2088 match(Set dst (ReplicateS zero)); 2089 format %{ "pxor $dst,$dst\t! replicate4S zero" %} 2090 ins_encode %{ 2091 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2092 %} 2093 ins_pipe( fpu_reg_reg ); 2094 %} 2095 2096 instruct Repl8S_zero(vecX dst, immI0 zero) %{ 2097 predicate(n->as_Vector()->length() == 8); 2098 match(Set dst (ReplicateS zero)); 2099 format %{ "pxor $dst,$dst\t! replicate8S zero" %} 2100 ins_encode %{ 2101 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2102 %} 2103 ins_pipe( fpu_reg_reg ); 2104 %} 2105 2106 instruct Repl16S_zero(vecY dst, immI0 zero) %{ 2107 predicate(n->as_Vector()->length() == 16); 2108 match(Set dst (ReplicateS zero)); 2109 format %{ "vpxor $dst,$dst,$dst\t! replicate16S zero" %} 2110 ins_encode %{ 2111 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2112 bool vector256 = true; 2113 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2114 %} 2115 ins_pipe( fpu_reg_reg ); 2116 %} 2117 2118 // Replicate integer (4 byte) scalar to be vector 2119 instruct Repl2I(vecD dst, rRegI src) %{ 2120 predicate(n->as_Vector()->length() == 2); 2121 match(Set dst (ReplicateI src)); 2122 format %{ "movd $dst,$src\n\t" 2123 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2124 ins_encode %{ 2125 __ movdl($dst$$XMMRegister, $src$$Register); 2126 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2127 %} 2128 ins_pipe( fpu_reg_reg ); 2129 %} 2130 2131 instruct Repl4I(vecX dst, rRegI src) %{ 2132 predicate(n->as_Vector()->length() == 4); 2133 match(Set dst (ReplicateI src)); 2134 format %{ "movd $dst,$src\n\t" 2135 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2136 ins_encode %{ 2137 __ movdl($dst$$XMMRegister, $src$$Register); 2138 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2139 %} 2140 ins_pipe( pipe_slow ); 2141 %} 2142 2143 instruct Repl8I(vecY dst, rRegI src) %{ 2144 predicate(n->as_Vector()->length() == 8); 2145 match(Set dst (ReplicateI src)); 2146 format %{ "movd $dst,$src\n\t" 2147 "pshufd $dst,$dst,0x00\n\t" 2148 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2149 ins_encode %{ 2150 __ movdl($dst$$XMMRegister, $src$$Register); 2151 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2152 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2153 %} 2154 ins_pipe( pipe_slow ); 2155 %} 2156 2157 // Replicate integer (4 byte) scalar immediate to be vector by loading from const table. 2158 instruct Repl2I_imm(vecD dst, immI con) %{ 2159 predicate(n->as_Vector()->length() == 2); 2160 match(Set dst (ReplicateI con)); 2161 format %{ "movq $dst,[$constantaddress]\t! replicate2I($con)" %} 2162 ins_encode %{ 2163 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2164 %} 2165 ins_pipe( fpu_reg_reg ); 2166 %} 2167 2168 instruct Repl4I_imm(vecX dst, immI con) %{ 2169 predicate(n->as_Vector()->length() == 4); 2170 match(Set dst (ReplicateI con)); 2171 format %{ "movq $dst,[$constantaddress]\t! replicate4I($con)\n\t" 2172 "punpcklqdq $dst,$dst" %} 2173 ins_encode %{ 2174 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2175 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2176 %} 2177 ins_pipe( pipe_slow ); 2178 %} 2179 2180 instruct Repl8I_imm(vecY dst, immI con) %{ 2181 predicate(n->as_Vector()->length() == 8); 2182 match(Set dst (ReplicateI con)); 2183 format %{ "movq $dst,[$constantaddress]\t! replicate8I($con)\n\t" 2184 "punpcklqdq $dst,$dst\n\t" 2185 "vinserti128h $dst,$dst,$dst" %} 2186 ins_encode %{ 2187 __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4))); 2188 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2189 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2190 %} 2191 ins_pipe( pipe_slow ); 2192 %} 2193 2194 // Integer could be loaded into xmm register directly from memory. 2195 instruct Repl2I_mem(vecD dst, memory mem) %{ 2196 predicate(n->as_Vector()->length() == 2); 2197 match(Set dst (ReplicateI (LoadI mem))); 2198 format %{ "movd $dst,$mem\n\t" 2199 "pshufd $dst,$dst,0x00\t! replicate2I" %} 2200 ins_encode %{ 2201 __ movdl($dst$$XMMRegister, $mem$$Address); 2202 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2203 %} 2204 ins_pipe( fpu_reg_reg ); 2205 %} 2206 2207 instruct Repl4I_mem(vecX dst, memory mem) %{ 2208 predicate(n->as_Vector()->length() == 4); 2209 match(Set dst (ReplicateI (LoadI mem))); 2210 format %{ "movd $dst,$mem\n\t" 2211 "pshufd $dst,$dst,0x00\t! replicate4I" %} 2212 ins_encode %{ 2213 __ movdl($dst$$XMMRegister, $mem$$Address); 2214 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2215 %} 2216 ins_pipe( pipe_slow ); 2217 %} 2218 2219 instruct Repl8I_mem(vecY dst, memory mem) %{ 2220 predicate(n->as_Vector()->length() == 8); 2221 match(Set dst (ReplicateI (LoadI mem))); 2222 format %{ "movd $dst,$mem\n\t" 2223 "pshufd $dst,$dst,0x00\n\t" 2224 "vinserti128h $dst,$dst,$dst\t! replicate8I" %} 2225 ins_encode %{ 2226 __ movdl($dst$$XMMRegister, $mem$$Address); 2227 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 2228 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2229 %} 2230 ins_pipe( pipe_slow ); 2231 %} 2232 2233 // Replicate integer (4 byte) scalar zero to be vector 2234 instruct Repl2I_zero(vecD dst, immI0 zero) %{ 2235 predicate(n->as_Vector()->length() == 2); 2236 match(Set dst (ReplicateI zero)); 2237 format %{ "pxor $dst,$dst\t! replicate2I" %} 2238 ins_encode %{ 2239 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2240 %} 2241 ins_pipe( fpu_reg_reg ); 2242 %} 2243 2244 instruct Repl4I_zero(vecX dst, immI0 zero) %{ 2245 predicate(n->as_Vector()->length() == 4); 2246 match(Set dst (ReplicateI zero)); 2247 format %{ "pxor $dst,$dst\t! replicate4I zero)" %} 2248 ins_encode %{ 2249 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2250 %} 2251 ins_pipe( fpu_reg_reg ); 2252 %} 2253 2254 instruct Repl8I_zero(vecY dst, immI0 zero) %{ 2255 predicate(n->as_Vector()->length() == 8); 2256 match(Set dst (ReplicateI zero)); 2257 format %{ "vpxor $dst,$dst,$dst\t! replicate8I zero" %} 2258 ins_encode %{ 2259 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2260 bool vector256 = true; 2261 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2262 %} 2263 ins_pipe( fpu_reg_reg ); 2264 %} 2265 2266 // Replicate long (8 byte) scalar to be vector 2267 #ifdef _LP64 2268 instruct Repl2L(vecX dst, rRegL src) %{ 2269 predicate(n->as_Vector()->length() == 2); 2270 match(Set dst (ReplicateL src)); 2271 format %{ "movdq $dst,$src\n\t" 2272 "punpcklqdq $dst,$dst\t! replicate2L" %} 2273 ins_encode %{ 2274 __ movdq($dst$$XMMRegister, $src$$Register); 2275 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2276 %} 2277 ins_pipe( pipe_slow ); 2278 %} 2279 2280 instruct Repl4L(vecY dst, rRegL src) %{ 2281 predicate(n->as_Vector()->length() == 4); 2282 match(Set dst (ReplicateL src)); 2283 format %{ "movdq $dst,$src\n\t" 2284 "punpcklqdq $dst,$dst\n\t" 2285 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2286 ins_encode %{ 2287 __ movdq($dst$$XMMRegister, $src$$Register); 2288 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2289 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2290 %} 2291 ins_pipe( pipe_slow ); 2292 %} 2293 #else // _LP64 2294 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{ 2295 predicate(n->as_Vector()->length() == 2); 2296 match(Set dst (ReplicateL src)); 2297 effect(TEMP dst, USE src, TEMP tmp); 2298 format %{ "movdl $dst,$src.lo\n\t" 2299 "movdl $tmp,$src.hi\n\t" 2300 "punpckldq $dst,$tmp\n\t" 2301 "punpcklqdq $dst,$dst\t! replicate2L"%} 2302 ins_encode %{ 2303 __ movdl($dst$$XMMRegister, $src$$Register); 2304 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2305 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2306 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2307 %} 2308 ins_pipe( pipe_slow ); 2309 %} 2310 2311 instruct Repl4L(vecY dst, eRegL src, regD tmp) %{ 2312 predicate(n->as_Vector()->length() == 4); 2313 match(Set dst (ReplicateL src)); 2314 effect(TEMP dst, USE src, TEMP tmp); 2315 format %{ "movdl $dst,$src.lo\n\t" 2316 "movdl $tmp,$src.hi\n\t" 2317 "punpckldq $dst,$tmp\n\t" 2318 "punpcklqdq $dst,$dst\n\t" 2319 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2320 ins_encode %{ 2321 __ movdl($dst$$XMMRegister, $src$$Register); 2322 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 2323 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 2324 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2325 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2326 %} 2327 ins_pipe( pipe_slow ); 2328 %} 2329 #endif // _LP64 2330 2331 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 2332 instruct Repl2L_imm(vecX dst, immL con) %{ 2333 predicate(n->as_Vector()->length() == 2); 2334 match(Set dst (ReplicateL con)); 2335 format %{ "movq $dst,[$constantaddress]\n\t" 2336 "punpcklqdq $dst,$dst\t! replicate2L($con)" %} 2337 ins_encode %{ 2338 __ movq($dst$$XMMRegister, $constantaddress($con)); 2339 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2340 %} 2341 ins_pipe( pipe_slow ); 2342 %} 2343 2344 instruct Repl4L_imm(vecY dst, immL con) %{ 2345 predicate(n->as_Vector()->length() == 4); 2346 match(Set dst (ReplicateL con)); 2347 format %{ "movq $dst,[$constantaddress]\n\t" 2348 "punpcklqdq $dst,$dst\n\t" 2349 "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %} 2350 ins_encode %{ 2351 __ movq($dst$$XMMRegister, $constantaddress($con)); 2352 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2353 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2354 %} 2355 ins_pipe( pipe_slow ); 2356 %} 2357 2358 // Long could be loaded into xmm register directly from memory. 2359 instruct Repl2L_mem(vecX dst, memory mem) %{ 2360 predicate(n->as_Vector()->length() == 2); 2361 match(Set dst (ReplicateL (LoadL mem))); 2362 format %{ "movq $dst,$mem\n\t" 2363 "punpcklqdq $dst,$dst\t! replicate2L" %} 2364 ins_encode %{ 2365 __ movq($dst$$XMMRegister, $mem$$Address); 2366 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2367 %} 2368 ins_pipe( pipe_slow ); 2369 %} 2370 2371 instruct Repl4L_mem(vecY dst, memory mem) %{ 2372 predicate(n->as_Vector()->length() == 4); 2373 match(Set dst (ReplicateL (LoadL mem))); 2374 format %{ "movq $dst,$mem\n\t" 2375 "punpcklqdq $dst,$dst\n\t" 2376 "vinserti128h $dst,$dst,$dst\t! replicate4L" %} 2377 ins_encode %{ 2378 __ movq($dst$$XMMRegister, $mem$$Address); 2379 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 2380 __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2381 %} 2382 ins_pipe( pipe_slow ); 2383 %} 2384 2385 // Replicate long (8 byte) scalar zero to be vector 2386 instruct Repl2L_zero(vecX dst, immL0 zero) %{ 2387 predicate(n->as_Vector()->length() == 2); 2388 match(Set dst (ReplicateL zero)); 2389 format %{ "pxor $dst,$dst\t! replicate2L zero" %} 2390 ins_encode %{ 2391 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 2392 %} 2393 ins_pipe( fpu_reg_reg ); 2394 %} 2395 2396 instruct Repl4L_zero(vecY dst, immL0 zero) %{ 2397 predicate(n->as_Vector()->length() == 4); 2398 match(Set dst (ReplicateL zero)); 2399 format %{ "vpxor $dst,$dst,$dst\t! replicate4L zero" %} 2400 ins_encode %{ 2401 // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it). 2402 bool vector256 = true; 2403 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2404 %} 2405 ins_pipe( fpu_reg_reg ); 2406 %} 2407 2408 // Replicate float (4 byte) scalar to be vector 2409 instruct Repl2F(vecD dst, regF src) %{ 2410 predicate(n->as_Vector()->length() == 2); 2411 match(Set dst (ReplicateF src)); 2412 format %{ "pshufd $dst,$dst,0x00\t! replicate2F" %} 2413 ins_encode %{ 2414 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2415 %} 2416 ins_pipe( fpu_reg_reg ); 2417 %} 2418 2419 instruct Repl4F(vecX dst, regF src) %{ 2420 predicate(n->as_Vector()->length() == 4); 2421 match(Set dst (ReplicateF src)); 2422 format %{ "pshufd $dst,$dst,0x00\t! replicate4F" %} 2423 ins_encode %{ 2424 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2425 %} 2426 ins_pipe( pipe_slow ); 2427 %} 2428 2429 instruct Repl8F(vecY dst, regF src) %{ 2430 predicate(n->as_Vector()->length() == 8); 2431 match(Set dst (ReplicateF src)); 2432 format %{ "pshufd $dst,$src,0x00\n\t" 2433 "vinsertf128h $dst,$dst,$dst\t! replicate8F" %} 2434 ins_encode %{ 2435 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 2436 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2437 %} 2438 ins_pipe( pipe_slow ); 2439 %} 2440 2441 // Replicate float (4 byte) scalar zero to be vector 2442 instruct Repl2F_zero(vecD dst, immF0 zero) %{ 2443 predicate(n->as_Vector()->length() == 2); 2444 match(Set dst (ReplicateF zero)); 2445 format %{ "xorps $dst,$dst\t! replicate2F zero" %} 2446 ins_encode %{ 2447 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2448 %} 2449 ins_pipe( fpu_reg_reg ); 2450 %} 2451 2452 instruct Repl4F_zero(vecX dst, immF0 zero) %{ 2453 predicate(n->as_Vector()->length() == 4); 2454 match(Set dst (ReplicateF zero)); 2455 format %{ "xorps $dst,$dst\t! replicate4F zero" %} 2456 ins_encode %{ 2457 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 2458 %} 2459 ins_pipe( fpu_reg_reg ); 2460 %} 2461 2462 instruct Repl8F_zero(vecY dst, immF0 zero) %{ 2463 predicate(n->as_Vector()->length() == 8); 2464 match(Set dst (ReplicateF zero)); 2465 format %{ "vxorps $dst,$dst,$dst\t! replicate8F zero" %} 2466 ins_encode %{ 2467 bool vector256 = true; 2468 __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2469 %} 2470 ins_pipe( fpu_reg_reg ); 2471 %} 2472 2473 // Replicate double (8 bytes) scalar to be vector 2474 instruct Repl2D(vecX dst, regD src) %{ 2475 predicate(n->as_Vector()->length() == 2); 2476 match(Set dst (ReplicateD src)); 2477 format %{ "pshufd $dst,$src,0x44\t! replicate2D" %} 2478 ins_encode %{ 2479 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2480 %} 2481 ins_pipe( pipe_slow ); 2482 %} 2483 2484 instruct Repl4D(vecY dst, regD src) %{ 2485 predicate(n->as_Vector()->length() == 4); 2486 match(Set dst (ReplicateD src)); 2487 format %{ "pshufd $dst,$src,0x44\n\t" 2488 "vinsertf128h $dst,$dst,$dst\t! replicate4D" %} 2489 ins_encode %{ 2490 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 2491 __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister); 2492 %} 2493 ins_pipe( pipe_slow ); 2494 %} 2495 2496 // Replicate double (8 byte) scalar zero to be vector 2497 instruct Repl2D_zero(vecX dst, immD0 zero) %{ 2498 predicate(n->as_Vector()->length() == 2); 2499 match(Set dst (ReplicateD zero)); 2500 format %{ "xorpd $dst,$dst\t! replicate2D zero" %} 2501 ins_encode %{ 2502 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 2503 %} 2504 ins_pipe( fpu_reg_reg ); 2505 %} 2506 2507 instruct Repl4D_zero(vecY dst, immD0 zero) %{ 2508 predicate(n->as_Vector()->length() == 4); 2509 match(Set dst (ReplicateD zero)); 2510 format %{ "vxorpd $dst,$dst,$dst,vect256\t! replicate4D zero" %} 2511 ins_encode %{ 2512 bool vector256 = true; 2513 __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256); 2514 %} 2515 ins_pipe( fpu_reg_reg ); 2516 %} 2517 2518 // ====================VECTOR ARITHMETIC======================================= 2519 2520 // --------------------------------- ADD -------------------------------------- 2521 2522 // Bytes vector add 2523 instruct vadd4B(vecS dst, vecS src) %{ 2524 predicate(n->as_Vector()->length() == 4); 2525 match(Set dst (AddVB dst src)); 2526 format %{ "paddb $dst,$src\t! add packed4B" %} 2527 ins_encode %{ 2528 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2529 %} 2530 ins_pipe( pipe_slow ); 2531 %} 2532 2533 instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{ 2534 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2535 match(Set dst (AddVB src1 src2)); 2536 format %{ "vpaddb $dst,$src1,$src2\t! add packed4B" %} 2537 ins_encode %{ 2538 bool vector256 = false; 2539 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2540 %} 2541 ins_pipe( pipe_slow ); 2542 %} 2543 2544 instruct vadd8B(vecD dst, vecD src) %{ 2545 predicate(n->as_Vector()->length() == 8); 2546 match(Set dst (AddVB dst src)); 2547 format %{ "paddb $dst,$src\t! add packed8B" %} 2548 ins_encode %{ 2549 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2550 %} 2551 ins_pipe( pipe_slow ); 2552 %} 2553 2554 instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ 2555 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2556 match(Set dst (AddVB src1 src2)); 2557 format %{ "vpaddb $dst,$src1,$src2\t! add packed8B" %} 2558 ins_encode %{ 2559 bool vector256 = false; 2560 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2561 %} 2562 ins_pipe( pipe_slow ); 2563 %} 2564 2565 instruct vadd16B(vecX dst, vecX src) %{ 2566 predicate(n->as_Vector()->length() == 16); 2567 match(Set dst (AddVB dst src)); 2568 format %{ "paddb $dst,$src\t! add packed16B" %} 2569 ins_encode %{ 2570 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 2571 %} 2572 ins_pipe( pipe_slow ); 2573 %} 2574 2575 instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ 2576 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2577 match(Set dst (AddVB src1 src2)); 2578 format %{ "vpaddb $dst,$src1,$src2\t! add packed16B" %} 2579 ins_encode %{ 2580 bool vector256 = false; 2581 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2582 %} 2583 ins_pipe( pipe_slow ); 2584 %} 2585 2586 instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{ 2587 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 2588 match(Set dst (AddVB src (LoadVector mem))); 2589 format %{ "vpaddb $dst,$src,$mem\t! add packed16B" %} 2590 ins_encode %{ 2591 bool vector256 = false; 2592 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2593 %} 2594 ins_pipe( pipe_slow ); 2595 %} 2596 2597 instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{ 2598 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2599 match(Set dst (AddVB src1 src2)); 2600 format %{ "vpaddb $dst,$src1,$src2\t! add packed32B" %} 2601 ins_encode %{ 2602 bool vector256 = true; 2603 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2604 %} 2605 ins_pipe( pipe_slow ); 2606 %} 2607 2608 instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{ 2609 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 2610 match(Set dst (AddVB src (LoadVector mem))); 2611 format %{ "vpaddb $dst,$src,$mem\t! add packed32B" %} 2612 ins_encode %{ 2613 bool vector256 = true; 2614 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2615 %} 2616 ins_pipe( pipe_slow ); 2617 %} 2618 2619 // Shorts/Chars vector add 2620 instruct vadd2S(vecS dst, vecS src) %{ 2621 predicate(n->as_Vector()->length() == 2); 2622 match(Set dst (AddVS dst src)); 2623 format %{ "paddw $dst,$src\t! add packed2S" %} 2624 ins_encode %{ 2625 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2626 %} 2627 ins_pipe( pipe_slow ); 2628 %} 2629 2630 instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{ 2631 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2632 match(Set dst (AddVS src1 src2)); 2633 format %{ "vpaddw $dst,$src1,$src2\t! add packed2S" %} 2634 ins_encode %{ 2635 bool vector256 = false; 2636 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2637 %} 2638 ins_pipe( pipe_slow ); 2639 %} 2640 2641 instruct vadd4S(vecD dst, vecD src) %{ 2642 predicate(n->as_Vector()->length() == 4); 2643 match(Set dst (AddVS dst src)); 2644 format %{ "paddw $dst,$src\t! add packed4S" %} 2645 ins_encode %{ 2646 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2647 %} 2648 ins_pipe( pipe_slow ); 2649 %} 2650 2651 instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ 2652 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2653 match(Set dst (AddVS src1 src2)); 2654 format %{ "vpaddw $dst,$src1,$src2\t! add packed4S" %} 2655 ins_encode %{ 2656 bool vector256 = false; 2657 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2658 %} 2659 ins_pipe( pipe_slow ); 2660 %} 2661 2662 instruct vadd8S(vecX dst, vecX src) %{ 2663 predicate(n->as_Vector()->length() == 8); 2664 match(Set dst (AddVS dst src)); 2665 format %{ "paddw $dst,$src\t! add packed8S" %} 2666 ins_encode %{ 2667 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 2668 %} 2669 ins_pipe( pipe_slow ); 2670 %} 2671 2672 instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ 2673 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2674 match(Set dst (AddVS src1 src2)); 2675 format %{ "vpaddw $dst,$src1,$src2\t! add packed8S" %} 2676 ins_encode %{ 2677 bool vector256 = false; 2678 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2679 %} 2680 ins_pipe( pipe_slow ); 2681 %} 2682 2683 instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{ 2684 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2685 match(Set dst (AddVS src (LoadVector mem))); 2686 format %{ "vpaddw $dst,$src,$mem\t! add packed8S" %} 2687 ins_encode %{ 2688 bool vector256 = false; 2689 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2690 %} 2691 ins_pipe( pipe_slow ); 2692 %} 2693 2694 instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{ 2695 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2696 match(Set dst (AddVS src1 src2)); 2697 format %{ "vpaddw $dst,$src1,$src2\t! add packed16S" %} 2698 ins_encode %{ 2699 bool vector256 = true; 2700 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2701 %} 2702 ins_pipe( pipe_slow ); 2703 %} 2704 2705 instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{ 2706 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 2707 match(Set dst (AddVS src (LoadVector mem))); 2708 format %{ "vpaddw $dst,$src,$mem\t! add packed16S" %} 2709 ins_encode %{ 2710 bool vector256 = true; 2711 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2712 %} 2713 ins_pipe( pipe_slow ); 2714 %} 2715 2716 // Integers vector add 2717 instruct vadd2I(vecD dst, vecD src) %{ 2718 predicate(n->as_Vector()->length() == 2); 2719 match(Set dst (AddVI dst src)); 2720 format %{ "paddd $dst,$src\t! add packed2I" %} 2721 ins_encode %{ 2722 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2723 %} 2724 ins_pipe( pipe_slow ); 2725 %} 2726 2727 instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ 2728 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2729 match(Set dst (AddVI src1 src2)); 2730 format %{ "vpaddd $dst,$src1,$src2\t! add packed2I" %} 2731 ins_encode %{ 2732 bool vector256 = false; 2733 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2734 %} 2735 ins_pipe( pipe_slow ); 2736 %} 2737 2738 instruct vadd4I(vecX dst, vecX src) %{ 2739 predicate(n->as_Vector()->length() == 4); 2740 match(Set dst (AddVI dst src)); 2741 format %{ "paddd $dst,$src\t! add packed4I" %} 2742 ins_encode %{ 2743 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 2744 %} 2745 ins_pipe( pipe_slow ); 2746 %} 2747 2748 instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ 2749 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2750 match(Set dst (AddVI src1 src2)); 2751 format %{ "vpaddd $dst,$src1,$src2\t! add packed4I" %} 2752 ins_encode %{ 2753 bool vector256 = false; 2754 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2755 %} 2756 ins_pipe( pipe_slow ); 2757 %} 2758 2759 instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{ 2760 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2761 match(Set dst (AddVI src (LoadVector mem))); 2762 format %{ "vpaddd $dst,$src,$mem\t! add packed4I" %} 2763 ins_encode %{ 2764 bool vector256 = false; 2765 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2766 %} 2767 ins_pipe( pipe_slow ); 2768 %} 2769 2770 instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{ 2771 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2772 match(Set dst (AddVI src1 src2)); 2773 format %{ "vpaddd $dst,$src1,$src2\t! add packed8I" %} 2774 ins_encode %{ 2775 bool vector256 = true; 2776 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2777 %} 2778 ins_pipe( pipe_slow ); 2779 %} 2780 2781 instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{ 2782 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 2783 match(Set dst (AddVI src (LoadVector mem))); 2784 format %{ "vpaddd $dst,$src,$mem\t! add packed8I" %} 2785 ins_encode %{ 2786 bool vector256 = true; 2787 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2788 %} 2789 ins_pipe( pipe_slow ); 2790 %} 2791 2792 // Longs vector add 2793 instruct vadd2L(vecX dst, vecX src) %{ 2794 predicate(n->as_Vector()->length() == 2); 2795 match(Set dst (AddVL dst src)); 2796 format %{ "paddq $dst,$src\t! add packed2L" %} 2797 ins_encode %{ 2798 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 2799 %} 2800 ins_pipe( pipe_slow ); 2801 %} 2802 2803 instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ 2804 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2805 match(Set dst (AddVL src1 src2)); 2806 format %{ "vpaddq $dst,$src1,$src2\t! add packed2L" %} 2807 ins_encode %{ 2808 bool vector256 = false; 2809 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2810 %} 2811 ins_pipe( pipe_slow ); 2812 %} 2813 2814 instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{ 2815 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2816 match(Set dst (AddVL src (LoadVector mem))); 2817 format %{ "vpaddq $dst,$src,$mem\t! add packed2L" %} 2818 ins_encode %{ 2819 bool vector256 = false; 2820 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2821 %} 2822 ins_pipe( pipe_slow ); 2823 %} 2824 2825 instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{ 2826 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2827 match(Set dst (AddVL src1 src2)); 2828 format %{ "vpaddq $dst,$src1,$src2\t! add packed4L" %} 2829 ins_encode %{ 2830 bool vector256 = true; 2831 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2832 %} 2833 ins_pipe( pipe_slow ); 2834 %} 2835 2836 instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{ 2837 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 2838 match(Set dst (AddVL src (LoadVector mem))); 2839 format %{ "vpaddq $dst,$src,$mem\t! add packed4L" %} 2840 ins_encode %{ 2841 bool vector256 = true; 2842 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2843 %} 2844 ins_pipe( pipe_slow ); 2845 %} 2846 2847 // Floats vector add 2848 instruct vadd2F(vecD dst, vecD src) %{ 2849 predicate(n->as_Vector()->length() == 2); 2850 match(Set dst (AddVF dst src)); 2851 format %{ "addps $dst,$src\t! add packed2F" %} 2852 ins_encode %{ 2853 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2854 %} 2855 ins_pipe( pipe_slow ); 2856 %} 2857 2858 instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{ 2859 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2860 match(Set dst (AddVF src1 src2)); 2861 format %{ "vaddps $dst,$src1,$src2\t! add packed2F" %} 2862 ins_encode %{ 2863 bool vector256 = false; 2864 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2865 %} 2866 ins_pipe( pipe_slow ); 2867 %} 2868 2869 instruct vadd4F(vecX dst, vecX src) %{ 2870 predicate(n->as_Vector()->length() == 4); 2871 match(Set dst (AddVF dst src)); 2872 format %{ "addps $dst,$src\t! add packed4F" %} 2873 ins_encode %{ 2874 __ addps($dst$$XMMRegister, $src$$XMMRegister); 2875 %} 2876 ins_pipe( pipe_slow ); 2877 %} 2878 2879 instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{ 2880 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2881 match(Set dst (AddVF src1 src2)); 2882 format %{ "vaddps $dst,$src1,$src2\t! add packed4F" %} 2883 ins_encode %{ 2884 bool vector256 = false; 2885 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2886 %} 2887 ins_pipe( pipe_slow ); 2888 %} 2889 2890 instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{ 2891 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2892 match(Set dst (AddVF src (LoadVector mem))); 2893 format %{ "vaddps $dst,$src,$mem\t! add packed4F" %} 2894 ins_encode %{ 2895 bool vector256 = false; 2896 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2897 %} 2898 ins_pipe( pipe_slow ); 2899 %} 2900 2901 instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{ 2902 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2903 match(Set dst (AddVF src1 src2)); 2904 format %{ "vaddps $dst,$src1,$src2\t! add packed8F" %} 2905 ins_encode %{ 2906 bool vector256 = true; 2907 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2908 %} 2909 ins_pipe( pipe_slow ); 2910 %} 2911 2912 instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{ 2913 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 2914 match(Set dst (AddVF src (LoadVector mem))); 2915 format %{ "vaddps $dst,$src,$mem\t! add packed8F" %} 2916 ins_encode %{ 2917 bool vector256 = true; 2918 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2919 %} 2920 ins_pipe( pipe_slow ); 2921 %} 2922 2923 // Doubles vector add 2924 instruct vadd2D(vecX dst, vecX src) %{ 2925 predicate(n->as_Vector()->length() == 2); 2926 match(Set dst (AddVD dst src)); 2927 format %{ "addpd $dst,$src\t! add packed2D" %} 2928 ins_encode %{ 2929 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 2930 %} 2931 ins_pipe( pipe_slow ); 2932 %} 2933 2934 instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{ 2935 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2936 match(Set dst (AddVD src1 src2)); 2937 format %{ "vaddpd $dst,$src1,$src2\t! add packed2D" %} 2938 ins_encode %{ 2939 bool vector256 = false; 2940 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2941 %} 2942 ins_pipe( pipe_slow ); 2943 %} 2944 2945 instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{ 2946 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 2947 match(Set dst (AddVD src (LoadVector mem))); 2948 format %{ "vaddpd $dst,$src,$mem\t! add packed2D" %} 2949 ins_encode %{ 2950 bool vector256 = false; 2951 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2952 %} 2953 ins_pipe( pipe_slow ); 2954 %} 2955 2956 instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{ 2957 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2958 match(Set dst (AddVD src1 src2)); 2959 format %{ "vaddpd $dst,$src1,$src2\t! add packed4D" %} 2960 ins_encode %{ 2961 bool vector256 = true; 2962 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2963 %} 2964 ins_pipe( pipe_slow ); 2965 %} 2966 2967 instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{ 2968 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2969 match(Set dst (AddVD src (LoadVector mem))); 2970 format %{ "vaddpd $dst,$src,$mem\t! add packed4D" %} 2971 ins_encode %{ 2972 bool vector256 = true; 2973 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 2974 %} 2975 ins_pipe( pipe_slow ); 2976 %} 2977 2978 // --------------------------------- SUB -------------------------------------- 2979 2980 // Bytes vector sub 2981 instruct vsub4B(vecS dst, vecS src) %{ 2982 predicate(n->as_Vector()->length() == 4); 2983 match(Set dst (SubVB dst src)); 2984 format %{ "psubb $dst,$src\t! sub packed4B" %} 2985 ins_encode %{ 2986 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 2987 %} 2988 ins_pipe( pipe_slow ); 2989 %} 2990 2991 instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{ 2992 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 2993 match(Set dst (SubVB src1 src2)); 2994 format %{ "vpsubb $dst,$src1,$src2\t! sub packed4B" %} 2995 ins_encode %{ 2996 bool vector256 = false; 2997 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 2998 %} 2999 ins_pipe( pipe_slow ); 3000 %} 3001 3002 instruct vsub8B(vecD dst, vecD src) %{ 3003 predicate(n->as_Vector()->length() == 8); 3004 match(Set dst (SubVB dst src)); 3005 format %{ "psubb $dst,$src\t! sub packed8B" %} 3006 ins_encode %{ 3007 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3008 %} 3009 ins_pipe( pipe_slow ); 3010 %} 3011 3012 instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ 3013 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3014 match(Set dst (SubVB src1 src2)); 3015 format %{ "vpsubb $dst,$src1,$src2\t! sub packed8B" %} 3016 ins_encode %{ 3017 bool vector256 = false; 3018 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3019 %} 3020 ins_pipe( pipe_slow ); 3021 %} 3022 3023 instruct vsub16B(vecX dst, vecX src) %{ 3024 predicate(n->as_Vector()->length() == 16); 3025 match(Set dst (SubVB dst src)); 3026 format %{ "psubb $dst,$src\t! sub packed16B" %} 3027 ins_encode %{ 3028 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 3029 %} 3030 ins_pipe( pipe_slow ); 3031 %} 3032 3033 instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ 3034 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3035 match(Set dst (SubVB src1 src2)); 3036 format %{ "vpsubb $dst,$src1,$src2\t! sub packed16B" %} 3037 ins_encode %{ 3038 bool vector256 = false; 3039 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3040 %} 3041 ins_pipe( pipe_slow ); 3042 %} 3043 3044 instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{ 3045 predicate(UseAVX > 0 && n->as_Vector()->length() == 16); 3046 match(Set dst (SubVB src (LoadVector mem))); 3047 format %{ "vpsubb $dst,$src,$mem\t! sub packed16B" %} 3048 ins_encode %{ 3049 bool vector256 = false; 3050 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3051 %} 3052 ins_pipe( pipe_slow ); 3053 %} 3054 3055 instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{ 3056 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3057 match(Set dst (SubVB src1 src2)); 3058 format %{ "vpsubb $dst,$src1,$src2\t! sub packed32B" %} 3059 ins_encode %{ 3060 bool vector256 = true; 3061 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3062 %} 3063 ins_pipe( pipe_slow ); 3064 %} 3065 3066 instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{ 3067 predicate(UseAVX > 1 && n->as_Vector()->length() == 32); 3068 match(Set dst (SubVB src (LoadVector mem))); 3069 format %{ "vpsubb $dst,$src,$mem\t! sub packed32B" %} 3070 ins_encode %{ 3071 bool vector256 = true; 3072 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3073 %} 3074 ins_pipe( pipe_slow ); 3075 %} 3076 3077 // Shorts/Chars vector sub 3078 instruct vsub2S(vecS dst, vecS src) %{ 3079 predicate(n->as_Vector()->length() == 2); 3080 match(Set dst (SubVS dst src)); 3081 format %{ "psubw $dst,$src\t! sub packed2S" %} 3082 ins_encode %{ 3083 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3084 %} 3085 ins_pipe( pipe_slow ); 3086 %} 3087 3088 instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{ 3089 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3090 match(Set dst (SubVS src1 src2)); 3091 format %{ "vpsubw $dst,$src1,$src2\t! sub packed2S" %} 3092 ins_encode %{ 3093 bool vector256 = false; 3094 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3095 %} 3096 ins_pipe( pipe_slow ); 3097 %} 3098 3099 instruct vsub4S(vecD dst, vecD src) %{ 3100 predicate(n->as_Vector()->length() == 4); 3101 match(Set dst (SubVS dst src)); 3102 format %{ "psubw $dst,$src\t! sub packed4S" %} 3103 ins_encode %{ 3104 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3105 %} 3106 ins_pipe( pipe_slow ); 3107 %} 3108 3109 instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ 3110 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3111 match(Set dst (SubVS src1 src2)); 3112 format %{ "vpsubw $dst,$src1,$src2\t! sub packed4S" %} 3113 ins_encode %{ 3114 bool vector256 = false; 3115 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3116 %} 3117 ins_pipe( pipe_slow ); 3118 %} 3119 3120 instruct vsub8S(vecX dst, vecX src) %{ 3121 predicate(n->as_Vector()->length() == 8); 3122 match(Set dst (SubVS dst src)); 3123 format %{ "psubw $dst,$src\t! sub packed8S" %} 3124 ins_encode %{ 3125 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 3126 %} 3127 ins_pipe( pipe_slow ); 3128 %} 3129 3130 instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{ 3131 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3132 match(Set dst (SubVS src1 src2)); 3133 format %{ "vpsubw $dst,$src1,$src2\t! sub packed8S" %} 3134 ins_encode %{ 3135 bool vector256 = false; 3136 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3137 %} 3138 ins_pipe( pipe_slow ); 3139 %} 3140 3141 instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{ 3142 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3143 match(Set dst (SubVS src (LoadVector mem))); 3144 format %{ "vpsubw $dst,$src,$mem\t! sub packed8S" %} 3145 ins_encode %{ 3146 bool vector256 = false; 3147 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3148 %} 3149 ins_pipe( pipe_slow ); 3150 %} 3151 3152 instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{ 3153 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3154 match(Set dst (SubVS src1 src2)); 3155 format %{ "vpsubw $dst,$src1,$src2\t! sub packed16S" %} 3156 ins_encode %{ 3157 bool vector256 = true; 3158 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3159 %} 3160 ins_pipe( pipe_slow ); 3161 %} 3162 3163 instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{ 3164 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3165 match(Set dst (SubVS src (LoadVector mem))); 3166 format %{ "vpsubw $dst,$src,$mem\t! sub packed16S" %} 3167 ins_encode %{ 3168 bool vector256 = true; 3169 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3170 %} 3171 ins_pipe( pipe_slow ); 3172 %} 3173 3174 // Integers vector sub 3175 instruct vsub2I(vecD dst, vecD src) %{ 3176 predicate(n->as_Vector()->length() == 2); 3177 match(Set dst (SubVI dst src)); 3178 format %{ "psubd $dst,$src\t! sub packed2I" %} 3179 ins_encode %{ 3180 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3181 %} 3182 ins_pipe( pipe_slow ); 3183 %} 3184 3185 instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ 3186 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3187 match(Set dst (SubVI src1 src2)); 3188 format %{ "vpsubd $dst,$src1,$src2\t! sub packed2I" %} 3189 ins_encode %{ 3190 bool vector256 = false; 3191 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3192 %} 3193 ins_pipe( pipe_slow ); 3194 %} 3195 3196 instruct vsub4I(vecX dst, vecX src) %{ 3197 predicate(n->as_Vector()->length() == 4); 3198 match(Set dst (SubVI dst src)); 3199 format %{ "psubd $dst,$src\t! sub packed4I" %} 3200 ins_encode %{ 3201 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 3202 %} 3203 ins_pipe( pipe_slow ); 3204 %} 3205 3206 instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ 3207 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3208 match(Set dst (SubVI src1 src2)); 3209 format %{ "vpsubd $dst,$src1,$src2\t! sub packed4I" %} 3210 ins_encode %{ 3211 bool vector256 = false; 3212 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3213 %} 3214 ins_pipe( pipe_slow ); 3215 %} 3216 3217 instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{ 3218 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3219 match(Set dst (SubVI src (LoadVector mem))); 3220 format %{ "vpsubd $dst,$src,$mem\t! sub packed4I" %} 3221 ins_encode %{ 3222 bool vector256 = false; 3223 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3224 %} 3225 ins_pipe( pipe_slow ); 3226 %} 3227 3228 instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{ 3229 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3230 match(Set dst (SubVI src1 src2)); 3231 format %{ "vpsubd $dst,$src1,$src2\t! sub packed8I" %} 3232 ins_encode %{ 3233 bool vector256 = true; 3234 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3235 %} 3236 ins_pipe( pipe_slow ); 3237 %} 3238 3239 instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{ 3240 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3241 match(Set dst (SubVI src (LoadVector mem))); 3242 format %{ "vpsubd $dst,$src,$mem\t! sub packed8I" %} 3243 ins_encode %{ 3244 bool vector256 = true; 3245 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3246 %} 3247 ins_pipe( pipe_slow ); 3248 %} 3249 3250 // Longs vector sub 3251 instruct vsub2L(vecX dst, vecX src) %{ 3252 predicate(n->as_Vector()->length() == 2); 3253 match(Set dst (SubVL dst src)); 3254 format %{ "psubq $dst,$src\t! sub packed2L" %} 3255 ins_encode %{ 3256 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 3257 %} 3258 ins_pipe( pipe_slow ); 3259 %} 3260 3261 instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ 3262 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3263 match(Set dst (SubVL src1 src2)); 3264 format %{ "vpsubq $dst,$src1,$src2\t! sub packed2L" %} 3265 ins_encode %{ 3266 bool vector256 = false; 3267 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3268 %} 3269 ins_pipe( pipe_slow ); 3270 %} 3271 3272 instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{ 3273 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3274 match(Set dst (SubVL src (LoadVector mem))); 3275 format %{ "vpsubq $dst,$src,$mem\t! sub packed2L" %} 3276 ins_encode %{ 3277 bool vector256 = false; 3278 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3279 %} 3280 ins_pipe( pipe_slow ); 3281 %} 3282 3283 instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{ 3284 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3285 match(Set dst (SubVL src1 src2)); 3286 format %{ "vpsubq $dst,$src1,$src2\t! sub packed4L" %} 3287 ins_encode %{ 3288 bool vector256 = true; 3289 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3290 %} 3291 ins_pipe( pipe_slow ); 3292 %} 3293 3294 instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{ 3295 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 3296 match(Set dst (SubVL src (LoadVector mem))); 3297 format %{ "vpsubq $dst,$src,$mem\t! sub packed4L" %} 3298 ins_encode %{ 3299 bool vector256 = true; 3300 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3301 %} 3302 ins_pipe( pipe_slow ); 3303 %} 3304 3305 // Floats vector sub 3306 instruct vsub2F(vecD dst, vecD src) %{ 3307 predicate(n->as_Vector()->length() == 2); 3308 match(Set dst (SubVF dst src)); 3309 format %{ "subps $dst,$src\t! sub packed2F" %} 3310 ins_encode %{ 3311 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3312 %} 3313 ins_pipe( pipe_slow ); 3314 %} 3315 3316 instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{ 3317 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3318 match(Set dst (SubVF src1 src2)); 3319 format %{ "vsubps $dst,$src1,$src2\t! sub packed2F" %} 3320 ins_encode %{ 3321 bool vector256 = false; 3322 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3323 %} 3324 ins_pipe( pipe_slow ); 3325 %} 3326 3327 instruct vsub4F(vecX dst, vecX src) %{ 3328 predicate(n->as_Vector()->length() == 4); 3329 match(Set dst (SubVF dst src)); 3330 format %{ "subps $dst,$src\t! sub packed4F" %} 3331 ins_encode %{ 3332 __ subps($dst$$XMMRegister, $src$$XMMRegister); 3333 %} 3334 ins_pipe( pipe_slow ); 3335 %} 3336 3337 instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{ 3338 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3339 match(Set dst (SubVF src1 src2)); 3340 format %{ "vsubps $dst,$src1,$src2\t! sub packed4F" %} 3341 ins_encode %{ 3342 bool vector256 = false; 3343 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3344 %} 3345 ins_pipe( pipe_slow ); 3346 %} 3347 3348 instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{ 3349 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3350 match(Set dst (SubVF src (LoadVector mem))); 3351 format %{ "vsubps $dst,$src,$mem\t! sub packed4F" %} 3352 ins_encode %{ 3353 bool vector256 = false; 3354 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3355 %} 3356 ins_pipe( pipe_slow ); 3357 %} 3358 3359 instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{ 3360 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3361 match(Set dst (SubVF src1 src2)); 3362 format %{ "vsubps $dst,$src1,$src2\t! sub packed8F" %} 3363 ins_encode %{ 3364 bool vector256 = true; 3365 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3366 %} 3367 ins_pipe( pipe_slow ); 3368 %} 3369 3370 instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{ 3371 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3372 match(Set dst (SubVF src (LoadVector mem))); 3373 format %{ "vsubps $dst,$src,$mem\t! sub packed8F" %} 3374 ins_encode %{ 3375 bool vector256 = true; 3376 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3377 %} 3378 ins_pipe( pipe_slow ); 3379 %} 3380 3381 // Doubles vector sub 3382 instruct vsub2D(vecX dst, vecX src) %{ 3383 predicate(n->as_Vector()->length() == 2); 3384 match(Set dst (SubVD dst src)); 3385 format %{ "subpd $dst,$src\t! sub packed2D" %} 3386 ins_encode %{ 3387 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 3388 %} 3389 ins_pipe( pipe_slow ); 3390 %} 3391 3392 instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{ 3393 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3394 match(Set dst (SubVD src1 src2)); 3395 format %{ "vsubpd $dst,$src1,$src2\t! sub packed2D" %} 3396 ins_encode %{ 3397 bool vector256 = false; 3398 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3399 %} 3400 ins_pipe( pipe_slow ); 3401 %} 3402 3403 instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{ 3404 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3405 match(Set dst (SubVD src (LoadVector mem))); 3406 format %{ "vsubpd $dst,$src,$mem\t! sub packed2D" %} 3407 ins_encode %{ 3408 bool vector256 = false; 3409 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3410 %} 3411 ins_pipe( pipe_slow ); 3412 %} 3413 3414 instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{ 3415 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3416 match(Set dst (SubVD src1 src2)); 3417 format %{ "vsubpd $dst,$src1,$src2\t! sub packed4D" %} 3418 ins_encode %{ 3419 bool vector256 = true; 3420 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3421 %} 3422 ins_pipe( pipe_slow ); 3423 %} 3424 3425 instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{ 3426 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3427 match(Set dst (SubVD src (LoadVector mem))); 3428 format %{ "vsubpd $dst,$src,$mem\t! sub packed4D" %} 3429 ins_encode %{ 3430 bool vector256 = true; 3431 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3432 %} 3433 ins_pipe( pipe_slow ); 3434 %} 3435 3436 // --------------------------------- MUL -------------------------------------- 3437 3438 // Shorts/Chars vector mul 3439 instruct vmul2S(vecS dst, vecS src) %{ 3440 predicate(n->as_Vector()->length() == 2); 3441 match(Set dst (MulVS dst src)); 3442 format %{ "pmullw $dst,$src\t! mul packed2S" %} 3443 ins_encode %{ 3444 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3445 %} 3446 ins_pipe( pipe_slow ); 3447 %} 3448 3449 instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{ 3450 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3451 match(Set dst (MulVS src1 src2)); 3452 format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %} 3453 ins_encode %{ 3454 bool vector256 = false; 3455 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3456 %} 3457 ins_pipe( pipe_slow ); 3458 %} 3459 3460 instruct vmul4S(vecD dst, vecD src) %{ 3461 predicate(n->as_Vector()->length() == 4); 3462 match(Set dst (MulVS dst src)); 3463 format %{ "pmullw $dst,$src\t! mul packed4S" %} 3464 ins_encode %{ 3465 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3466 %} 3467 ins_pipe( pipe_slow ); 3468 %} 3469 3470 instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ 3471 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3472 match(Set dst (MulVS src1 src2)); 3473 format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %} 3474 ins_encode %{ 3475 bool vector256 = false; 3476 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3477 %} 3478 ins_pipe( pipe_slow ); 3479 %} 3480 3481 instruct vmul8S(vecX dst, vecX src) %{ 3482 predicate(n->as_Vector()->length() == 8); 3483 match(Set dst (MulVS dst src)); 3484 format %{ "pmullw $dst,$src\t! mul packed8S" %} 3485 ins_encode %{ 3486 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 3487 %} 3488 ins_pipe( pipe_slow ); 3489 %} 3490 3491 instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ 3492 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3493 match(Set dst (MulVS src1 src2)); 3494 format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %} 3495 ins_encode %{ 3496 bool vector256 = false; 3497 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3498 %} 3499 ins_pipe( pipe_slow ); 3500 %} 3501 3502 instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{ 3503 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3504 match(Set dst (MulVS src (LoadVector mem))); 3505 format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %} 3506 ins_encode %{ 3507 bool vector256 = false; 3508 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3509 %} 3510 ins_pipe( pipe_slow ); 3511 %} 3512 3513 instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{ 3514 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3515 match(Set dst (MulVS src1 src2)); 3516 format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %} 3517 ins_encode %{ 3518 bool vector256 = true; 3519 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3520 %} 3521 ins_pipe( pipe_slow ); 3522 %} 3523 3524 instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{ 3525 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 3526 match(Set dst (MulVS src (LoadVector mem))); 3527 format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %} 3528 ins_encode %{ 3529 bool vector256 = true; 3530 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3531 %} 3532 ins_pipe( pipe_slow ); 3533 %} 3534 3535 // Integers vector mul (sse4_1) 3536 instruct vmul2I(vecD dst, vecD src) %{ 3537 predicate(UseSSE > 3 && n->as_Vector()->length() == 2); 3538 match(Set dst (MulVI dst src)); 3539 format %{ "pmulld $dst,$src\t! mul packed2I" %} 3540 ins_encode %{ 3541 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3542 %} 3543 ins_pipe( pipe_slow ); 3544 %} 3545 3546 instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ 3547 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3548 match(Set dst (MulVI src1 src2)); 3549 format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %} 3550 ins_encode %{ 3551 bool vector256 = false; 3552 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3553 %} 3554 ins_pipe( pipe_slow ); 3555 %} 3556 3557 instruct vmul4I(vecX dst, vecX src) %{ 3558 predicate(UseSSE > 3 && n->as_Vector()->length() == 4); 3559 match(Set dst (MulVI dst src)); 3560 format %{ "pmulld $dst,$src\t! mul packed4I" %} 3561 ins_encode %{ 3562 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 3563 %} 3564 ins_pipe( pipe_slow ); 3565 %} 3566 3567 instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ 3568 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3569 match(Set dst (MulVI src1 src2)); 3570 format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %} 3571 ins_encode %{ 3572 bool vector256 = false; 3573 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3574 %} 3575 ins_pipe( pipe_slow ); 3576 %} 3577 3578 instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{ 3579 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3580 match(Set dst (MulVI src (LoadVector mem))); 3581 format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %} 3582 ins_encode %{ 3583 bool vector256 = false; 3584 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3585 %} 3586 ins_pipe( pipe_slow ); 3587 %} 3588 3589 instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{ 3590 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3591 match(Set dst (MulVI src1 src2)); 3592 format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %} 3593 ins_encode %{ 3594 bool vector256 = true; 3595 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3596 %} 3597 ins_pipe( pipe_slow ); 3598 %} 3599 3600 instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{ 3601 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 3602 match(Set dst (MulVI src (LoadVector mem))); 3603 format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %} 3604 ins_encode %{ 3605 bool vector256 = true; 3606 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3607 %} 3608 ins_pipe( pipe_slow ); 3609 %} 3610 3611 // Floats vector mul 3612 instruct vmul2F(vecD dst, vecD src) %{ 3613 predicate(n->as_Vector()->length() == 2); 3614 match(Set dst (MulVF dst src)); 3615 format %{ "mulps $dst,$src\t! mul packed2F" %} 3616 ins_encode %{ 3617 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3618 %} 3619 ins_pipe( pipe_slow ); 3620 %} 3621 3622 instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{ 3623 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3624 match(Set dst (MulVF src1 src2)); 3625 format %{ "vmulps $dst,$src1,$src2\t! mul packed2F" %} 3626 ins_encode %{ 3627 bool vector256 = false; 3628 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3629 %} 3630 ins_pipe( pipe_slow ); 3631 %} 3632 3633 instruct vmul4F(vecX dst, vecX src) %{ 3634 predicate(n->as_Vector()->length() == 4); 3635 match(Set dst (MulVF dst src)); 3636 format %{ "mulps $dst,$src\t! mul packed4F" %} 3637 ins_encode %{ 3638 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 3639 %} 3640 ins_pipe( pipe_slow ); 3641 %} 3642 3643 instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{ 3644 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3645 match(Set dst (MulVF src1 src2)); 3646 format %{ "vmulps $dst,$src1,$src2\t! mul packed4F" %} 3647 ins_encode %{ 3648 bool vector256 = false; 3649 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3650 %} 3651 ins_pipe( pipe_slow ); 3652 %} 3653 3654 instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{ 3655 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3656 match(Set dst (MulVF src (LoadVector mem))); 3657 format %{ "vmulps $dst,$src,$mem\t! mul packed4F" %} 3658 ins_encode %{ 3659 bool vector256 = false; 3660 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3661 %} 3662 ins_pipe( pipe_slow ); 3663 %} 3664 3665 instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{ 3666 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3667 match(Set dst (MulVF src1 src2)); 3668 format %{ "vmulps $dst,$src1,$src2\t! mul packed8F" %} 3669 ins_encode %{ 3670 bool vector256 = true; 3671 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3672 %} 3673 ins_pipe( pipe_slow ); 3674 %} 3675 3676 instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{ 3677 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3678 match(Set dst (MulVF src (LoadVector mem))); 3679 format %{ "vmulps $dst,$src,$mem\t! mul packed8F" %} 3680 ins_encode %{ 3681 bool vector256 = true; 3682 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3683 %} 3684 ins_pipe( pipe_slow ); 3685 %} 3686 3687 // Doubles vector mul 3688 instruct vmul2D(vecX dst, vecX src) %{ 3689 predicate(n->as_Vector()->length() == 2); 3690 match(Set dst (MulVD dst src)); 3691 format %{ "mulpd $dst,$src\t! mul packed2D" %} 3692 ins_encode %{ 3693 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 3694 %} 3695 ins_pipe( pipe_slow ); 3696 %} 3697 3698 instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{ 3699 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3700 match(Set dst (MulVD src1 src2)); 3701 format %{ "vmulpd $dst,$src1,$src2\t! mul packed2D" %} 3702 ins_encode %{ 3703 bool vector256 = false; 3704 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3705 %} 3706 ins_pipe( pipe_slow ); 3707 %} 3708 3709 instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{ 3710 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3711 match(Set dst (MulVD src (LoadVector mem))); 3712 format %{ "vmulpd $dst,$src,$mem\t! mul packed2D" %} 3713 ins_encode %{ 3714 bool vector256 = false; 3715 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3716 %} 3717 ins_pipe( pipe_slow ); 3718 %} 3719 3720 instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{ 3721 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3722 match(Set dst (MulVD src1 src2)); 3723 format %{ "vmulpd $dst,$src1,$src2\t! mul packed4D" %} 3724 ins_encode %{ 3725 bool vector256 = true; 3726 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3727 %} 3728 ins_pipe( pipe_slow ); 3729 %} 3730 3731 instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{ 3732 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3733 match(Set dst (MulVD src (LoadVector mem))); 3734 format %{ "vmulpd $dst,$src,$mem\t! mul packed4D" %} 3735 ins_encode %{ 3736 bool vector256 = true; 3737 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3738 %} 3739 ins_pipe( pipe_slow ); 3740 %} 3741 3742 // --------------------------------- DIV -------------------------------------- 3743 3744 // Floats vector div 3745 instruct vdiv2F(vecD dst, vecD src) %{ 3746 predicate(n->as_Vector()->length() == 2); 3747 match(Set dst (DivVF dst src)); 3748 format %{ "divps $dst,$src\t! div packed2F" %} 3749 ins_encode %{ 3750 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3751 %} 3752 ins_pipe( pipe_slow ); 3753 %} 3754 3755 instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{ 3756 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3757 match(Set dst (DivVF src1 src2)); 3758 format %{ "vdivps $dst,$src1,$src2\t! div packed2F" %} 3759 ins_encode %{ 3760 bool vector256 = false; 3761 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3762 %} 3763 ins_pipe( pipe_slow ); 3764 %} 3765 3766 instruct vdiv4F(vecX dst, vecX src) %{ 3767 predicate(n->as_Vector()->length() == 4); 3768 match(Set dst (DivVF dst src)); 3769 format %{ "divps $dst,$src\t! div packed4F" %} 3770 ins_encode %{ 3771 __ divps($dst$$XMMRegister, $src$$XMMRegister); 3772 %} 3773 ins_pipe( pipe_slow ); 3774 %} 3775 3776 instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{ 3777 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3778 match(Set dst (DivVF src1 src2)); 3779 format %{ "vdivps $dst,$src1,$src2\t! div packed4F" %} 3780 ins_encode %{ 3781 bool vector256 = false; 3782 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3783 %} 3784 ins_pipe( pipe_slow ); 3785 %} 3786 3787 instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{ 3788 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3789 match(Set dst (DivVF src (LoadVector mem))); 3790 format %{ "vdivps $dst,$src,$mem\t! div packed4F" %} 3791 ins_encode %{ 3792 bool vector256 = false; 3793 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3794 %} 3795 ins_pipe( pipe_slow ); 3796 %} 3797 3798 instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{ 3799 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3800 match(Set dst (DivVF src1 src2)); 3801 format %{ "vdivps $dst,$src1,$src2\t! div packed8F" %} 3802 ins_encode %{ 3803 bool vector256 = true; 3804 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3805 %} 3806 ins_pipe( pipe_slow ); 3807 %} 3808 3809 instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{ 3810 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3811 match(Set dst (DivVF src (LoadVector mem))); 3812 format %{ "vdivps $dst,$src,$mem\t! div packed8F" %} 3813 ins_encode %{ 3814 bool vector256 = true; 3815 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3816 %} 3817 ins_pipe( pipe_slow ); 3818 %} 3819 3820 // Doubles vector div 3821 instruct vdiv2D(vecX dst, vecX src) %{ 3822 predicate(n->as_Vector()->length() == 2); 3823 match(Set dst (DivVD dst src)); 3824 format %{ "divpd $dst,$src\t! div packed2D" %} 3825 ins_encode %{ 3826 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 3827 %} 3828 ins_pipe( pipe_slow ); 3829 %} 3830 3831 instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{ 3832 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3833 match(Set dst (DivVD src1 src2)); 3834 format %{ "vdivpd $dst,$src1,$src2\t! div packed2D" %} 3835 ins_encode %{ 3836 bool vector256 = false; 3837 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3838 %} 3839 ins_pipe( pipe_slow ); 3840 %} 3841 3842 instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{ 3843 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3844 match(Set dst (DivVD src (LoadVector mem))); 3845 format %{ "vdivpd $dst,$src,$mem\t! div packed2D" %} 3846 ins_encode %{ 3847 bool vector256 = false; 3848 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3849 %} 3850 ins_pipe( pipe_slow ); 3851 %} 3852 3853 instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{ 3854 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3855 match(Set dst (DivVD src1 src2)); 3856 format %{ "vdivpd $dst,$src1,$src2\t! div packed4D" %} 3857 ins_encode %{ 3858 bool vector256 = true; 3859 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 3860 %} 3861 ins_pipe( pipe_slow ); 3862 %} 3863 3864 instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{ 3865 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3866 match(Set dst (DivVD src (LoadVector mem))); 3867 format %{ "vdivpd $dst,$src,$mem\t! div packed4D" %} 3868 ins_encode %{ 3869 bool vector256 = true; 3870 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 3871 %} 3872 ins_pipe( pipe_slow ); 3873 %} 3874 3875 // ------------------------------ Shift --------------------------------------- 3876 3877 // Left and right shift count vectors are the same on x86 3878 // (only lowest bits of xmm reg are used for count). 3879 instruct vshiftcnt(vecS dst, rRegI cnt) %{ 3880 match(Set dst (LShiftCntV cnt)); 3881 match(Set dst (RShiftCntV cnt)); 3882 format %{ "movd $dst,$cnt\t! load shift count" %} 3883 ins_encode %{ 3884 __ movdl($dst$$XMMRegister, $cnt$$Register); 3885 %} 3886 ins_pipe( pipe_slow ); 3887 %} 3888 3889 // ------------------------------ LeftShift ----------------------------------- 3890 3891 // Shorts/Chars vector left shift 3892 instruct vsll2S(vecS dst, vecS shift) %{ 3893 predicate(n->as_Vector()->length() == 2); 3894 match(Set dst (LShiftVS dst shift)); 3895 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3896 ins_encode %{ 3897 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3898 %} 3899 ins_pipe( pipe_slow ); 3900 %} 3901 3902 instruct vsll2S_imm(vecS dst, immI8 shift) %{ 3903 predicate(n->as_Vector()->length() == 2); 3904 match(Set dst (LShiftVS dst shift)); 3905 format %{ "psllw $dst,$shift\t! left shift packed2S" %} 3906 ins_encode %{ 3907 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3908 %} 3909 ins_pipe( pipe_slow ); 3910 %} 3911 3912 instruct vsll2S_reg(vecS dst, vecS src, vecS shift) %{ 3913 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3914 match(Set dst (LShiftVS src shift)); 3915 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3916 ins_encode %{ 3917 bool vector256 = false; 3918 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3919 %} 3920 ins_pipe( pipe_slow ); 3921 %} 3922 3923 instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 3924 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 3925 match(Set dst (LShiftVS src shift)); 3926 format %{ "vpsllw $dst,$src,$shift\t! left shift packed2S" %} 3927 ins_encode %{ 3928 bool vector256 = false; 3929 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3930 %} 3931 ins_pipe( pipe_slow ); 3932 %} 3933 3934 instruct vsll4S(vecD dst, vecS shift) %{ 3935 predicate(n->as_Vector()->length() == 4); 3936 match(Set dst (LShiftVS dst shift)); 3937 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3938 ins_encode %{ 3939 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3940 %} 3941 ins_pipe( pipe_slow ); 3942 %} 3943 3944 instruct vsll4S_imm(vecD dst, immI8 shift) %{ 3945 predicate(n->as_Vector()->length() == 4); 3946 match(Set dst (LShiftVS dst shift)); 3947 format %{ "psllw $dst,$shift\t! left shift packed4S" %} 3948 ins_encode %{ 3949 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3950 %} 3951 ins_pipe( pipe_slow ); 3952 %} 3953 3954 instruct vsll4S_reg(vecD dst, vecD src, vecS shift) %{ 3955 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3956 match(Set dst (LShiftVS src shift)); 3957 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3958 ins_encode %{ 3959 bool vector256 = false; 3960 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 3961 %} 3962 ins_pipe( pipe_slow ); 3963 %} 3964 3965 instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 3966 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 3967 match(Set dst (LShiftVS src shift)); 3968 format %{ "vpsllw $dst,$src,$shift\t! left shift packed4S" %} 3969 ins_encode %{ 3970 bool vector256 = false; 3971 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 3972 %} 3973 ins_pipe( pipe_slow ); 3974 %} 3975 3976 instruct vsll8S(vecX dst, vecS shift) %{ 3977 predicate(n->as_Vector()->length() == 8); 3978 match(Set dst (LShiftVS dst shift)); 3979 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3980 ins_encode %{ 3981 __ psllw($dst$$XMMRegister, $shift$$XMMRegister); 3982 %} 3983 ins_pipe( pipe_slow ); 3984 %} 3985 3986 instruct vsll8S_imm(vecX dst, immI8 shift) %{ 3987 predicate(n->as_Vector()->length() == 8); 3988 match(Set dst (LShiftVS dst shift)); 3989 format %{ "psllw $dst,$shift\t! left shift packed8S" %} 3990 ins_encode %{ 3991 __ psllw($dst$$XMMRegister, (int)$shift$$constant); 3992 %} 3993 ins_pipe( pipe_slow ); 3994 %} 3995 3996 instruct vsll8S_reg(vecX dst, vecX src, vecS shift) %{ 3997 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 3998 match(Set dst (LShiftVS src shift)); 3999 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4000 ins_encode %{ 4001 bool vector256 = false; 4002 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4003 %} 4004 ins_pipe( pipe_slow ); 4005 %} 4006 4007 instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4008 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4009 match(Set dst (LShiftVS src shift)); 4010 format %{ "vpsllw $dst,$src,$shift\t! left shift packed8S" %} 4011 ins_encode %{ 4012 bool vector256 = false; 4013 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4014 %} 4015 ins_pipe( pipe_slow ); 4016 %} 4017 4018 instruct vsll16S_reg(vecY dst, vecY src, vecS shift) %{ 4019 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4020 match(Set dst (LShiftVS src shift)); 4021 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4022 ins_encode %{ 4023 bool vector256 = true; 4024 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4025 %} 4026 ins_pipe( pipe_slow ); 4027 %} 4028 4029 instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4030 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4031 match(Set dst (LShiftVS src shift)); 4032 format %{ "vpsllw $dst,$src,$shift\t! left shift packed16S" %} 4033 ins_encode %{ 4034 bool vector256 = true; 4035 __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4036 %} 4037 ins_pipe( pipe_slow ); 4038 %} 4039 4040 // Integers vector left shift 4041 instruct vsll2I(vecD dst, vecS shift) %{ 4042 predicate(n->as_Vector()->length() == 2); 4043 match(Set dst (LShiftVI dst shift)); 4044 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4045 ins_encode %{ 4046 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4047 %} 4048 ins_pipe( pipe_slow ); 4049 %} 4050 4051 instruct vsll2I_imm(vecD dst, immI8 shift) %{ 4052 predicate(n->as_Vector()->length() == 2); 4053 match(Set dst (LShiftVI dst shift)); 4054 format %{ "pslld $dst,$shift\t! left shift packed2I" %} 4055 ins_encode %{ 4056 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4057 %} 4058 ins_pipe( pipe_slow ); 4059 %} 4060 4061 instruct vsll2I_reg(vecD dst, vecD src, vecS shift) %{ 4062 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4063 match(Set dst (LShiftVI src shift)); 4064 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4065 ins_encode %{ 4066 bool vector256 = false; 4067 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4068 %} 4069 ins_pipe( pipe_slow ); 4070 %} 4071 4072 instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4073 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4074 match(Set dst (LShiftVI src shift)); 4075 format %{ "vpslld $dst,$src,$shift\t! left shift packed2I" %} 4076 ins_encode %{ 4077 bool vector256 = false; 4078 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4079 %} 4080 ins_pipe( pipe_slow ); 4081 %} 4082 4083 instruct vsll4I(vecX dst, vecS shift) %{ 4084 predicate(n->as_Vector()->length() == 4); 4085 match(Set dst (LShiftVI dst shift)); 4086 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4087 ins_encode %{ 4088 __ pslld($dst$$XMMRegister, $shift$$XMMRegister); 4089 %} 4090 ins_pipe( pipe_slow ); 4091 %} 4092 4093 instruct vsll4I_imm(vecX dst, immI8 shift) %{ 4094 predicate(n->as_Vector()->length() == 4); 4095 match(Set dst (LShiftVI dst shift)); 4096 format %{ "pslld $dst,$shift\t! left shift packed4I" %} 4097 ins_encode %{ 4098 __ pslld($dst$$XMMRegister, (int)$shift$$constant); 4099 %} 4100 ins_pipe( pipe_slow ); 4101 %} 4102 4103 instruct vsll4I_reg(vecX dst, vecX src, vecS shift) %{ 4104 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4105 match(Set dst (LShiftVI src shift)); 4106 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4107 ins_encode %{ 4108 bool vector256 = false; 4109 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4110 %} 4111 ins_pipe( pipe_slow ); 4112 %} 4113 4114 instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4115 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4116 match(Set dst (LShiftVI src shift)); 4117 format %{ "vpslld $dst,$src,$shift\t! left shift packed4I" %} 4118 ins_encode %{ 4119 bool vector256 = false; 4120 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4121 %} 4122 ins_pipe( pipe_slow ); 4123 %} 4124 4125 instruct vsll8I_reg(vecY dst, vecY src, vecS shift) %{ 4126 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4127 match(Set dst (LShiftVI src shift)); 4128 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4129 ins_encode %{ 4130 bool vector256 = true; 4131 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4132 %} 4133 ins_pipe( pipe_slow ); 4134 %} 4135 4136 instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4137 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4138 match(Set dst (LShiftVI src shift)); 4139 format %{ "vpslld $dst,$src,$shift\t! left shift packed8I" %} 4140 ins_encode %{ 4141 bool vector256 = true; 4142 __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4143 %} 4144 ins_pipe( pipe_slow ); 4145 %} 4146 4147 // Longs vector left shift 4148 instruct vsll2L(vecX dst, vecS shift) %{ 4149 predicate(n->as_Vector()->length() == 2); 4150 match(Set dst (LShiftVL dst shift)); 4151 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4152 ins_encode %{ 4153 __ psllq($dst$$XMMRegister, $shift$$XMMRegister); 4154 %} 4155 ins_pipe( pipe_slow ); 4156 %} 4157 4158 instruct vsll2L_imm(vecX dst, immI8 shift) %{ 4159 predicate(n->as_Vector()->length() == 2); 4160 match(Set dst (LShiftVL dst shift)); 4161 format %{ "psllq $dst,$shift\t! left shift packed2L" %} 4162 ins_encode %{ 4163 __ psllq($dst$$XMMRegister, (int)$shift$$constant); 4164 %} 4165 ins_pipe( pipe_slow ); 4166 %} 4167 4168 instruct vsll2L_reg(vecX dst, vecX src, vecS shift) %{ 4169 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4170 match(Set dst (LShiftVL src shift)); 4171 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4172 ins_encode %{ 4173 bool vector256 = false; 4174 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4175 %} 4176 ins_pipe( pipe_slow ); 4177 %} 4178 4179 instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4180 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4181 match(Set dst (LShiftVL src shift)); 4182 format %{ "vpsllq $dst,$src,$shift\t! left shift packed2L" %} 4183 ins_encode %{ 4184 bool vector256 = false; 4185 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4186 %} 4187 ins_pipe( pipe_slow ); 4188 %} 4189 4190 instruct vsll4L_reg(vecY dst, vecY src, vecS shift) %{ 4191 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4192 match(Set dst (LShiftVL src shift)); 4193 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4194 ins_encode %{ 4195 bool vector256 = true; 4196 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4197 %} 4198 ins_pipe( pipe_slow ); 4199 %} 4200 4201 instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4202 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4203 match(Set dst (LShiftVL src shift)); 4204 format %{ "vpsllq $dst,$src,$shift\t! left shift packed4L" %} 4205 ins_encode %{ 4206 bool vector256 = true; 4207 __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4208 %} 4209 ins_pipe( pipe_slow ); 4210 %} 4211 4212 // ----------------------- LogicalRightShift ----------------------------------- 4213 4214 // Shorts vector logical right shift produces incorrect Java result 4215 // for negative data because java code convert short value into int with 4216 // sign extension before a shift. But char vectors are fine since chars are 4217 // unsigned values. 4218 4219 instruct vsrl2S(vecS dst, vecS shift) %{ 4220 predicate(n->as_Vector()->length() == 2); 4221 match(Set dst (URShiftVS dst shift)); 4222 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4223 ins_encode %{ 4224 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4225 %} 4226 ins_pipe( pipe_slow ); 4227 %} 4228 4229 instruct vsrl2S_imm(vecS dst, immI8 shift) %{ 4230 predicate(n->as_Vector()->length() == 2); 4231 match(Set dst (URShiftVS dst shift)); 4232 format %{ "psrlw $dst,$shift\t! logical right shift packed2S" %} 4233 ins_encode %{ 4234 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4235 %} 4236 ins_pipe( pipe_slow ); 4237 %} 4238 4239 instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{ 4240 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4241 match(Set dst (URShiftVS src shift)); 4242 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4243 ins_encode %{ 4244 bool vector256 = false; 4245 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4246 %} 4247 ins_pipe( pipe_slow ); 4248 %} 4249 4250 instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4251 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4252 match(Set dst (URShiftVS src shift)); 4253 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed2S" %} 4254 ins_encode %{ 4255 bool vector256 = false; 4256 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4257 %} 4258 ins_pipe( pipe_slow ); 4259 %} 4260 4261 instruct vsrl4S(vecD dst, vecS shift) %{ 4262 predicate(n->as_Vector()->length() == 4); 4263 match(Set dst (URShiftVS dst shift)); 4264 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4265 ins_encode %{ 4266 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4267 %} 4268 ins_pipe( pipe_slow ); 4269 %} 4270 4271 instruct vsrl4S_imm(vecD dst, immI8 shift) %{ 4272 predicate(n->as_Vector()->length() == 4); 4273 match(Set dst (URShiftVS dst shift)); 4274 format %{ "psrlw $dst,$shift\t! logical right shift packed4S" %} 4275 ins_encode %{ 4276 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4277 %} 4278 ins_pipe( pipe_slow ); 4279 %} 4280 4281 instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{ 4282 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4283 match(Set dst (URShiftVS src shift)); 4284 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4285 ins_encode %{ 4286 bool vector256 = false; 4287 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4288 %} 4289 ins_pipe( pipe_slow ); 4290 %} 4291 4292 instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4293 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4294 match(Set dst (URShiftVS src shift)); 4295 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed4S" %} 4296 ins_encode %{ 4297 bool vector256 = false; 4298 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4299 %} 4300 ins_pipe( pipe_slow ); 4301 %} 4302 4303 instruct vsrl8S(vecX dst, vecS shift) %{ 4304 predicate(n->as_Vector()->length() == 8); 4305 match(Set dst (URShiftVS dst shift)); 4306 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4307 ins_encode %{ 4308 __ psrlw($dst$$XMMRegister, $shift$$XMMRegister); 4309 %} 4310 ins_pipe( pipe_slow ); 4311 %} 4312 4313 instruct vsrl8S_imm(vecX dst, immI8 shift) %{ 4314 predicate(n->as_Vector()->length() == 8); 4315 match(Set dst (URShiftVS dst shift)); 4316 format %{ "psrlw $dst,$shift\t! logical right shift packed8S" %} 4317 ins_encode %{ 4318 __ psrlw($dst$$XMMRegister, (int)$shift$$constant); 4319 %} 4320 ins_pipe( pipe_slow ); 4321 %} 4322 4323 instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{ 4324 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4325 match(Set dst (URShiftVS src shift)); 4326 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4327 ins_encode %{ 4328 bool vector256 = false; 4329 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4330 %} 4331 ins_pipe( pipe_slow ); 4332 %} 4333 4334 instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4335 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4336 match(Set dst (URShiftVS src shift)); 4337 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed8S" %} 4338 ins_encode %{ 4339 bool vector256 = false; 4340 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4341 %} 4342 ins_pipe( pipe_slow ); 4343 %} 4344 4345 instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{ 4346 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4347 match(Set dst (URShiftVS src shift)); 4348 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4349 ins_encode %{ 4350 bool vector256 = true; 4351 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4352 %} 4353 ins_pipe( pipe_slow ); 4354 %} 4355 4356 instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4357 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4358 match(Set dst (URShiftVS src shift)); 4359 format %{ "vpsrlw $dst,$src,$shift\t! logical right shift packed16S" %} 4360 ins_encode %{ 4361 bool vector256 = true; 4362 __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4363 %} 4364 ins_pipe( pipe_slow ); 4365 %} 4366 4367 // Integers vector logical right shift 4368 instruct vsrl2I(vecD dst, vecS shift) %{ 4369 predicate(n->as_Vector()->length() == 2); 4370 match(Set dst (URShiftVI dst shift)); 4371 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4372 ins_encode %{ 4373 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4374 %} 4375 ins_pipe( pipe_slow ); 4376 %} 4377 4378 instruct vsrl2I_imm(vecD dst, immI8 shift) %{ 4379 predicate(n->as_Vector()->length() == 2); 4380 match(Set dst (URShiftVI dst shift)); 4381 format %{ "psrld $dst,$shift\t! logical right shift packed2I" %} 4382 ins_encode %{ 4383 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4384 %} 4385 ins_pipe( pipe_slow ); 4386 %} 4387 4388 instruct vsrl2I_reg(vecD dst, vecD src, vecS shift) %{ 4389 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4390 match(Set dst (URShiftVI src shift)); 4391 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4392 ins_encode %{ 4393 bool vector256 = false; 4394 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4395 %} 4396 ins_pipe( pipe_slow ); 4397 %} 4398 4399 instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4400 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4401 match(Set dst (URShiftVI src shift)); 4402 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed2I" %} 4403 ins_encode %{ 4404 bool vector256 = false; 4405 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4406 %} 4407 ins_pipe( pipe_slow ); 4408 %} 4409 4410 instruct vsrl4I(vecX dst, vecS shift) %{ 4411 predicate(n->as_Vector()->length() == 4); 4412 match(Set dst (URShiftVI dst shift)); 4413 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4414 ins_encode %{ 4415 __ psrld($dst$$XMMRegister, $shift$$XMMRegister); 4416 %} 4417 ins_pipe( pipe_slow ); 4418 %} 4419 4420 instruct vsrl4I_imm(vecX dst, immI8 shift) %{ 4421 predicate(n->as_Vector()->length() == 4); 4422 match(Set dst (URShiftVI dst shift)); 4423 format %{ "psrld $dst,$shift\t! logical right shift packed4I" %} 4424 ins_encode %{ 4425 __ psrld($dst$$XMMRegister, (int)$shift$$constant); 4426 %} 4427 ins_pipe( pipe_slow ); 4428 %} 4429 4430 instruct vsrl4I_reg(vecX dst, vecX src, vecS shift) %{ 4431 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4432 match(Set dst (URShiftVI src shift)); 4433 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4434 ins_encode %{ 4435 bool vector256 = false; 4436 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4437 %} 4438 ins_pipe( pipe_slow ); 4439 %} 4440 4441 instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4442 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4443 match(Set dst (URShiftVI src shift)); 4444 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed4I" %} 4445 ins_encode %{ 4446 bool vector256 = false; 4447 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4448 %} 4449 ins_pipe( pipe_slow ); 4450 %} 4451 4452 instruct vsrl8I_reg(vecY dst, vecY src, vecS shift) %{ 4453 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4454 match(Set dst (URShiftVI src shift)); 4455 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4456 ins_encode %{ 4457 bool vector256 = true; 4458 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4459 %} 4460 ins_pipe( pipe_slow ); 4461 %} 4462 4463 instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4464 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4465 match(Set dst (URShiftVI src shift)); 4466 format %{ "vpsrld $dst,$src,$shift\t! logical right shift packed8I" %} 4467 ins_encode %{ 4468 bool vector256 = true; 4469 __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4470 %} 4471 ins_pipe( pipe_slow ); 4472 %} 4473 4474 // Longs vector logical right shift 4475 instruct vsrl2L(vecX dst, vecS shift) %{ 4476 predicate(n->as_Vector()->length() == 2); 4477 match(Set dst (URShiftVL dst shift)); 4478 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4479 ins_encode %{ 4480 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 4481 %} 4482 ins_pipe( pipe_slow ); 4483 %} 4484 4485 instruct vsrl2L_imm(vecX dst, immI8 shift) %{ 4486 predicate(n->as_Vector()->length() == 2); 4487 match(Set dst (URShiftVL dst shift)); 4488 format %{ "psrlq $dst,$shift\t! logical right shift packed2L" %} 4489 ins_encode %{ 4490 __ psrlq($dst$$XMMRegister, (int)$shift$$constant); 4491 %} 4492 ins_pipe( pipe_slow ); 4493 %} 4494 4495 instruct vsrl2L_reg(vecX dst, vecX src, vecS shift) %{ 4496 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4497 match(Set dst (URShiftVL src shift)); 4498 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4499 ins_encode %{ 4500 bool vector256 = false; 4501 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4502 %} 4503 ins_pipe( pipe_slow ); 4504 %} 4505 4506 instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4507 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4508 match(Set dst (URShiftVL src shift)); 4509 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed2L" %} 4510 ins_encode %{ 4511 bool vector256 = false; 4512 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4513 %} 4514 ins_pipe( pipe_slow ); 4515 %} 4516 4517 instruct vsrl4L_reg(vecY dst, vecY src, vecS shift) %{ 4518 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4519 match(Set dst (URShiftVL src shift)); 4520 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4521 ins_encode %{ 4522 bool vector256 = true; 4523 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4524 %} 4525 ins_pipe( pipe_slow ); 4526 %} 4527 4528 instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4529 predicate(UseAVX > 1 && n->as_Vector()->length() == 4); 4530 match(Set dst (URShiftVL src shift)); 4531 format %{ "vpsrlq $dst,$src,$shift\t! logical right shift packed4L" %} 4532 ins_encode %{ 4533 bool vector256 = true; 4534 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4535 %} 4536 ins_pipe( pipe_slow ); 4537 %} 4538 4539 // ------------------- ArithmeticRightShift ----------------------------------- 4540 4541 // Shorts/Chars vector arithmetic right shift 4542 instruct vsra2S(vecS dst, vecS shift) %{ 4543 predicate(n->as_Vector()->length() == 2); 4544 match(Set dst (RShiftVS dst shift)); 4545 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4546 ins_encode %{ 4547 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4548 %} 4549 ins_pipe( pipe_slow ); 4550 %} 4551 4552 instruct vsra2S_imm(vecS dst, immI8 shift) %{ 4553 predicate(n->as_Vector()->length() == 2); 4554 match(Set dst (RShiftVS dst shift)); 4555 format %{ "psraw $dst,$shift\t! arithmetic right shift packed2S" %} 4556 ins_encode %{ 4557 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4558 %} 4559 ins_pipe( pipe_slow ); 4560 %} 4561 4562 instruct vsra2S_reg(vecS dst, vecS src, vecS shift) %{ 4563 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4564 match(Set dst (RShiftVS src shift)); 4565 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4566 ins_encode %{ 4567 bool vector256 = false; 4568 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4569 %} 4570 ins_pipe( pipe_slow ); 4571 %} 4572 4573 instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{ 4574 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4575 match(Set dst (RShiftVS src shift)); 4576 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed2S" %} 4577 ins_encode %{ 4578 bool vector256 = false; 4579 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4580 %} 4581 ins_pipe( pipe_slow ); 4582 %} 4583 4584 instruct vsra4S(vecD dst, vecS shift) %{ 4585 predicate(n->as_Vector()->length() == 4); 4586 match(Set dst (RShiftVS dst shift)); 4587 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4588 ins_encode %{ 4589 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4590 %} 4591 ins_pipe( pipe_slow ); 4592 %} 4593 4594 instruct vsra4S_imm(vecD dst, immI8 shift) %{ 4595 predicate(n->as_Vector()->length() == 4); 4596 match(Set dst (RShiftVS dst shift)); 4597 format %{ "psraw $dst,$shift\t! arithmetic right shift packed4S" %} 4598 ins_encode %{ 4599 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4600 %} 4601 ins_pipe( pipe_slow ); 4602 %} 4603 4604 instruct vsra4S_reg(vecD dst, vecD src, vecS shift) %{ 4605 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4606 match(Set dst (RShiftVS src shift)); 4607 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4608 ins_encode %{ 4609 bool vector256 = false; 4610 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4611 %} 4612 ins_pipe( pipe_slow ); 4613 %} 4614 4615 instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4616 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4617 match(Set dst (RShiftVS src shift)); 4618 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed4S" %} 4619 ins_encode %{ 4620 bool vector256 = false; 4621 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4622 %} 4623 ins_pipe( pipe_slow ); 4624 %} 4625 4626 instruct vsra8S(vecX dst, vecS shift) %{ 4627 predicate(n->as_Vector()->length() == 8); 4628 match(Set dst (RShiftVS dst shift)); 4629 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4630 ins_encode %{ 4631 __ psraw($dst$$XMMRegister, $shift$$XMMRegister); 4632 %} 4633 ins_pipe( pipe_slow ); 4634 %} 4635 4636 instruct vsra8S_imm(vecX dst, immI8 shift) %{ 4637 predicate(n->as_Vector()->length() == 8); 4638 match(Set dst (RShiftVS dst shift)); 4639 format %{ "psraw $dst,$shift\t! arithmetic right shift packed8S" %} 4640 ins_encode %{ 4641 __ psraw($dst$$XMMRegister, (int)$shift$$constant); 4642 %} 4643 ins_pipe( pipe_slow ); 4644 %} 4645 4646 instruct vsra8S_reg(vecX dst, vecX src, vecS shift) %{ 4647 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4648 match(Set dst (RShiftVS src shift)); 4649 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4650 ins_encode %{ 4651 bool vector256 = false; 4652 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4653 %} 4654 ins_pipe( pipe_slow ); 4655 %} 4656 4657 instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4658 predicate(UseAVX > 0 && n->as_Vector()->length() == 8); 4659 match(Set dst (RShiftVS src shift)); 4660 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed8S" %} 4661 ins_encode %{ 4662 bool vector256 = false; 4663 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4664 %} 4665 ins_pipe( pipe_slow ); 4666 %} 4667 4668 instruct vsra16S_reg(vecY dst, vecY src, vecS shift) %{ 4669 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4670 match(Set dst (RShiftVS src shift)); 4671 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4672 ins_encode %{ 4673 bool vector256 = true; 4674 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4675 %} 4676 ins_pipe( pipe_slow ); 4677 %} 4678 4679 instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4680 predicate(UseAVX > 1 && n->as_Vector()->length() == 16); 4681 match(Set dst (RShiftVS src shift)); 4682 format %{ "vpsraw $dst,$src,$shift\t! arithmetic right shift packed16S" %} 4683 ins_encode %{ 4684 bool vector256 = true; 4685 __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4686 %} 4687 ins_pipe( pipe_slow ); 4688 %} 4689 4690 // Integers vector arithmetic right shift 4691 instruct vsra2I(vecD dst, vecS shift) %{ 4692 predicate(n->as_Vector()->length() == 2); 4693 match(Set dst (RShiftVI dst shift)); 4694 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4695 ins_encode %{ 4696 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4697 %} 4698 ins_pipe( pipe_slow ); 4699 %} 4700 4701 instruct vsra2I_imm(vecD dst, immI8 shift) %{ 4702 predicate(n->as_Vector()->length() == 2); 4703 match(Set dst (RShiftVI dst shift)); 4704 format %{ "psrad $dst,$shift\t! arithmetic right shift packed2I" %} 4705 ins_encode %{ 4706 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4707 %} 4708 ins_pipe( pipe_slow ); 4709 %} 4710 4711 instruct vsra2I_reg(vecD dst, vecD src, vecS shift) %{ 4712 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4713 match(Set dst (RShiftVI src shift)); 4714 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4715 ins_encode %{ 4716 bool vector256 = false; 4717 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4718 %} 4719 ins_pipe( pipe_slow ); 4720 %} 4721 4722 instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{ 4723 predicate(UseAVX > 0 && n->as_Vector()->length() == 2); 4724 match(Set dst (RShiftVI src shift)); 4725 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed2I" %} 4726 ins_encode %{ 4727 bool vector256 = false; 4728 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4729 %} 4730 ins_pipe( pipe_slow ); 4731 %} 4732 4733 instruct vsra4I(vecX dst, vecS shift) %{ 4734 predicate(n->as_Vector()->length() == 4); 4735 match(Set dst (RShiftVI dst shift)); 4736 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4737 ins_encode %{ 4738 __ psrad($dst$$XMMRegister, $shift$$XMMRegister); 4739 %} 4740 ins_pipe( pipe_slow ); 4741 %} 4742 4743 instruct vsra4I_imm(vecX dst, immI8 shift) %{ 4744 predicate(n->as_Vector()->length() == 4); 4745 match(Set dst (RShiftVI dst shift)); 4746 format %{ "psrad $dst,$shift\t! arithmetic right shift packed4I" %} 4747 ins_encode %{ 4748 __ psrad($dst$$XMMRegister, (int)$shift$$constant); 4749 %} 4750 ins_pipe( pipe_slow ); 4751 %} 4752 4753 instruct vsra4I_reg(vecX dst, vecX src, vecS shift) %{ 4754 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4755 match(Set dst (RShiftVI src shift)); 4756 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4757 ins_encode %{ 4758 bool vector256 = false; 4759 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4760 %} 4761 ins_pipe( pipe_slow ); 4762 %} 4763 4764 instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{ 4765 predicate(UseAVX > 0 && n->as_Vector()->length() == 4); 4766 match(Set dst (RShiftVI src shift)); 4767 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed4I" %} 4768 ins_encode %{ 4769 bool vector256 = false; 4770 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4771 %} 4772 ins_pipe( pipe_slow ); 4773 %} 4774 4775 instruct vsra8I_reg(vecY dst, vecY src, vecS shift) %{ 4776 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4777 match(Set dst (RShiftVI src shift)); 4778 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4779 ins_encode %{ 4780 bool vector256 = true; 4781 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256); 4782 %} 4783 ins_pipe( pipe_slow ); 4784 %} 4785 4786 instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{ 4787 predicate(UseAVX > 1 && n->as_Vector()->length() == 8); 4788 match(Set dst (RShiftVI src shift)); 4789 format %{ "vpsrad $dst,$src,$shift\t! arithmetic right shift packed8I" %} 4790 ins_encode %{ 4791 bool vector256 = true; 4792 __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256); 4793 %} 4794 ins_pipe( pipe_slow ); 4795 %} 4796 4797 // There are no longs vector arithmetic right shift instructions. 4798 4799 4800 // --------------------------------- AND -------------------------------------- 4801 4802 instruct vand4B(vecS dst, vecS src) %{ 4803 predicate(n->as_Vector()->length_in_bytes() == 4); 4804 match(Set dst (AndV dst src)); 4805 format %{ "pand $dst,$src\t! and vectors (4 bytes)" %} 4806 ins_encode %{ 4807 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4808 %} 4809 ins_pipe( pipe_slow ); 4810 %} 4811 4812 instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{ 4813 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4814 match(Set dst (AndV src1 src2)); 4815 format %{ "vpand $dst,$src1,$src2\t! and vectors (4 bytes)" %} 4816 ins_encode %{ 4817 bool vector256 = false; 4818 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4819 %} 4820 ins_pipe( pipe_slow ); 4821 %} 4822 4823 instruct vand8B(vecD dst, vecD src) %{ 4824 predicate(n->as_Vector()->length_in_bytes() == 8); 4825 match(Set dst (AndV dst src)); 4826 format %{ "pand $dst,$src\t! and vectors (8 bytes)" %} 4827 ins_encode %{ 4828 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4829 %} 4830 ins_pipe( pipe_slow ); 4831 %} 4832 4833 instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{ 4834 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4835 match(Set dst (AndV src1 src2)); 4836 format %{ "vpand $dst,$src1,$src2\t! and vectors (8 bytes)" %} 4837 ins_encode %{ 4838 bool vector256 = false; 4839 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4840 %} 4841 ins_pipe( pipe_slow ); 4842 %} 4843 4844 instruct vand16B(vecX dst, vecX src) %{ 4845 predicate(n->as_Vector()->length_in_bytes() == 16); 4846 match(Set dst (AndV dst src)); 4847 format %{ "pand $dst,$src\t! and vectors (16 bytes)" %} 4848 ins_encode %{ 4849 __ pand($dst$$XMMRegister, $src$$XMMRegister); 4850 %} 4851 ins_pipe( pipe_slow ); 4852 %} 4853 4854 instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{ 4855 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4856 match(Set dst (AndV src1 src2)); 4857 format %{ "vpand $dst,$src1,$src2\t! and vectors (16 bytes)" %} 4858 ins_encode %{ 4859 bool vector256 = false; 4860 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4861 %} 4862 ins_pipe( pipe_slow ); 4863 %} 4864 4865 instruct vand16B_mem(vecX dst, vecX src, memory mem) %{ 4866 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4867 match(Set dst (AndV src (LoadVector mem))); 4868 format %{ "vpand $dst,$src,$mem\t! and vectors (16 bytes)" %} 4869 ins_encode %{ 4870 bool vector256 = false; 4871 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4872 %} 4873 ins_pipe( pipe_slow ); 4874 %} 4875 4876 instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{ 4877 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4878 match(Set dst (AndV src1 src2)); 4879 format %{ "vpand $dst,$src1,$src2\t! and vectors (32 bytes)" %} 4880 ins_encode %{ 4881 bool vector256 = true; 4882 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4883 %} 4884 ins_pipe( pipe_slow ); 4885 %} 4886 4887 instruct vand32B_mem(vecY dst, vecY src, memory mem) %{ 4888 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4889 match(Set dst (AndV src (LoadVector mem))); 4890 format %{ "vpand $dst,$src,$mem\t! and vectors (32 bytes)" %} 4891 ins_encode %{ 4892 bool vector256 = true; 4893 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4894 %} 4895 ins_pipe( pipe_slow ); 4896 %} 4897 4898 // --------------------------------- OR --------------------------------------- 4899 4900 instruct vor4B(vecS dst, vecS src) %{ 4901 predicate(n->as_Vector()->length_in_bytes() == 4); 4902 match(Set dst (OrV dst src)); 4903 format %{ "por $dst,$src\t! or vectors (4 bytes)" %} 4904 ins_encode %{ 4905 __ por($dst$$XMMRegister, $src$$XMMRegister); 4906 %} 4907 ins_pipe( pipe_slow ); 4908 %} 4909 4910 instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{ 4911 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 4912 match(Set dst (OrV src1 src2)); 4913 format %{ "vpor $dst,$src1,$src2\t! or vectors (4 bytes)" %} 4914 ins_encode %{ 4915 bool vector256 = false; 4916 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4917 %} 4918 ins_pipe( pipe_slow ); 4919 %} 4920 4921 instruct vor8B(vecD dst, vecD src) %{ 4922 predicate(n->as_Vector()->length_in_bytes() == 8); 4923 match(Set dst (OrV dst src)); 4924 format %{ "por $dst,$src\t! or vectors (8 bytes)" %} 4925 ins_encode %{ 4926 __ por($dst$$XMMRegister, $src$$XMMRegister); 4927 %} 4928 ins_pipe( pipe_slow ); 4929 %} 4930 4931 instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{ 4932 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 4933 match(Set dst (OrV src1 src2)); 4934 format %{ "vpor $dst,$src1,$src2\t! or vectors (8 bytes)" %} 4935 ins_encode %{ 4936 bool vector256 = false; 4937 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4938 %} 4939 ins_pipe( pipe_slow ); 4940 %} 4941 4942 instruct vor16B(vecX dst, vecX src) %{ 4943 predicate(n->as_Vector()->length_in_bytes() == 16); 4944 match(Set dst (OrV dst src)); 4945 format %{ "por $dst,$src\t! or vectors (16 bytes)" %} 4946 ins_encode %{ 4947 __ por($dst$$XMMRegister, $src$$XMMRegister); 4948 %} 4949 ins_pipe( pipe_slow ); 4950 %} 4951 4952 instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{ 4953 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4954 match(Set dst (OrV src1 src2)); 4955 format %{ "vpor $dst,$src1,$src2\t! or vectors (16 bytes)" %} 4956 ins_encode %{ 4957 bool vector256 = false; 4958 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4959 %} 4960 ins_pipe( pipe_slow ); 4961 %} 4962 4963 instruct vor16B_mem(vecX dst, vecX src, memory mem) %{ 4964 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 4965 match(Set dst (OrV src (LoadVector mem))); 4966 format %{ "vpor $dst,$src,$mem\t! or vectors (16 bytes)" %} 4967 ins_encode %{ 4968 bool vector256 = false; 4969 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4970 %} 4971 ins_pipe( pipe_slow ); 4972 %} 4973 4974 instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{ 4975 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4976 match(Set dst (OrV src1 src2)); 4977 format %{ "vpor $dst,$src1,$src2\t! or vectors (32 bytes)" %} 4978 ins_encode %{ 4979 bool vector256 = true; 4980 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 4981 %} 4982 ins_pipe( pipe_slow ); 4983 %} 4984 4985 instruct vor32B_mem(vecY dst, vecY src, memory mem) %{ 4986 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 4987 match(Set dst (OrV src (LoadVector mem))); 4988 format %{ "vpor $dst,$src,$mem\t! or vectors (32 bytes)" %} 4989 ins_encode %{ 4990 bool vector256 = true; 4991 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 4992 %} 4993 ins_pipe( pipe_slow ); 4994 %} 4995 4996 // --------------------------------- XOR -------------------------------------- 4997 4998 instruct vxor4B(vecS dst, vecS src) %{ 4999 predicate(n->as_Vector()->length_in_bytes() == 4); 5000 match(Set dst (XorV dst src)); 5001 format %{ "pxor $dst,$src\t! xor vectors (4 bytes)" %} 5002 ins_encode %{ 5003 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5004 %} 5005 ins_pipe( pipe_slow ); 5006 %} 5007 5008 instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{ 5009 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4); 5010 match(Set dst (XorV src1 src2)); 5011 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (4 bytes)" %} 5012 ins_encode %{ 5013 bool vector256 = false; 5014 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5015 %} 5016 ins_pipe( pipe_slow ); 5017 %} 5018 5019 instruct vxor8B(vecD dst, vecD src) %{ 5020 predicate(n->as_Vector()->length_in_bytes() == 8); 5021 match(Set dst (XorV dst src)); 5022 format %{ "pxor $dst,$src\t! xor vectors (8 bytes)" %} 5023 ins_encode %{ 5024 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5025 %} 5026 ins_pipe( pipe_slow ); 5027 %} 5028 5029 instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{ 5030 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8); 5031 match(Set dst (XorV src1 src2)); 5032 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (8 bytes)" %} 5033 ins_encode %{ 5034 bool vector256 = false; 5035 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5036 %} 5037 ins_pipe( pipe_slow ); 5038 %} 5039 5040 instruct vxor16B(vecX dst, vecX src) %{ 5041 predicate(n->as_Vector()->length_in_bytes() == 16); 5042 match(Set dst (XorV dst src)); 5043 format %{ "pxor $dst,$src\t! xor vectors (16 bytes)" %} 5044 ins_encode %{ 5045 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 5046 %} 5047 ins_pipe( pipe_slow ); 5048 %} 5049 5050 instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{ 5051 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5052 match(Set dst (XorV src1 src2)); 5053 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (16 bytes)" %} 5054 ins_encode %{ 5055 bool vector256 = false; 5056 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5057 %} 5058 ins_pipe( pipe_slow ); 5059 %} 5060 5061 instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{ 5062 predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16); 5063 match(Set dst (XorV src (LoadVector mem))); 5064 format %{ "vpxor $dst,$src,$mem\t! xor vectors (16 bytes)" %} 5065 ins_encode %{ 5066 bool vector256 = false; 5067 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5068 %} 5069 ins_pipe( pipe_slow ); 5070 %} 5071 5072 instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{ 5073 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5074 match(Set dst (XorV src1 src2)); 5075 format %{ "vpxor $dst,$src1,$src2\t! xor vectors (32 bytes)" %} 5076 ins_encode %{ 5077 bool vector256 = true; 5078 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256); 5079 %} 5080 ins_pipe( pipe_slow ); 5081 %} 5082 5083 instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{ 5084 predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32); 5085 match(Set dst (XorV src (LoadVector mem))); 5086 format %{ "vpxor $dst,$src,$mem\t! xor vectors (32 bytes)" %} 5087 ins_encode %{ 5088 bool vector256 = true; 5089 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256); 5090 %} 5091 ins_pipe( pipe_slow ); 5092 %} 5093