< prev index next >

hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp

Print this page
rev 7347 : 8078113: 8011102 changes may cause incorrect results
Summary: replace Vzeroupper instruction in stubs with zeroing only used ymm registers.
Reviewed-by: kvn
Contributed-by: sandhya.viswanathan@intel.com


1311       }
1312       __ BIND(L_copy_bytes);
1313       __ addptr(qword_count, 8);
1314       __ jcc(Assembler::lessEqual, L_loop);
1315       __ subptr(qword_count, 4);  // sub(8) and add(4)
1316       __ jccb(Assembler::greater, L_end);
1317       // Copy trailing 32 bytes
1318       if (UseAVX >= 2) {
1319         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1320         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1321       } else {
1322         __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1323         __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1324         __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
1325         __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
1326       }
1327       __ addptr(qword_count, 4);
1328       __ BIND(L_end);
1329       if (UseAVX >= 2) {
1330         // clean upper bits of YMM registers
1331         __ vzeroupper();

1332       }
1333     } else {
1334       // Copy 32-bytes per iteration
1335       __ BIND(L_loop);
1336       __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
1337       __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
1338       __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
1339       __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
1340       __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
1341       __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
1342       __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
1343       __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
1344 
1345       __ BIND(L_copy_bytes);
1346       __ addptr(qword_count, 4);
1347       __ jcc(Assembler::lessEqual, L_loop);
1348     }
1349     __ subptr(qword_count, 4);
1350     __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
1351   }


1388       __ BIND(L_copy_bytes);
1389       __ subptr(qword_count, 8);
1390       __ jcc(Assembler::greaterEqual, L_loop);
1391 
1392       __ addptr(qword_count, 4);  // add(8) and sub(4)
1393       __ jccb(Assembler::less, L_end);
1394       // Copy trailing 32 bytes
1395       if (UseAVX >= 2) {
1396         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
1397         __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
1398       } else {
1399         __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
1400         __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
1401         __ movdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
1402         __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
1403       }
1404       __ subptr(qword_count, 4);
1405       __ BIND(L_end);
1406       if (UseAVX >= 2) {
1407         // clean upper bits of YMM registers
1408         __ vzeroupper();

1409       }
1410     } else {
1411       // Copy 32-bytes per iteration
1412       __ BIND(L_loop);
1413       __ movq(to, Address(from, qword_count, Address::times_8, 24));
1414       __ movq(Address(dest, qword_count, Address::times_8, 24), to);
1415       __ movq(to, Address(from, qword_count, Address::times_8, 16));
1416       __ movq(Address(dest, qword_count, Address::times_8, 16), to);
1417       __ movq(to, Address(from, qword_count, Address::times_8,  8));
1418       __ movq(Address(dest, qword_count, Address::times_8,  8), to);
1419       __ movq(to, Address(from, qword_count, Address::times_8,  0));
1420       __ movq(Address(dest, qword_count, Address::times_8,  0), to);
1421 
1422       __ BIND(L_copy_bytes);
1423       __ subptr(qword_count, 4);
1424       __ jcc(Assembler::greaterEqual, L_loop);
1425     }
1426     __ addptr(qword_count, 4);
1427     __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
1428   }




1311       }
1312       __ BIND(L_copy_bytes);
1313       __ addptr(qword_count, 8);
1314       __ jcc(Assembler::lessEqual, L_loop);
1315       __ subptr(qword_count, 4);  // sub(8) and add(4)
1316       __ jccb(Assembler::greater, L_end);
1317       // Copy trailing 32 bytes
1318       if (UseAVX >= 2) {
1319         __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1320         __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1321       } else {
1322         __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1323         __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1324         __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
1325         __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
1326       }
1327       __ addptr(qword_count, 4);
1328       __ BIND(L_end);
1329       if (UseAVX >= 2) {
1330         // clean upper bits of YMM registers
1331         __ vpxor(xmm0, xmm0);
1332         __ vpxor(xmm1, xmm1);
1333       }
1334     } else {
1335       // Copy 32-bytes per iteration
1336       __ BIND(L_loop);
1337       __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
1338       __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
1339       __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
1340       __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
1341       __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
1342       __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
1343       __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
1344       __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
1345 
1346       __ BIND(L_copy_bytes);
1347       __ addptr(qword_count, 4);
1348       __ jcc(Assembler::lessEqual, L_loop);
1349     }
1350     __ subptr(qword_count, 4);
1351     __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
1352   }


1389       __ BIND(L_copy_bytes);
1390       __ subptr(qword_count, 8);
1391       __ jcc(Assembler::greaterEqual, L_loop);
1392 
1393       __ addptr(qword_count, 4);  // add(8) and sub(4)
1394       __ jccb(Assembler::less, L_end);
1395       // Copy trailing 32 bytes
1396       if (UseAVX >= 2) {
1397         __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
1398         __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
1399       } else {
1400         __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
1401         __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
1402         __ movdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
1403         __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
1404       }
1405       __ subptr(qword_count, 4);
1406       __ BIND(L_end);
1407       if (UseAVX >= 2) {
1408         // clean upper bits of YMM registers
1409         __ vpxor(xmm0, xmm0);
1410         __ vpxor(xmm1, xmm1);
1411       }
1412     } else {
1413       // Copy 32-bytes per iteration
1414       __ BIND(L_loop);
1415       __ movq(to, Address(from, qword_count, Address::times_8, 24));
1416       __ movq(Address(dest, qword_count, Address::times_8, 24), to);
1417       __ movq(to, Address(from, qword_count, Address::times_8, 16));
1418       __ movq(Address(dest, qword_count, Address::times_8, 16), to);
1419       __ movq(to, Address(from, qword_count, Address::times_8,  8));
1420       __ movq(Address(dest, qword_count, Address::times_8,  8), to);
1421       __ movq(to, Address(from, qword_count, Address::times_8,  0));
1422       __ movq(Address(dest, qword_count, Address::times_8,  0), to);
1423 
1424       __ BIND(L_copy_bytes);
1425       __ subptr(qword_count, 4);
1426       __ jcc(Assembler::greaterEqual, L_loop);
1427     }
1428     __ addptr(qword_count, 4);
1429     __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
1430   }


< prev index next >