< prev index next >
hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
Print this page
rev 7347 : 8078113: 8011102 changes may cause incorrect results
Summary: replace Vzeroupper instruction in stubs with zeroing only used ymm registers.
Reviewed-by: kvn
Contributed-by: sandhya.viswanathan@intel.com
*** 6688,6698 ****
jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
addptr(result, stride2);
subl(cnt2, stride2);
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
// clean upper bits of YMM registers
! vzeroupper();
// compare wide vectors tail
bind(COMPARE_WIDE_TAIL);
testptr(result, result);
jccb(Assembler::zero, LENGTH_DIFF_LABEL);
--- 6688,6698 ----
jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
addptr(result, stride2);
subl(cnt2, stride2);
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
// clean upper bits of YMM registers
! vpxor(vec1, vec1);
// compare wide vectors tail
bind(COMPARE_WIDE_TAIL);
testptr(result, result);
jccb(Assembler::zero, LENGTH_DIFF_LABEL);
*** 6703,6713 ****
jmpb(COMPARE_WIDE_VECTORS_LOOP);
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
bind(VECTOR_NOT_EQUAL);
// clean upper bits of YMM registers
! vzeroupper();
lea(str1, Address(str1, result, scale));
lea(str2, Address(str2, result, scale));
jmp(COMPARE_16_CHARS);
// Compare tail chars, length between 1 to 15 chars
--- 6703,6713 ----
jmpb(COMPARE_WIDE_VECTORS_LOOP);
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
bind(VECTOR_NOT_EQUAL);
// clean upper bits of YMM registers
! vpxor(vec1, vec1);
lea(str1, Address(str1, result, scale));
lea(str2, Address(str2, result, scale));
jmp(COMPARE_16_CHARS);
// Compare tail chars, length between 1 to 15 chars
*** 6962,6972 ****
// That's it
bind(DONE);
if (UseAVX >= 2) {
// clean upper bits of YMM registers
! vzeroupper();
}
}
void MacroAssembler::generate_fill(BasicType t, bool aligned,
Register to, Register value, Register count,
--- 6962,6973 ----
// That's it
bind(DONE);
if (UseAVX >= 2) {
// clean upper bits of YMM registers
! vpxor(vec1, vec1);
! vpxor(vec2, vec2);
}
}
void MacroAssembler::generate_fill(BasicType t, bool aligned,
Register to, Register value, Register count,
*** 7096,7106 ****
addptr(to, 32);
subl(count, 8 << shift);
BIND(L_check_fill_8_bytes);
// clean upper bits of YMM registers
! vzeroupper();
} else {
// Fill 32-byte chunks
pshufd(xtmp, xtmp, 0);
subl(count, 8 << shift);
--- 7097,7108 ----
addptr(to, 32);
subl(count, 8 << shift);
BIND(L_check_fill_8_bytes);
// clean upper bits of YMM registers
! movdl(xtmp, value);
! pshufd(xtmp, xtmp, 0);
} else {
// Fill 32-byte chunks
pshufd(xtmp, xtmp, 0);
subl(count, 8 << shift);
*** 7259,7269 ****
jccb(Assembler::lessEqual, L_copy_16_chars);
bind(L_copy_16_chars_exit);
if (UseAVX >= 2) {
// clean upper bits of YMM registers
! vzeroupper();
}
subptr(len, 8);
jccb(Assembler::greater, L_copy_8_chars_exit);
bind(L_copy_8_chars);
--- 7261,7275 ----
jccb(Assembler::lessEqual, L_copy_16_chars);
bind(L_copy_16_chars_exit);
if (UseAVX >= 2) {
// clean upper bits of YMM registers
! vpxor(tmp2Reg, tmp2Reg);
! vpxor(tmp3Reg, tmp3Reg);
! vpxor(tmp4Reg, tmp4Reg);
! movdl(tmp1Reg, tmp5);
! pshufd(tmp1Reg, tmp1Reg, 0);
}
subptr(len, 8);
jccb(Assembler::greater, L_copy_8_chars_exit);
bind(L_copy_8_chars);
< prev index next >