820 __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
821 __ movq(xmm3, Address(from, 24));
822 __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
823 __ movq(xmm4, Address(from, 32));
824 __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
825 __ movq(xmm5, Address(from, 40));
826 __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
827 __ movq(xmm6, Address(from, 48));
828 __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
829 __ movq(xmm7, Address(from, 56));
830 __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
831 }
832
833 __ addl(from, 64);
834 __ BIND(L_copy_64_bytes);
835 __ subl(qword_count, 8);
836 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
837
838 if (UseUnalignedLoadStores && (UseAVX >= 2)) {
839 // clean upper bits of YMM registers
840 __ vzeroupper();
841 }
842 __ addl(qword_count, 8);
843 __ jccb(Assembler::zero, L_exit);
844 //
845 // length is too short, just copy qwords
846 //
847 __ BIND(L_copy_8_bytes);
848 __ movq(xmm0, Address(from, 0));
849 __ movq(Address(from, to_from, Address::times_1), xmm0);
850 __ addl(from, 8);
851 __ decrement(qword_count);
852 __ jcc(Assembler::greater, L_copy_8_bytes);
853 __ BIND(L_exit);
854 }
855
856 // Copy 64 bytes chunks
857 //
858 // Inputs:
859 // from - source array address
860 // to_from - destination array address - from
|
820 __ movq(Address(from, to_from, Address::times_1, 16), xmm2);
821 __ movq(xmm3, Address(from, 24));
822 __ movq(Address(from, to_from, Address::times_1, 24), xmm3);
823 __ movq(xmm4, Address(from, 32));
824 __ movq(Address(from, to_from, Address::times_1, 32), xmm4);
825 __ movq(xmm5, Address(from, 40));
826 __ movq(Address(from, to_from, Address::times_1, 40), xmm5);
827 __ movq(xmm6, Address(from, 48));
828 __ movq(Address(from, to_from, Address::times_1, 48), xmm6);
829 __ movq(xmm7, Address(from, 56));
830 __ movq(Address(from, to_from, Address::times_1, 56), xmm7);
831 }
832
833 __ addl(from, 64);
834 __ BIND(L_copy_64_bytes);
835 __ subl(qword_count, 8);
836 __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop);
837
838 if (UseUnalignedLoadStores && (UseAVX >= 2)) {
839 // clean upper bits of YMM registers
840 __ vpxor(xmm0, xmm0);
841 __ vpxor(xmm1, xmm1);
842 }
843 __ addl(qword_count, 8);
844 __ jccb(Assembler::zero, L_exit);
845 //
846 // length is too short, just copy qwords
847 //
848 __ BIND(L_copy_8_bytes);
849 __ movq(xmm0, Address(from, 0));
850 __ movq(Address(from, to_from, Address::times_1), xmm0);
851 __ addl(from, 8);
852 __ decrement(qword_count);
853 __ jcc(Assembler::greater, L_copy_8_bytes);
854 __ BIND(L_exit);
855 }
856
857 // Copy 64 bytes chunks
858 //
859 // Inputs:
860 // from - source array address
861 // to_from - destination array address - from
|