1311 }
1312 __ BIND(L_copy_bytes);
1313 __ addptr(qword_count, 8);
1314 __ jcc(Assembler::lessEqual, L_loop);
1315 __ subptr(qword_count, 4); // sub(8) and add(4)
1316 __ jccb(Assembler::greater, L_end);
1317 // Copy trailing 32 bytes
1318 if (UseAVX >= 2) {
1319 __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1320 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1321 } else {
1322 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1323 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1324 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
1325 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
1326 }
1327 __ addptr(qword_count, 4);
1328 __ BIND(L_end);
1329 if (UseAVX >= 2) {
1330 // clean upper bits of YMM registers
1331 __ vzeroupper();
1332 }
1333 } else {
1334 // Copy 32-bytes per iteration
1335 __ BIND(L_loop);
1336 __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
1337 __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
1338 __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
1339 __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
1340 __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
1341 __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
1342 __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
1343 __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
1344
1345 __ BIND(L_copy_bytes);
1346 __ addptr(qword_count, 4);
1347 __ jcc(Assembler::lessEqual, L_loop);
1348 }
1349 __ subptr(qword_count, 4);
1350 __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
1351 }
1388 __ BIND(L_copy_bytes);
1389 __ subptr(qword_count, 8);
1390 __ jcc(Assembler::greaterEqual, L_loop);
1391
1392 __ addptr(qword_count, 4); // add(8) and sub(4)
1393 __ jccb(Assembler::less, L_end);
1394 // Copy trailing 32 bytes
1395 if (UseAVX >= 2) {
1396 __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
1397 __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
1398 } else {
1399 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
1400 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
1401 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
1402 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
1403 }
1404 __ subptr(qword_count, 4);
1405 __ BIND(L_end);
1406 if (UseAVX >= 2) {
1407 // clean upper bits of YMM registers
1408 __ vzeroupper();
1409 }
1410 } else {
1411 // Copy 32-bytes per iteration
1412 __ BIND(L_loop);
1413 __ movq(to, Address(from, qword_count, Address::times_8, 24));
1414 __ movq(Address(dest, qword_count, Address::times_8, 24), to);
1415 __ movq(to, Address(from, qword_count, Address::times_8, 16));
1416 __ movq(Address(dest, qword_count, Address::times_8, 16), to);
1417 __ movq(to, Address(from, qword_count, Address::times_8, 8));
1418 __ movq(Address(dest, qword_count, Address::times_8, 8), to);
1419 __ movq(to, Address(from, qword_count, Address::times_8, 0));
1420 __ movq(Address(dest, qword_count, Address::times_8, 0), to);
1421
1422 __ BIND(L_copy_bytes);
1423 __ subptr(qword_count, 4);
1424 __ jcc(Assembler::greaterEqual, L_loop);
1425 }
1426 __ addptr(qword_count, 4);
1427 __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
1428 }
|
1311 }
1312 __ BIND(L_copy_bytes);
1313 __ addptr(qword_count, 8);
1314 __ jcc(Assembler::lessEqual, L_loop);
1315 __ subptr(qword_count, 4); // sub(8) and add(4)
1316 __ jccb(Assembler::greater, L_end);
1317 // Copy trailing 32 bytes
1318 if (UseAVX >= 2) {
1319 __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1320 __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1321 } else {
1322 __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
1323 __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
1324 __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
1325 __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
1326 }
1327 __ addptr(qword_count, 4);
1328 __ BIND(L_end);
1329 if (UseAVX >= 2) {
1330 // clean upper bits of YMM registers
1331 __ vpxor(xmm0, xmm0);
1332 __ vpxor(xmm1, xmm1);
1333 }
1334 } else {
1335 // Copy 32-bytes per iteration
1336 __ BIND(L_loop);
1337 __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
1338 __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
1339 __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
1340 __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
1341 __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
1342 __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
1343 __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
1344 __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
1345
1346 __ BIND(L_copy_bytes);
1347 __ addptr(qword_count, 4);
1348 __ jcc(Assembler::lessEqual, L_loop);
1349 }
1350 __ subptr(qword_count, 4);
1351 __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
1352 }
1389 __ BIND(L_copy_bytes);
1390 __ subptr(qword_count, 8);
1391 __ jcc(Assembler::greaterEqual, L_loop);
1392
1393 __ addptr(qword_count, 4); // add(8) and sub(4)
1394 __ jccb(Assembler::less, L_end);
1395 // Copy trailing 32 bytes
1396 if (UseAVX >= 2) {
1397 __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
1398 __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
1399 } else {
1400 __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
1401 __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
1402 __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
1403 __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
1404 }
1405 __ subptr(qword_count, 4);
1406 __ BIND(L_end);
1407 if (UseAVX >= 2) {
1408 // clean upper bits of YMM registers
1409 __ vpxor(xmm0, xmm0);
1410 __ vpxor(xmm1, xmm1);
1411 }
1412 } else {
1413 // Copy 32-bytes per iteration
1414 __ BIND(L_loop);
1415 __ movq(to, Address(from, qword_count, Address::times_8, 24));
1416 __ movq(Address(dest, qword_count, Address::times_8, 24), to);
1417 __ movq(to, Address(from, qword_count, Address::times_8, 16));
1418 __ movq(Address(dest, qword_count, Address::times_8, 16), to);
1419 __ movq(to, Address(from, qword_count, Address::times_8, 8));
1420 __ movq(Address(dest, qword_count, Address::times_8, 8), to);
1421 __ movq(to, Address(from, qword_count, Address::times_8, 0));
1422 __ movq(Address(dest, qword_count, Address::times_8, 0), to);
1423
1424 __ BIND(L_copy_bytes);
1425 __ subptr(qword_count, 4);
1426 __ jcc(Assembler::greaterEqual, L_loop);
1427 }
1428 __ addptr(qword_count, 4);
1429 __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
1430 }
|