rev 51258 : [mq]: spin
1 # 2 # Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved. 3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 # 5 # This code is free software; you can redistribute it and/or modify it 6 # under the terms of the GNU General Public License version 2 only, as 7 # published by the Free Software Foundation. 8 # 9 # This code is distributed in the hope that it will be useful, but WITHOUT 10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 # version 2 for more details (a copy is included in the LICENSE file that 13 # accompanied this code). 14 # 15 # You should have received a copy of the GNU General Public License version 16 # 2 along with this work; if not, write to the Free Software Foundation, 17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 # 19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 # or visit www.oracle.com if you need additional information or have any 21 # questions. 22 # 23 24 25 # NOTE WELL! The _Copy functions are called directly 26 # from server-compiler-generated code via CallLeafNoFP, 27 # which means that they *must* either not use floating 28 # point or use it in the same manner as does the server 29 # compiler. 30 31 .globl _Copy_conjoint_bytes 32 .globl _Copy_arrayof_conjoint_bytes 33 .globl _Copy_conjoint_jshorts_atomic 34 .globl _Copy_arrayof_conjoint_jshorts 35 .globl _Copy_conjoint_jints_atomic 36 .globl _Copy_arrayof_conjoint_jints 37 .globl _Copy_conjoint_jlongs_atomic 38 .globl _mmx_Copy_arrayof_conjoint_jshorts 39 40 .globl _Atomic_cmpxchg_long 41 .globl _Atomic_move_long 42 43 .text 44 45 .globl SpinPause 46 .type SpinPause,@function 47 .p2align 4,,15 48 SpinPause: 49 rep 50 nop 51 movl $1, %eax 52 ret 53 54 # Support for void Copy::conjoint_bytes(void* from, 55 # void* to, 56 # size_t count) 57 .p2align 4,,15 58 .type _Copy_conjoint_bytes,@function 59 _Copy_conjoint_bytes: 60 pushl %esi 61 movl 4+12(%esp),%ecx # count 62 pushl %edi 63 movl 8+ 4(%esp),%esi # from 64 movl 8+ 8(%esp),%edi # to 65 cmpl %esi,%edi 66 leal -1(%esi,%ecx),%eax # from + count - 1 67 jbe cb_CopyRight 68 cmpl %eax,%edi 69 jbe cb_CopyLeft 70 # copy from low to high 71 cb_CopyRight: 72 cmpl $3,%ecx 73 jbe 5f # <= 3 bytes 74 # align source address at dword address boundary 75 movl %ecx,%eax # original count 76 movl $4,%ecx 77 subl %esi,%ecx 78 andl $3,%ecx # prefix byte count 79 jz 1f # no prefix 80 subl %ecx,%eax # byte count less prefix 81 # copy prefix 82 subl %esi,%edi 83 0: movb (%esi),%dl 84 movb %dl,(%edi,%esi,1) 85 addl $1,%esi 86 subl $1,%ecx 87 jnz 0b 88 addl %esi,%edi 89 1: movl %eax,%ecx # byte count less prefix 90 shrl $2,%ecx # dword count 91 jz 4f # no dwords to move 92 cmpl $32,%ecx 93 jbe 2f # <= 32 dwords 94 # copy aligned dwords 95 rep; smovl 96 jmp 4f 97 # copy aligned dwords 98 2: subl %esi,%edi 99 .p2align 4,,15 100 3: movl (%esi),%edx 101 movl %edx,(%edi,%esi,1) 102 addl $4,%esi 103 subl $1,%ecx 104 jnz 3b 105 addl %esi,%edi 106 4: movl %eax,%ecx # byte count less prefix 107 5: andl $3,%ecx # suffix byte count 108 jz 7f # no suffix 109 # copy suffix 110 xorl %eax,%eax 111 6: movb (%esi,%eax,1),%dl 112 movb %dl,(%edi,%eax,1) 113 addl $1,%eax 114 subl $1,%ecx 115 jnz 6b 116 7: popl %edi 117 popl %esi 118 ret 119 # copy from high to low 120 cb_CopyLeft: 121 std 122 leal -4(%edi,%ecx),%edi # to + count - 4 123 movl %eax,%esi # from + count - 1 124 movl %ecx,%eax 125 subl $3,%esi # from + count - 4 126 cmpl $3,%ecx 127 jbe 5f # <= 3 bytes 128 1: shrl $2,%ecx # dword count 129 jz 4f # no dwords to move 130 cmpl $32,%ecx 131 ja 3f # > 32 dwords 132 # copy dwords, aligned or not 133 subl %esi,%edi 134 .p2align 4,,15 135 2: movl (%esi),%edx 136 movl %edx,(%edi,%esi,1) 137 subl $4,%esi 138 subl $1,%ecx 139 jnz 2b 140 addl %esi,%edi 141 jmp 4f 142 # copy dwords, aligned or not 143 3: rep; smovl 144 4: movl %eax,%ecx # byte count 145 5: andl $3,%ecx # suffix byte count 146 jz 7f # no suffix 147 # copy suffix 148 subl %esi,%edi 149 addl $3,%esi 150 6: movb (%esi),%dl 151 movb %dl,(%edi,%esi,1) 152 subl $1,%esi 153 subl $1,%ecx 154 jnz 6b 155 7: cld 156 popl %edi 157 popl %esi 158 ret 159 160 # Support for void Copy::arrayof_conjoint_bytes(void* from, 161 # void* to, 162 # size_t count) 163 # 164 # Same as _Copy_conjoint_bytes, except no source alignment check. 165 .p2align 4,,15 166 .type _Copy_arrayof_conjoint_bytes,@function 167 _Copy_arrayof_conjoint_bytes: 168 pushl %esi 169 movl 4+12(%esp),%ecx # count 170 pushl %edi 171 movl 8+ 4(%esp),%esi # from 172 movl 8+ 8(%esp),%edi # to 173 cmpl %esi,%edi 174 leal -1(%esi,%ecx),%eax # from + count - 1 175 jbe acb_CopyRight 176 cmpl %eax,%edi 177 jbe acb_CopyLeft 178 # copy from low to high 179 acb_CopyRight: 180 cmpl $3,%ecx 181 jbe 5f 182 1: movl %ecx,%eax 183 shrl $2,%ecx 184 jz 4f 185 cmpl $32,%ecx 186 ja 3f 187 # copy aligned dwords 188 subl %esi,%edi 189 .p2align 4,,15 190 2: movl (%esi),%edx 191 movl %edx,(%edi,%esi,1) 192 addl $4,%esi 193 subl $1,%ecx 194 jnz 2b 195 addl %esi,%edi 196 jmp 4f 197 # copy aligned dwords 198 3: rep; smovl 199 4: movl %eax,%ecx 200 5: andl $3,%ecx 201 jz 7f 202 # copy suffix 203 xorl %eax,%eax 204 6: movb (%esi,%eax,1),%dl 205 movb %dl,(%edi,%eax,1) 206 addl $1,%eax 207 subl $1,%ecx 208 jnz 6b 209 7: popl %edi 210 popl %esi 211 ret 212 acb_CopyLeft: 213 std 214 leal -4(%edi,%ecx),%edi # to + count - 4 215 movl %eax,%esi # from + count - 1 216 movl %ecx,%eax 217 subl $3,%esi # from + count - 4 218 cmpl $3,%ecx 219 jbe 5f 220 1: shrl $2,%ecx 221 jz 4f 222 cmpl $32,%ecx 223 jbe 2f # <= 32 dwords 224 rep; smovl 225 jmp 4f 226 .space 8 227 2: subl %esi,%edi 228 .p2align 4,,15 229 3: movl (%esi),%edx 230 movl %edx,(%edi,%esi,1) 231 subl $4,%esi 232 subl $1,%ecx 233 jnz 3b 234 addl %esi,%edi 235 4: movl %eax,%ecx 236 5: andl $3,%ecx 237 jz 7f 238 subl %esi,%edi 239 addl $3,%esi 240 6: movb (%esi),%dl 241 movb %dl,(%edi,%esi,1) 242 subl $1,%esi 243 subl $1,%ecx 244 jnz 6b 245 7: cld 246 popl %edi 247 popl %esi 248 ret 249 250 # Support for void Copy::conjoint_jshorts_atomic(void* from, 251 # void* to, 252 # size_t count) 253 .p2align 4,,15 254 .type _Copy_conjoint_jshorts_atomic,@function 255 _Copy_conjoint_jshorts_atomic: 256 pushl %esi 257 movl 4+12(%esp),%ecx # count 258 pushl %edi 259 movl 8+ 4(%esp),%esi # from 260 movl 8+ 8(%esp),%edi # to 261 cmpl %esi,%edi 262 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 263 jbe cs_CopyRight 264 cmpl %eax,%edi 265 jbe cs_CopyLeft 266 # copy from low to high 267 cs_CopyRight: 268 # align source address at dword address boundary 269 movl %esi,%eax # original from 270 andl $3,%eax # either 0 or 2 271 jz 1f # no prefix 272 # copy prefix 273 subl $1,%ecx 274 jl 5f # zero count 275 movw (%esi),%dx 276 movw %dx,(%edi) 277 addl %eax,%esi # %eax == 2 278 addl %eax,%edi 279 1: movl %ecx,%eax # word count less prefix 280 sarl %ecx # dword count 281 jz 4f # no dwords to move 282 cmpl $32,%ecx 283 jbe 2f # <= 32 dwords 284 # copy aligned dwords 285 rep; smovl 286 jmp 4f 287 # copy aligned dwords 288 2: subl %esi,%edi 289 .p2align 4,,15 290 3: movl (%esi),%edx 291 movl %edx,(%edi,%esi,1) 292 addl $4,%esi 293 subl $1,%ecx 294 jnz 3b 295 addl %esi,%edi 296 4: andl $1,%eax # suffix count 297 jz 5f # no suffix 298 # copy suffix 299 movw (%esi),%dx 300 movw %dx,(%edi) 301 5: popl %edi 302 popl %esi 303 ret 304 # copy from high to low 305 cs_CopyLeft: 306 std 307 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 308 movl %eax,%esi # from + count*2 - 2 309 movl %ecx,%eax 310 subl $2,%esi # from + count*2 - 4 311 1: sarl %ecx # dword count 312 jz 4f # no dwords to move 313 cmpl $32,%ecx 314 ja 3f # > 32 dwords 315 subl %esi,%edi 316 .p2align 4,,15 317 2: movl (%esi),%edx 318 movl %edx,(%edi,%esi,1) 319 subl $4,%esi 320 subl $1,%ecx 321 jnz 2b 322 addl %esi,%edi 323 jmp 4f 324 3: rep; smovl 325 4: andl $1,%eax # suffix count 326 jz 5f # no suffix 327 # copy suffix 328 addl $2,%esi 329 addl $2,%edi 330 movw (%esi),%dx 331 movw %dx,(%edi) 332 5: cld 333 popl %edi 334 popl %esi 335 ret 336 337 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 338 # void* to, 339 # size_t count) 340 .p2align 4,,15 341 .type _Copy_arrayof_conjoint_jshorts,@function 342 _Copy_arrayof_conjoint_jshorts: 343 pushl %esi 344 movl 4+12(%esp),%ecx # count 345 pushl %edi 346 movl 8+ 4(%esp),%esi # from 347 movl 8+ 8(%esp),%edi # to 348 cmpl %esi,%edi 349 leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 350 jbe acs_CopyRight 351 cmpl %eax,%edi 352 jbe acs_CopyLeft 353 acs_CopyRight: 354 movl %ecx,%eax # word count 355 sarl %ecx # dword count 356 jz 4f # no dwords to move 357 cmpl $32,%ecx 358 jbe 2f # <= 32 dwords 359 # copy aligned dwords 360 rep; smovl 361 jmp 4f 362 # copy aligned dwords 363 .space 5 364 2: subl %esi,%edi 365 .p2align 4,,15 366 3: movl (%esi),%edx 367 movl %edx,(%edi,%esi,1) 368 addl $4,%esi 369 subl $1,%ecx 370 jnz 3b 371 addl %esi,%edi 372 4: andl $1,%eax # suffix count 373 jz 5f # no suffix 374 # copy suffix 375 movw (%esi),%dx 376 movw %dx,(%edi) 377 5: popl %edi 378 popl %esi 379 ret 380 acs_CopyLeft: 381 std 382 leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 383 movl %eax,%esi # from + count*2 - 2 384 movl %ecx,%eax 385 subl $2,%esi # from + count*2 - 4 386 sarl %ecx # dword count 387 jz 4f # no dwords to move 388 cmpl $32,%ecx 389 ja 3f # > 32 dwords 390 subl %esi,%edi 391 .p2align 4,,15 392 2: movl (%esi),%edx 393 movl %edx,(%edi,%esi,1) 394 subl $4,%esi 395 subl $1,%ecx 396 jnz 2b 397 addl %esi,%edi 398 jmp 4f 399 3: rep; smovl 400 4: andl $1,%eax # suffix count 401 jz 5f # no suffix 402 # copy suffix 403 addl $2,%esi 404 addl $2,%edi 405 movw (%esi),%dx 406 movw %dx,(%edi) 407 5: cld 408 popl %edi 409 popl %esi 410 ret 411 412 # Support for void Copy::conjoint_jints_atomic(void* from, 413 # void* to, 414 # size_t count) 415 # Equivalent to 416 # arrayof_conjoint_jints 417 .p2align 4,,15 418 .type _Copy_conjoint_jints_atomic,@function 419 .type _Copy_arrayof_conjoint_jints,@function 420 _Copy_conjoint_jints_atomic: 421 _Copy_arrayof_conjoint_jints: 422 pushl %esi 423 movl 4+12(%esp),%ecx # count 424 pushl %edi 425 movl 8+ 4(%esp),%esi # from 426 movl 8+ 8(%esp),%edi # to 427 cmpl %esi,%edi 428 leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 429 jbe ci_CopyRight 430 cmpl %eax,%edi 431 jbe ci_CopyLeft 432 ci_CopyRight: 433 cmpl $32,%ecx 434 jbe 2f # <= 32 dwords 435 rep; smovl 436 popl %edi 437 popl %esi 438 ret 439 .space 10 440 2: subl %esi,%edi 441 jmp 4f 442 .p2align 4,,15 443 3: movl (%esi),%edx 444 movl %edx,(%edi,%esi,1) 445 addl $4,%esi 446 4: subl $1,%ecx 447 jge 3b 448 popl %edi 449 popl %esi 450 ret 451 ci_CopyLeft: 452 std 453 leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 454 cmpl $32,%ecx 455 ja 4f # > 32 dwords 456 subl %eax,%edi # eax == from + count*4 - 4 457 jmp 3f 458 .p2align 4,,15 459 2: movl (%eax),%edx 460 movl %edx,(%edi,%eax,1) 461 subl $4,%eax 462 3: subl $1,%ecx 463 jge 2b 464 cld 465 popl %edi 466 popl %esi 467 ret 468 4: movl %eax,%esi # from + count*4 - 4 469 rep; smovl 470 cld 471 popl %edi 472 popl %esi 473 ret 474 475 # Support for void Copy::conjoint_jlongs_atomic(jlong* from, 476 # jlong* to, 477 # size_t count) 478 # 479 # 32-bit 480 # 481 # count treated as signed 482 # 483 # if (from > to) { 484 # while (--count >= 0) { 485 # *to++ = *from++; 486 # } 487 # } else { 488 # while (--count >= 0) { 489 # to[count] = from[count]; 490 # } 491 # } 492 .p2align 4,,15 493 .type _Copy_conjoint_jlongs_atomic,@function 494 _Copy_conjoint_jlongs_atomic: 495 movl 4+8(%esp),%ecx # count 496 movl 4+0(%esp),%eax # from 497 movl 4+4(%esp),%edx # to 498 cmpl %eax,%edx 499 jae cla_CopyLeft 500 cla_CopyRight: 501 subl %eax,%edx 502 jmp 2f 503 .p2align 4,,15 504 1: fildll (%eax) 505 fistpll (%edx,%eax,1) 506 addl $8,%eax 507 2: subl $1,%ecx 508 jge 1b 509 ret 510 .p2align 4,,15 511 3: fildll (%eax,%ecx,8) 512 fistpll (%edx,%ecx,8) 513 cla_CopyLeft: 514 subl $1,%ecx 515 jge 3b 516 ret 517 518 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 519 # void* to, 520 # size_t count) 521 .p2align 4,,15 522 .type _mmx_Copy_arrayof_conjoint_jshorts,@function 523 _mmx_Copy_arrayof_conjoint_jshorts: 524 pushl %esi 525 movl 4+12(%esp),%ecx 526 pushl %edi 527 movl 8+ 4(%esp),%esi 528 movl 8+ 8(%esp),%edi 529 cmpl %esi,%edi 530 leal -2(%esi,%ecx,2),%eax 531 jbe mmx_acs_CopyRight 532 cmpl %eax,%edi 533 jbe mmx_acs_CopyLeft 534 mmx_acs_CopyRight: 535 movl %ecx,%eax 536 sarl %ecx 537 je 5f 538 cmpl $33,%ecx 539 jae 3f 540 1: subl %esi,%edi 541 .p2align 4,,15 542 2: movl (%esi),%edx 543 movl %edx,(%edi,%esi,1) 544 addl $4,%esi 545 subl $1,%ecx 546 jnz 2b 547 addl %esi,%edi 548 jmp 5f 549 3: smovl # align to 8 bytes, we know we are 4 byte aligned to start 550 subl $1,%ecx 551 4: .p2align 4,,15 552 movq 0(%esi),%mm0 553 addl $64,%edi 554 movq 8(%esi),%mm1 555 subl $16,%ecx 556 movq 16(%esi),%mm2 557 movq %mm0,-64(%edi) 558 movq 24(%esi),%mm0 559 movq %mm1,-56(%edi) 560 movq 32(%esi),%mm1 561 movq %mm2,-48(%edi) 562 movq 40(%esi),%mm2 563 movq %mm0,-40(%edi) 564 movq 48(%esi),%mm0 565 movq %mm1,-32(%edi) 566 movq 56(%esi),%mm1 567 movq %mm2,-24(%edi) 568 movq %mm0,-16(%edi) 569 addl $64,%esi 570 movq %mm1,-8(%edi) 571 cmpl $16,%ecx 572 jge 4b 573 emms 574 testl %ecx,%ecx 575 ja 1b 576 5: andl $1,%eax 577 je 7f 578 6: movw (%esi),%dx 579 movw %dx,(%edi) 580 7: popl %edi 581 popl %esi 582 ret 583 mmx_acs_CopyLeft: 584 std 585 leal -4(%edi,%ecx,2),%edi 586 movl %eax,%esi 587 movl %ecx,%eax 588 subl $2,%esi 589 sarl %ecx 590 je 4f 591 cmpl $32,%ecx 592 ja 3f 593 subl %esi,%edi 594 .p2align 4,,15 595 2: movl (%esi),%edx 596 movl %edx,(%edi,%esi,1) 597 subl $4,%esi 598 subl $1,%ecx 599 jnz 2b 600 addl %esi,%edi 601 jmp 4f 602 3: rep; smovl 603 4: andl $1,%eax 604 je 6f 605 addl $2,%esi 606 addl $2,%edi 607 5: movw (%esi),%dx 608 movw %dx,(%edi) 609 6: cld 610 popl %edi 611 popl %esi 612 ret 613 614 615 # Support for jlong Atomic::cmpxchg(jlong exchange_value, 616 # volatile jlong* dest, 617 # jlong compare_value) 618 # 619 .p2align 4,,15 620 .type _Atomic_cmpxchg_long,@function 621 _Atomic_cmpxchg_long: 622 # 8(%esp) : return PC 623 pushl %ebx # 4(%esp) : old %ebx 624 pushl %edi # 0(%esp) : old %edi 625 movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) 626 movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) 627 movl 24(%esp), %eax # 24(%esp) : compare_value (low) 628 movl 28(%esp), %edx # 28(%esp) : compare_value (high) 629 movl 20(%esp), %edi # 20(%esp) : dest 630 lock cmpxchg8b (%edi) 631 popl %edi 632 popl %ebx 633 ret 634 635 636 # Support for jlong Atomic::load and Atomic::store. 637 # void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst) 638 .p2align 4,,15 639 .type _Atomic_move_long,@function 640 _Atomic_move_long: 641 movl 4(%esp), %eax # src 642 fildll (%eax) 643 movl 8(%esp), %eax # dest 644 fistpll (%eax) 645 ret 646 --- EOF ---