1 # 2 # Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. 3 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 # 5 # This code is free software; you can redistribute it and/or modify it 6 # under the terms of the GNU General Public License version 2 only, as 7 # published by the Free Software Foundation. 8 # 9 # This code is distributed in the hope that it will be useful, but WITHOUT 10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 # version 2 for more details (a copy is included in the LICENSE file that 13 # accompanied this code). 14 # 15 # You should have received a copy of the GNU General Public License version 16 # 2 along with this work; if not, write to the Free Software Foundation, 17 # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 # 19 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 # or visit www.oracle.com if you need additional information or have any 21 # questions. 22 # 23 24 25 # NOTE WELL! The _Copy functions are called directly 26 # from server-compiler-generated code via CallLeafNoFP, 27 # which means that they *must* either not use floating 28 # point or use it in the same manner as does the server 29 # compiler. 30 31 .globl _Copy_arrayof_conjoint_bytes 32 .globl _Copy_arrayof_conjoint_jshorts 33 .globl _Copy_conjoint_jshorts_atomic 34 .globl _Copy_arrayof_conjoint_jints 35 .globl _Copy_conjoint_jints_atomic 36 .globl _Copy_arrayof_conjoint_jlongs 37 .globl _Copy_conjoint_jlongs_atomic 38 39 .text 40 41 .globl SpinPause 42 .align 16 43 .type SpinPause,@function 44 SpinPause: 45 rep 46 nop 47 movq $1, %rax 48 ret 49 50 # Support for void Copy::arrayof_conjoint_bytes(void* from, 51 # void* to, 52 # size_t count) 53 # rdi - from 54 # rsi - to 55 # rdx - count, treated as ssize_t 56 # 57 .p2align 4,,15 58 .type _Copy_arrayof_conjoint_bytes,@function 59 _Copy_arrayof_conjoint_bytes: 60 movq %rdx,%r8 # byte count 61 shrq $3,%rdx # qword count 62 cmpq %rdi,%rsi 63 leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1 64 jbe acb_CopyRight 65 cmpq %rax,%rsi 66 jbe acb_CopyLeft 67 acb_CopyRight: 68 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 69 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 70 negq %rdx 71 jmp 7f 72 .p2align 4,,15 73 1: movq 8(%rax,%rdx,8),%rsi 74 movq %rsi,8(%rcx,%rdx,8) 75 addq $1,%rdx 76 jnz 1b 77 2: testq $4,%r8 # check for trailing dword 78 jz 3f 79 movl 8(%rax),%esi # copy trailing dword 80 movl %esi,8(%rcx) 81 addq $4,%rax 82 addq $4,%rcx # original %rsi is trashed, so we 83 # can't use it as a base register 84 3: testq $2,%r8 # check for trailing word 85 jz 4f 86 movw 8(%rax),%si # copy trailing word 87 movw %si,8(%rcx) 88 addq $2,%rcx 89 4: testq $1,%r8 # check for trailing byte 90 jz 5f 91 movb -1(%rdi,%r8,1),%al # copy trailing byte 92 movb %al,8(%rcx) 93 5: ret 94 .p2align 4,,15 95 6: movq -24(%rax,%rdx,8),%rsi 96 movq %rsi,-24(%rcx,%rdx,8) 97 movq -16(%rax,%rdx,8),%rsi 98 movq %rsi,-16(%rcx,%rdx,8) 99 movq -8(%rax,%rdx,8),%rsi 100 movq %rsi,-8(%rcx,%rdx,8) 101 movq (%rax,%rdx,8),%rsi 102 movq %rsi,(%rcx,%rdx,8) 103 7: addq $4,%rdx 104 jle 6b 105 subq $4,%rdx 106 jl 1b 107 jmp 2b 108 acb_CopyLeft: 109 testq $1,%r8 # check for trailing byte 110 jz 1f 111 movb -1(%rdi,%r8,1),%cl # copy trailing byte 112 movb %cl,-1(%rsi,%r8,1) 113 subq $1,%r8 # adjust for possible trailing word 114 1: testq $2,%r8 # check for trailing word 115 jz 2f 116 movw -2(%rdi,%r8,1),%cx # copy trailing word 117 movw %cx,-2(%rsi,%r8,1) 118 2: testq $4,%r8 # check for trailing dword 119 jz 5f 120 movl (%rdi,%rdx,8),%ecx # copy trailing dword 121 movl %ecx,(%rsi,%rdx,8) 122 jmp 5f 123 .p2align 4,,15 124 3: movq -8(%rdi,%rdx,8),%rcx 125 movq %rcx,-8(%rsi,%rdx,8) 126 subq $1,%rdx 127 jnz 3b 128 ret 129 .p2align 4,,15 130 4: movq 24(%rdi,%rdx,8),%rcx 131 movq %rcx,24(%rsi,%rdx,8) 132 movq 16(%rdi,%rdx,8),%rcx 133 movq %rcx,16(%rsi,%rdx,8) 134 movq 8(%rdi,%rdx,8),%rcx 135 movq %rcx,8(%rsi,%rdx,8) 136 movq (%rdi,%rdx,8),%rcx 137 movq %rcx,(%rsi,%rdx,8) 138 5: subq $4,%rdx 139 jge 4b 140 addq $4,%rdx 141 jg 3b 142 ret 143 144 # Support for void Copy::arrayof_conjoint_jshorts(void* from, 145 # void* to, 146 # size_t count) 147 # Equivalent to 148 # conjoint_jshorts_atomic 149 # 150 # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 151 # let the hardware handle it. The tow or four words within dwords 152 # or qwords that span cache line boundaries will still be loaded 153 # and stored atomically. 154 # 155 # rdi - from 156 # rsi - to 157 # rdx - count, treated as ssize_t 158 # 159 .p2align 4,,15 160 .type _Copy_arrayof_conjoint_jshorts,@function 161 .type _Copy_conjoint_jshorts_atomic,@function 162 _Copy_arrayof_conjoint_jshorts: 163 _Copy_conjoint_jshorts_atomic: 164 movq %rdx,%r8 # word count 165 shrq $2,%rdx # qword count 166 cmpq %rdi,%rsi 167 leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2 168 jbe acs_CopyRight 169 cmpq %rax,%rsi 170 jbe acs_CopyLeft 171 acs_CopyRight: 172 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 173 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 174 negq %rdx 175 jmp 6f 176 1: movq 8(%rax,%rdx,8),%rsi 177 movq %rsi,8(%rcx,%rdx,8) 178 addq $1,%rdx 179 jnz 1b 180 2: testq $2,%r8 # check for trailing dword 181 jz 3f 182 movl 8(%rax),%esi # copy trailing dword 183 movl %esi,8(%rcx) 184 addq $4,%rcx # original %rsi is trashed, so we 185 # can't use it as a base register 186 3: testq $1,%r8 # check for trailing word 187 jz 4f 188 movw -2(%rdi,%r8,2),%si # copy trailing word 189 movw %si,8(%rcx) 190 4: ret 191 .p2align 4,,15 192 5: movq -24(%rax,%rdx,8),%rsi 193 movq %rsi,-24(%rcx,%rdx,8) 194 movq -16(%rax,%rdx,8),%rsi 195 movq %rsi,-16(%rcx,%rdx,8) 196 movq -8(%rax,%rdx,8),%rsi 197 movq %rsi,-8(%rcx,%rdx,8) 198 movq (%rax,%rdx,8),%rsi 199 movq %rsi,(%rcx,%rdx,8) 200 6: addq $4,%rdx 201 jle 5b 202 subq $4,%rdx 203 jl 1b 204 jmp 2b 205 acs_CopyLeft: 206 testq $1,%r8 # check for trailing word 207 jz 1f 208 movw -2(%rdi,%r8,2),%cx # copy trailing word 209 movw %cx,-2(%rsi,%r8,2) 210 1: testq $2,%r8 # check for trailing dword 211 jz 4f 212 movl (%rdi,%rdx,8),%ecx # copy trailing dword 213 movl %ecx,(%rsi,%rdx,8) 214 jmp 4f 215 2: movq -8(%rdi,%rdx,8),%rcx 216 movq %rcx,-8(%rsi,%rdx,8) 217 subq $1,%rdx 218 jnz 2b 219 ret 220 .p2align 4,,15 221 3: movq 24(%rdi,%rdx,8),%rcx 222 movq %rcx,24(%rsi,%rdx,8) 223 movq 16(%rdi,%rdx,8),%rcx 224 movq %rcx,16(%rsi,%rdx,8) 225 movq 8(%rdi,%rdx,8),%rcx 226 movq %rcx,8(%rsi,%rdx,8) 227 movq (%rdi,%rdx,8),%rcx 228 movq %rcx,(%rsi,%rdx,8) 229 4: subq $4,%rdx 230 jge 3b 231 addq $4,%rdx 232 jg 2b 233 ret 234 235 # Support for void Copy::arrayof_conjoint_jints(jint* from, 236 # jint* to, 237 # size_t count) 238 # Equivalent to 239 # conjoint_jints_atomic 240 # 241 # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 242 # the hardware handle it. The two dwords within qwords that span 243 # cache line boundaries will still be loaded and stored atomically. 244 # 245 # rdi - from 246 # rsi - to 247 # rdx - count, treated as ssize_t 248 # 249 .p2align 4,,15 250 .type _Copy_arrayof_conjoint_jints,@function 251 .type _Copy_conjoint_jints_atomic,@function 252 _Copy_arrayof_conjoint_jints: 253 _Copy_conjoint_jints_atomic: 254 movq %rdx,%r8 # dword count 255 shrq %rdx # qword count 256 cmpq %rdi,%rsi 257 leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4 258 jbe aci_CopyRight 259 cmpq %rax,%rsi 260 jbe aci_CopyLeft 261 aci_CopyRight: 262 leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 263 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 264 negq %rdx 265 jmp 5f 266 .p2align 4,,15 267 1: movq 8(%rax,%rdx,8),%rsi 268 movq %rsi,8(%rcx,%rdx,8) 269 addq $1,%rdx 270 jnz 1b 271 2: testq $1,%r8 # check for trailing dword 272 jz 3f 273 movl 8(%rax),%esi # copy trailing dword 274 movl %esi,8(%rcx) 275 3: ret 276 .p2align 4,,15 277 4: movq -24(%rax,%rdx,8),%rsi 278 movq %rsi,-24(%rcx,%rdx,8) 279 movq -16(%rax,%rdx,8),%rsi 280 movq %rsi,-16(%rcx,%rdx,8) 281 movq -8(%rax,%rdx,8),%rsi 282 movq %rsi,-8(%rcx,%rdx,8) 283 movq (%rax,%rdx,8),%rsi 284 movq %rsi,(%rcx,%rdx,8) 285 5: addq $4,%rdx 286 jle 4b 287 subq $4,%rdx 288 jl 1b 289 jmp 2b 290 aci_CopyLeft: 291 testq $1,%r8 # check for trailing dword 292 jz 3f 293 movl -4(%rdi,%r8,4),%ecx # copy trailing dword 294 movl %ecx,-4(%rsi,%r8,4) 295 jmp 3f 296 1: movq -8(%rdi,%rdx,8),%rcx 297 movq %rcx,-8(%rsi,%rdx,8) 298 subq $1,%rdx 299 jnz 1b 300 ret 301 .p2align 4,,15 302 2: movq 24(%rdi,%rdx,8),%rcx 303 movq %rcx,24(%rsi,%rdx,8) 304 movq 16(%rdi,%rdx,8),%rcx 305 movq %rcx,16(%rsi,%rdx,8) 306 movq 8(%rdi,%rdx,8),%rcx 307 movq %rcx,8(%rsi,%rdx,8) 308 movq (%rdi,%rdx,8),%rcx 309 movq %rcx,(%rsi,%rdx,8) 310 3: subq $4,%rdx 311 jge 2b 312 addq $4,%rdx 313 jg 1b 314 ret 315 316 # Support for void Copy::arrayof_conjoint_jlongs(jlong* from, 317 # jlong* to, 318 # size_t count) 319 # Equivalent to 320 # conjoint_jlongs_atomic 321 # arrayof_conjoint_oops 322 # conjoint_oops_atomic 323 # 324 # rdi - from 325 # rsi - to 326 # rdx - count, treated as ssize_t 327 # 328 .p2align 4,,15 329 .type _Copy_arrayof_conjoint_jlongs,@function 330 .type _Copy_conjoint_jlongs_atomic,@function 331 _Copy_arrayof_conjoint_jlongs: 332 _Copy_conjoint_jlongs_atomic: 333 cmpq %rdi,%rsi 334 leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8 335 jbe acl_CopyRight 336 cmpq %rax,%rsi 337 jbe acl_CopyLeft 338 acl_CopyRight: 339 leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8 340 negq %rdx 341 jmp 3f 342 1: movq 8(%rax,%rdx,8),%rsi 343 movq %rsi,8(%rcx,%rdx,8) 344 addq $1,%rdx 345 jnz 1b 346 ret 347 .p2align 4,,15 348 2: movq -24(%rax,%rdx,8),%rsi 349 movq %rsi,-24(%rcx,%rdx,8) 350 movq -16(%rax,%rdx,8),%rsi 351 movq %rsi,-16(%rcx,%rdx,8) 352 movq -8(%rax,%rdx,8),%rsi 353 movq %rsi,-8(%rcx,%rdx,8) 354 movq (%rax,%rdx,8),%rsi 355 movq %rsi,(%rcx,%rdx,8) 356 3: addq $4,%rdx 357 jle 2b 358 subq $4,%rdx 359 jl 1b 360 ret 361 4: movq -8(%rdi,%rdx,8),%rcx 362 movq %rcx,-8(%rsi,%rdx,8) 363 subq $1,%rdx 364 jnz 4b 365 ret 366 .p2align 4,,15 367 5: movq 24(%rdi,%rdx,8),%rcx 368 movq %rcx,24(%rsi,%rdx,8) 369 movq 16(%rdi,%rdx,8),%rcx 370 movq %rcx,16(%rsi,%rdx,8) 371 movq 8(%rdi,%rdx,8),%rcx 372 movq %rcx,8(%rsi,%rdx,8) 373 movq (%rdi,%rdx,8),%rcx 374 movq %rcx,(%rsi,%rdx,8) 375 acl_CopyLeft: 376 subq $4,%rdx 377 jge 5b 378 addq $4,%rdx 379 jg 4b 380 ret