1 / 2 / Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. 3 / DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 / 5 / This code is free software; you can redistribute it and/or modify it 6 / under the terms of the GNU General Public License version 2 only, as 7 / published by the Free Software Foundation. 8 / 9 / This code is distributed in the hope that it will be useful, but WITHOUT 10 / ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 / FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 / version 2 for more details (a copy is included in the LICENSE file that 13 / accompanied this code). 14 / 15 / You should have received a copy of the GNU General Public License version 16 / 2 along with this work; if not, write to the Free Software Foundation, 17 / Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 / 19 / Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 / or visit www.oracle.com if you need additional information or have any 21 / questions. 22 / 23 24 .globl fs_load 25 .globl fs_thread 26 27 // NOTE WELL! The _Copy functions are called directly 28 // from server-compiler-generated code via CallLeafNoFP, 29 // which means that they *must* either not use floating 30 // point or use it in the same manner as does the server 31 // compiler. 32 33 .globl _Copy_arrayof_conjoint_bytes 34 .globl _Copy_conjoint_jshorts_atomic 35 .globl _Copy_arrayof_conjoint_jshorts 36 .globl _Copy_conjoint_jints_atomic 37 .globl _Copy_arrayof_conjoint_jints 38 .globl _Copy_conjoint_jlongs_atomic 39 .globl _Copy_arrayof_conjoint_jlongs 40 41 .section .text,"ax" 42 43 / Fast thread accessors, used by threadLS_solaris_amd64.cpp 44 .align 16 45 fs_load: 46 movq %fs:(%rdi),%rax 47 ret 48 49 .align 16 50 fs_thread: 51 movq %fs:0x0,%rax 52 ret 53 54 / Support for void Copy::arrayof_conjoint_bytes(void* from, 55 / void* to, 56 / size_t count) 57 / rdi - from 58 / rsi - to 59 / rdx - count, treated as ssize_t 60 / 61 .align 16 62 _Copy_arrayof_conjoint_bytes: 63 movq %rdx,%r8 / byte count 64 shrq $3,%rdx / qword count 65 cmpq %rdi,%rsi 66 leaq -1(%rdi,%r8,1),%rax / from + bcount*1 - 1 67 jbe acb_CopyRight 68 cmpq %rax,%rsi 69 jbe acb_CopyLeft 70 acb_CopyRight: 71 leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 72 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 73 negq %rdx 74 jmp 7f 75 .align 16 76 1: movq 8(%rax,%rdx,8),%rsi 77 movq %rsi,8(%rcx,%rdx,8) 78 addq $1,%rdx 79 jnz 1b 80 2: testq $4,%r8 / check for trailing dword 81 jz 3f 82 movl 8(%rax),%esi / copy trailing dword 83 movl %esi,8(%rcx) 84 addq $4,%rax 85 addq $4,%rcx / original %rsi is trashed, so we 86 / can't use it as a base register 87 3: testq $2,%r8 / check for trailing word 88 jz 4f 89 movw 8(%rax),%si / copy trailing word 90 movw %si,8(%rcx) 91 addq $2,%rcx 92 4: testq $1,%r8 / check for trailing byte 93 jz 5f 94 movb -1(%rdi,%r8,1),%al / copy trailing byte 95 movb %al,8(%rcx) 96 5: ret 97 .align 16 98 6: movq -24(%rax,%rdx,8),%rsi 99 movq %rsi,-24(%rcx,%rdx,8) 100 movq -16(%rax,%rdx,8),%rsi 101 movq %rsi,-16(%rcx,%rdx,8) 102 movq -8(%rax,%rdx,8),%rsi 103 movq %rsi,-8(%rcx,%rdx,8) 104 movq (%rax,%rdx,8),%rsi 105 movq %rsi,(%rcx,%rdx,8) 106 7: addq $4,%rdx 107 jle 6b 108 subq $4,%rdx 109 jl 1b 110 jmp 2b 111 acb_CopyLeft: 112 testq $1,%r8 / check for trailing byte 113 jz 1f 114 movb -1(%rdi,%r8,1),%cl / copy trailing byte 115 movb %cl,-1(%rsi,%r8,1) 116 subq $1,%r8 / adjust for possible trailing word 117 1: testq $2,%r8 / check for trailing word 118 jz 2f 119 movw -2(%rdi,%r8,1),%cx / copy trailing word 120 movw %cx,-2(%rsi,%r8,1) 121 2: testq $4,%r8 / check for trailing dword 122 jz 5f 123 movl (%rdi,%rdx,8),%ecx / copy trailing dword 124 movl %ecx,(%rsi,%rdx,8) 125 jmp 5f 126 .align 16 127 3: movq -8(%rdi,%rdx,8),%rcx 128 movq %rcx,-8(%rsi,%rdx,8) 129 subq $1,%rdx 130 jnz 3b 131 ret 132 .align 16 133 4: movq 24(%rdi,%rdx,8),%rcx 134 movq %rcx,24(%rsi,%rdx,8) 135 movq 16(%rdi,%rdx,8),%rcx 136 movq %rcx,16(%rsi,%rdx,8) 137 movq 8(%rdi,%rdx,8),%rcx 138 movq %rcx,8(%rsi,%rdx,8) 139 movq (%rdi,%rdx,8),%rcx 140 movq %rcx,(%rsi,%rdx,8) 141 5: subq $4,%rdx 142 jge 4b 143 addq $4,%rdx 144 jg 3b 145 ret 146 147 / Support for void Copy::arrayof_conjoint_jshorts(void* from, 148 / void* to, 149 / size_t count) 150 / Equivalent to 151 / conjoint_jshorts_atomic 152 / 153 / If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we 154 / let the hardware handle it. The tow or four words within dwords 155 / or qwords that span cache line boundaries will still be loaded 156 / and stored atomically. 157 / 158 / rdi - from 159 / rsi - to 160 / rdx - count, treated as ssize_t 161 / 162 .align 16 163 _Copy_arrayof_conjoint_jshorts: 164 _Copy_conjoint_jshorts_atomic: 165 movq %rdx,%r8 / word count 166 shrq $2,%rdx / qword count 167 cmpq %rdi,%rsi 168 leaq -2(%rdi,%r8,2),%rax / from + wcount*2 - 2 169 jbe acs_CopyRight 170 cmpq %rax,%rsi 171 jbe acs_CopyLeft 172 acs_CopyRight: 173 leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 174 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 175 negq %rdx 176 jmp 6f 177 1: movq 8(%rax,%rdx,8),%rsi 178 movq %rsi,8(%rcx,%rdx,8) 179 addq $1,%rdx 180 jnz 1b 181 2: testq $2,%r8 / check for trailing dword 182 jz 3f 183 movl 8(%rax),%esi / copy trailing dword 184 movl %esi,8(%rcx) 185 addq $4,%rcx / original %rsi is trashed, so we 186 / can't use it as a base register 187 3: testq $1,%r8 / check for trailing word 188 jz 4f 189 movw -2(%rdi,%r8,2),%si / copy trailing word 190 movw %si,8(%rcx) 191 4: ret 192 .align 16 193 5: movq -24(%rax,%rdx,8),%rsi 194 movq %rsi,-24(%rcx,%rdx,8) 195 movq -16(%rax,%rdx,8),%rsi 196 movq %rsi,-16(%rcx,%rdx,8) 197 movq -8(%rax,%rdx,8),%rsi 198 movq %rsi,-8(%rcx,%rdx,8) 199 movq (%rax,%rdx,8),%rsi 200 movq %rsi,(%rcx,%rdx,8) 201 6: addq $4,%rdx 202 jle 5b 203 subq $4,%rdx 204 jl 1b 205 jmp 2b 206 acs_CopyLeft: 207 testq $1,%r8 / check for trailing word 208 jz 1f 209 movw -2(%rdi,%r8,2),%cx / copy trailing word 210 movw %cx,-2(%rsi,%r8,2) 211 1: testq $2,%r8 / check for trailing dword 212 jz 4f 213 movl (%rdi,%rdx,8),%ecx / copy trailing dword 214 movl %ecx,(%rsi,%rdx,8) 215 jmp 4f 216 2: movq -8(%rdi,%rdx,8),%rcx 217 movq %rcx,-8(%rsi,%rdx,8) 218 subq $1,%rdx 219 jnz 2b 220 ret 221 .align 16 222 3: movq 24(%rdi,%rdx,8),%rcx 223 movq %rcx,24(%rsi,%rdx,8) 224 movq 16(%rdi,%rdx,8),%rcx 225 movq %rcx,16(%rsi,%rdx,8) 226 movq 8(%rdi,%rdx,8),%rcx 227 movq %rcx,8(%rsi,%rdx,8) 228 movq (%rdi,%rdx,8),%rcx 229 movq %rcx,(%rsi,%rdx,8) 230 4: subq $4,%rdx 231 jge 3b 232 addq $4,%rdx 233 jg 2b 234 ret 235 236 / Support for void Copy::arrayof_conjoint_jints(jint* from, 237 / jint* to, 238 / size_t count) 239 / Equivalent to 240 / conjoint_jints_atomic 241 / 242 / If 'from' and/or 'to' are aligned on 4-byte boundaries, we let 243 / the hardware handle it. The two dwords within qwords that span 244 / cache line boundaries will still be loaded and stored atomically. 245 / 246 / rdi - from 247 / rsi - to 248 / rdx - count, treated as ssize_t 249 / 250 .align 16 251 _Copy_arrayof_conjoint_jints: 252 _Copy_conjoint_jints_atomic: 253 movq %rdx,%r8 / dword count 254 shrq %rdx / qword count 255 cmpq %rdi,%rsi 256 leaq -4(%rdi,%r8,4),%rax / from + dcount*4 - 4 257 jbe aci_CopyRight 258 cmpq %rax,%rsi 259 jbe aci_CopyLeft 260 aci_CopyRight: 261 leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 262 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 263 negq %rdx 264 jmp 5f 265 .align 16 266 1: movq 8(%rax,%rdx,8),%rsi 267 movq %rsi,8(%rcx,%rdx,8) 268 addq $1,%rdx 269 jnz 1b 270 2: testq $1,%r8 / check for trailing dword 271 jz 3f 272 movl 8(%rax),%esi / copy trailing dword 273 movl %esi,8(%rcx) 274 3: ret 275 .align 16 276 4: movq -24(%rax,%rdx,8),%rsi 277 movq %rsi,-24(%rcx,%rdx,8) 278 movq -16(%rax,%rdx,8),%rsi 279 movq %rsi,-16(%rcx,%rdx,8) 280 movq -8(%rax,%rdx,8),%rsi 281 movq %rsi,-8(%rcx,%rdx,8) 282 movq (%rax,%rdx,8),%rsi 283 movq %rsi,(%rcx,%rdx,8) 284 5: addq $4,%rdx 285 jle 4b 286 subq $4,%rdx 287 jl 1b 288 jmp 2b 289 aci_CopyLeft: 290 testq $1,%r8 / check for trailing dword 291 jz 3f 292 movl -4(%rdi,%r8,4),%ecx / copy trailing dword 293 movl %ecx,-4(%rsi,%r8,4) 294 jmp 3f 295 1: movq -8(%rdi,%rdx,8),%rcx 296 movq %rcx,-8(%rsi,%rdx,8) 297 subq $1,%rdx 298 jnz 1b 299 ret 300 .align 16 301 2: movq 24(%rdi,%rdx,8),%rcx 302 movq %rcx,24(%rsi,%rdx,8) 303 movq 16(%rdi,%rdx,8),%rcx 304 movq %rcx,16(%rsi,%rdx,8) 305 movq 8(%rdi,%rdx,8),%rcx 306 movq %rcx,8(%rsi,%rdx,8) 307 movq (%rdi,%rdx,8),%rcx 308 movq %rcx,(%rsi,%rdx,8) 309 3: subq $4,%rdx 310 jge 2b 311 addq $4,%rdx 312 jg 1b 313 ret 314 315 / Support for void Copy::arrayof_conjoint_jlongs(jlong* from, 316 / jlong* to, 317 / size_t count) 318 / Equivalent to 319 / conjoint_jlongs_atomic 320 / arrayof_conjoint_oops 321 / conjoint_oops_atomic 322 / 323 / rdi - from 324 / rsi - to 325 / rdx - count, treated as ssize_t 326 / 327 .align 16 328 _Copy_arrayof_conjoint_jlongs: 329 _Copy_conjoint_jlongs_atomic: 330 cmpq %rdi,%rsi 331 leaq -8(%rdi,%rdx,8),%rax / from + count*8 - 8 332 jbe acl_CopyRight 333 cmpq %rax,%rsi 334 jbe acl_CopyLeft 335 acl_CopyRight: 336 leaq -8(%rsi,%rdx,8),%rcx / to + count*8 - 8 337 negq %rdx 338 jmp 3f 339 1: movq 8(%rax,%rdx,8),%rsi 340 movq %rsi,8(%rcx,%rdx,8) 341 addq $1,%rdx 342 jnz 1b 343 ret 344 .align 16 345 2: movq -24(%rax,%rdx,8),%rsi 346 movq %rsi,-24(%rcx,%rdx,8) 347 movq -16(%rax,%rdx,8),%rsi 348 movq %rsi,-16(%rcx,%rdx,8) 349 movq -8(%rax,%rdx,8),%rsi 350 movq %rsi,-8(%rcx,%rdx,8) 351 movq (%rax,%rdx,8),%rsi 352 movq %rsi,(%rcx,%rdx,8) 353 3: addq $4,%rdx 354 jle 2b 355 subq $4,%rdx 356 jl 1b 357 ret 358 4: movq -8(%rdi,%rdx,8),%rcx 359 movq %rcx,-8(%rsi,%rdx,8) 360 subq $1,%rdx 361 jnz 4b 362 ret 363 .align 16 364 5: movq 24(%rdi,%rdx,8),%rcx 365 movq %rcx,24(%rsi,%rdx,8) 366 movq 16(%rdi,%rdx,8),%rcx 367 movq %rcx,16(%rsi,%rdx,8) 368 movq 8(%rdi,%rdx,8),%rcx 369 movq %rcx,8(%rsi,%rdx,8) 370 movq (%rdi,%rdx,8),%rcx 371 movq %rcx,(%rsi,%rdx,8) 372 acl_CopyLeft: 373 subq $4,%rdx 374 jge 5b 375 addq $4,%rdx 376 jg 4b 377 ret