1 /
   2 / Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
   3 / DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 /
   5 / This code is free software; you can redistribute it and/or modify it
   6 / under the terms of the GNU General Public License version 2 only, as
   7 / published by the Free Software Foundation.
   8 /
   9 / This code is distributed in the hope that it will be useful, but WITHOUT
  10 / ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 / FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 / version 2 for more details (a copy is included in the LICENSE file that
  13 / accompanied this code).
  14 /
  15 / You should have received a copy of the GNU General Public License version
  16 / 2 along with this work; if not, write to the Free Software Foundation,
  17 / Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 /
  19 / Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 / or visit www.oracle.com if you need additional information or have any
  21 / questions.
  22 /
  23 
  24         .globl fs_load
  25         .globl fs_thread
  26 
  27         // NOTE WELL!  The _Copy functions are called directly
  28         // from server-compiler-generated code via CallLeafNoFP,
  29         // which means that they *must* either not use floating
  30         // point or use it in the same manner as does the server
  31         // compiler.
  32 
  33         .globl _Copy_arrayof_conjoint_bytes
  34         .globl _Copy_conjoint_jshorts_atomic
  35         .globl _Copy_arrayof_conjoint_jshorts
  36         .globl _Copy_conjoint_jints_atomic
  37         .globl _Copy_arrayof_conjoint_jints
  38         .globl _Copy_conjoint_jlongs_atomic
  39         .globl _Copy_arrayof_conjoint_jlongs
  40 
  41         .section .text,"ax"
  42 
  43         / Fast thread accessors, used by threadLS_solaris_amd64.cpp
  44         .align   16
  45 fs_load:
  46         movq %fs:(%rdi),%rax
  47         ret
  48 
  49         .align   16
  50 fs_thread:
  51         movq %fs:0x0,%rax
  52         ret
  53 
  54         / Support for void Copy::arrayof_conjoint_bytes(void* from,
  55         /                                               void* to,
  56         /                                               size_t count)
  57         / rdi - from
  58         / rsi - to
  59         / rdx - count, treated as ssize_t
  60         /
  61         .align   16
  62 _Copy_arrayof_conjoint_bytes:
  63         movq     %rdx,%r8             / byte count
  64         shrq     $3,%rdx              / qword count
  65         cmpq     %rdi,%rsi
  66         leaq     -1(%rdi,%r8,1),%rax  / from + bcount*1 - 1
  67         jbe      acb_CopyRight
  68         cmpq     %rax,%rsi
  69         jbe      acb_CopyLeft 
  70 acb_CopyRight:
  71         leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
  72         leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
  73         negq     %rdx
  74         jmp      7f
  75         .align   16
  76 1:      movq     8(%rax,%rdx,8),%rsi
  77         movq     %rsi,8(%rcx,%rdx,8)
  78         addq     $1,%rdx
  79         jnz      1b
  80 2:      testq    $4,%r8               / check for trailing dword
  81         jz       3f
  82         movl     8(%rax),%esi         / copy trailing dword
  83         movl     %esi,8(%rcx)
  84         addq     $4,%rax
  85         addq     $4,%rcx              / original %rsi is trashed, so we
  86                                       /  can't use it as a base register
  87 3:      testq    $2,%r8               / check for trailing word
  88         jz       4f
  89         movw     8(%rax),%si          / copy trailing word
  90         movw     %si,8(%rcx)
  91         addq     $2,%rcx
  92 4:      testq    $1,%r8               / check for trailing byte
  93         jz       5f
  94         movb     -1(%rdi,%r8,1),%al   / copy trailing byte
  95         movb     %al,8(%rcx)
  96 5:      ret
  97         .align   16
  98 6:      movq     -24(%rax,%rdx,8),%rsi
  99         movq     %rsi,-24(%rcx,%rdx,8)
 100         movq     -16(%rax,%rdx,8),%rsi
 101         movq     %rsi,-16(%rcx,%rdx,8)
 102         movq     -8(%rax,%rdx,8),%rsi
 103         movq     %rsi,-8(%rcx,%rdx,8)
 104         movq     (%rax,%rdx,8),%rsi
 105         movq     %rsi,(%rcx,%rdx,8)
 106 7:      addq     $4,%rdx
 107         jle      6b
 108         subq     $4,%rdx
 109         jl       1b
 110         jmp      2b
 111 acb_CopyLeft:
 112         testq    $1,%r8               / check for trailing byte
 113         jz       1f
 114         movb     -1(%rdi,%r8,1),%cl   / copy trailing byte
 115         movb     %cl,-1(%rsi,%r8,1)
 116         subq     $1,%r8               / adjust for possible trailing word
 117 1:      testq    $2,%r8               / check for trailing word
 118         jz       2f
 119         movw     -2(%rdi,%r8,1),%cx   / copy trailing word
 120         movw     %cx,-2(%rsi,%r8,1)
 121 2:      testq    $4,%r8               / check for trailing dword
 122         jz       5f
 123         movl     (%rdi,%rdx,8),%ecx   / copy trailing dword
 124         movl     %ecx,(%rsi,%rdx,8)
 125         jmp      5f
 126         .align   16
 127 3:      movq     -8(%rdi,%rdx,8),%rcx
 128         movq     %rcx,-8(%rsi,%rdx,8)
 129         subq     $1,%rdx
 130         jnz      3b
 131         ret
 132         .align   16
 133 4:      movq     24(%rdi,%rdx,8),%rcx
 134         movq     %rcx,24(%rsi,%rdx,8)
 135         movq     16(%rdi,%rdx,8),%rcx
 136         movq     %rcx,16(%rsi,%rdx,8)
 137         movq     8(%rdi,%rdx,8),%rcx
 138         movq     %rcx,8(%rsi,%rdx,8)
 139         movq     (%rdi,%rdx,8),%rcx
 140         movq     %rcx,(%rsi,%rdx,8)
 141 5:      subq     $4,%rdx
 142         jge      4b
 143         addq     $4,%rdx
 144         jg       3b
 145         ret
 146 
 147         / Support for void Copy::arrayof_conjoint_jshorts(void* from,
 148         /                                                 void* to,
 149         /                                                 size_t count)
 150         / Equivalent to
 151         /   conjoint_jshorts_atomic
 152         /
 153         / If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
 154         / let the hardware handle it.  The tow or four words within dwords
 155         / or qwords that span cache line boundaries will still be loaded
 156         / and stored atomically.
 157         /
 158         / rdi - from
 159         / rsi - to
 160         / rdx - count, treated as ssize_t
 161         /
 162         .align   16
 163 _Copy_arrayof_conjoint_jshorts:
 164 _Copy_conjoint_jshorts_atomic:
 165         movq     %rdx,%r8             / word count
 166         shrq     $2,%rdx              / qword count
 167         cmpq     %rdi,%rsi
 168         leaq     -2(%rdi,%r8,2),%rax  / from + wcount*2 - 2
 169         jbe      acs_CopyRight
 170         cmpq     %rax,%rsi
 171         jbe      acs_CopyLeft 
 172 acs_CopyRight:
 173         leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
 174         leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
 175         negq     %rdx
 176         jmp      6f
 177 1:      movq     8(%rax,%rdx,8),%rsi
 178         movq     %rsi,8(%rcx,%rdx,8)
 179         addq     $1,%rdx
 180         jnz      1b
 181 2:      testq    $2,%r8               / check for trailing dword
 182         jz       3f
 183         movl     8(%rax),%esi         / copy trailing dword
 184         movl     %esi,8(%rcx)
 185         addq     $4,%rcx              / original %rsi is trashed, so we
 186                                       /  can't use it as a base register
 187 3:      testq    $1,%r8               / check for trailing word
 188         jz       4f
 189         movw     -2(%rdi,%r8,2),%si   / copy trailing word
 190         movw     %si,8(%rcx)
 191 4:      ret
 192         .align   16
 193 5:      movq     -24(%rax,%rdx,8),%rsi
 194         movq     %rsi,-24(%rcx,%rdx,8)
 195         movq     -16(%rax,%rdx,8),%rsi
 196         movq     %rsi,-16(%rcx,%rdx,8)
 197         movq     -8(%rax,%rdx,8),%rsi
 198         movq     %rsi,-8(%rcx,%rdx,8)
 199         movq     (%rax,%rdx,8),%rsi
 200         movq     %rsi,(%rcx,%rdx,8)
 201 6:      addq     $4,%rdx
 202         jle      5b
 203         subq     $4,%rdx
 204         jl       1b
 205         jmp      2b
 206 acs_CopyLeft:
 207         testq    $1,%r8               / check for trailing word
 208         jz       1f
 209         movw     -2(%rdi,%r8,2),%cx   / copy trailing word
 210         movw     %cx,-2(%rsi,%r8,2)
 211 1:      testq    $2,%r8               / check for trailing dword
 212         jz       4f
 213         movl     (%rdi,%rdx,8),%ecx   / copy trailing dword
 214         movl     %ecx,(%rsi,%rdx,8)
 215         jmp      4f
 216 2:      movq     -8(%rdi,%rdx,8),%rcx
 217         movq     %rcx,-8(%rsi,%rdx,8)
 218         subq     $1,%rdx
 219         jnz      2b
 220         ret
 221         .align   16
 222 3:      movq     24(%rdi,%rdx,8),%rcx
 223         movq     %rcx,24(%rsi,%rdx,8)
 224         movq     16(%rdi,%rdx,8),%rcx
 225         movq     %rcx,16(%rsi,%rdx,8)
 226         movq     8(%rdi,%rdx,8),%rcx
 227         movq     %rcx,8(%rsi,%rdx,8)
 228         movq     (%rdi,%rdx,8),%rcx
 229         movq     %rcx,(%rsi,%rdx,8)
 230 4:      subq     $4,%rdx
 231         jge      3b
 232         addq     $4,%rdx
 233         jg       2b
 234         ret
 235 
 236         / Support for void Copy::arrayof_conjoint_jints(jint* from,
 237         /                                               jint* to,
 238         /                                               size_t count)
 239         / Equivalent to
 240         /   conjoint_jints_atomic
 241         /
 242         / If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 243         / the hardware handle it.  The two dwords within qwords that span
 244         / cache line boundaries will still be loaded and stored atomically.
 245         /
 246         / rdi - from
 247         / rsi - to
 248         / rdx - count, treated as ssize_t
 249         /
 250         .align   16
 251 _Copy_arrayof_conjoint_jints:
 252 _Copy_conjoint_jints_atomic:
 253         movq     %rdx,%r8             / dword count
 254         shrq     %rdx                 / qword count
 255         cmpq     %rdi,%rsi
 256         leaq     -4(%rdi,%r8,4),%rax  / from + dcount*4 - 4
 257         jbe      aci_CopyRight
 258         cmpq     %rax,%rsi
 259         jbe      aci_CopyLeft 
 260 aci_CopyRight:
 261         leaq     -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8
 262         leaq     -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8
 263         negq     %rdx
 264         jmp      5f
 265         .align   16
 266 1:      movq     8(%rax,%rdx,8),%rsi
 267         movq     %rsi,8(%rcx,%rdx,8)
 268         addq     $1,%rdx
 269         jnz       1b
 270 2:      testq    $1,%r8               / check for trailing dword
 271         jz       3f
 272         movl     8(%rax),%esi         / copy trailing dword
 273         movl     %esi,8(%rcx)
 274 3:      ret
 275         .align   16
 276 4:      movq     -24(%rax,%rdx,8),%rsi
 277         movq     %rsi,-24(%rcx,%rdx,8)
 278         movq     -16(%rax,%rdx,8),%rsi
 279         movq     %rsi,-16(%rcx,%rdx,8)
 280         movq     -8(%rax,%rdx,8),%rsi
 281         movq     %rsi,-8(%rcx,%rdx,8)
 282         movq     (%rax,%rdx,8),%rsi
 283         movq     %rsi,(%rcx,%rdx,8)
 284 5:      addq     $4,%rdx
 285         jle      4b
 286         subq     $4,%rdx
 287         jl       1b
 288         jmp      2b
 289 aci_CopyLeft:
 290         testq    $1,%r8               / check for trailing dword
 291         jz       3f
 292         movl     -4(%rdi,%r8,4),%ecx  / copy trailing dword
 293         movl     %ecx,-4(%rsi,%r8,4)
 294         jmp      3f
 295 1:      movq     -8(%rdi,%rdx,8),%rcx
 296         movq     %rcx,-8(%rsi,%rdx,8)
 297         subq     $1,%rdx
 298         jnz      1b
 299         ret
 300         .align   16
 301 2:      movq     24(%rdi,%rdx,8),%rcx
 302         movq     %rcx,24(%rsi,%rdx,8)
 303         movq     16(%rdi,%rdx,8),%rcx
 304         movq     %rcx,16(%rsi,%rdx,8)
 305         movq     8(%rdi,%rdx,8),%rcx
 306         movq     %rcx,8(%rsi,%rdx,8)
 307         movq     (%rdi,%rdx,8),%rcx
 308         movq     %rcx,(%rsi,%rdx,8)
 309 3:      subq     $4,%rdx
 310         jge      2b
 311         addq     $4,%rdx
 312         jg       1b
 313         ret
 314 
 315         / Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
 316         /                                                jlong* to,
 317         /                                                size_t count)
 318         / Equivalent to
 319         /   conjoint_jlongs_atomic
 320         /   arrayof_conjoint_oops
 321         /   conjoint_oops_atomic
 322         /
 323         / rdi - from
 324         / rsi - to
 325         / rdx - count, treated as ssize_t
 326         /
 327         .align   16
 328 _Copy_arrayof_conjoint_jlongs:
 329 _Copy_conjoint_jlongs_atomic:
 330         cmpq     %rdi,%rsi
 331         leaq     -8(%rdi,%rdx,8),%rax / from + count*8 - 8
 332         jbe      acl_CopyRight
 333         cmpq     %rax,%rsi
 334         jbe      acl_CopyLeft 
 335 acl_CopyRight:
 336         leaq     -8(%rsi,%rdx,8),%rcx / to + count*8 - 8
 337         negq     %rdx
 338         jmp      3f
 339 1:      movq     8(%rax,%rdx,8),%rsi
 340         movq     %rsi,8(%rcx,%rdx,8)
 341         addq     $1,%rdx
 342         jnz      1b
 343         ret
 344         .align   16
 345 2:      movq     -24(%rax,%rdx,8),%rsi
 346         movq     %rsi,-24(%rcx,%rdx,8)
 347         movq     -16(%rax,%rdx,8),%rsi
 348         movq     %rsi,-16(%rcx,%rdx,8)
 349         movq     -8(%rax,%rdx,8),%rsi
 350         movq     %rsi,-8(%rcx,%rdx,8)
 351         movq     (%rax,%rdx,8),%rsi
 352         movq     %rsi,(%rcx,%rdx,8)
 353 3:      addq     $4,%rdx
 354         jle      2b
 355         subq     $4,%rdx
 356         jl       1b
 357         ret
 358 4:      movq     -8(%rdi,%rdx,8),%rcx
 359         movq     %rcx,-8(%rsi,%rdx,8)
 360         subq     $1,%rdx
 361         jnz      4b
 362         ret
 363         .align   16
 364 5:      movq     24(%rdi,%rdx,8),%rcx
 365         movq     %rcx,24(%rsi,%rdx,8)
 366         movq     16(%rdi,%rdx,8),%rcx
 367         movq     %rcx,16(%rsi,%rdx,8)
 368         movq     8(%rdi,%rdx,8),%rcx
 369         movq     %rcx,8(%rsi,%rdx,8)
 370         movq     (%rdi,%rdx,8),%rcx
 371         movq     %rcx,(%rsi,%rdx,8)
 372 acl_CopyLeft:
 373         subq     $4,%rdx
 374         jge      5b
 375         addq     $4,%rdx
 376         jg       4b
 377         ret