|
Java example source code file (solaris_x86_64.s)
The solaris_x86_64.s Java example source code/ / Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. / DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. / / This code is free software; you can redistribute it and/or modify it / under the terms of the GNU General Public License version 2 only, as / published by the Free Software Foundation. / / This code is distributed in the hope that it will be useful, but WITHOUT / ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or / FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License / version 2 for more details (a copy is included in the LICENSE file that / accompanied this code). / / You should have received a copy of the GNU General Public License version / 2 along with this work; if not, write to the Free Software Foundation, / Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. / / Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA / or visit www.oracle.com if you need additional information or have any / questions. / .globl fs_load .globl fs_thread // NOTE WELL! The _Copy functions are called directly // from server-compiler-generated code via CallLeafNoFP, // which means that they *must* either not use floating // point or use it in the same manner as does the server // compiler. .globl _Copy_arrayof_conjoint_bytes .globl _Copy_conjoint_jshorts_atomic .globl _Copy_arrayof_conjoint_jshorts .globl _Copy_conjoint_jints_atomic .globl _Copy_arrayof_conjoint_jints .globl _Copy_conjoint_jlongs_atomic .globl _Copy_arrayof_conjoint_jlongs .section .text,"ax" / Fast thread accessors, used by threadLS_solaris_amd64.cpp .align 16 fs_load: movq %fs:(%rdi),%rax ret .align 16 fs_thread: movq %fs:0x0,%rax ret .globl SpinPause .align 16 SpinPause: rep nop movq $1, %rax ret / Support for void Copy::arrayof_conjoint_bytes(void* from, / void* to, / size_t count) / rdi - from / rsi - to / rdx - count, treated as ssize_t / .align 16 _Copy_arrayof_conjoint_bytes: movq %rdx,%r8 / byte count shrq $3,%rdx / qword count cmpq %rdi,%rsi leaq -1(%rdi,%r8,1),%rax / from + bcount*1 - 1 jbe acb_CopyRight cmpq %rax,%rsi jbe acb_CopyLeft acb_CopyRight: leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 negq %rdx jmp 7f .align 16 1: movq 8(%rax,%rdx,8),%rsi movq %rsi,8(%rcx,%rdx,8) addq $1,%rdx jnz 1b 2: testq $4,%r8 / check for trailing dword jz 3f movl 8(%rax),%esi / copy trailing dword movl %esi,8(%rcx) addq $4,%rax addq $4,%rcx / original %rsi is trashed, so we / can't use it as a base register 3: testq $2,%r8 / check for trailing word jz 4f movw 8(%rax),%si / copy trailing word movw %si,8(%rcx) addq $2,%rcx 4: testq $1,%r8 / check for trailing byte jz 5f movb -1(%rdi,%r8,1),%al / copy trailing byte movb %al,8(%rcx) 5: ret .align 16 6: movq -24(%rax,%rdx,8),%rsi movq %rsi,-24(%rcx,%rdx,8) movq -16(%rax,%rdx,8),%rsi movq %rsi,-16(%rcx,%rdx,8) movq -8(%rax,%rdx,8),%rsi movq %rsi,-8(%rcx,%rdx,8) movq (%rax,%rdx,8),%rsi movq %rsi,(%rcx,%rdx,8) 7: addq $4,%rdx jle 6b subq $4,%rdx jl 1b jmp 2b acb_CopyLeft: testq $1,%r8 / check for trailing byte jz 1f movb -1(%rdi,%r8,1),%cl / copy trailing byte movb %cl,-1(%rsi,%r8,1) subq $1,%r8 / adjust for possible trailing word 1: testq $2,%r8 / check for trailing word jz 2f movw -2(%rdi,%r8,1),%cx / copy trailing word movw %cx,-2(%rsi,%r8,1) 2: testq $4,%r8 / check for trailing dword jz 5f movl (%rdi,%rdx,8),%ecx / copy trailing dword movl %ecx,(%rsi,%rdx,8) jmp 5f .align 16 3: movq -8(%rdi,%rdx,8),%rcx movq %rcx,-8(%rsi,%rdx,8) subq $1,%rdx jnz 3b ret .align 16 4: movq 24(%rdi,%rdx,8),%rcx movq %rcx,24(%rsi,%rdx,8) movq 16(%rdi,%rdx,8),%rcx movq %rcx,16(%rsi,%rdx,8) movq 8(%rdi,%rdx,8),%rcx movq %rcx,8(%rsi,%rdx,8) movq (%rdi,%rdx,8),%rcx movq %rcx,(%rsi,%rdx,8) 5: subq $4,%rdx jge 4b addq $4,%rdx jg 3b ret / Support for void Copy::arrayof_conjoint_jshorts(void* from, / void* to, / size_t count) / Equivalent to / conjoint_jshorts_atomic / / If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we / let the hardware handle it. The tow or four words within dwords / or qwords that span cache line boundaries will still be loaded / and stored atomically. / / rdi - from / rsi - to / rdx - count, treated as ssize_t / .align 16 _Copy_arrayof_conjoint_jshorts: _Copy_conjoint_jshorts_atomic: movq %rdx,%r8 / word count shrq $2,%rdx / qword count cmpq %rdi,%rsi leaq -2(%rdi,%r8,2),%rax / from + wcount*2 - 2 jbe acs_CopyRight cmpq %rax,%rsi jbe acs_CopyLeft acs_CopyRight: leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 negq %rdx jmp 6f 1: movq 8(%rax,%rdx,8),%rsi movq %rsi,8(%rcx,%rdx,8) addq $1,%rdx jnz 1b 2: testq $2,%r8 / check for trailing dword jz 3f movl 8(%rax),%esi / copy trailing dword movl %esi,8(%rcx) addq $4,%rcx / original %rsi is trashed, so we / can't use it as a base register 3: testq $1,%r8 / check for trailing word jz 4f movw -2(%rdi,%r8,2),%si / copy trailing word movw %si,8(%rcx) 4: ret .align 16 5: movq -24(%rax,%rdx,8),%rsi movq %rsi,-24(%rcx,%rdx,8) movq -16(%rax,%rdx,8),%rsi movq %rsi,-16(%rcx,%rdx,8) movq -8(%rax,%rdx,8),%rsi movq %rsi,-8(%rcx,%rdx,8) movq (%rax,%rdx,8),%rsi movq %rsi,(%rcx,%rdx,8) 6: addq $4,%rdx jle 5b subq $4,%rdx jl 1b jmp 2b acs_CopyLeft: testq $1,%r8 / check for trailing word jz 1f movw -2(%rdi,%r8,2),%cx / copy trailing word movw %cx,-2(%rsi,%r8,2) 1: testq $2,%r8 / check for trailing dword jz 4f movl (%rdi,%rdx,8),%ecx / copy trailing dword movl %ecx,(%rsi,%rdx,8) jmp 4f 2: movq -8(%rdi,%rdx,8),%rcx movq %rcx,-8(%rsi,%rdx,8) subq $1,%rdx jnz 2b ret .align 16 3: movq 24(%rdi,%rdx,8),%rcx movq %rcx,24(%rsi,%rdx,8) movq 16(%rdi,%rdx,8),%rcx movq %rcx,16(%rsi,%rdx,8) movq 8(%rdi,%rdx,8),%rcx movq %rcx,8(%rsi,%rdx,8) movq (%rdi,%rdx,8),%rcx movq %rcx,(%rsi,%rdx,8) 4: subq $4,%rdx jge 3b addq $4,%rdx jg 2b ret / Support for void Copy::arrayof_conjoint_jints(jint* from, / jint* to, / size_t count) / Equivalent to / conjoint_jints_atomic / / If 'from' and/or 'to' are aligned on 4-byte boundaries, we let / the hardware handle it. The two dwords within qwords that span / cache line boundaries will still be loaded and stored atomically. / / rdi - from / rsi - to / rdx - count, treated as ssize_t / .align 16 _Copy_arrayof_conjoint_jints: _Copy_conjoint_jints_atomic: movq %rdx,%r8 / dword count shrq %rdx / qword count cmpq %rdi,%rsi leaq -4(%rdi,%r8,4),%rax / from + dcount*4 - 4 jbe aci_CopyRight cmpq %rax,%rsi jbe aci_CopyLeft aci_CopyRight: leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 negq %rdx jmp 5f .align 16 1: movq 8(%rax,%rdx,8),%rsi movq %rsi,8(%rcx,%rdx,8) addq $1,%rdx jnz 1b 2: testq $1,%r8 / check for trailing dword jz 3f movl 8(%rax),%esi / copy trailing dword movl %esi,8(%rcx) 3: ret .align 16 4: movq -24(%rax,%rdx,8),%rsi movq %rsi,-24(%rcx,%rdx,8) movq -16(%rax,%rdx,8),%rsi movq %rsi,-16(%rcx,%rdx,8) movq -8(%rax,%rdx,8),%rsi movq %rsi,-8(%rcx,%rdx,8) movq (%rax,%rdx,8),%rsi movq %rsi,(%rcx,%rdx,8) 5: addq $4,%rdx jle 4b subq $4,%rdx jl 1b jmp 2b aci_CopyLeft: testq $1,%r8 / check for trailing dword jz 3f movl -4(%rdi,%r8,4),%ecx / copy trailing dword movl %ecx,-4(%rsi,%r8,4) jmp 3f 1: movq -8(%rdi,%rdx,8),%rcx movq %rcx,-8(%rsi,%rdx,8) subq $1,%rdx jnz 1b ret .align 16 2: movq 24(%rdi,%rdx,8),%rcx movq %rcx,24(%rsi,%rdx,8) movq 16(%rdi,%rdx,8),%rcx movq %rcx,16(%rsi,%rdx,8) movq 8(%rdi,%rdx,8),%rcx movq %rcx,8(%rsi,%rdx,8) movq (%rdi,%rdx,8),%rcx movq %rcx,(%rsi,%rdx,8) 3: subq $4,%rdx jge 2b addq $4,%rdx jg 1b ret / Support for void Copy::arrayof_conjoint_jlongs(jlong* from, / jlong* to, / size_t count) / Equivalent to / conjoint_jlongs_atomic / arrayof_conjoint_oops / conjoint_oops_atomic / / rdi - from / rsi - to / rdx - count, treated as ssize_t / .align 16 _Copy_arrayof_conjoint_jlongs: _Copy_conjoint_jlongs_atomic: cmpq %rdi,%rsi leaq -8(%rdi,%rdx,8),%rax / from + count*8 - 8 jbe acl_CopyRight cmpq %rax,%rsi jbe acl_CopyLeft acl_CopyRight: leaq -8(%rsi,%rdx,8),%rcx / to + count*8 - 8 negq %rdx jmp 3f 1: movq 8(%rax,%rdx,8),%rsi movq %rsi,8(%rcx,%rdx,8) addq $1,%rdx jnz 1b ret .align 16 2: movq -24(%rax,%rdx,8),%rsi movq %rsi,-24(%rcx,%rdx,8) movq -16(%rax,%rdx,8),%rsi movq %rsi,-16(%rcx,%rdx,8) movq -8(%rax,%rdx,8),%rsi movq %rsi,-8(%rcx,%rdx,8) movq (%rax,%rdx,8),%rsi movq %rsi,(%rcx,%rdx,8) 3: addq $4,%rdx jle 2b subq $4,%rdx jl 1b ret 4: movq -8(%rdi,%rdx,8),%rcx movq %rcx,-8(%rsi,%rdx,8) subq $1,%rdx jnz 4b ret .align 16 5: movq 24(%rdi,%rdx,8),%rcx movq %rcx,24(%rsi,%rdx,8) movq 16(%rdi,%rdx,8),%rcx movq %rcx,16(%rsi,%rdx,8) movq 8(%rdi,%rdx,8),%rcx movq %rcx,8(%rsi,%rdx,8) movq (%rdi,%rdx,8),%rcx movq %rcx,(%rsi,%rdx,8) acl_CopyLeft: subq $4,%rdx jge 5b addq $4,%rdx jg 4b ret Other Java examples (source code examples)Here is a short list of links related to this Java solaris_x86_64.s source code file: |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2024 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.