summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am1
-rw-r--r--configure.host12
-rw-r--r--src/x86/darwin.S444
-rw-r--r--src/x86/darwin64.S416
-rw-r--r--src/x86/ffi.c29
-rw-r--r--src/x86/sysv.S884
-rw-r--r--src/x86/unix64.S390
7 files changed, 896 insertions, 1280 deletions
diff --git a/Makefile.am b/Makefile.am
index dfdcea6..6fb3d47 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -144,7 +144,6 @@ EXTRA_libffi_la_SOURCES = \
src/x86/ffi.c src/x86/sysv.S \
src/x86/ffiw64.c src/x86/win64.S \
src/x86/ffi64.c src/x86/unix64.S \
- src/x86/darwin64.S src/x86/darwin.S \
src/xtensa/ffi.c src/xtensa/sysv.S
TARGET_OBJ = @TARGET_OBJ@
diff --git a/configure.host b/configure.host
index bc3e838..5ee632c 100644
--- a/configure.host
+++ b/configure.host
@@ -84,7 +84,12 @@ case "${host}" in
;;
i?86-*-darwin* | x86_64-*-darwin*)
- TARGET=X86_DARWIN; TARGETDIR=x86
+ TARGETDIR=x86
+ if test $ac_cv_sizeof_size_t = 4; then
+ TARGET=X86_DARWIN
+ else
+ TARGET=X86_64
+ fi
;;
i?86-*-* | x86_64-*-* | amd64-*)
@@ -237,7 +242,7 @@ case "${TARGET}" in
POWERPC_FREEBSD)
SOURCES="ffi.c ffi_sysv.c sysv.S ppc_closure.S"
;;
- X86 | X86_FREEBSD | X86_WIN32)
+ X86 | X86_DARWIN | X86_FREEBSD | X86_WIN32)
SOURCES="ffi.c sysv.S"
;;
X86_64)
@@ -246,9 +251,6 @@ case "${TARGET}" in
X86_WIN64)
SOURCES="ffiw64.c win64.S"
;;
- X86_DARWIN)
- SOURCES="ffi.c darwin.S ffi64.c darwin64.S"
- ;;
esac
# If we failed to configure SOURCES, we can't do anything.
diff --git a/src/x86/darwin.S b/src/x86/darwin.S
deleted file mode 100644
index 8f0f070..0000000
--- a/src/x86/darwin.S
+++ /dev/null
@@ -1,444 +0,0 @@
-/* -----------------------------------------------------------------------
- darwin.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc.
- Copyright (C) 2008 Free Software Foundation, Inc.
-
- X86 Foreign Function Interface
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- ``Software''), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- DEALINGS IN THE SOFTWARE.
- -----------------------------------------------------------------------
- */
-
-#ifndef __x86_64__
-
-#define LIBFFI_ASM
-#include <fficonfig.h>
-#include <ffi.h>
-
-.text
-
-.globl _ffi_prep_args
-
- .align 4
-.globl _ffi_call_SYSV
-
-_ffi_call_SYSV:
-.LFB1:
- pushl %ebp
-.LCFI0:
- movl %esp,%ebp
-.LCFI1:
- subl $8,%esp
- /* Make room for all of the new args. */
- movl 16(%ebp),%ecx
- subl %ecx,%esp
-
- movl %esp,%eax
-
- /* Place all of the ffi_prep_args in position */
- subl $8,%esp
- pushl 12(%ebp)
- pushl %eax
- call *8(%ebp)
-
- /* Return stack to previous state and call the function */
- addl $16,%esp
-
- call *28(%ebp)
-
- /* Load %ecx with the return type code */
- movl 20(%ebp),%ecx
-
- /* Protect %esi. We're going to pop it in the epilogue. */
- pushl %esi
-
- /* If the return value pointer is NULL, assume no return value. */
- cmpl $0,24(%ebp)
- jne 0f
-
- /* Even if there is no space for the return value, we are
- obliged to handle floating-point values. */
- cmpl $FFI_TYPE_FLOAT,%ecx
- jne noretval
- fstp %st(0)
-
- jmp epilogue
-0:
- .align 4
- call 1f
-.Lstore_table:
- .long noretval-.Lstore_table /* FFI_TYPE_VOID */
- .long retint-.Lstore_table /* FFI_TYPE_INT */
- .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */
- .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */
- .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */
- .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */
- .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */
- .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */
- .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */
- .long retint-.Lstore_table /* FFI_TYPE_UINT32 */
- .long retint-.Lstore_table /* FFI_TYPE_SINT32 */
- .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */
- .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */
- .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */
- .long retint-.Lstore_table /* FFI_TYPE_POINTER */
- .long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */
- .long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */
-1:
- pop %esi
- add (%esi, %ecx, 4), %esi
- jmp *%esi
-
- /* Sign/zero extend as appropriate. */
-retsint8:
- movsbl %al, %eax
- jmp retint
-
-retsint16:
- movswl %ax, %eax
- jmp retint
-
-retuint8:
- movzbl %al, %eax
- jmp retint
-
-retuint16:
- movzwl %ax, %eax
- jmp retint
-
-retfloat:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstps (%ecx)
- jmp epilogue
-
-retdouble:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstpl (%ecx)
- jmp epilogue
-
-retlongdouble:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- fstpt (%ecx)
- jmp epilogue
-
-retint64:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movl %eax,0(%ecx)
- movl %edx,4(%ecx)
- jmp epilogue
-
-retstruct1b:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movb %al,0(%ecx)
- jmp epilogue
-
-retstruct2b:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movw %ax,0(%ecx)
- jmp epilogue
-
-retint:
- /* Load %ecx with the pointer to storage for the return value */
- movl 24(%ebp),%ecx
- movl %eax,0(%ecx)
-
-retstruct:
- /* Nothing to do! */
-
-noretval:
-epilogue:
- popl %esi
- movl %ebp,%esp
- popl %ebp
- ret
-
-.LFE1:
-.ffi_call_SYSV_end:
-
- .align 4
-FFI_HIDDEN (ffi_closure_SYSV)
-.globl _ffi_closure_SYSV
-
-_ffi_closure_SYSV:
-.LFB2:
- pushl %ebp
-.LCFI2:
- movl %esp, %ebp
-.LCFI3:
- subl $40, %esp
- leal -24(%ebp), %edx
- movl %edx, -12(%ebp) /* resp */
- leal 8(%ebp), %edx
- movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
- leal -12(%ebp), %edx
- movl %edx, (%esp) /* &resp */
- movl %ebx, 8(%esp)
-.LCFI7:
- call L_ffi_closure_SYSV_inner$stub
- movl 8(%esp), %ebx
- movl -12(%ebp), %ecx
- cmpl $FFI_TYPE_INT, %eax
- je .Lcls_retint
-
- /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
- FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
- cmpl $FFI_TYPE_UINT64, %eax
- jge 0f
- cmpl $FFI_TYPE_UINT8, %eax
- jge .Lcls_retint
-
-0: cmpl $FFI_TYPE_FLOAT, %eax
- je .Lcls_retfloat
- cmpl $FFI_TYPE_DOUBLE, %eax
- je .Lcls_retdouble
- cmpl $FFI_TYPE_LONGDOUBLE, %eax
- je .Lcls_retldouble
- cmpl $FFI_TYPE_SINT64, %eax
- je .Lcls_retllong
- cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax
- je .Lcls_retstruct1b
- cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax
- je .Lcls_retstruct2b
- cmpl $FFI_TYPE_STRUCT, %eax
- je .Lcls_retstruct
-.Lcls_epilogue:
- movl %ebp, %esp
- popl %ebp
- ret
-.Lcls_retint:
- movl (%ecx), %eax
- jmp .Lcls_epilogue
-.Lcls_retfloat:
- flds (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retdouble:
- fldl (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retldouble:
- fldt (%ecx)
- jmp .Lcls_epilogue
-.Lcls_retllong:
- movl (%ecx), %eax
- movl 4(%ecx), %edx
- jmp .Lcls_epilogue
-.Lcls_retstruct1b:
- movsbl (%ecx), %eax
- jmp .Lcls_epilogue
-.Lcls_retstruct2b:
- movswl (%ecx), %eax
- jmp .Lcls_epilogue
-.Lcls_retstruct:
- lea -8(%ebp),%esp
- movl %ebp, %esp
- popl %ebp
- ret $4
-.LFE2:
-
-#if !FFI_NO_RAW_API
-
-#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
-#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
-#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
-#define CIF_FLAGS_OFFSET 20
-
- .align 4
-FFI_HIDDEN (ffi_closure_raw_SYSV)
-.globl _ffi_closure_raw_SYSV
-
-_ffi_closure_raw_SYSV:
-.LFB3:
- pushl %ebp
-.LCFI4:
- movl %esp, %ebp
-.LCFI5:
- pushl %esi
-.LCFI6:
- subl $36, %esp
- movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
- movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
- movl %edx, 12(%esp) /* user_data */
- leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
- movl %edx, 8(%esp) /* raw_args */
- leal -24(%ebp), %edx
- movl %edx, 4(%esp) /* &res */
- movl %esi, (%esp) /* cif */
- call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */
- movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */
- cmpl $FFI_TYPE_INT, %eax
- je .Lrcls_retint
-
- /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16,
- FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */
- cmpl $FFI_TYPE_UINT64, %eax
- jge 0f
- cmpl $FFI_TYPE_UINT8, %eax
- jge .Lrcls_retint
-0:
- cmpl $FFI_TYPE_FLOAT, %eax
- je .Lrcls_retfloat
- cmpl $FFI_TYPE_DOUBLE, %eax
- je .Lrcls_retdouble
- cmpl $FFI_TYPE_LONGDOUBLE, %eax
- je .Lrcls_retldouble
- cmpl $FFI_TYPE_SINT64, %eax
- je .Lrcls_retllong
-.Lrcls_epilogue:
- addl $36, %esp
- popl %esi
- popl %ebp
- ret
-.Lrcls_retint:
- movl -24(%ebp), %eax
- jmp .Lrcls_epilogue
-.Lrcls_retfloat:
- flds -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retdouble:
- fldl -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retldouble:
- fldt -24(%ebp)
- jmp .Lrcls_epilogue
-.Lrcls_retllong:
- movl -24(%ebp), %eax
- movl -20(%ebp), %edx
- jmp .Lrcls_epilogue
-.LFE3:
-#endif
-
-.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
-L_ffi_closure_SYSV_inner$stub:
- .indirect_symbol _ffi_closure_SYSV_inner
- hlt ; hlt ; hlt ; hlt ; hlt
-
-
-.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
-EH_frame1:
- .set L$set$0,LECIE1-LSCIE1
- .long L$set$0
-LSCIE1:
- .long 0x0
- .byte 0x1
- .ascii "zR\0"
- .byte 0x1
- .byte 0x7c
- .byte 0x8
- .byte 0x1
- .byte 0x10
- .byte 0xc
- .byte 0x5
- .byte 0x4
- .byte 0x88
- .byte 0x1
- .align 2
-LECIE1:
-.globl _ffi_call_SYSV.eh
-_ffi_call_SYSV.eh:
-LSFDE1:
- .set L$set$1,LEFDE1-LASFDE1
- .long L$set$1
-LASFDE1:
- .long LASFDE1-EH_frame1
- .long .LFB1-.
- .set L$set$2,.LFE1-.LFB1
- .long L$set$2
- .byte 0x0
- .byte 0x4
- .set L$set$3,.LCFI0-.LFB1
- .long L$set$3
- .byte 0xe
- .byte 0x8
- .byte 0x84
- .byte 0x2
- .byte 0x4
- .set L$set$4,.LCFI1-.LCFI0
- .long L$set$4
- .byte 0xd
- .byte 0x4
- .align 2
-LEFDE1:
-.globl _ffi_closure_SYSV.eh
-_ffi_closure_SYSV.eh:
-LSFDE2:
- .set L$set$5,LEFDE2-LASFDE2
- .long L$set$5
-LASFDE2:
- .long LASFDE2-EH_frame1
- .long .LFB2-.
- .set L$set$6,.LFE2-.LFB2
- .long L$set$6
- .byte 0x0
- .byte 0x4
- .set L$set$7,.LCFI2-.LFB2
- .long L$set$7
- .byte 0xe
- .byte 0x8
- .byte 0x84
- .byte 0x2
- .byte 0x4
- .set L$set$8,.LCFI3-.LCFI2
- .long L$set$8
- .byte 0xd
- .byte 0x4
- .align 2
-LEFDE2:
-
-#if !FFI_NO_RAW_API
-
-.globl _ffi_closure_raw_SYSV.eh
-_ffi_closure_raw_SYSV.eh:
-LSFDE3:
- .set L$set$10,LEFDE3-LASFDE3
- .long L$set$10
-LASFDE3:
- .long LASFDE3-EH_frame1
- .long .LFB3-.
- .set L$set$11,.LFE3-.LFB3
- .long L$set$11
- .byte 0x0
- .byte 0x4
- .set L$set$12,.LCFI4-.LFB3
- .long L$set$12
- .byte 0xe
- .byte 0x8
- .byte 0x84
- .byte 0x2
- .byte 0x4
- .set L$set$13,.LCFI5-.LCFI4
- .long L$set$13
- .byte 0xd
- .byte 0x4
- .byte 0x4
- .set L$set$14,.LCFI6-.LCFI5
- .long L$set$14
- .byte 0x85
- .byte 0x3
- .align 2
-LEFDE3:
-
-#endif
-
-#endif /* ifndef __x86_64__ */
diff --git a/src/x86/darwin64.S b/src/x86/darwin64.S
deleted file mode 100644
index 2f7394e..0000000
--- a/src/x86/darwin64.S
+++ /dev/null
@@ -1,416 +0,0 @@
-/* -----------------------------------------------------------------------
- darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc.
- Copyright (c) 2008 Red Hat, Inc.
- derived from unix64.S
-
- x86-64 Foreign Function Interface for Darwin.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- ``Software''), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
- OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
- OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- OTHER DEALINGS IN THE SOFTWARE.
- ----------------------------------------------------------------------- */
-
-#ifdef __x86_64__
-#define LIBFFI_ASM
-#include <fficonfig.h>
-#include <ffi.h>
-
- .file "darwin64.S"
-.text
-
-/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
- void *raddr, void (*fnaddr)(void));
-
- Bit o trickiness here -- ARGS+BYTES is the base of the stack frame
- for this function. This has been allocated by ffi_call. We also
- deallocate some of the stack that has been alloca'd. */
-
- .align 3
- .globl _ffi_call_unix64
-
-_ffi_call_unix64:
-LUW0:
- movq (%rsp), %r10 /* Load return address. */
- leaq (%rdi, %rsi), %rax /* Find local stack base. */
- movq %rdx, (%rax) /* Save flags. */
- movq %rcx, 8(%rax) /* Save raddr. */
- movq %rbp, 16(%rax) /* Save old frame pointer. */
- movq %r10, 24(%rax) /* Relocate return address. */
- movq %rax, %rbp /* Finalize local stack frame. */
-LUW1:
- movq %rdi, %r10 /* Save a copy of the register area. */
- movq %r8, %r11 /* Save a copy of the target fn. */
- movl %r9d, %eax /* Set number of SSE registers. */
-
- /* Load up all argument registers. */
- movq (%r10), %rdi
- movq 8(%r10), %rsi
- movq 16(%r10), %rdx
- movq 24(%r10), %rcx
- movq 32(%r10), %r8
- movq 40(%r10), %r9
- testl %eax, %eax
- jnz Lload_sse
-Lret_from_load_sse:
-
- /* Deallocate the reg arg area. */
- leaq 176(%r10), %rsp
-
- /* Call the user function. */
- call *%r11
-
- /* Deallocate stack arg area; local stack frame in redzone. */
- leaq 24(%rbp), %rsp
-
- movq 0(%rbp), %rcx /* Reload flags. */
- movq 8(%rbp), %rdi /* Reload raddr. */
- movq 16(%rbp), %rbp /* Reload old frame pointer. */
-LUW2:
-
- /* The first byte of the flags contains the FFI_TYPE. */
- movzbl %cl, %r10d
- leaq Lstore_table(%rip), %r11
- movslq (%r11, %r10, 4), %r10
- addq %r11, %r10
- jmp *%r10
-
-Lstore_table:
- .long Lst_void-Lstore_table /* FFI_TYPE_VOID */
- .long Lst_sint32-Lstore_table /* FFI_TYPE_INT */
- .long Lst_float-Lstore_table /* FFI_TYPE_FLOAT */
- .long Lst_double-Lstore_table /* FFI_TYPE_DOUBLE */
- .long Lst_ldouble-Lstore_table /* FFI_TYPE_LONGDOUBLE */
- .long Lst_uint8-Lstore_table /* FFI_TYPE_UINT8 */
- .long Lst_sint8-Lstore_table /* FFI_TYPE_SINT8 */
- .long Lst_uint16-Lstore_table /* FFI_TYPE_UINT16 */
- .long Lst_sint16-Lstore_table /* FFI_TYPE_SINT16 */
- .long Lst_uint32-Lstore_table /* FFI_TYPE_UINT32 */
- .long Lst_sint32-Lstore_table /* FFI_TYPE_SINT32 */
- .long Lst_int64-Lstore_table /* FFI_TYPE_UINT64 */
- .long Lst_int64-Lstore_table /* FFI_TYPE_SINT64 */
- .long Lst_struct-Lstore_table /* FFI_TYPE_STRUCT */
- .long Lst_int64-Lstore_table /* FFI_TYPE_POINTER */
-
- .text
- .align 3
-Lst_void:
- ret
- .align 3
-Lst_uint8:
- movzbq %al, %rax
- movq %rax, (%rdi)
- ret
- .align 3
-Lst_sint8:
- movsbq %al, %rax
- movq %rax, (%rdi)
- ret
- .align 3
-Lst_uint16:
- movzwq %ax, %rax
- movq %rax, (%rdi)
- .align 3
-Lst_sint16:
- movswq %ax, %rax
- movq %rax, (%rdi)
- ret
- .align 3
-Lst_uint32:
- movl %eax, %eax
- movq %rax, (%rdi)
- .align 3
-Lst_sint32:
- cltq
- movq %rax, (%rdi)
- ret
- .align 3
-Lst_int64:
- movq %rax, (%rdi)
- ret
- .align 3
-Lst_float:
- movss %xmm0, (%rdi)
- ret
- .align 3
-Lst_double:
- movsd %xmm0, (%rdi)
- ret
-Lst_ldouble:
- fstpt (%rdi)
- ret
- .align 3
-Lst_struct:
- leaq -20(%rsp), %rsi /* Scratch area in redzone. */
-
- /* We have to locate the values now, and since we don't want to
- write too much data into the user's return value, we spill the
- value to a 16 byte scratch area first. Bits 8, 9, and 10
- control where the values are located. Only one of the three
- bits will be set; see ffi_prep_cif_machdep for the pattern. */
- movd %xmm0, %r10
- movd %xmm1, %r11
- testl $0x100, %ecx
- cmovnz %rax, %rdx
- cmovnz %r10, %rax
- testl $0x200, %ecx
- cmovnz %r10, %rdx
- testl $0x400, %ecx
- cmovnz %r10, %rax
- cmovnz %r11, %rdx
- movq %rax, (%rsi)
- movq %rdx, 8(%rsi)
-
- /* Bits 12-31 contain the true size of the structure. Copy from
- the scratch area to the true destination. */
- shrl $12, %ecx
- rep movsb
- ret
-
- /* Many times we can avoid loading any SSE registers at all.
- It's not worth an indirect jump to load the exact set of
- SSE registers needed; zero or all is a good compromise. */
- .align 3
-LUW3:
-Lload_sse:
- movdqa 48(%r10), %xmm0
- movdqa 64(%r10), %xmm1
- movdqa 80(%r10), %xmm2
- movdqa 96(%r10), %xmm3
- movdqa 112(%r10), %xmm4
- movdqa 128(%r10), %xmm5
- movdqa 144(%r10), %xmm6
- movdqa 160(%r10), %xmm7
- jmp Lret_from_load_sse
-
-LUW4:
- .align 3
- .globl _ffi_closure_unix64
-
-_ffi_closure_unix64:
-LUW5:
- /* The carry flag is set by the trampoline iff SSE registers
- are used. Don't clobber it before the branch instruction. */
- leaq -200(%rsp), %rsp
-LUW6:
- movq %rdi, (%rsp)
- movq %rsi, 8(%rsp)
- movq %rdx, 16(%rsp)
- movq %rcx, 24(%rsp)
- movq %r8, 32(%rsp)
- movq %r9, 40(%rsp)
- jc Lsave_sse
-Lret_from_save_sse:
-
- movq %r10, %rdi
- leaq 176(%rsp), %rsi
- movq %rsp, %rdx
- leaq 208(%rsp), %rcx
- call _ffi_closure_unix64_inner
-
- /* Deallocate stack frame early; return value is now in redzone. */
- addq $200, %rsp
-LUW7:
-
- /* The first byte of the return value contains the FFI_TYPE. */
- movzbl %al, %r10d
- leaq Lload_table(%rip), %r11
- movslq (%r11, %r10, 4), %r10
- addq %r11, %r10
- jmp *%r10
-
-Lload_table:
- .long Lld_void-Lload_table /* FFI_TYPE_VOID */
- .long Lld_int32-Lload_table /* FFI_TYPE_INT */
- .long Lld_float-Lload_table /* FFI_TYPE_FLOAT */
- .long Lld_double-Lload_table /* FFI_TYPE_DOUBLE */
- .long Lld_ldouble-Lload_table /* FFI_TYPE_LONGDOUBLE */
- .long Lld_int8-Lload_table /* FFI_TYPE_UINT8 */
- .long Lld_int8-Lload_table /* FFI_TYPE_SINT8 */
- .long Lld_int16-Lload_table /* FFI_TYPE_UINT16 */
- .long Lld_int16-Lload_table /* FFI_TYPE_SINT16 */
- .long Lld_int32-Lload_table /* FFI_TYPE_UINT32 */
- .long Lld_int32-Lload_table /* FFI_TYPE_SINT32 */
- .long Lld_int64-Lload_table /* FFI_TYPE_UINT64 */
- .long Lld_int64-Lload_table /* FFI_TYPE_SINT64 */
- .long Lld_struct-Lload_table /* FFI_TYPE_STRUCT */
- .long Lld_int64-Lload_table /* FFI_TYPE_POINTER */
-
- .text
- .align 3
-Lld_void:
- ret
- .align 3
-Lld_int8:
- movzbl -24(%rsp), %eax
- ret
- .align 3
-Lld_int16:
- movzwl -24(%rsp), %eax
- ret
- .align 3
-Lld_int32:
- movl -24(%rsp), %eax
- ret
- .align 3
-Lld_int64:
- movq -24(%rsp), %rax
- ret
- .align 3
-Lld_float:
- movss -24(%rsp), %xmm0
- ret
- .align 3
-Lld_double:
- movsd -24(%rsp), %xmm0
- ret
- .align 3
-Lld_ldouble:
- fldt -24(%rsp)
- ret
- .align 3
-Lld_struct:
- /* There are four possibilities here, %rax/%rdx, %xmm0/%rax,
- %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading
- both rdx and xmm1 with the second word. For the remaining,
- bit 8 set means xmm0 gets the second word, and bit 9 means
- that rax gets the second word. */
- movq -24(%rsp), %rcx
- movq -16(%rsp), %rdx
- movq -16(%rsp), %xmm1
- testl $0x100, %eax
- cmovnz %rdx, %rcx
- movd %rcx, %xmm0
- testl $0x200, %eax
- movq -24(%rsp), %rax
- cmovnz %rdx, %rax
- ret
-
- /* See the comment above Lload_sse; the same logic applies here. */
- .align 3
-LUW8:
-Lsave_sse:
- movdqa %xmm0, 48(%rsp)
- movdqa %xmm1, 64(%rsp)
- movdqa %xmm2, 80(%rsp)
- movdqa %xmm3, 96(%rsp)
- movdqa %xmm4, 112(%rsp)
- movdqa %xmm5, 128(%rsp)
- movdqa %xmm6, 144(%rsp)
- movdqa %xmm7, 160(%rsp)
- jmp Lret_from_save_sse
-
-LUW9:
-.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
-EH_frame1:
- .set L$set$0,LECIE1-LSCIE1 /* CIE Length */
- .long L$set$0
-LSCIE1:
- .long 0x0 /* CIE Identifier Tag */
- .byte 0x1 /* CIE Version */
- .ascii "zR\0" /* CIE Augmentation */
- .byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */
- .byte 0x78 /* sleb128 -8; CIE Data Alignment Factor */
- .byte 0x10 /* CIE RA Column */
- .byte 0x1 /* uleb128 0x1; Augmentation size */
- .byte 0x10 /* FDE Encoding (pcrel sdata4) */
- .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
- .byte 0x7 /* uleb128 0x7 */
- .byte 0x8 /* uleb128 0x8 */
- .byte 0x90 /* DW_CFA_offset, column 0x10 */
- .byte 0x1
- .align 3
-LECIE1:
- .globl _ffi_call_unix64.eh
-_ffi_call_unix64.eh:
-LSFDE1:
- .set L$set$1,LEFDE1-LASFDE1 /* FDE Length */
- .long L$set$1
-LASFDE1:
- .long LASFDE1-EH_frame1 /* FDE CIE offset */
- .quad LUW0-. /* FDE initial location */
- .set L$set$2,LUW4-LUW0 /* FDE address range */
- .quad L$set$2
- .byte 0x0 /* Augmentation size */
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$3,LUW1-LUW0
- .long L$set$3
-
- /* New stack frame based off rbp. This is a itty bit of unwind
- trickery in that the CFA *has* changed. There is no easy way
- to describe it correctly on entry to the function. Fortunately,
- it doesn't matter too much since at all points we can correctly
- unwind back to ffi_call. Note that the location to which we
- moved the return address is (the new) CFA-8, so from the
- perspective of the unwind info, it hasn't moved. */
- .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */
- .byte 0x6
- .byte 0x20
- .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */
- .byte 0x2
- .byte 0xa /* DW_CFA_remember_state */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$4,LUW2-LUW1
- .long L$set$4
- .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */
- .byte 0x7
- .byte 0x8
- .byte 0xc0+6 /* DW_CFA_restore, %rbp */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$5,LUW3-LUW2
- .long L$set$5
- .byte 0xb /* DW_CFA_restore_state */
-
- .align 3
-LEFDE1:
- .globl _ffi_closure_unix64.eh
-_ffi_closure_unix64.eh:
-LSFDE3:
- .set L$set$6,LEFDE3-LASFDE3 /* FDE Length */
- .long L$set$6
-LASFDE3:
- .long LASFDE3-EH_frame1 /* FDE CIE offset */
- .quad LUW5-. /* FDE initial location */
- .set L$set$7,LUW9-LUW5 /* FDE address range */
- .quad L$set$7
- .byte 0x0 /* Augmentation size */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$8,LUW6-LUW5
- .long L$set$8
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .byte 208,1 /* uleb128 208 */
- .byte 0xa /* DW_CFA_remember_state */
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$9,LUW7-LUW6
- .long L$set$9
- .byte 0xe /* DW_CFA_def_cfa_offset */
- .byte 0x8
-
- .byte 0x4 /* DW_CFA_advance_loc4 */
- .set L$set$10,LUW8-LUW7
- .long L$set$10
- .byte 0xb /* DW_CFA_restore_state */
-
- .align 3
-LEFDE3:
- .subsections_via_symbols
-
-#endif /* __x86_64__ */
diff --git a/src/x86/ffi.c b/src/x86/ffi.c
index 4c96c6d..c4d740a 100644
--- a/src/x86/ffi.c
+++ b/src/x86/ffi.c
@@ -332,13 +332,28 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
else
{
size_t za = ALIGN (z, FFI_SIZEOF_ARG);
+ size_t align = FFI_SIZEOF_ARG;
+
+ /* Alignment rules for arguments are quite complex. Vectors and
+ structures with 16 byte alignment get it. Note that long double
+ on Darwin does have 16 byte alignment, and does not get this
+ alignment if passed directly; a structure with a long double
+ inside, however, would get 16 byte alignment. Since libffi does
+ not support vectors, we need non concern ourselves with other
+ cases. */
+ if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
+ align = 16;
+
if (dir < 0)
{
+ /* ??? These reverse argument ABIs are probably too old
+ to have cared about alignment. Someone should check. */
argp -= za;
memcpy (argp, valp, z);
}
else
{
+ argp = (char *)ALIGN (argp, align);
memcpy (argp, valp, z);
argp += za;
}
@@ -419,8 +434,9 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
arg_types = cif->arg_types;
for (i = 0; i < n; ++i)
{
- size_t z = arg_types[i]->size;
- int t = arg_types[i]->type;
+ ffi_type *ty = arg_types[i];
+ size_t z = ty->size;
+ int t = ty->type;
void *valp;
if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT)
@@ -441,13 +457,22 @@ ffi_closure_inner (struct closure_frame *frame, char *stack)
else
{
size_t za = ALIGN (z, FFI_SIZEOF_ARG);
+ size_t align = FFI_SIZEOF_ARG;
+
+ /* See the comment in ffi_call_int. */
+ if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
+ align = 16;
+
if (dir < 0)
{
+ /* ??? These reverse argument ABIs are probably too old
+ to have cared about alignment. Someone should check. */
argp -= za;
valp = argp;
}
else
{
+ argp = (char *)ALIGN (argp, align);
valp = argp;
argp += za;
}
diff --git a/src/x86/sysv.S b/src/x86/sysv.S
index ebd1693..6043c67 100644
--- a/src/x86/sysv.S
+++ b/src/x86/sysv.S
@@ -30,7 +30,6 @@
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
-#include <ffi_cfi.h>
#include "internal.h"
#define C2(X, Y) X ## Y
@@ -41,6 +40,12 @@
# define C(X) X
#endif
+#ifdef X86_DARWIN
+# define L(X) C1(L, X)
+#else
+# define L(X) C1(.L, X)
+#endif
+
#ifdef __ELF__
# define ENDF(X) .type X,@function; .size X, . - X
#else
@@ -60,14 +65,14 @@
actual table. The entry points into the table are all 8 bytes.
The use of ORG asserts that we're at the correct location. */
/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
-#ifdef __clang__
-# define E(X) .align 8
+#if defined(__clang__) || defined(__APPLE__)
+# define E(BASE, X) .balign 8
#else
-# define E(X) .align 8; .org 0b + X * 8
+# define E(BASE, X) .balign 8; .org BASE + X * 8
#endif
.text
- .align 16
+ .balign 16
.globl ffi_call_i386
FFI_HIDDEN(ffi_call_i386)
@@ -83,7 +88,8 @@
*/
ffi_call_i386:
- cfi_startproc
+L(UW0):
+ # cfi_startproc
movl (%esp), %eax /* move the return address */
movl %ebp, (%ecx) /* store %ebp into local frame */
movl %eax, 4(%ecx) /* store retaddr into local frame */
@@ -96,8 +102,9 @@ ffi_call_i386:
moved the return address is (the new) CFA-4, so from the
perspective of the unwind info, it hasn't moved. */
movl %ecx, %ebp
- cfi_def_cfa(%ebp, 8)
- cfi_rel_offset(%ebp, 0)
+L(UW1):
+ # cfi_def_cfa(%ebp, 8)
+ # cfi_rel_offset(%ebp, 0)
movl %edx, %esp /* set outgoing argument stack */
movl 20+R_EAX*4(%ebp), %eax /* set register arguments */
@@ -108,80 +115,86 @@ ffi_call_i386:
movl 12(%ebp), %ecx /* load return type code */
movl %ebx, 8(%ebp) /* preserve %ebx */
- cfi_rel_offset(%ebx, 8)
+L(UW2):
+ # cfi_rel_offset(%ebx, 8)
andl $X86_RET_TYPE_MASK, %ecx
#ifdef __PIC__
- call __x86.get_pc_thunk.bx
-1: leal 0f-1b(%ebx, %ecx, 8), %ebx
+ call C(__x86.get_pc_thunk.bx)
+L(pc1):
+ leal L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx
#else
- leal 0f(,%ecx, 8), %ebx
+ leal L(store_table)(,%ecx, 8), %ebx
#endif
movl 16(%ebp), %ecx /* load result address */
jmp *%ebx
- .align 8
-0:
-E(X86_RET_FLOAT)
+ .balign 8
+L(store_table):
+E(L(store_table), X86_RET_FLOAT)
fstps (%ecx)
- jmp 9f
-E(X86_RET_DOUBLE)
+ jmp L(e1)
+E(L(store_table), X86_RET_DOUBLE)
fstpl (%ecx)
- jmp 9f
-E(X86_RET_LDOUBLE)
+ jmp L(e1)
+E(L(store_table), X86_RET_LDOUBLE)
fstpt (%ecx)
- jmp 9f
-E(X86_RET_SINT8)
+ jmp L(e1)
+E(L(store_table), X86_RET_SINT8)
movsbl %al, %eax
mov %eax, (%ecx)
- jmp 9f
-E(X86_RET_SINT16)
+ jmp L(e1)
+E(L(store_table), X86_RET_SINT16)
movswl %ax, %eax
mov %eax, (%ecx)
- jmp 9f
-E(X86_RET_UINT8)
+ jmp L(e1)
+E(L(store_table), X86_RET_UINT8)
movzbl %al, %eax
mov %eax, (%ecx)
- jmp 9f
-E(X86_RET_UINT16)
+ jmp L(e1)
+E(L(store_table), X86_RET_UINT16)
movzwl %ax, %eax
mov %eax, (%ecx)
- jmp 9f
-E(X86_RET_INT64)
+ jmp L(e1)
+E(L(store_table), X86_RET_INT64)
movl %edx, 4(%ecx)
/* fallthru */
-E(X86_RET_INT32)
+E(L(store_table), X86_RET_INT32)
movl %eax, (%ecx)
/* fallthru */
-E(X86_RET_VOID)
-9: movl 8(%ebp), %ebx
+E(L(store_table), X86_RET_VOID)
+L(e1):
+ movl 8(%ebp), %ebx
movl %ebp, %esp
popl %ebp
- cfi_remember_state
- cfi_def_cfa(%esp, 4)
- cfi_restore(%ebx)
- cfi_restore(%ebp)
+L(UW3):
+ # cfi_remember_state
+ # cfi_def_cfa(%esp, 4)
+ # cfi_restore(%ebx)
+ # cfi_restore(%ebp)
ret
- cfi_restore_state
-
-E(X86_RET_STRUCTPOP)
- jmp 9b
-E(X86_RET_STRUCTARG)
- jmp 9b
-E(X86_RET_STRUCT_1B)
+L(UW4):
+ # cfi_restore_state
+
+E(L(store_table), X86_RET_STRUCTPOP)
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCTARG)
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCT_1B)
movb %al, (%ecx)
- jmp 9b
-E(X86_RET_STRUCT_2B)
+ jmp L(e1)
+E(L(store_table), X86_RET_STRUCT_2B)
movw %ax, (%ecx)
- jmp 9b
+ jmp L(e1)
/* Fill out the table so that bad values are predictable. */
-E(X86_RET_UNUSED14)
+E(L(store_table), X86_RET_UNUSED14)
ud2
-E(X86_RET_UNUSED15)
+E(L(store_table), X86_RET_UNUSED15)
ud2
- cfi_endproc
+L(UW5):
+ # cfi_endproc
ENDF(ffi_call_i386)
/* The inner helper is declared as
@@ -212,190 +225,242 @@ ENDF(ffi_call_i386)
movl %ecx, 32(%esp); \
movl %eax, 36(%esp)
+# define FFI_CLOSURE_CALL_INNER(UW) \
+ movl %esp, %ecx; /* load closure_data */ \
+ leal closure_FS+4(%esp), %edx; /* load incoming stack */ \
+ call ffi_closure_inner
+#define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \
+ andl $X86_RET_TYPE_MASK, %eax; \
+ leal L(C1(load_table,N))(, %eax, 8), %eax; \
+ jmp *%eax
#ifdef __PIC__
-/* We're going to always load the got register here, even if .hidden says
- we're going to avoid the PLT call. We'll use the got register in
- FFI_CLOSURE_MASK_AND_JUMP. */
-# if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
-# define PLT(X) X
+# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+# undef FFI_CLOSURE_MASK_AND_JUMP
+# define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \
+ andl $X86_RET_TYPE_MASK, %eax; \
+ call C(__x86.get_pc_thunk.dx); \
+L(C1(pc,N)): \
+ leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %eax; \
+ jmp *%eax
# else
-# define PLT(X) X@PLT
-# endif
-# define FFI_CLOSURE_CALL_INNER \
+# define FFI_CLOSURE_CALL_INNER_SAVE_EBX
+# undef FFI_CLOSURE_CALL_INNER
+# define FFI_CLOSURE_CALL_INNER(UWN) \
movl %esp, %ecx; /* load closure_data */ \
leal closure_FS+4(%esp), %edx; /* load incoming stack */ \
movl %ebx, 40(%esp); /* save ebx */ \
- cfi_rel_offset(%ebx, 40); \
- call __x86.get_pc_thunk.bx; /* load got register */ \
+L(C1(UW,UWN)): \
+ # cfi_rel_offset(%ebx, 40); \
+ call C(__x86.get_pc_thunk.bx); /* load got register */ \
addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \
- call PLT(ffi_closure_inner)
-#define FFI_CLOSURE_MASK_AND_JUMP \
+ call ffi_closure_inner@PLT
+# undef FFI_CLOSURE_MASK_AND_JUMP
+# define FFI_CLOSURE_MASK_AND_JUMP(N, UWN) \
andl $X86_RET_TYPE_MASK, %eax; \
- leal 0f@GOTOFF(%ebx, %eax, 8), %eax; \
+ leal L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %eax; \
movl 40(%esp), %ebx; /* restore ebx */ \
- cfi_restore(%ebx); \
- jmp *%eax
-#else
-# define FFI_CLOSURE_CALL_INNER \
- movl %esp, %ecx; /* load closure_data */ \
- leal closure_FS+4(%esp), %edx; /* load incoming stack */ \
- call ffi_closure_inner
-#define FFI_CLOSURE_MASK_AND_JUMP \
- andl $X86_RET_TYPE_MASK, %eax; \
- leal 0f(, %eax, 8), %eax; \
+L(C1(UW,UWN)): \
+ # cfi_restore(%ebx); \
jmp *%eax
+# endif /* DARWIN || HIDDEN */
#endif /* __PIC__ */
-#define FFI_GO_CLOSURE(suffix, chain, t1, t2) \
- .align 16; \
- .globl C(C1(ffi_go_closure_,suffix)); \
- FFI_HIDDEN(C(C1(ffi_go_closure_,suffix))); \
-C(C1(ffi_go_closure_,suffix)): \
- cfi_startproc; \
- subl $closure_FS, %esp; \
- /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ \
- cfi_def_cfa_offset(closure_FS + 4); \
- FFI_CLOSURE_SAVE_REGS; \
- movl 4(chain), t1; /* copy cif */ \
- movl 8(chain), t2; /* copy fun */ \
- movl t1, 28(%esp); \
- movl t2, 32(%esp); \
- movl chain, 36(%esp); /* closure is user_data */ \
- jmp 88f; \
- cfi_endproc; \
-ENDF(C(C1(ffi_go_closure_,suffix)))
-
-FFI_GO_CLOSURE(EAX, %eax, %edx, %ecx)
-FFI_GO_CLOSURE(ECX, %ecx, %edx, %eax)
+ .balign 16
+ .globl C(ffi_go_closure_EAX)
+ FFI_HIDDEN(C(ffi_go_closure_EAX))
+C(ffi_go_closure_EAX):
+L(UW6):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW7):
+ # cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ movl 4(%eax), %edx /* copy cif */
+ movl 8(%eax), %ecx /* copy fun */
+ movl %edx, 28(%esp)
+ movl %ecx, 32(%esp)
+ movl %eax, 36(%esp) /* closure is user_data */
+ jmp L(do_closure_i386)
+L(UW8):
+ # cfi_endproc
+ENDF(C(ffi_go_closure_EAX))
+
+ .balign 16
+ .globl C(ffi_go_closure_ECX)
+ FFI_HIDDEN(C(ffi_go_closure_ECX))
+C(ffi_go_closure_ECX):
+L(UW9):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW10):
+ # cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ movl 4(%ecx), %edx /* copy cif */
+ movl 8(%ecx), %eax /* copy fun */
+ movl %edx, 28(%esp)
+ movl %eax, 32(%esp)
+ movl %ecx, 36(%esp) /* closure is user_data */
+ jmp L(do_closure_i386)
+L(UW11):
+ # cfi_endproc
+ENDF(C(ffi_go_closure_ECX))
/* The closure entry points are reached from the ffi_closure trampoline.
On entry, %eax contains the address of the ffi_closure. */
- .align 16
+ .balign 16
.globl C(ffi_closure_i386)
FFI_HIDDEN(C(ffi_closure_i386))
C(ffi_closure_i386):
- cfi_startproc
+L(UW12):
+ # cfi_startproc
subl $closure_FS, %esp
- /* Note clang bug 21515: adjust_cfa_offset error across endproc. */
- cfi_def_cfa_offset(closure_FS + 4)
+L(UW13):
+ # cfi_def_cfa_offset(closure_FS + 4)
FFI_CLOSURE_SAVE_REGS
FFI_CLOSURE_COPY_TRAMP_DATA
-88: /* Entry point from preceeding Go closures. */
+ /* Entry point from preceeding Go closures. */
+L(do_closure_i386):
- FFI_CLOSURE_CALL_INNER
- FFI_CLOSURE_MASK_AND_JUMP
+ FFI_CLOSURE_CALL_INNER(14)
+ FFI_CLOSURE_MASK_AND_JUMP(2, 15)
- .align 8
-0:
-E(X86_RET_FLOAT)
+ .balign 8
+L(load_table2):
+E(L(load_table2), X86_RET_FLOAT)
flds (%esp)
- jmp 9f
-E(X86_RET_DOUBLE)
+ jmp L(e2)
+E(L(load_table2), X86_RET_DOUBLE)
fldl (%esp)
- jmp 9f
-E(X86_RET_LDOUBLE)
+ jmp L(e2)
+E(L(load_table2), X86_RET_LDOUBLE)
fldt (%esp)
- jmp 9f
-E(X86_RET_SINT8)
+ jmp L(e2)
+E(L(load_table2), X86_RET_SINT8)
movsbl (%esp), %eax
- jmp 9f
-E(X86_RET_SINT16)
+ jmp L(e2)
+E(L(load_table2), X86_RET_SINT16)
movswl (%esp), %eax
- jmp 9f
-E(X86_RET_UINT8)
+ jmp L(e2)
+E(L(load_table2), X86_RET_UINT8)
movzbl (%esp), %eax
- jmp 9f
-E(X86_RET_UINT16)
+ jmp L(e2)
+E(L(load_table2), X86_RET_UINT16)
movzwl (%esp), %eax
- jmp 9f
-E(X86_RET_INT64)
+ jmp L(e2)
+E(L(load_table2), X86_RET_INT64)
movl 4(%esp), %edx
/* fallthru */
-E(X86_RET_INT32)
+E(L(load_table2), X86_RET_INT32)
movl (%esp), %eax
/* fallthru */
-E(X86_RET_VOID)
-9: addl $closure_FS, %esp
- cfi_adjust_cfa_offset(-closure_FS)
+E(L(load_table2), X86_RET_VOID)
+L(e2):
+ addl $closure_FS, %esp
+L(UW16):
+ # cfi_adjust_cfa_offset(-closure_FS)
ret
- cfi_adjust_cfa_offset(closure_FS)
-E(X86_RET_STRUCTPOP)
+L(UW17):
+ # cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTPOP)
addl $closure_FS, %esp
- cfi_adjust_cfa_offset(-closure_FS)
+L(UW18):
+ # cfi_adjust_cfa_offset(-closure_FS)
ret $4
- cfi_adjust_cfa_offset(closure_FS)
-E(X86_RET_STRUCTARG)
+L(UW19):
+ # cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTARG)
movl (%esp), %eax
- jmp 9b
-E(X86_RET_STRUCT_1B)
+ jmp L(e2)
+E(L(load_table2), X86_RET_STRUCT_1B)
movzbl (%esp), %eax
- jmp 9b
-E(X86_RET_STRUCT_2B)
+ jmp L(e2)
+E(L(load_table2), X86_RET_STRUCT_2B)
movzwl (%esp), %eax
- jmp 9b
+ jmp L(e2)
/* Fill out the table so that bad values are predictable. */
-E(X86_RET_UNUSED14)
+E(L(load_table2), X86_RET_UNUSED14)
ud2
-E(X86_RET_UNUSED15)
+E(L(load_table2), X86_RET_UNUSED15)
ud2
- cfi_endproc
+L(UW20):
+ # cfi_endproc
ENDF(C(ffi_closure_i386))
-FFI_GO_CLOSURE(STDCALL, %ecx, %edx, %eax)
+ .balign 16
+ .globl C(ffi_go_closure_STDCALL)
+ FFI_HIDDEN(C(ffi_go_closure_STDCALL))
+C(ffi_go_closure_STDCALL):
+L(UW21):
+ # cfi_startproc
+ subl $closure_FS, %esp
+L(UW22):
+ # cfi_def_cfa_offset(closure_FS + 4)
+ FFI_CLOSURE_SAVE_REGS
+ movl 4(%ecx), %edx /* copy cif */
+ movl 8(%ecx), %eax /* copy fun */
+ movl %edx, 28(%esp)
+ movl %eax, 32(%esp)
+ movl %ecx, 36(%esp) /* closure is user_data */
+ jmp L(do_closure_STDCALL)
+L(UW23):
+ # cfi_endproc
+ENDF(C(ffi_go_closure_STDCALL))
/* For REGISTER, we have no available parameter registers, and so we
enter here having pushed the closure onto the stack. */
- .align 16
+ .balign 16
.globl C(ffi_closure_REGISTER)
FFI_HIDDEN(C(ffi_closure_REGISTER))
C(ffi_closure_REGISTER):
- cfi_startproc
- cfi_def_cfa(%esp, 8)
- cfi_offset(%eip, -8)
+L(UW24):
+ # cfi_startproc
+ # cfi_def_cfa(%esp, 8)
+ # cfi_offset(%eip, -8)
subl $closure_FS-4, %esp
- /* Note clang bug 21515: adjust_cfa_offset error across endproc. */
- cfi_def_cfa_offset(closure_FS + 4)
-
+L(UW25):
+ # cfi_def_cfa_offset(closure_FS + 4)
FFI_CLOSURE_SAVE_REGS
-
movl closure_FS-4(%esp), %ecx /* load retaddr */
movl closure_FS(%esp), %eax /* load closure */
movl %ecx, closure_FS(%esp) /* move retaddr */
- jmp 0f
-
- cfi_endproc
+ jmp L(do_closure_REGISTER)
+L(UW26):
+ # cfi_endproc
ENDF(C(ffi_closure_REGISTER))
/* For STDCALL (and others), we need to pop N bytes of arguments off
the stack following the closure. The amount needing to be popped
is returned to us from ffi_closure_inner. */
- .align 16
+ .balign 16
.globl C(ffi_closure_STDCALL)
FFI_HIDDEN(C(ffi_closure_STDCALL))
C(ffi_closure_STDCALL):
- cfi_startproc
+L(UW27):
+ # cfi_startproc
subl $closure_FS, %esp
- /* Note clang bug 21515: adjust_cfa_offset error across endproc. */
- cfi_def_cfa_offset(closure_FS + 4)
+L(UW28):
+ # cfi_def_cfa_offset(closure_FS + 4)
FFI_CLOSURE_SAVE_REGS
-0: /* Entry point from ffi_closure_REGISTER. */
+ /* Entry point from ffi_closure_REGISTER. */
+L(do_closure_REGISTER):
FFI_CLOSURE_COPY_TRAMP_DATA
-88: /* Entry point from preceeding Go closure. */
+ /* Entry point from preceeding Go closure. */
+L(do_closure_STDCALL):
- FFI_CLOSURE_CALL_INNER
+ FFI_CLOSURE_CALL_INNER(29)
movl %eax, %ecx
shrl $X86_RET_POP_SHIFT, %ecx /* isolate pop count */
@@ -403,100 +468,98 @@ C(ffi_closure_STDCALL):
movl closure_FS(%esp), %edx /* move return address */
movl %edx, (%ecx)
- /* New pseudo-stack frame based off ecx. This is unwind trickery
- in that the CFA *has* changed, to the proper popped stack address.
- Note that the location to which we moved the return address
- is the new CFA-4, so that's unchanged. */
- cfi_def_cfa(%ecx, 4)
- /* Normally esp is unwound to CFA + the caller's ARGS_SIZE.
- We've just set the CFA to that final value. Tell the unwinder
- to restore esp from CFA without the ARGS_SIZE:
- DW_CFA_val_expression %esp, DW_OP_call_frame_cfa. */
- cfi_escape(0x16, 4, 1, 0x9c)
-
- FFI_CLOSURE_MASK_AND_JUMP
-
- .align 8
-0:
-E(X86_RET_FLOAT)
+ /* From this point on, the value of %esp upon return is %ecx+4,
+ and we've copied the return address to %ecx to make return easy.
+ There's no point in representing this in the unwind info, as
+ there is always a window between the mov and the ret which
+ will be wrong from one point of view or another. */
+
+ FFI_CLOSURE_MASK_AND_JUMP(3, 30)
+
+ .balign 8
+L(load_table3):
+E(L(load_table3), X86_RET_FLOAT)
flds (%esp)
movl %ecx, %esp
ret
-E(X86_RET_DOUBLE)
+E(L(load_table3), X86_RET_DOUBLE)
fldl (%esp)
movl %ecx, %esp
ret
-E(X86_RET_LDOUBLE)
+E(L(load_table3), X86_RET_LDOUBLE)
fldt (%esp)
movl %ecx, %esp
ret
-E(X86_RET_SINT8)
+E(L(load_table3), X86_RET_SINT8)
movsbl (%esp), %eax
movl %ecx, %esp
ret
-E(X86_RET_SINT16)
+E(L(load_table3), X86_RET_SINT16)
movswl (%esp), %eax
movl %ecx, %esp
ret
-E(X86_RET_UINT8)
+E(L(load_table3), X86_RET_UINT8)
movzbl (%esp), %eax
movl %ecx, %esp
ret
-E(X86_RET_UINT16)
+E(L(load_table3), X86_RET_UINT16)
movzwl (%esp), %eax
movl %ecx, %esp
ret
-E(X86_RET_INT64)
+E(L(load_table3), X86_RET_INT64)
popl %eax
popl %edx
movl %ecx, %esp
ret
-E(X86_RET_INT32)
+E(L(load_table3), X86_RET_INT32)
movl (%esp), %eax
movl %ecx, %esp
ret
-E(X86_RET_VOID)
+E(L(load_table3), X86_RET_VOID)
movl %ecx, %esp
ret
-E(X86_RET_STRUCTPOP)
+E(L(load_table3), X86_RET_STRUCTPOP)
movl %ecx, %esp
ret
-E(X86_RET_STRUCTARG)
+E(L(load_table3), X86_RET_STRUCTARG)
movl (%esp), %eax
movl %ecx, %esp
ret
-E(X86_RET_STRUCT_1B)
+E(L(load_table3), X86_RET_STRUCT_1B)
movzbl (%esp), %eax
movl %ecx, %esp
ret
-E(X86_RET_STRUCT_2B)
+E(L(load_table3), X86_RET_STRUCT_2B)
movzwl (%esp), %eax
movl %ecx, %esp
ret
/* Fill out the table so that bad values are predictable. */
-E(X86_RET_UNUSED14)
+E(L(load_table3), X86_RET_UNUSED14)
ud2
-E(X86_RET_UNUSED15)
+E(L(load_table3), X86_RET_UNUSED15)
ud2
- cfi_endproc
+L(UW31):
+ # cfi_endproc
ENDF(C(ffi_closure_STDCALL))
#if !FFI_NO_RAW_API
#define raw_closure_S_FS (16+16+12)
- .align 16
+ .balign 16
.globl C(ffi_closure_raw_SYSV)
FFI_HIDDEN(C(ffi_closure_raw_SYSV))
C(ffi_closure_raw_SYSV):
- cfi_startproc
+L(UW32):
+ # cfi_startproc
subl $raw_closure_S_FS, %esp
- /* Note clang bug 21515: adjust_cfa_offset error across endproc. */
- cfi_def_cfa_offset(raw_closure_S_FS + 4)
+L(UW33):
+ # cfi_def_cfa_offset(raw_closure_S_FS + 4)
movl %ebx, raw_closure_S_FS-4(%esp)
- cfi_rel_offset(%ebx, raw_closure_S_FS-4)
+L(UW34):
+ # cfi_rel_offset(%ebx, raw_closure_S_FS-4)
movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */
movl %edx, 12(%esp)
@@ -511,96 +574,108 @@ C(ffi_closure_raw_SYSV):
movl 20(%ebx), %eax /* load cif->flags */
andl $X86_RET_TYPE_MASK, %eax
#ifdef __PIC__
- call __x86.get_pc_thunk.bx
-1: leal 0f-1b(%ebx, %eax, 8), %eax
+ call C(__x86.get_pc_thunk.bx)
+L(pc4):
+ leal L(load_table4)-L(pc4)(%ebx, %eax, 8), %eax
#else
- leal 0f(,%eax, 8), %eax
+ leal L(load_table4)(,%eax, 8), %eax
#endif
movl raw_closure_S_FS-4(%esp), %ebx
- cfi_restore(%ebx)
+L(UW35):
+ # cfi_restore(%ebx)
jmp *%eax
- .align 8
-0:
-E(X86_RET_FLOAT)
+ .balign 8
+L(load_table4):
+E(L(load_table4), X86_RET_FLOAT)
flds 16(%esp)
- jmp 9f
-E(X86_RET_DOUBLE)
+ jmp L(e4)
+E(L(load_table4), X86_RET_DOUBLE)
fldl 16(%esp)
- jmp 9f
-E(X86_RET_LDOUBLE)
+ jmp L(e4)
+E(L(load_table4), X86_RET_LDOUBLE)
fldt 16(%esp)
- jmp 9f
-E(X86_RET_SINT8)
+ jmp L(e4)
+E(L(load_table4), X86_RET_SINT8)
movsbl 16(%esp), %eax
- jmp 9f
-E(X86_RET_SINT16)
+ jmp L(e4)
+E(L(load_table4), X86_RET_SINT16)
movswl 16(%esp), %eax
- jmp 9f
-E(X86_RET_UINT8)
+ jmp L(e4)
+E(L(load_table4), X86_RET_UINT8)
movzbl 16(%esp), %eax
- jmp 9f
-E(X86_RET_UINT16)
+ jmp L(e4)
+E(L(load_table4), X86_RET_UINT16)
movzwl 16(%esp), %eax
- jmp 9f
-E(X86_RET_INT64)
+ jmp L(e4)
+E(L(load_table4), X86_RET_INT64)
movl 16+4(%esp), %edx
/* fallthru */
-E(X86_RET_INT32)
+E(L(load_table4), X86_RET_INT32)
movl 16(%esp), %eax
/* fallthru */
-E(X86_RET_VOID)
-9: addl $raw_closure_S_FS, %esp
- cfi_adjust_cfa_offset(-raw_closure_S_FS)
+E(L(load_table4), X86_RET_VOID)
+L(e4):
+ addl $raw_closure_S_FS, %esp
+L(UW36):
+ # cfi_adjust_cfa_offset(-raw_closure_S_FS)
ret
- cfi_adjust_cfa_offset(raw_closure_S_FS)
-E(X86_RET_STRUCTPOP)
+L(UW37):
+ # cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTPOP)
addl $raw_closure_S_FS, %esp
- cfi_adjust_cfa_offset(-raw_closure_S_FS)
+L(UW38):
+ # cfi_adjust_cfa_offset(-raw_closure_S_FS)
ret $4
- cfi_adjust_cfa_offset(raw_closure_S_FS)
-E(X86_RET_STRUCTARG)
+L(UW39):
+ # cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTARG)
movl 16(%esp), %eax
- jmp 9b
-E(X86_RET_STRUCT_1B)
+ jmp L(e4)
+E(L(load_table4), X86_RET_STRUCT_1B)
movzbl 16(%esp), %eax
- jmp 9b
-E(X86_RET_STRUCT_2B)
+ jmp L(e4)
+E(L(load_table4), X86_RET_STRUCT_2B)
movzwl 16(%esp), %eax
- jmp 9b
+ jmp L(e4)
/* Fill out the table so that bad values are predictable. */
-E(X86_RET_UNUSED14)
+E(L(load_table4), X86_RET_UNUSED14)
ud2
-E(X86_RET_UNUSED15)
+E(L(load_table4), X86_RET_UNUSED15)
ud2
- cfi_endproc
+L(UW40):
+ # cfi_endproc
ENDF(C(ffi_closure_raw_SYSV))
-#undef raw_closure_S_FS
#define raw_closure_T_FS (16+16+8)
- .align 16
+ .balign 16
.globl C(ffi_closure_raw_THISCALL)
FFI_HIDDEN(C(ffi_closure_raw_THISCALL))
C(ffi_closure_raw_THISCALL):
- cfi_startproc
+L(UW41):
+ # cfi_startproc
/* Rearrange the stack such that %ecx is the first argument.
This means moving the return address. */
popl %edx
- /* Note clang bug 21515: adjust_cfa_offset error across endproc. */
- cfi_def_cfa_offset(0)
- cfi_register(%eip, %edx)
+L(UW42):
+ # cfi_def_cfa_offset(0)
+ # cfi_register(%eip, %edx)
pushl %ecx
- cfi_adjust_cfa_offset(4)
+L(UW43):
+ # cfi_adjust_cfa_offset(4)
pushl %edx
- cfi_adjust_cfa_offset(4)
- cfi_rel_offset(%eip, 0)
+L(UW44):
+ # cfi_adjust_cfa_offset(4)
+ # cfi_rel_offset(%eip, 0)
subl $raw_closure_T_FS, %esp
- cfi_adjust_cfa_offset(raw_closure_T_FS)
+L(UW45):
+ # cfi_adjust_cfa_offset(raw_closure_T_FS)
movl %ebx, raw_closure_T_FS-4(%esp)
- cfi_rel_offset(%ebx, raw_closure_T_FS-4)
+L(UW46):
+ # cfi_rel_offset(%ebx, raw_closure_T_FS-4)
movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */
movl %edx, 12(%esp)
@@ -615,90 +690,329 @@ C(ffi_closure_raw_THISCALL):
movl 20(%ebx), %eax /* load cif->flags */
andl $X86_RET_TYPE_MASK, %eax
#ifdef __PIC__
- call __x86.get_pc_thunk.bx
-1: leal 0f-1b(%ebx, %eax, 8), %eax
+ call C(__x86.get_pc_thunk.bx)
+L(pc5):
+ leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %eax
#else
- leal 0f(,%eax, 8), %eax
+ leal L(load_table5)(,%eax, 8), %eax
#endif
movl raw_closure_T_FS-4(%esp), %ebx
- cfi_restore(%ebx)
+L(UW47):
+ # cfi_restore(%ebx)
jmp *%eax
- .align 8
-0:
-E(X86_RET_FLOAT)
+ .balign 8
+L(load_table5):
+E(L(load_table5), X86_RET_FLOAT)
flds 16(%esp)
- jmp 9f
-E(X86_RET_DOUBLE)
+ jmp L(e5)
+E(L(load_table5), X86_RET_DOUBLE)
fldl 16(%esp)
- jmp 9f
-E(X86_RET_LDOUBLE)
+ jmp L(e5)
+E(L(load_table5), X86_RET_LDOUBLE)
fldt 16(%esp)
- jmp 9f
-E(X86_RET_SINT8)
+ jmp L(e5)
+E(L(load_table5), X86_RET_SINT8)
movsbl 16(%esp), %eax
- jmp 9f
-E(X86_RET_SINT16)
+ jmp L(e5)
+E(L(load_table5), X86_RET_SINT16)
movswl 16(%esp), %eax
- jmp 9f
-E(X86_RET_UINT8)
+ jmp L(e5)
+E(L(load_table5), X86_RET_UINT8)
movzbl 16(%esp), %eax
- jmp 9f
-E(X86_RET_UINT16)
+ jmp L(e5)
+E(L(load_table5), X86_RET_UINT16)
movzwl 16(%esp), %eax
- jmp 9f
-E(X86_RET_INT64)
+ jmp L(e5)
+E(L(load_table5), X86_RET_INT64)
movl 16+4(%esp), %edx
/* fallthru */
-E(X86_RET_INT32)
+E(L(load_table5), X86_RET_INT32)
movl 16(%esp), %eax
/* fallthru */
-E(X86_RET_VOID)
-9: addl $raw_closure_T_FS, %esp
- cfi_adjust_cfa_offset(-raw_closure_T_FS)
+E(L(load_table5), X86_RET_VOID)
+L(e5):
+ addl $raw_closure_T_FS, %esp
+L(UW48):
+ # cfi_adjust_cfa_offset(-raw_closure_T_FS)
/* Remove the extra %ecx argument we pushed. */
ret $4
- cfi_adjust_cfa_offset(raw_closure_T_FS)
-E(X86_RET_STRUCTPOP)
+L(UW49):
+ # cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTPOP)
addl $raw_closure_T_FS, %esp
- cfi_adjust_cfa_offset(-raw_closure_T_FS)
+L(UW50):
+ # cfi_adjust_cfa_offset(-raw_closure_T_FS)
ret $8
- cfi_adjust_cfa_offset(raw_closure_T_FS)
-E(X86_RET_STRUCTARG)
+L(UW51):
+ # cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTARG)
movl 16(%esp), %eax
- jmp 9b
-E(X86_RET_STRUCT_1B)
+ jmp L(e5)
+E(L(load_table5), X86_RET_STRUCT_1B)
movzbl 16(%esp), %eax
- jmp 9b
-E(X86_RET_STRUCT_2B)
+ jmp L(e5)
+E(L(load_table5), X86_RET_STRUCT_2B)
movzwl 16(%esp), %eax
- jmp 9b
+ jmp L(e5)
/* Fill out the table so that bad values are predictable. */
-E(X86_RET_UNUSED14)
+E(L(load_table5), X86_RET_UNUSED14)
ud2
-E(X86_RET_UNUSED15)
+E(L(load_table5), X86_RET_UNUSED15)
ud2
- cfi_endproc
+L(UW52):
+ # cfi_endproc
ENDF(C(ffi_closure_raw_THISCALL))
#endif /* !FFI_NO_RAW_API */
+#ifdef X86_DARWIN
+# define COMDAT(X) \
+ .section __TEXT,__textcoal_nt,coalesced,pure_instructions; \
+ .weak_definition X; \
+ .private_extern X
+#elif defined __ELF__
+# define COMDAT(X) \
+ .section .text.X,"axG",@progbits,X,comdat; \
+ .globl X; \
+ FFI_HIDDEN(X)
+#else
+# define COMDAT(X)
+#endif
+
#if defined(__PIC__)
- .section .text.__x86.get_pc_thunk.bx,"axG",@progbits,__x86.get_pc_thunk.bx,comdat
- .globl __x86.get_pc_thunk.bx
- .hidden __x86.get_pc_thunk.bx
- .type __x86.get_pc_thunk.bx,@function
-__x86.get_pc_thunk.bx:
- cfi_startproc
+ COMDAT(C(__x86.get_pc_thunk.bx))
+C(__x86.get_pc_thunk.bx):
movl (%esp), %ebx
ret
- cfi_endproc
- .size __x86.get_pc_thunk.bx, . - __x86.get_pc_thunk.bx
+ENDF(C(__x86.get_pc_thunk.bx))
+# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+ COMDAT(C(__x86.get_pc_thunk.dx))
+C(__x86.get_pc_thunk.dx):
+ movl (%esp), %edx
+ ret
+ENDF(C(__x86.get_pc_thunk.dx))
+#endif /* DARWIN || HIDDEN */
#endif /* __PIC__ */
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */
+
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,"a",@unwind
+#else
+.section .eh_frame,"a",@progbits
+#endif
+
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X) X - .
+#else
+# define PCREL(X) X@rel
+#endif
+
+/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
+#define ADV(N, P) .byte 2, L(N)-L(P)
+
+ .balign 4
+L(CIE):
+ .set L(set0),L(ECIE)-L(SCIE)
+ .long L(set0) /* CIE Length */
+L(SCIE):
+ .long 0 /* CIE Identifier Tag */
+ .byte 1 /* CIE Version */
+ .ascii "zR\0" /* CIE Augmentation */
+ .byte 1 /* CIE Code Alignment Factor */
+ .byte 0x7c /* CIE Data Alignment Factor */
+ .byte 0x8 /* CIE RA Column */
+ .byte 1 /* Augmentation size */
+ .byte 0x1b /* FDE Encoding (pcrel sdata4) */
+ .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp offset 4 */
+ .byte 0x80+8, 1 /* DW_CFA_offset, %eip offset 1*-4 */
+ .balign 4
+L(ECIE):
+
+ .set L(set1),L(EFDE1)-L(SFDE1)
+ .long L(set1) /* FDE Length */
+L(SFDE1):
+ .long L(SFDE1)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW0)) /* Initial location */
+ .long L(UW5)-L(UW0) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW1, UW0)
+ .byte 0xc, 5, 8 /* DW_CFA_def_cfa, %ebp 8 */
+ .byte 0x80+5, 2 /* DW_CFA_offset, %ebp 2*-4 */
+ ADV(UW2, UW1)
+ .byte 0x80+3, 0 /* DW_CFA_offset, %ebx 0*-4 */
+ ADV(UW3, UW2)
+ .byte 0xa /* DW_CFA_remember_state */
+ .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp 4 */
+ .byte 0xc0+3 /* DW_CFA_restore, %ebx */
+ .byte 0xc0+5 /* DW_CFA_restore, %ebp */
+ ADV(UW4, UW3)
+ .byte 0xb /* DW_CFA_restore_state */
+ .balign 4
+L(EFDE1):
+
+ .set L(set2),L(EFDE2)-L(SFDE2)
+ .long L(set2) /* FDE Length */
+L(SFDE2):
+ .long L(SFDE2)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW6)) /* Initial location */
+ .long L(UW8)-L(UW6) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW7, UW6)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE2):
+
+ .set L(set3),L(EFDE3)-L(SFDE3)
+ .long L(set3) /* FDE Length */
+L(SFDE3):
+ .long L(SFDE3)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW9)) /* Initial location */
+ .long L(UW11)-L(UW9) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW10, UW9)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE3):
+
+ .set L(set4),L(EFDE4)-L(SFDE4)
+ .long L(set4) /* FDE Length */
+L(SFDE4):
+ .long L(SFDE4)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW12)) /* Initial location */
+ .long L(UW20)-L(UW12) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW13, UW12)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+ ADV(UW14, UW13)
+ .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */
+ ADV(UW15, UW14)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW16, UW15)
+#else
+ ADV(UW16, UW13)
+#endif
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW17, UW16)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW18, UW17)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW19, UW18)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE4):
+
+ .set L(set5),L(EFDE5)-L(SFDE5)
+ .long L(set5) /* FDE Length */
+L(SFDE5):
+ .long L(SFDE5)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW21)) /* Initial location */
+ .long L(UW23)-L(UW21) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW22, UW21)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE5):
+
+ .set L(set6),L(EFDE6)-L(SFDE6)
+ .long L(set6) /* FDE Length */
+L(SFDE6):
+ .long L(SFDE6)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW24)) /* Initial location */
+ .long L(UW26)-L(UW24) /* Address range */
+ .byte 0 /* Augmentation size */
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ .byte 0x80+8, 2 /* DW_CFA_offset %eip, 2*-4 */
+ ADV(UW25, UW24)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE6):
+
+ .set L(set7),L(EFDE7)-L(SFDE7)
+ .long L(set7) /* FDE Length */
+L(SFDE7):
+ .long L(SFDE7)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW27)) /* Initial location */
+ .long L(UW31)-L(UW27) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW28, UW27)
+ .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+ ADV(UW29, UW28)
+ .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */
+ ADV(UW30, UW29)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+#endif
+ .balign 4
+L(EFDE7):
+
+#if !FFI_NO_RAW_API
+ .set L(set8),L(EFDE8)-L(SFDE8)
+ .long L(set8) /* FDE Length */
+L(SFDE8):
+ .long L(SFDE8)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW32)) /* Initial location */
+ .long L(UW40)-L(UW32) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW33, UW32)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW34, UW33)
+ .byte 0x80+3, 2 /* DW_CFA_offset %ebx 2*-4 */
+ ADV(UW35, UW34)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW36, UW35)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW37, UW36)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ ADV(UW38, UW37)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW39, UW38)
+ .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE8):
+
+ .set L(set9),L(EFDE9)-L(SFDE9)
+ .long L(set9) /* FDE Length */
+L(SFDE9):
+ .long L(SFDE9)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW41)) /* Initial location */
+ .long L(UW52)-L(UW41) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW42, UW41)
+ .byte 0xe, 0 /* DW_CFA_def_cfa_offset */
+ .byte 0x9, 8, 2 /* DW_CFA_register %eip, %edx */
+ ADV(UW43, UW42)
+ .byte 0xe, 4 /* DW_CFA_def_cfa_offset */
+ ADV(UW44, UW43)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ .byte 0x80+8, 2 /* DW_CFA_offset %eip 2*-4 */
+ ADV(UW45, UW44)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ ADV(UW46, UW45)
+ .byte 0x80+3, 3 /* DW_CFA_offset %ebx 3*-4 */
+ ADV(UW47, UW46)
+ .byte 0xc0+3 /* DW_CFA_restore %ebx */
+ ADV(UW48, UW47)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ ADV(UW49, UW48)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ ADV(UW50, UW49)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset */
+ ADV(UW51, UW50)
+ .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */
+ .balign 4
+L(EFDE9):
+#endif /* !FFI_NO_RAW_API */
+
#endif /* ifndef __x86_64__ */
+
#if defined __ELF__ && defined __linux__
.section .note.GNU-stack,"",@progbits
#endif
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index 58cb153..f9f9163 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -30,21 +30,41 @@
#define LIBFFI_ASM
#include <fficonfig.h>
#include <ffi.h>
-#include <ffi_cfi.h>
#include "internal64.h"
.text
+#define C2(X, Y) X ## Y
+#define C1(X, Y) C2(X, Y)
+#ifdef __USER_LABEL_PREFIX__
+# define C(X) C1(__USER_LABEL_PREFIX__, X)
+#else
+# define C(X) X
+#endif
+
+#ifdef __APPLE__
+# define L(X) C1(L, X)
+#else
+# define L(X) C1(.L, X)
+#endif
+
+#ifdef __ELF__
+# define PLT(X) X@PLT
+# define ENDF(X) .type X,@function; .size X, . - X
+#else
+# define PLT(X) X
+# define ENDF(X)
+#endif
+
/* This macro allows the safe creation of jump tables without an
actual table. The entry points into the table are all 8 bytes.
The use of ORG asserts that we're at the correct location. */
/* ??? The clang assembler doesn't handle .org with symbolic expressions. */
-.macro E index
- .align 8
-#ifndef __clang__
- .org 0b + \index * 8, 0x90
+#if defined(__clang__) || defined(__APPLE__)
+# define E(BASE, X) .balign 8
+#else
+# define E(BASE, X) .balign 8; .org BASE + X * 8
#endif
-.endm
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
void *raddr, void (*fnaddr)(void));
@@ -53,13 +73,12 @@
for this function. This has been allocated by ffi_call. We also
deallocate some of the stack that has been alloca'd. */
- .align 8
- .globl ffi_call_unix64
- .type ffi_call_unix64,@function
- FFI_HIDDEN(ffi_call_unix64)
+ .balign 8
+ .globl C(ffi_call_unix64)
+ FFI_HIDDEN(C(ffi_call_unix64))
-ffi_call_unix64:
- cfi_startproc
+C(ffi_call_unix64):
+L(UW0):
movq (%rsp), %r10 /* Load return address. */
leaq (%rdi, %rsi), %rax /* Find local stack base. */
movq %rdx, (%rax) /* Save flags. */
@@ -75,8 +94,9 @@ ffi_call_unix64:
unwind back to ffi_call. Note that the location to which we
moved the return address is (the new) CFA-8, so from the
perspective of the unwind info, it hasn't moved. */
- cfi_def_cfa(%rbp, 32)
- cfi_rel_offset(%rbp, 16)
+L(UW1):
+ /* cfi_def_cfa(%rbp, 32) */
+ /* cfi_rel_offset(%rbp, 16) */
movq %rdi, %r10 /* Save a copy of the register area. */
movq %r8, %r11 /* Save a copy of the target fn. */
@@ -91,8 +111,8 @@ ffi_call_unix64:
movq 0x28(%r10), %r9
movl 0xb0(%r10), %eax
testl %eax, %eax
- jnz .Lload_sse
-.Lret_from_load_sse:
+ jnz L(load_sse)
+L(ret_from_load_sse):
/* Deallocate the reg arg area, except for r10, then load via pop. */
leaq 0xb8(%r10), %rsp
@@ -107,94 +127,98 @@ ffi_call_unix64:
movq 0(%rbp), %rcx /* Reload flags. */
movq 8(%rbp), %rdi /* Reload raddr. */
movq 16(%rbp), %rbp /* Reload old frame pointer. */
- cfi_remember_state
- cfi_def_cfa(%rsp, 8)
- cfi_restore(%rbp)
+L(UW2):
+ /* cfi_remember_state */
+ /* cfi_def_cfa(%rsp, 8) */
+ /* cfi_restore(%rbp) */
/* The first byte of the flags contains the FFI_TYPE. */
cmpb $UNIX64_RET_LAST, %cl
movzbl %cl, %r10d
- leaq 0f(%rip), %r11
- ja 9f
+ leaq L(store_table)(%rip), %r11
+ ja L(sa)
leaq (%r11, %r10, 8), %r10
/* Prep for the structure cases: scratch area in redzone. */
leaq -20(%rsp), %rsi
jmp *%r10
- .align 8
-0:
-E UNIX64_RET_VOID
+ .balign 8
+L(store_table):
+E(L(store_table), UNIX64_RET_VOID)
ret
-E UNIX64_RET_UINT8
+E(L(store_table), UNIX64_RET_UINT8)
movzbl %al, %eax
movq %rax, (%rdi)
ret
-E UNIX64_RET_UINT16
+E(L(store_table), UNIX64_RET_UINT16)
movzwl %ax, %eax
movq %rax, (%rdi)
ret
-E UNIX64_RET_UINT32
+E(L(store_table), UNIX64_RET_UINT32)
movl %eax, %eax
movq %rax, (%rdi)
ret
-E UNIX64_RET_SINT8
+E(L(store_table), UNIX64_RET_SINT8)
movsbq %al, %rax
movq %rax, (%rdi)
ret
-E UNIX64_RET_SINT16
+E(L(store_table), UNIX64_RET_SINT16)
movswq %ax, %rax
movq %rax, (%rdi)
ret
-E UNIX64_RET_SINT32
+E(L(store_table), UNIX64_RET_SINT32)
cltq
movq %rax, (%rdi)
ret
-E UNIX64_RET_INT64
+E(L(store_table), UNIX64_RET_INT64)
movq %rax, (%rdi)
ret
-E UNIX64_RET_XMM32
+E(L(store_table), UNIX64_RET_XMM32)
movd %xmm0, (%rdi)
ret
-E UNIX64_RET_XMM64
+E(L(store_table), UNIX64_RET_XMM64)
movq %xmm0, (%rdi)
ret
-E UNIX64_RET_X87
+E(L(store_table), UNIX64_RET_X87)
fstpt (%rdi)
ret
-E UNIX64_RET_X87_2
+E(L(store_table), UNIX64_RET_X87_2)
fstpt (%rdi)
fstpt 16(%rdi)
ret
-E UNIX64_RET_ST_XMM0_RAX
+E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
movq %rax, 8(%rsi)
- jmp 3f
-E UNIX64_RET_ST_RAX_XMM0
+ jmp L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
movq %xmm0, 8(%rsi)
- jmp 2f
-E UNIX64_RET_ST_XMM0_XMM1
+ jmp L(s2)
+E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
movq %xmm1, 8(%rsi)
- jmp 3f
-E UNIX64_RET_ST_RAX_RDX
+ jmp L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_RDX)
movq %rdx, 8(%rsi)
-2: movq %rax, (%rsi)
+L(s2):
+ movq %rax, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
- .align 8
-3: movq %xmm0, (%rsi)
+ .balign 8
+L(s3):
+ movq %xmm0, (%rsi)
shrl $UNIX64_SIZE_SHIFT, %ecx
rep movsb
ret
-9: call abort@PLT
+L(sa): call PLT(C(abort))
/* Many times we can avoid loading any SSE registers at all.
It's not worth an indirect jump to load the exact set of
SSE registers needed; zero or all is a good compromise. */
- .align 2
- cfi_restore_state
-.Lload_sse:
+ .balign 2
+L(UW3):
+ /* cfi_restore_state */
+L(load_sse):
movdqa 0x30(%r10), %xmm0
movdqa 0x40(%r10), %xmm1
movdqa 0x50(%r10), %xmm2
@@ -203,10 +227,10 @@ E UNIX64_RET_ST_RAX_RDX
movdqa 0x80(%r10), %xmm5
movdqa 0x90(%r10), %xmm6
movdqa 0xa0(%r10), %xmm7
- jmp .Lret_from_load_sse
+ jmp L(ret_from_load_sse)
- cfi_endproc
- .size ffi_call_unix64,.-ffi_call_unix64
+L(UW4):
+ENDF(C(ffi_call_unix64))
/* 6 general registers, 8 vector registers,
32 bytes of rvalue, 8 bytes of alignment. */
@@ -218,16 +242,15 @@ E UNIX64_RET_ST_RAX_RDX
/* The location of rvalue within the red zone after deallocating the frame. */
#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS)
- .align 2
- .globl ffi_closure_unix64_sse
- .type ffi_closure_unix64_sse,@function
- FFI_HIDDEN(ffi_closure_unix64_sse)
+ .balign 2
+ .globl C(ffi_closure_unix64_sse)
+ FFI_HIDDEN(C(ffi_closure_unix64_sse))
-ffi_closure_unix64_sse:
- cfi_startproc
+C(ffi_closure_unix64_sse):
+L(UW5):
subq $ffi_closure_FS, %rsp
- /* Note clang bug 21515: adjust_cfa_offset error across endproc. */
- cfi_def_cfa_offset(ffi_closure_FS + 8)
+L(UW6):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp)
movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp)
@@ -237,22 +260,21 @@ ffi_closure_unix64_sse:
movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp)
movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp)
movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp)
- jmp 0f
+ jmp L(sse_entry1)
- cfi_endproc
- .size ffi_closure_unix64_sse,.-ffi_closure_unix64_sse
+L(UW7):
+ENDF(C(ffi_closure_unix64_sse))
- .align 2
- .globl ffi_closure_unix64
- .type ffi_closure_unix64,@function
- FFI_HIDDEN(ffi_closure_unix64)
+ .balign 2
+ .globl C(ffi_closure_unix64)
+ FFI_HIDDEN(C(ffi_closure_unix64))
-ffi_closure_unix64:
- cfi_startproc
+C(ffi_closure_unix64):
+L(UW8):
subq $ffi_closure_FS, %rsp
- /* Note clang bug 21515: adjust_cfa_offset error across endproc. */
- cfi_def_cfa_offset(ffi_closure_FS + 8)
-0:
+L(UW9):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
+L(sse_entry1):
movq %rdi, ffi_closure_OFS_G+0x00(%rsp)
movq %rsi, ffi_closure_OFS_G+0x08(%rsp)
movq %rdx, ffi_closure_OFS_G+0x10(%rsp)
@@ -269,95 +291,97 @@ ffi_closure_unix64:
movq FFI_TRAMPOLINE_SIZE+8(%r10), %rsi /* Load fun */
movq FFI_TRAMPOLINE_SIZE+16(%r10), %rdx /* Load user_data */
#endif
-.Ldo_closure:
+L(do_closure):
leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */
movq %rsp, %r8 /* Load reg_args */
leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */
- call ffi_closure_unix64_inner
+ call C(ffi_closure_unix64_inner)
/* Deallocate stack frame early; return value is now in redzone. */
addq $ffi_closure_FS, %rsp
- cfi_adjust_cfa_offset(-ffi_closure_FS)
+L(UW10):
+ /* cfi_adjust_cfa_offset(-ffi_closure_FS) */
/* The first byte of the return value contains the FFI_TYPE. */
cmpb $UNIX64_RET_LAST, %al
movzbl %al, %r10d
- leaq 0f(%rip), %r11
- ja 9f
+ leaq L(load_table)(%rip), %r11
+ ja L(la)
leaq (%r11, %r10, 8), %r10
leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10
- .align 8
-0:
-E UNIX64_RET_VOID
+ .balign 8
+L(load_table):
+E(L(load_table), UNIX64_RET_VOID)
ret
-E UNIX64_RET_UINT8
+E(L(load_table), UNIX64_RET_UINT8)
movzbl (%rsi), %eax
ret
-E UNIX64_RET_UINT16
+E(L(load_table), UNIX64_RET_UINT16)
movzwl (%rsi), %eax
ret
-E UNIX64_RET_UINT32
+E(L(load_table), UNIX64_RET_UINT32)
movl (%rsi), %eax
ret
-E UNIX64_RET_SINT8
+E(L(load_table), UNIX64_RET_SINT8)
movsbl (%rsi), %eax
ret
-E UNIX64_RET_SINT16
+E(L(load_table), UNIX64_RET_SINT16)
movswl (%rsi), %eax
ret
-E UNIX64_RET_SINT32
+E(L(load_table), UNIX64_RET_SINT32)
movl (%rsi), %eax
ret
-E UNIX64_RET_INT64
+E(L(load_table), UNIX64_RET_INT64)
movq (%rsi), %rax
ret
-E UNIX64_RET_XMM32
+E(L(load_table), UNIX64_RET_XMM32)
movd (%rsi), %xmm0
ret
-E UNIX64_RET_XMM64
+E(L(load_table), UNIX64_RET_XMM64)
movq (%rsi), %xmm0
ret
-E UNIX64_RET_X87
+E(L(load_table), UNIX64_RET_X87)
fldt (%rsi)
ret
-E UNIX64_RET_X87_2
+E(L(load_table), UNIX64_RET_X87_2)
fldt 16(%rsi)
fldt (%rsi)
ret
-E UNIX64_RET_ST_XMM0_RAX
+E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
movq 8(%rsi), %rax
- jmp 3f
-E UNIX64_RET_ST_RAX_XMM0
+ jmp L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
movq 8(%rsi), %xmm0
- jmp 2f
-E UNIX64_RET_ST_XMM0_XMM1
+ jmp L(l2)
+E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
movq 8(%rsi), %xmm1
- jmp 3f
-E UNIX64_RET_ST_RAX_RDX
+ jmp L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_RDX)
movq 8(%rsi), %rdx
-2: movq (%rsi), %rax
+L(l2):
+ movq (%rsi), %rax
ret
- .align 8
-3: movq (%rsi), %xmm0
+ .balign 8
+L(l3):
+ movq (%rsi), %xmm0
ret
-9: call abort@PLT
+L(la): call PLT(C(abort))
- cfi_endproc
- .size ffi_closure_unix64,.-ffi_closure_unix64
+L(UW11):
+ENDF(C(ffi_closure_unix64))
- .align 2
- .globl ffi_go_closure_unix64_sse
- .type ffi_go_closure_unix64_sse,@function
- FFI_HIDDEN(ffi_go_closure_unix64_sse)
+ .balign 2
+ .globl C(ffi_go_closure_unix64_sse)
+ FFI_HIDDEN(C(ffi_go_closure_unix64_sse))
-ffi_go_closure_unix64_sse:
- cfi_startproc
+C(ffi_go_closure_unix64_sse):
+L(UW12):
subq $ffi_closure_FS, %rsp
- /* Note clang bug 21515: adjust_cfa_offset error across endproc. */
- cfi_def_cfa_offset(ffi_closure_FS + 8)
+L(UW13):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp)
movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp)
@@ -367,22 +391,21 @@ ffi_go_closure_unix64_sse:
movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp)
movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp)
movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp)
- jmp 0f
+ jmp L(sse_entry2)
- cfi_endproc
- .size ffi_go_closure_unix64_sse,.-ffi_go_closure_unix64_sse
+L(UW14):
+ENDF(C(ffi_go_closure_unix64_sse))
- .align 2
- .globl ffi_go_closure_unix64
- .type ffi_go_closure_unix64,@function
- FFI_HIDDEN(ffi_go_closure_unix64)
+ .balign 2
+ .globl C(ffi_go_closure_unix64)
+ FFI_HIDDEN(C(ffi_go_closure_unix64))
-ffi_go_closure_unix64:
- cfi_startproc
+C(ffi_go_closure_unix64):
+L(UW15):
subq $ffi_closure_FS, %rsp
- /* Note clang bug 21515: adjust_cfa_offset error across endproc. */
- cfi_def_cfa_offset(ffi_closure_FS + 8)
-0:
+L(UW16):
+ /* cfi_adjust_cfa_offset(ffi_closure_FS) */
+L(sse_entry2):
movq %rdi, ffi_closure_OFS_G+0x00(%rsp)
movq %rsi, ffi_closure_OFS_G+0x08(%rsp)
movq %rdx, ffi_closure_OFS_G+0x10(%rsp)
@@ -399,10 +422,123 @@ ffi_go_closure_unix64:
movq 16(%r10), %rsi /* Load fun */
movq %r10, %rdx /* Load closure (user_data) */
#endif
- jmp .Ldo_closure
+ jmp L(do_closure)
+
+L(UW17):
+ENDF(C(ffi_go_closure_unix64))
+
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */
+
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,"a",@unwind
+#else
+.section .eh_frame,"a",@progbits
+#endif
- cfi_endproc
- .size ffi_go_closure_unix64,.-ffi_go_closure_unix64
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X) X - .
+#else
+# define PCREL(X) X@rel
+#endif
+
+/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
+#define ADV(N, P) .byte 2, L(N)-L(P)
+
+ .balign 8
+L(CIE):
+ .set L(set0),L(ECIE)-L(SCIE)
+ .long L(set0) /* CIE Length */
+L(SCIE):
+ .long 0 /* CIE Identifier Tag */
+ .byte 1 /* CIE Version */
+ .ascii "zR\0" /* CIE Augmentation */
+ .byte 1 /* CIE Code Alignment Factor */
+ .byte 0x78 /* CIE Data Alignment Factor */
+ .byte 0x10 /* CIE RA Column */
+ .byte 1 /* Augmentation size */
+ .byte 0x1b /* FDE Encoding (pcrel sdata4) */
+ .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp offset 8 */
+ .byte 0x80+16, 1 /* DW_CFA_offset, %rip offset 1*-8 */
+ .balign 8
+L(ECIE):
+
+ .set L(set1),L(EFDE1)-L(SFDE1)
+ .long L(set1) /* FDE Length */
+L(SFDE1):
+ .long L(SFDE1)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW0)) /* Initial location */
+ .long L(UW4)-L(UW0) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW1, UW0)
+ .byte 0xc, 6, 32 /* DW_CFA_def_cfa, %rbp 32 */
+ .byte 0x80+6, 2 /* DW_CFA_offset, %rbp 2*-8 */
+ ADV(UW2, UW1)
+ .byte 0xa /* DW_CFA_remember_state */
+ .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp 8 */
+ .byte 0xc0+6 /* DW_CFA_restore, %rbp */
+ ADV(UW3, UW2)
+ .byte 0xb /* DW_CFA_restore_state */
+ .balign 8
+L(EFDE1):
+
+ .set L(set2),L(EFDE2)-L(SFDE2)
+ .long L(set2) /* FDE Length */
+L(SFDE2):
+ .long L(SFDE2)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW5)) /* Initial location */
+ .long L(UW7)-L(UW5) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW6, UW5)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ .balign 8
+L(EFDE2):
+
+ .set L(set3),L(EFDE3)-L(SFDE3)
+ .long L(set3) /* FDE Length */
+L(SFDE3):
+ .long L(SFDE3)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW8)) /* Initial location */
+ .long L(UW11)-L(UW8) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW9, UW8)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ ADV(UW10, UW9)
+ .byte 0xe, 8 /* DW_CFA_def_cfa_offset 8 */
+L(EFDE3):
+
+ .set L(set4),L(EFDE4)-L(SFDE4)
+ .long L(set4) /* FDE Length */
+L(SFDE4):
+ .long L(SFDE4)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW12)) /* Initial location */
+ .long L(UW14)-L(UW12) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW13, UW12)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ .balign 8
+L(EFDE4):
+
+ .set L(set5),L(EFDE5)-L(SFDE5)
+ .long L(set5) /* FDE Length */
+L(SFDE5):
+ .long L(SFDE5)-L(CIE) /* FDE CIE offset */
+ .long PCREL(L(UW15)) /* Initial location */
+ .long L(UW17)-L(UW15) /* Address range */
+ .byte 0 /* Augmentation size */
+ ADV(UW16, UW15)
+ .byte 0xe /* DW_CFA_def_cfa_offset */
+ .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */
+ .balign 8
+L(EFDE5):
+#ifdef __APPLE__
+ .subsections_via_symbols
+#endif
#endif /* __x86_64__ */
#if defined __ELF__ && defined __linux__