diff options
author | Anthony Green <green@moxielogic.com> | 2015-01-10 09:22:55 -0500 |
---|---|---|
committer | Anthony Green <green@moxielogic.com> | 2015-01-10 09:22:55 -0500 |
commit | dd0b59a5cf63e0f9602c76fc89a4cb62593ff6f1 (patch) | |
tree | 15b11b2c88959ef8962649a0b65e76e7936b93bc /src/x86 | |
parent | 9131039c93b6ecd1c3946905a3b6dafb5dc4ee40 (diff) | |
parent | b7f6d7aa9b0d7b19eec28a945251e09a4b65b275 (diff) | |
download | libffi-dd0b59a5cf63e0f9602c76fc89a4cb62593ff6f1.tar.gz |
Merge pull request #164 from rth7680/darwin
Fix build on darwin
Diffstat (limited to 'src/x86')
-rw-r--r-- | src/x86/darwin.S | 444 | ||||
-rw-r--r-- | src/x86/darwin64.S | 416 | ||||
-rw-r--r-- | src/x86/ffi.c | 29 | ||||
-rw-r--r-- | src/x86/sysv.S | 884 | ||||
-rw-r--r-- | src/x86/unix64.S | 390 |
5 files changed, 889 insertions, 1274 deletions
diff --git a/src/x86/darwin.S b/src/x86/darwin.S deleted file mode 100644 index 8f0f070..0000000 --- a/src/x86/darwin.S +++ /dev/null @@ -1,444 +0,0 @@ -/* ----------------------------------------------------------------------- - darwin.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005 Red Hat, Inc. - Copyright (C) 2008 Free Software Foundation, Inc. - - X86 Foreign Function Interface - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - ``Software''), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. - ----------------------------------------------------------------------- - */ - -#ifndef __x86_64__ - -#define LIBFFI_ASM -#include <fficonfig.h> -#include <ffi.h> - -.text - -.globl _ffi_prep_args - - .align 4 -.globl _ffi_call_SYSV - -_ffi_call_SYSV: -.LFB1: - pushl %ebp -.LCFI0: - movl %esp,%ebp -.LCFI1: - subl $8,%esp - /* Make room for all of the new args. */ - movl 16(%ebp),%ecx - subl %ecx,%esp - - movl %esp,%eax - - /* Place all of the ffi_prep_args in position */ - subl $8,%esp - pushl 12(%ebp) - pushl %eax - call *8(%ebp) - - /* Return stack to previous state and call the function */ - addl $16,%esp - - call *28(%ebp) - - /* Load %ecx with the return type code */ - movl 20(%ebp),%ecx - - /* Protect %esi. We're going to pop it in the epilogue. */ - pushl %esi - - /* If the return value pointer is NULL, assume no return value. */ - cmpl $0,24(%ebp) - jne 0f - - /* Even if there is no space for the return value, we are - obliged to handle floating-point values. */ - cmpl $FFI_TYPE_FLOAT,%ecx - jne noretval - fstp %st(0) - - jmp epilogue -0: - .align 4 - call 1f -.Lstore_table: - .long noretval-.Lstore_table /* FFI_TYPE_VOID */ - .long retint-.Lstore_table /* FFI_TYPE_INT */ - .long retfloat-.Lstore_table /* FFI_TYPE_FLOAT */ - .long retdouble-.Lstore_table /* FFI_TYPE_DOUBLE */ - .long retlongdouble-.Lstore_table /* FFI_TYPE_LONGDOUBLE */ - .long retuint8-.Lstore_table /* FFI_TYPE_UINT8 */ - .long retsint8-.Lstore_table /* FFI_TYPE_SINT8 */ - .long retuint16-.Lstore_table /* FFI_TYPE_UINT16 */ - .long retsint16-.Lstore_table /* FFI_TYPE_SINT16 */ - .long retint-.Lstore_table /* FFI_TYPE_UINT32 */ - .long retint-.Lstore_table /* FFI_TYPE_SINT32 */ - .long retint64-.Lstore_table /* FFI_TYPE_UINT64 */ - .long retint64-.Lstore_table /* FFI_TYPE_SINT64 */ - .long retstruct-.Lstore_table /* FFI_TYPE_STRUCT */ - .long retint-.Lstore_table /* FFI_TYPE_POINTER */ - .long retstruct1b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_1B */ - .long retstruct2b-.Lstore_table /* FFI_TYPE_SMALL_STRUCT_2B */ -1: - pop %esi - add (%esi, %ecx, 4), %esi - jmp *%esi - - /* Sign/zero extend as appropriate. */ -retsint8: - movsbl %al, %eax - jmp retint - -retsint16: - movswl %ax, %eax - jmp retint - -retuint8: - movzbl %al, %eax - jmp retint - -retuint16: - movzwl %ax, %eax - jmp retint - -retfloat: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstps (%ecx) - jmp epilogue - -retdouble: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstpl (%ecx) - jmp epilogue - -retlongdouble: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - fstpt (%ecx) - jmp epilogue - -retint64: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movl %eax,0(%ecx) - movl %edx,4(%ecx) - jmp epilogue - -retstruct1b: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movb %al,0(%ecx) - jmp epilogue - -retstruct2b: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movw %ax,0(%ecx) - jmp epilogue - -retint: - /* Load %ecx with the pointer to storage for the return value */ - movl 24(%ebp),%ecx - movl %eax,0(%ecx) - -retstruct: - /* Nothing to do! */ - -noretval: -epilogue: - popl %esi - movl %ebp,%esp - popl %ebp - ret - -.LFE1: -.ffi_call_SYSV_end: - - .align 4 -FFI_HIDDEN (ffi_closure_SYSV) -.globl _ffi_closure_SYSV - -_ffi_closure_SYSV: -.LFB2: - pushl %ebp -.LCFI2: - movl %esp, %ebp -.LCFI3: - subl $40, %esp - leal -24(%ebp), %edx - movl %edx, -12(%ebp) /* resp */ - leal 8(%ebp), %edx - movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ - leal -12(%ebp), %edx - movl %edx, (%esp) /* &resp */ - movl %ebx, 8(%esp) -.LCFI7: - call L_ffi_closure_SYSV_inner$stub - movl 8(%esp), %ebx - movl -12(%ebp), %ecx - cmpl $FFI_TYPE_INT, %eax - je .Lcls_retint - - /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, - FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ - cmpl $FFI_TYPE_UINT64, %eax - jge 0f - cmpl $FFI_TYPE_UINT8, %eax - jge .Lcls_retint - -0: cmpl $FFI_TYPE_FLOAT, %eax - je .Lcls_retfloat - cmpl $FFI_TYPE_DOUBLE, %eax - je .Lcls_retdouble - cmpl $FFI_TYPE_LONGDOUBLE, %eax - je .Lcls_retldouble - cmpl $FFI_TYPE_SINT64, %eax - je .Lcls_retllong - cmpl $FFI_TYPE_SMALL_STRUCT_1B, %eax - je .Lcls_retstruct1b - cmpl $FFI_TYPE_SMALL_STRUCT_2B, %eax - je .Lcls_retstruct2b - cmpl $FFI_TYPE_STRUCT, %eax - je .Lcls_retstruct -.Lcls_epilogue: - movl %ebp, %esp - popl %ebp - ret -.Lcls_retint: - movl (%ecx), %eax - jmp .Lcls_epilogue -.Lcls_retfloat: - flds (%ecx) - jmp .Lcls_epilogue -.Lcls_retdouble: - fldl (%ecx) - jmp .Lcls_epilogue -.Lcls_retldouble: - fldt (%ecx) - jmp .Lcls_epilogue -.Lcls_retllong: - movl (%ecx), %eax - movl 4(%ecx), %edx - jmp .Lcls_epilogue -.Lcls_retstruct1b: - movsbl (%ecx), %eax - jmp .Lcls_epilogue -.Lcls_retstruct2b: - movswl (%ecx), %eax - jmp .Lcls_epilogue -.Lcls_retstruct: - lea -8(%ebp),%esp - movl %ebp, %esp - popl %ebp - ret $4 -.LFE2: - -#if !FFI_NO_RAW_API - -#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3) -#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) -#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) -#define CIF_FLAGS_OFFSET 20 - - .align 4 -FFI_HIDDEN (ffi_closure_raw_SYSV) -.globl _ffi_closure_raw_SYSV - -_ffi_closure_raw_SYSV: -.LFB3: - pushl %ebp -.LCFI4: - movl %esp, %ebp -.LCFI5: - pushl %esi -.LCFI6: - subl $36, %esp - movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ - movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ - movl %edx, 12(%esp) /* user_data */ - leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ - movl %edx, 8(%esp) /* raw_args */ - leal -24(%ebp), %edx - movl %edx, 4(%esp) /* &res */ - movl %esi, (%esp) /* cif */ - call *RAW_CLOSURE_FUN_OFFSET(%eax) /* closure->fun */ - movl CIF_FLAGS_OFFSET(%esi), %eax /* rtype */ - cmpl $FFI_TYPE_INT, %eax - je .Lrcls_retint - - /* Handle FFI_TYPE_UINT8, FFI_TYPE_SINT8, FFI_TYPE_UINT16, - FFI_TYPE_SINT16, FFI_TYPE_UINT32, FFI_TYPE_SINT32. */ - cmpl $FFI_TYPE_UINT64, %eax - jge 0f - cmpl $FFI_TYPE_UINT8, %eax - jge .Lrcls_retint -0: - cmpl $FFI_TYPE_FLOAT, %eax - je .Lrcls_retfloat - cmpl $FFI_TYPE_DOUBLE, %eax - je .Lrcls_retdouble - cmpl $FFI_TYPE_LONGDOUBLE, %eax - je .Lrcls_retldouble - cmpl $FFI_TYPE_SINT64, %eax - je .Lrcls_retllong -.Lrcls_epilogue: - addl $36, %esp - popl %esi - popl %ebp - ret -.Lrcls_retint: - movl -24(%ebp), %eax - jmp .Lrcls_epilogue -.Lrcls_retfloat: - flds -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retdouble: - fldl -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retldouble: - fldt -24(%ebp) - jmp .Lrcls_epilogue -.Lrcls_retllong: - movl -24(%ebp), %eax - movl -20(%ebp), %edx - jmp .Lrcls_epilogue -.LFE3: -#endif - -.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5 -L_ffi_closure_SYSV_inner$stub: - .indirect_symbol _ffi_closure_SYSV_inner - hlt ; hlt ; hlt ; hlt ; hlt - - -.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support -EH_frame1: - .set L$set$0,LECIE1-LSCIE1 - .long L$set$0 -LSCIE1: - .long 0x0 - .byte 0x1 - .ascii "zR\0" - .byte 0x1 - .byte 0x7c - .byte 0x8 - .byte 0x1 - .byte 0x10 - .byte 0xc - .byte 0x5 - .byte 0x4 - .byte 0x88 - .byte 0x1 - .align 2 -LECIE1: -.globl _ffi_call_SYSV.eh -_ffi_call_SYSV.eh: -LSFDE1: - .set L$set$1,LEFDE1-LASFDE1 - .long L$set$1 -LASFDE1: - .long LASFDE1-EH_frame1 - .long .LFB1-. - .set L$set$2,.LFE1-.LFB1 - .long L$set$2 - .byte 0x0 - .byte 0x4 - .set L$set$3,.LCFI0-.LFB1 - .long L$set$3 - .byte 0xe - .byte 0x8 - .byte 0x84 - .byte 0x2 - .byte 0x4 - .set L$set$4,.LCFI1-.LCFI0 - .long L$set$4 - .byte 0xd - .byte 0x4 - .align 2 -LEFDE1: -.globl _ffi_closure_SYSV.eh -_ffi_closure_SYSV.eh: -LSFDE2: - .set L$set$5,LEFDE2-LASFDE2 - .long L$set$5 -LASFDE2: - .long LASFDE2-EH_frame1 - .long .LFB2-. - .set L$set$6,.LFE2-.LFB2 - .long L$set$6 - .byte 0x0 - .byte 0x4 - .set L$set$7,.LCFI2-.LFB2 - .long L$set$7 - .byte 0xe - .byte 0x8 - .byte 0x84 - .byte 0x2 - .byte 0x4 - .set L$set$8,.LCFI3-.LCFI2 - .long L$set$8 - .byte 0xd - .byte 0x4 - .align 2 -LEFDE2: - -#if !FFI_NO_RAW_API - -.globl _ffi_closure_raw_SYSV.eh -_ffi_closure_raw_SYSV.eh: -LSFDE3: - .set L$set$10,LEFDE3-LASFDE3 - .long L$set$10 -LASFDE3: - .long LASFDE3-EH_frame1 - .long .LFB3-. - .set L$set$11,.LFE3-.LFB3 - .long L$set$11 - .byte 0x0 - .byte 0x4 - .set L$set$12,.LCFI4-.LFB3 - .long L$set$12 - .byte 0xe - .byte 0x8 - .byte 0x84 - .byte 0x2 - .byte 0x4 - .set L$set$13,.LCFI5-.LCFI4 - .long L$set$13 - .byte 0xd - .byte 0x4 - .byte 0x4 - .set L$set$14,.LCFI6-.LCFI5 - .long L$set$14 - .byte 0x85 - .byte 0x3 - .align 2 -LEFDE3: - -#endif - -#endif /* ifndef __x86_64__ */ diff --git a/src/x86/darwin64.S b/src/x86/darwin64.S deleted file mode 100644 index 2f7394e..0000000 --- a/src/x86/darwin64.S +++ /dev/null @@ -1,416 +0,0 @@ -/* ----------------------------------------------------------------------- - darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc. - Copyright (c) 2008 Red Hat, Inc. - derived from unix64.S - - x86-64 Foreign Function Interface for Darwin. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - ``Software''), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR - OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - OTHER DEALINGS IN THE SOFTWARE. - ----------------------------------------------------------------------- */ - -#ifdef __x86_64__ -#define LIBFFI_ASM -#include <fficonfig.h> -#include <ffi.h> - - .file "darwin64.S" -.text - -/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, - void *raddr, void (*fnaddr)(void)); - - Bit o trickiness here -- ARGS+BYTES is the base of the stack frame - for this function. This has been allocated by ffi_call. We also - deallocate some of the stack that has been alloca'd. */ - - .align 3 - .globl _ffi_call_unix64 - -_ffi_call_unix64: -LUW0: - movq (%rsp), %r10 /* Load return address. */ - leaq (%rdi, %rsi), %rax /* Find local stack base. */ - movq %rdx, (%rax) /* Save flags. */ - movq %rcx, 8(%rax) /* Save raddr. */ - movq %rbp, 16(%rax) /* Save old frame pointer. */ - movq %r10, 24(%rax) /* Relocate return address. */ - movq %rax, %rbp /* Finalize local stack frame. */ -LUW1: - movq %rdi, %r10 /* Save a copy of the register area. */ - movq %r8, %r11 /* Save a copy of the target fn. */ - movl %r9d, %eax /* Set number of SSE registers. */ - - /* Load up all argument registers. */ - movq (%r10), %rdi - movq 8(%r10), %rsi - movq 16(%r10), %rdx - movq 24(%r10), %rcx - movq 32(%r10), %r8 - movq 40(%r10), %r9 - testl %eax, %eax - jnz Lload_sse -Lret_from_load_sse: - - /* Deallocate the reg arg area. */ - leaq 176(%r10), %rsp - - /* Call the user function. */ - call *%r11 - - /* Deallocate stack arg area; local stack frame in redzone. */ - leaq 24(%rbp), %rsp - - movq 0(%rbp), %rcx /* Reload flags. */ - movq 8(%rbp), %rdi /* Reload raddr. */ - movq 16(%rbp), %rbp /* Reload old frame pointer. */ -LUW2: - - /* The first byte of the flags contains the FFI_TYPE. */ - movzbl %cl, %r10d - leaq Lstore_table(%rip), %r11 - movslq (%r11, %r10, 4), %r10 - addq %r11, %r10 - jmp *%r10 - -Lstore_table: - .long Lst_void-Lstore_table /* FFI_TYPE_VOID */ - .long Lst_sint32-Lstore_table /* FFI_TYPE_INT */ - .long Lst_float-Lstore_table /* FFI_TYPE_FLOAT */ - .long Lst_double-Lstore_table /* FFI_TYPE_DOUBLE */ - .long Lst_ldouble-Lstore_table /* FFI_TYPE_LONGDOUBLE */ - .long Lst_uint8-Lstore_table /* FFI_TYPE_UINT8 */ - .long Lst_sint8-Lstore_table /* FFI_TYPE_SINT8 */ - .long Lst_uint16-Lstore_table /* FFI_TYPE_UINT16 */ - .long Lst_sint16-Lstore_table /* FFI_TYPE_SINT16 */ - .long Lst_uint32-Lstore_table /* FFI_TYPE_UINT32 */ - .long Lst_sint32-Lstore_table /* FFI_TYPE_SINT32 */ - .long Lst_int64-Lstore_table /* FFI_TYPE_UINT64 */ - .long Lst_int64-Lstore_table /* FFI_TYPE_SINT64 */ - .long Lst_struct-Lstore_table /* FFI_TYPE_STRUCT */ - .long Lst_int64-Lstore_table /* FFI_TYPE_POINTER */ - - .text - .align 3 -Lst_void: - ret - .align 3 -Lst_uint8: - movzbq %al, %rax - movq %rax, (%rdi) - ret - .align 3 -Lst_sint8: - movsbq %al, %rax - movq %rax, (%rdi) - ret - .align 3 -Lst_uint16: - movzwq %ax, %rax - movq %rax, (%rdi) - .align 3 -Lst_sint16: - movswq %ax, %rax - movq %rax, (%rdi) - ret - .align 3 -Lst_uint32: - movl %eax, %eax - movq %rax, (%rdi) - .align 3 -Lst_sint32: - cltq - movq %rax, (%rdi) - ret - .align 3 -Lst_int64: - movq %rax, (%rdi) - ret - .align 3 -Lst_float: - movss %xmm0, (%rdi) - ret - .align 3 -Lst_double: - movsd %xmm0, (%rdi) - ret -Lst_ldouble: - fstpt (%rdi) - ret - .align 3 -Lst_struct: - leaq -20(%rsp), %rsi /* Scratch area in redzone. */ - - /* We have to locate the values now, and since we don't want to - write too much data into the user's return value, we spill the - value to a 16 byte scratch area first. Bits 8, 9, and 10 - control where the values are located. Only one of the three - bits will be set; see ffi_prep_cif_machdep for the pattern. */ - movd %xmm0, %r10 - movd %xmm1, %r11 - testl $0x100, %ecx - cmovnz %rax, %rdx - cmovnz %r10, %rax - testl $0x200, %ecx - cmovnz %r10, %rdx - testl $0x400, %ecx - cmovnz %r10, %rax - cmovnz %r11, %rdx - movq %rax, (%rsi) - movq %rdx, 8(%rsi) - - /* Bits 12-31 contain the true size of the structure. Copy from - the scratch area to the true destination. */ - shrl $12, %ecx - rep movsb - ret - - /* Many times we can avoid loading any SSE registers at all. - It's not worth an indirect jump to load the exact set of - SSE registers needed; zero or all is a good compromise. */ - .align 3 -LUW3: -Lload_sse: - movdqa 48(%r10), %xmm0 - movdqa 64(%r10), %xmm1 - movdqa 80(%r10), %xmm2 - movdqa 96(%r10), %xmm3 - movdqa 112(%r10), %xmm4 - movdqa 128(%r10), %xmm5 - movdqa 144(%r10), %xmm6 - movdqa 160(%r10), %xmm7 - jmp Lret_from_load_sse - -LUW4: - .align 3 - .globl _ffi_closure_unix64 - -_ffi_closure_unix64: -LUW5: - /* The carry flag is set by the trampoline iff SSE registers - are used. Don't clobber it before the branch instruction. */ - leaq -200(%rsp), %rsp -LUW6: - movq %rdi, (%rsp) - movq %rsi, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rcx, 24(%rsp) - movq %r8, 32(%rsp) - movq %r9, 40(%rsp) - jc Lsave_sse -Lret_from_save_sse: - - movq %r10, %rdi - leaq 176(%rsp), %rsi - movq %rsp, %rdx - leaq 208(%rsp), %rcx - call _ffi_closure_unix64_inner - - /* Deallocate stack frame early; return value is now in redzone. */ - addq $200, %rsp -LUW7: - - /* The first byte of the return value contains the FFI_TYPE. */ - movzbl %al, %r10d - leaq Lload_table(%rip), %r11 - movslq (%r11, %r10, 4), %r10 - addq %r11, %r10 - jmp *%r10 - -Lload_table: - .long Lld_void-Lload_table /* FFI_TYPE_VOID */ - .long Lld_int32-Lload_table /* FFI_TYPE_INT */ - .long Lld_float-Lload_table /* FFI_TYPE_FLOAT */ - .long Lld_double-Lload_table /* FFI_TYPE_DOUBLE */ - .long Lld_ldouble-Lload_table /* FFI_TYPE_LONGDOUBLE */ - .long Lld_int8-Lload_table /* FFI_TYPE_UINT8 */ - .long Lld_int8-Lload_table /* FFI_TYPE_SINT8 */ - .long Lld_int16-Lload_table /* FFI_TYPE_UINT16 */ - .long Lld_int16-Lload_table /* FFI_TYPE_SINT16 */ - .long Lld_int32-Lload_table /* FFI_TYPE_UINT32 */ - .long Lld_int32-Lload_table /* FFI_TYPE_SINT32 */ - .long Lld_int64-Lload_table /* FFI_TYPE_UINT64 */ - .long Lld_int64-Lload_table /* FFI_TYPE_SINT64 */ - .long Lld_struct-Lload_table /* FFI_TYPE_STRUCT */ - .long Lld_int64-Lload_table /* FFI_TYPE_POINTER */ - - .text - .align 3 -Lld_void: - ret - .align 3 -Lld_int8: - movzbl -24(%rsp), %eax - ret - .align 3 -Lld_int16: - movzwl -24(%rsp), %eax - ret - .align 3 -Lld_int32: - movl -24(%rsp), %eax - ret - .align 3 -Lld_int64: - movq -24(%rsp), %rax - ret - .align 3 -Lld_float: - movss -24(%rsp), %xmm0 - ret - .align 3 -Lld_double: - movsd -24(%rsp), %xmm0 - ret - .align 3 -Lld_ldouble: - fldt -24(%rsp) - ret - .align 3 -Lld_struct: - /* There are four possibilities here, %rax/%rdx, %xmm0/%rax, - %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading - both rdx and xmm1 with the second word. For the remaining, - bit 8 set means xmm0 gets the second word, and bit 9 means - that rax gets the second word. */ - movq -24(%rsp), %rcx - movq -16(%rsp), %rdx - movq -16(%rsp), %xmm1 - testl $0x100, %eax - cmovnz %rdx, %rcx - movd %rcx, %xmm0 - testl $0x200, %eax - movq -24(%rsp), %rax - cmovnz %rdx, %rax - ret - - /* See the comment above Lload_sse; the same logic applies here. */ - .align 3 -LUW8: -Lsave_sse: - movdqa %xmm0, 48(%rsp) - movdqa %xmm1, 64(%rsp) - movdqa %xmm2, 80(%rsp) - movdqa %xmm3, 96(%rsp) - movdqa %xmm4, 112(%rsp) - movdqa %xmm5, 128(%rsp) - movdqa %xmm6, 144(%rsp) - movdqa %xmm7, 160(%rsp) - jmp Lret_from_save_sse - -LUW9: -.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support -EH_frame1: - .set L$set$0,LECIE1-LSCIE1 /* CIE Length */ - .long L$set$0 -LSCIE1: - .long 0x0 /* CIE Identifier Tag */ - .byte 0x1 /* CIE Version */ - .ascii "zR\0" /* CIE Augmentation */ - .byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */ - .byte 0x78 /* sleb128 -8; CIE Data Alignment Factor */ - .byte 0x10 /* CIE RA Column */ - .byte 0x1 /* uleb128 0x1; Augmentation size */ - .byte 0x10 /* FDE Encoding (pcrel sdata4) */ - .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ - .byte 0x7 /* uleb128 0x7 */ - .byte 0x8 /* uleb128 0x8 */ - .byte 0x90 /* DW_CFA_offset, column 0x10 */ - .byte 0x1 - .align 3 -LECIE1: - .globl _ffi_call_unix64.eh -_ffi_call_unix64.eh: -LSFDE1: - .set L$set$1,LEFDE1-LASFDE1 /* FDE Length */ - .long L$set$1 -LASFDE1: - .long LASFDE1-EH_frame1 /* FDE CIE offset */ - .quad LUW0-. /* FDE initial location */ - .set L$set$2,LUW4-LUW0 /* FDE address range */ - .quad L$set$2 - .byte 0x0 /* Augmentation size */ - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$3,LUW1-LUW0 - .long L$set$3 - - /* New stack frame based off rbp. This is a itty bit of unwind - trickery in that the CFA *has* changed. There is no easy way - to describe it correctly on entry to the function. Fortunately, - it doesn't matter too much since at all points we can correctly - unwind back to ffi_call. Note that the location to which we - moved the return address is (the new) CFA-8, so from the - perspective of the unwind info, it hasn't moved. */ - .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */ - .byte 0x6 - .byte 0x20 - .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */ - .byte 0x2 - .byte 0xa /* DW_CFA_remember_state */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$4,LUW2-LUW1 - .long L$set$4 - .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ - .byte 0x7 - .byte 0x8 - .byte 0xc0+6 /* DW_CFA_restore, %rbp */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$5,LUW3-LUW2 - .long L$set$5 - .byte 0xb /* DW_CFA_restore_state */ - - .align 3 -LEFDE1: - .globl _ffi_closure_unix64.eh -_ffi_closure_unix64.eh: -LSFDE3: - .set L$set$6,LEFDE3-LASFDE3 /* FDE Length */ - .long L$set$6 -LASFDE3: - .long LASFDE3-EH_frame1 /* FDE CIE offset */ - .quad LUW5-. /* FDE initial location */ - .set L$set$7,LUW9-LUW5 /* FDE address range */ - .quad L$set$7 - .byte 0x0 /* Augmentation size */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$8,LUW6-LUW5 - .long L$set$8 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte 208,1 /* uleb128 208 */ - .byte 0xa /* DW_CFA_remember_state */ - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$9,LUW7-LUW6 - .long L$set$9 - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte 0x8 - - .byte 0x4 /* DW_CFA_advance_loc4 */ - .set L$set$10,LUW8-LUW7 - .long L$set$10 - .byte 0xb /* DW_CFA_restore_state */ - - .align 3 -LEFDE3: - .subsections_via_symbols - -#endif /* __x86_64__ */ diff --git a/src/x86/ffi.c b/src/x86/ffi.c index 4c96c6d..c4d740a 100644 --- a/src/x86/ffi.c +++ b/src/x86/ffi.c @@ -332,13 +332,28 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, else { size_t za = ALIGN (z, FFI_SIZEOF_ARG); + size_t align = FFI_SIZEOF_ARG; + + /* Alignment rules for arguments are quite complex. Vectors and + structures with 16 byte alignment get it. Note that long double + on Darwin does have 16 byte alignment, and does not get this + alignment if passed directly; a structure with a long double + inside, however, would get 16 byte alignment. Since libffi does + not support vectors, we need non concern ourselves with other + cases. */ + if (t == FFI_TYPE_STRUCT && ty->alignment >= 16) + align = 16; + if (dir < 0) { + /* ??? These reverse argument ABIs are probably too old + to have cared about alignment. Someone should check. */ argp -= za; memcpy (argp, valp, z); } else { + argp = (char *)ALIGN (argp, align); memcpy (argp, valp, z); argp += za; } @@ -419,8 +434,9 @@ ffi_closure_inner (struct closure_frame *frame, char *stack) arg_types = cif->arg_types; for (i = 0; i < n; ++i) { - size_t z = arg_types[i]->size; - int t = arg_types[i]->type; + ffi_type *ty = arg_types[i]; + size_t z = ty->size; + int t = ty->type; void *valp; if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT) @@ -441,13 +457,22 @@ ffi_closure_inner (struct closure_frame *frame, char *stack) else { size_t za = ALIGN (z, FFI_SIZEOF_ARG); + size_t align = FFI_SIZEOF_ARG; + + /* See the comment in ffi_call_int. */ + if (t == FFI_TYPE_STRUCT && ty->alignment >= 16) + align = 16; + if (dir < 0) { + /* ??? These reverse argument ABIs are probably too old + to have cared about alignment. Someone should check. */ argp -= za; valp = argp; } else { + argp = (char *)ALIGN (argp, align); valp = argp; argp += za; } diff --git a/src/x86/sysv.S b/src/x86/sysv.S index ebd1693..6043c67 100644 --- a/src/x86/sysv.S +++ b/src/x86/sysv.S @@ -30,7 +30,6 @@ #define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> -#include <ffi_cfi.h> #include "internal.h" #define C2(X, Y) X ## Y @@ -41,6 +40,12 @@ # define C(X) X #endif +#ifdef X86_DARWIN +# define L(X) C1(L, X) +#else +# define L(X) C1(.L, X) +#endif + #ifdef __ELF__ # define ENDF(X) .type X,@function; .size X, . - X #else @@ -60,14 +65,14 @@ actual table. The entry points into the table are all 8 bytes. The use of ORG asserts that we're at the correct location. */ /* ??? The clang assembler doesn't handle .org with symbolic expressions. */ -#ifdef __clang__ -# define E(X) .align 8 +#if defined(__clang__) || defined(__APPLE__) +# define E(BASE, X) .balign 8 #else -# define E(X) .align 8; .org 0b + X * 8 +# define E(BASE, X) .balign 8; .org BASE + X * 8 #endif .text - .align 16 + .balign 16 .globl ffi_call_i386 FFI_HIDDEN(ffi_call_i386) @@ -83,7 +88,8 @@ */ ffi_call_i386: - cfi_startproc +L(UW0): + # cfi_startproc movl (%esp), %eax /* move the return address */ movl %ebp, (%ecx) /* store %ebp into local frame */ movl %eax, 4(%ecx) /* store retaddr into local frame */ @@ -96,8 +102,9 @@ ffi_call_i386: moved the return address is (the new) CFA-4, so from the perspective of the unwind info, it hasn't moved. */ movl %ecx, %ebp - cfi_def_cfa(%ebp, 8) - cfi_rel_offset(%ebp, 0) +L(UW1): + # cfi_def_cfa(%ebp, 8) + # cfi_rel_offset(%ebp, 0) movl %edx, %esp /* set outgoing argument stack */ movl 20+R_EAX*4(%ebp), %eax /* set register arguments */ @@ -108,80 +115,86 @@ ffi_call_i386: movl 12(%ebp), %ecx /* load return type code */ movl %ebx, 8(%ebp) /* preserve %ebx */ - cfi_rel_offset(%ebx, 8) +L(UW2): + # cfi_rel_offset(%ebx, 8) andl $X86_RET_TYPE_MASK, %ecx #ifdef __PIC__ - call __x86.get_pc_thunk.bx -1: leal 0f-1b(%ebx, %ecx, 8), %ebx + call C(__x86.get_pc_thunk.bx) +L(pc1): + leal L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx #else - leal 0f(,%ecx, 8), %ebx + leal L(store_table)(,%ecx, 8), %ebx #endif movl 16(%ebp), %ecx /* load result address */ jmp *%ebx - .align 8 -0: -E(X86_RET_FLOAT) + .balign 8 +L(store_table): +E(L(store_table), X86_RET_FLOAT) fstps (%ecx) - jmp 9f -E(X86_RET_DOUBLE) + jmp L(e1) +E(L(store_table), X86_RET_DOUBLE) fstpl (%ecx) - jmp 9f -E(X86_RET_LDOUBLE) + jmp L(e1) +E(L(store_table), X86_RET_LDOUBLE) fstpt (%ecx) - jmp 9f -E(X86_RET_SINT8) + jmp L(e1) +E(L(store_table), X86_RET_SINT8) movsbl %al, %eax mov %eax, (%ecx) - jmp 9f -E(X86_RET_SINT16) + jmp L(e1) +E(L(store_table), X86_RET_SINT16) movswl %ax, %eax mov %eax, (%ecx) - jmp 9f -E(X86_RET_UINT8) + jmp L(e1) +E(L(store_table), X86_RET_UINT8) movzbl %al, %eax mov %eax, (%ecx) - jmp 9f -E(X86_RET_UINT16) + jmp L(e1) +E(L(store_table), X86_RET_UINT16) movzwl %ax, %eax mov %eax, (%ecx) - jmp 9f -E(X86_RET_INT64) + jmp L(e1) +E(L(store_table), X86_RET_INT64) movl %edx, 4(%ecx) /* fallthru */ -E(X86_RET_INT32) +E(L(store_table), X86_RET_INT32) movl %eax, (%ecx) /* fallthru */ -E(X86_RET_VOID) -9: movl 8(%ebp), %ebx +E(L(store_table), X86_RET_VOID) +L(e1): + movl 8(%ebp), %ebx movl %ebp, %esp popl %ebp - cfi_remember_state - cfi_def_cfa(%esp, 4) - cfi_restore(%ebx) - cfi_restore(%ebp) +L(UW3): + # cfi_remember_state + # cfi_def_cfa(%esp, 4) + # cfi_restore(%ebx) + # cfi_restore(%ebp) ret - cfi_restore_state - -E(X86_RET_STRUCTPOP) - jmp 9b -E(X86_RET_STRUCTARG) - jmp 9b -E(X86_RET_STRUCT_1B) +L(UW4): + # cfi_restore_state + +E(L(store_table), X86_RET_STRUCTPOP) + jmp L(e1) +E(L(store_table), X86_RET_STRUCTARG) + jmp L(e1) +E(L(store_table), X86_RET_STRUCT_1B) movb %al, (%ecx) - jmp 9b -E(X86_RET_STRUCT_2B) + jmp L(e1) +E(L(store_table), X86_RET_STRUCT_2B) movw %ax, (%ecx) - jmp 9b + jmp L(e1) /* Fill out the table so that bad values are predictable. */ -E(X86_RET_UNUSED14) +E(L(store_table), X86_RET_UNUSED14) ud2 -E(X86_RET_UNUSED15) +E(L(store_table), X86_RET_UNUSED15) ud2 - cfi_endproc +L(UW5): + # cfi_endproc ENDF(ffi_call_i386) /* The inner helper is declared as @@ -212,190 +225,242 @@ ENDF(ffi_call_i386) movl %ecx, 32(%esp); \ movl %eax, 36(%esp) +# define FFI_CLOSURE_CALL_INNER(UW) \ + movl %esp, %ecx; /* load closure_data */ \ + leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ + call ffi_closure_inner +#define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \ + andl $X86_RET_TYPE_MASK, %eax; \ + leal L(C1(load_table,N))(, %eax, 8), %eax; \ + jmp *%eax #ifdef __PIC__ -/* We're going to always load the got register here, even if .hidden says - we're going to avoid the PLT call. We'll use the got register in - FFI_CLOSURE_MASK_AND_JUMP. */ -# if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE -# define PLT(X) X +# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE +# undef FFI_CLOSURE_MASK_AND_JUMP +# define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \ + andl $X86_RET_TYPE_MASK, %eax; \ + call C(__x86.get_pc_thunk.dx); \ +L(C1(pc,N)): \ + leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %eax; \ + jmp *%eax # else -# define PLT(X) X@PLT -# endif -# define FFI_CLOSURE_CALL_INNER \ +# define FFI_CLOSURE_CALL_INNER_SAVE_EBX +# undef FFI_CLOSURE_CALL_INNER +# define FFI_CLOSURE_CALL_INNER(UWN) \ movl %esp, %ecx; /* load closure_data */ \ leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ movl %ebx, 40(%esp); /* save ebx */ \ - cfi_rel_offset(%ebx, 40); \ - call __x86.get_pc_thunk.bx; /* load got register */ \ +L(C1(UW,UWN)): \ + # cfi_rel_offset(%ebx, 40); \ + call C(__x86.get_pc_thunk.bx); /* load got register */ \ addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \ - call PLT(ffi_closure_inner) -#define FFI_CLOSURE_MASK_AND_JUMP \ + call ffi_closure_inner@PLT +# undef FFI_CLOSURE_MASK_AND_JUMP +# define FFI_CLOSURE_MASK_AND_JUMP(N, UWN) \ andl $X86_RET_TYPE_MASK, %eax; \ - leal 0f@GOTOFF(%ebx, %eax, 8), %eax; \ + leal L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %eax; \ movl 40(%esp), %ebx; /* restore ebx */ \ - cfi_restore(%ebx); \ - jmp *%eax -#else -# define FFI_CLOSURE_CALL_INNER \ - movl %esp, %ecx; /* load closure_data */ \ - leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ - call ffi_closure_inner -#define FFI_CLOSURE_MASK_AND_JUMP \ - andl $X86_RET_TYPE_MASK, %eax; \ - leal 0f(, %eax, 8), %eax; \ +L(C1(UW,UWN)): \ + # cfi_restore(%ebx); \ jmp *%eax +# endif /* DARWIN || HIDDEN */ #endif /* __PIC__ */ -#define FFI_GO_CLOSURE(suffix, chain, t1, t2) \ - .align 16; \ - .globl C(C1(ffi_go_closure_,suffix)); \ - FFI_HIDDEN(C(C1(ffi_go_closure_,suffix))); \ -C(C1(ffi_go_closure_,suffix)): \ - cfi_startproc; \ - subl $closure_FS, %esp; \ - /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ \ - cfi_def_cfa_offset(closure_FS + 4); \ - FFI_CLOSURE_SAVE_REGS; \ - movl 4(chain), t1; /* copy cif */ \ - movl 8(chain), t2; /* copy fun */ \ - movl t1, 28(%esp); \ - movl t2, 32(%esp); \ - movl chain, 36(%esp); /* closure is user_data */ \ - jmp 88f; \ - cfi_endproc; \ -ENDF(C(C1(ffi_go_closure_,suffix))) - -FFI_GO_CLOSURE(EAX, %eax, %edx, %ecx) -FFI_GO_CLOSURE(ECX, %ecx, %edx, %eax) + .balign 16 + .globl C(ffi_go_closure_EAX) + FFI_HIDDEN(C(ffi_go_closure_EAX)) +C(ffi_go_closure_EAX): +L(UW6): + # cfi_startproc + subl $closure_FS, %esp +L(UW7): + # cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + movl 4(%eax), %edx /* copy cif */ + movl 8(%eax), %ecx /* copy fun */ + movl %edx, 28(%esp) + movl %ecx, 32(%esp) + movl %eax, 36(%esp) /* closure is user_data */ + jmp L(do_closure_i386) +L(UW8): + # cfi_endproc +ENDF(C(ffi_go_closure_EAX)) + + .balign 16 + .globl C(ffi_go_closure_ECX) + FFI_HIDDEN(C(ffi_go_closure_ECX)) +C(ffi_go_closure_ECX): +L(UW9): + # cfi_startproc + subl $closure_FS, %esp +L(UW10): + # cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + movl 4(%ecx), %edx /* copy cif */ + movl 8(%ecx), %eax /* copy fun */ + movl %edx, 28(%esp) + movl %eax, 32(%esp) + movl %ecx, 36(%esp) /* closure is user_data */ + jmp L(do_closure_i386) +L(UW11): + # cfi_endproc +ENDF(C(ffi_go_closure_ECX)) /* The closure entry points are reached from the ffi_closure trampoline. On entry, %eax contains the address of the ffi_closure. */ - .align 16 + .balign 16 .globl C(ffi_closure_i386) FFI_HIDDEN(C(ffi_closure_i386)) C(ffi_closure_i386): - cfi_startproc +L(UW12): + # cfi_startproc subl $closure_FS, %esp - /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ - cfi_def_cfa_offset(closure_FS + 4) +L(UW13): + # cfi_def_cfa_offset(closure_FS + 4) FFI_CLOSURE_SAVE_REGS FFI_CLOSURE_COPY_TRAMP_DATA -88: /* Entry point from preceeding Go closures. */ + /* Entry point from preceeding Go closures. */ +L(do_closure_i386): - FFI_CLOSURE_CALL_INNER - FFI_CLOSURE_MASK_AND_JUMP + FFI_CLOSURE_CALL_INNER(14) + FFI_CLOSURE_MASK_AND_JUMP(2, 15) - .align 8 -0: -E(X86_RET_FLOAT) + .balign 8 +L(load_table2): +E(L(load_table2), X86_RET_FLOAT) flds (%esp) - jmp 9f -E(X86_RET_DOUBLE) + jmp L(e2) +E(L(load_table2), X86_RET_DOUBLE) fldl (%esp) - jmp 9f -E(X86_RET_LDOUBLE) + jmp L(e2) +E(L(load_table2), X86_RET_LDOUBLE) fldt (%esp) - jmp 9f -E(X86_RET_SINT8) + jmp L(e2) +E(L(load_table2), X86_RET_SINT8) movsbl (%esp), %eax - jmp 9f -E(X86_RET_SINT16) + jmp L(e2) +E(L(load_table2), X86_RET_SINT16) movswl (%esp), %eax - jmp 9f -E(X86_RET_UINT8) + jmp L(e2) +E(L(load_table2), X86_RET_UINT8) movzbl (%esp), %eax - jmp 9f -E(X86_RET_UINT16) + jmp L(e2) +E(L(load_table2), X86_RET_UINT16) movzwl (%esp), %eax - jmp 9f -E(X86_RET_INT64) + jmp L(e2) +E(L(load_table2), X86_RET_INT64) movl 4(%esp), %edx /* fallthru */ -E(X86_RET_INT32) +E(L(load_table2), X86_RET_INT32) movl (%esp), %eax /* fallthru */ -E(X86_RET_VOID) -9: addl $closure_FS, %esp - cfi_adjust_cfa_offset(-closure_FS) +E(L(load_table2), X86_RET_VOID) +L(e2): + addl $closure_FS, %esp +L(UW16): + # cfi_adjust_cfa_offset(-closure_FS) ret - cfi_adjust_cfa_offset(closure_FS) -E(X86_RET_STRUCTPOP) +L(UW17): + # cfi_adjust_cfa_offset(closure_FS) +E(L(load_table2), X86_RET_STRUCTPOP) addl $closure_FS, %esp - cfi_adjust_cfa_offset(-closure_FS) +L(UW18): + # cfi_adjust_cfa_offset(-closure_FS) ret $4 - cfi_adjust_cfa_offset(closure_FS) -E(X86_RET_STRUCTARG) +L(UW19): + # cfi_adjust_cfa_offset(closure_FS) +E(L(load_table2), X86_RET_STRUCTARG) movl (%esp), %eax - jmp 9b -E(X86_RET_STRUCT_1B) + jmp L(e2) +E(L(load_table2), X86_RET_STRUCT_1B) movzbl (%esp), %eax - jmp 9b -E(X86_RET_STRUCT_2B) + jmp L(e2) +E(L(load_table2), X86_RET_STRUCT_2B) movzwl (%esp), %eax - jmp 9b + jmp L(e2) /* Fill out the table so that bad values are predictable. */ -E(X86_RET_UNUSED14) +E(L(load_table2), X86_RET_UNUSED14) ud2 -E(X86_RET_UNUSED15) +E(L(load_table2), X86_RET_UNUSED15) ud2 - cfi_endproc +L(UW20): + # cfi_endproc ENDF(C(ffi_closure_i386)) -FFI_GO_CLOSURE(STDCALL, %ecx, %edx, %eax) + .balign 16 + .globl C(ffi_go_closure_STDCALL) + FFI_HIDDEN(C(ffi_go_closure_STDCALL)) +C(ffi_go_closure_STDCALL): +L(UW21): + # cfi_startproc + subl $closure_FS, %esp +L(UW22): + # cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + movl 4(%ecx), %edx /* copy cif */ + movl 8(%ecx), %eax /* copy fun */ + movl %edx, 28(%esp) + movl %eax, 32(%esp) + movl %ecx, 36(%esp) /* closure is user_data */ + jmp L(do_closure_STDCALL) +L(UW23): + # cfi_endproc +ENDF(C(ffi_go_closure_STDCALL)) /* For REGISTER, we have no available parameter registers, and so we enter here having pushed the closure onto the stack. */ - .align 16 + .balign 16 .globl C(ffi_closure_REGISTER) FFI_HIDDEN(C(ffi_closure_REGISTER)) C(ffi_closure_REGISTER): - cfi_startproc - cfi_def_cfa(%esp, 8) - cfi_offset(%eip, -8) +L(UW24): + # cfi_startproc + # cfi_def_cfa(%esp, 8) + # cfi_offset(%eip, -8) subl $closure_FS-4, %esp - /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ - cfi_def_cfa_offset(closure_FS + 4) - +L(UW25): + # cfi_def_cfa_offset(closure_FS + 4) FFI_CLOSURE_SAVE_REGS - movl closure_FS-4(%esp), %ecx /* load retaddr */ movl closure_FS(%esp), %eax /* load closure */ movl %ecx, closure_FS(%esp) /* move retaddr */ - jmp 0f - - cfi_endproc + jmp L(do_closure_REGISTER) +L(UW26): + # cfi_endproc ENDF(C(ffi_closure_REGISTER)) /* For STDCALL (and others), we need to pop N bytes of arguments off the stack following the closure. The amount needing to be popped is returned to us from ffi_closure_inner. */ - .align 16 + .balign 16 .globl C(ffi_closure_STDCALL) FFI_HIDDEN(C(ffi_closure_STDCALL)) C(ffi_closure_STDCALL): - cfi_startproc +L(UW27): + # cfi_startproc subl $closure_FS, %esp - /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ - cfi_def_cfa_offset(closure_FS + 4) +L(UW28): + # cfi_def_cfa_offset(closure_FS + 4) FFI_CLOSURE_SAVE_REGS -0: /* Entry point from ffi_closure_REGISTER. */ + /* Entry point from ffi_closure_REGISTER. */ +L(do_closure_REGISTER): FFI_CLOSURE_COPY_TRAMP_DATA -88: /* Entry point from preceeding Go closure. */ + /* Entry point from preceeding Go closure. */ +L(do_closure_STDCALL): - FFI_CLOSURE_CALL_INNER + FFI_CLOSURE_CALL_INNER(29) movl %eax, %ecx shrl $X86_RET_POP_SHIFT, %ecx /* isolate pop count */ @@ -403,100 +468,98 @@ C(ffi_closure_STDCALL): movl closure_FS(%esp), %edx /* move return address */ movl %edx, (%ecx) - /* New pseudo-stack frame based off ecx. This is unwind trickery - in that the CFA *has* changed, to the proper popped stack address. - Note that the location to which we moved the return address - is the new CFA-4, so that's unchanged. */ - cfi_def_cfa(%ecx, 4) - /* Normally esp is unwound to CFA + the caller's ARGS_SIZE. - We've just set the CFA to that final value. Tell the unwinder - to restore esp from CFA without the ARGS_SIZE: - DW_CFA_val_expression %esp, DW_OP_call_frame_cfa. */ - cfi_escape(0x16, 4, 1, 0x9c) - - FFI_CLOSURE_MASK_AND_JUMP - - .align 8 -0: -E(X86_RET_FLOAT) + /* From this point on, the value of %esp upon return is %ecx+4, + and we've copied the return address to %ecx to make return easy. + There's no point in representing this in the unwind info, as + there is always a window between the mov and the ret which + will be wrong from one point of view or another. */ + + FFI_CLOSURE_MASK_AND_JUMP(3, 30) + + .balign 8 +L(load_table3): +E(L(load_table3), X86_RET_FLOAT) flds (%esp) movl %ecx, %esp ret -E(X86_RET_DOUBLE) +E(L(load_table3), X86_RET_DOUBLE) fldl (%esp) movl %ecx, %esp ret -E(X86_RET_LDOUBLE) +E(L(load_table3), X86_RET_LDOUBLE) fldt (%esp) movl %ecx, %esp ret -E(X86_RET_SINT8) +E(L(load_table3), X86_RET_SINT8) movsbl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_SINT16) +E(L(load_table3), X86_RET_SINT16) movswl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_UINT8) +E(L(load_table3), X86_RET_UINT8) movzbl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_UINT16) +E(L(load_table3), X86_RET_UINT16) movzwl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_INT64) +E(L(load_table3), X86_RET_INT64) popl %eax popl %edx movl %ecx, %esp ret -E(X86_RET_INT32) +E(L(load_table3), X86_RET_INT32) movl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_VOID) +E(L(load_table3), X86_RET_VOID) movl %ecx, %esp ret -E(X86_RET_STRUCTPOP) +E(L(load_table3), X86_RET_STRUCTPOP) movl %ecx, %esp ret -E(X86_RET_STRUCTARG) +E(L(load_table3), X86_RET_STRUCTARG) movl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_STRUCT_1B) +E(L(load_table3), X86_RET_STRUCT_1B) movzbl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_STRUCT_2B) +E(L(load_table3), X86_RET_STRUCT_2B) movzwl (%esp), %eax movl %ecx, %esp ret /* Fill out the table so that bad values are predictable. */ -E(X86_RET_UNUSED14) +E(L(load_table3), X86_RET_UNUSED14) ud2 -E(X86_RET_UNUSED15) +E(L(load_table3), X86_RET_UNUSED15) ud2 - cfi_endproc +L(UW31): + # cfi_endproc ENDF(C(ffi_closure_STDCALL)) #if !FFI_NO_RAW_API #define raw_closure_S_FS (16+16+12) - .align 16 + .balign 16 .globl C(ffi_closure_raw_SYSV) FFI_HIDDEN(C(ffi_closure_raw_SYSV)) C(ffi_closure_raw_SYSV): - cfi_startproc +L(UW32): + # cfi_startproc subl $raw_closure_S_FS, %esp - /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ - cfi_def_cfa_offset(raw_closure_S_FS + 4) +L(UW33): + # cfi_def_cfa_offset(raw_closure_S_FS + 4) movl %ebx, raw_closure_S_FS-4(%esp) - cfi_rel_offset(%ebx, raw_closure_S_FS-4) +L(UW34): + # cfi_rel_offset(%ebx, raw_closure_S_FS-4) movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */ movl %edx, 12(%esp) @@ -511,96 +574,108 @@ C(ffi_closure_raw_SYSV): movl 20(%ebx), %eax /* load cif->flags */ andl $X86_RET_TYPE_MASK, %eax #ifdef __PIC__ - call __x86.get_pc_thunk.bx -1: leal 0f-1b(%ebx, %eax, 8), %eax + call C(__x86.get_pc_thunk.bx) +L(pc4): + leal L(load_table4)-L(pc4)(%ebx, %eax, 8), %eax #else - leal 0f(,%eax, 8), %eax + leal L(load_table4)(,%eax, 8), %eax #endif movl raw_closure_S_FS-4(%esp), %ebx - cfi_restore(%ebx) +L(UW35): + # cfi_restore(%ebx) jmp *%eax - .align 8 -0: -E(X86_RET_FLOAT) + .balign 8 +L(load_table4): +E(L(load_table4), X86_RET_FLOAT) flds 16(%esp) - jmp 9f -E(X86_RET_DOUBLE) + jmp L(e4) +E(L(load_table4), X86_RET_DOUBLE) fldl 16(%esp) - jmp 9f -E(X86_RET_LDOUBLE) + jmp L(e4) +E(L(load_table4), X86_RET_LDOUBLE) fldt 16(%esp) - jmp 9f -E(X86_RET_SINT8) + jmp L(e4) +E(L(load_table4), X86_RET_SINT8) movsbl 16(%esp), %eax - jmp 9f -E(X86_RET_SINT16) + jmp L(e4) +E(L(load_table4), X86_RET_SINT16) movswl 16(%esp), %eax - jmp 9f -E(X86_RET_UINT8) + jmp L(e4) +E(L(load_table4), X86_RET_UINT8) movzbl 16(%esp), %eax - jmp 9f -E(X86_RET_UINT16) + jmp L(e4) +E(L(load_table4), X86_RET_UINT16) movzwl 16(%esp), %eax - jmp 9f -E(X86_RET_INT64) + jmp L(e4) +E(L(load_table4), X86_RET_INT64) movl 16+4(%esp), %edx /* fallthru */ -E(X86_RET_INT32) +E(L(load_table4), X86_RET_INT32) movl 16(%esp), %eax /* fallthru */ -E(X86_RET_VOID) -9: addl $raw_closure_S_FS, %esp - cfi_adjust_cfa_offset(-raw_closure_S_FS) +E(L(load_table4), X86_RET_VOID) +L(e4): + addl $raw_closure_S_FS, %esp +L(UW36): + # cfi_adjust_cfa_offset(-raw_closure_S_FS) ret - cfi_adjust_cfa_offset(raw_closure_S_FS) -E(X86_RET_STRUCTPOP) +L(UW37): + # cfi_adjust_cfa_offset(raw_closure_S_FS) +E(L(load_table4), X86_RET_STRUCTPOP) addl $raw_closure_S_FS, %esp - cfi_adjust_cfa_offset(-raw_closure_S_FS) +L(UW38): + # cfi_adjust_cfa_offset(-raw_closure_S_FS) ret $4 - cfi_adjust_cfa_offset(raw_closure_S_FS) -E(X86_RET_STRUCTARG) +L(UW39): + # cfi_adjust_cfa_offset(raw_closure_S_FS) +E(L(load_table4), X86_RET_STRUCTARG) movl 16(%esp), %eax - jmp 9b -E(X86_RET_STRUCT_1B) + jmp L(e4) +E(L(load_table4), X86_RET_STRUCT_1B) movzbl 16(%esp), %eax - jmp 9b -E(X86_RET_STRUCT_2B) + jmp L(e4) +E(L(load_table4), X86_RET_STRUCT_2B) movzwl 16(%esp), %eax - jmp 9b + jmp L(e4) /* Fill out the table so that bad values are predictable. */ -E(X86_RET_UNUSED14) +E(L(load_table4), X86_RET_UNUSED14) ud2 -E(X86_RET_UNUSED15) +E(L(load_table4), X86_RET_UNUSED15) ud2 - cfi_endproc +L(UW40): + # cfi_endproc ENDF(C(ffi_closure_raw_SYSV)) -#undef raw_closure_S_FS #define raw_closure_T_FS (16+16+8) - .align 16 + .balign 16 .globl C(ffi_closure_raw_THISCALL) FFI_HIDDEN(C(ffi_closure_raw_THISCALL)) C(ffi_closure_raw_THISCALL): - cfi_startproc +L(UW41): + # cfi_startproc /* Rearrange the stack such that %ecx is the first argument. This means moving the return address. */ popl %edx - /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ - cfi_def_cfa_offset(0) - cfi_register(%eip, %edx) +L(UW42): + # cfi_def_cfa_offset(0) + # cfi_register(%eip, %edx) pushl %ecx - cfi_adjust_cfa_offset(4) +L(UW43): + # cfi_adjust_cfa_offset(4) pushl %edx - cfi_adjust_cfa_offset(4) - cfi_rel_offset(%eip, 0) +L(UW44): + # cfi_adjust_cfa_offset(4) + # cfi_rel_offset(%eip, 0) subl $raw_closure_T_FS, %esp - cfi_adjust_cfa_offset(raw_closure_T_FS) +L(UW45): + # cfi_adjust_cfa_offset(raw_closure_T_FS) movl %ebx, raw_closure_T_FS-4(%esp) - cfi_rel_offset(%ebx, raw_closure_T_FS-4) +L(UW46): + # cfi_rel_offset(%ebx, raw_closure_T_FS-4) movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */ movl %edx, 12(%esp) @@ -615,90 +690,329 @@ C(ffi_closure_raw_THISCALL): movl 20(%ebx), %eax /* load cif->flags */ andl $X86_RET_TYPE_MASK, %eax #ifdef __PIC__ - call __x86.get_pc_thunk.bx -1: leal 0f-1b(%ebx, %eax, 8), %eax + call C(__x86.get_pc_thunk.bx) +L(pc5): + leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %eax #else - leal 0f(,%eax, 8), %eax + leal L(load_table5)(,%eax, 8), %eax #endif movl raw_closure_T_FS-4(%esp), %ebx - cfi_restore(%ebx) +L(UW47): + # cfi_restore(%ebx) jmp *%eax - .align 8 -0: -E(X86_RET_FLOAT) + .balign 8 +L(load_table5): +E(L(load_table5), X86_RET_FLOAT) flds 16(%esp) - jmp 9f -E(X86_RET_DOUBLE) + jmp L(e5) +E(L(load_table5), X86_RET_DOUBLE) fldl 16(%esp) - jmp 9f -E(X86_RET_LDOUBLE) + jmp L(e5) +E(L(load_table5), X86_RET_LDOUBLE) fldt 16(%esp) - jmp 9f -E(X86_RET_SINT8) + jmp L(e5) +E(L(load_table5), X86_RET_SINT8) movsbl 16(%esp), %eax - jmp 9f -E(X86_RET_SINT16) + jmp L(e5) +E(L(load_table5), X86_RET_SINT16) movswl 16(%esp), %eax - jmp 9f -E(X86_RET_UINT8) + jmp L(e5) +E(L(load_table5), X86_RET_UINT8) movzbl 16(%esp), %eax - jmp 9f -E(X86_RET_UINT16) + jmp L(e5) +E(L(load_table5), X86_RET_UINT16) movzwl 16(%esp), %eax - jmp 9f -E(X86_RET_INT64) + jmp L(e5) +E(L(load_table5), X86_RET_INT64) movl 16+4(%esp), %edx /* fallthru */ -E(X86_RET_INT32) +E(L(load_table5), X86_RET_INT32) movl 16(%esp), %eax /* fallthru */ -E(X86_RET_VOID) -9: addl $raw_closure_T_FS, %esp - cfi_adjust_cfa_offset(-raw_closure_T_FS) +E(L(load_table5), X86_RET_VOID) +L(e5): + addl $raw_closure_T_FS, %esp +L(UW48): + # cfi_adjust_cfa_offset(-raw_closure_T_FS) /* Remove the extra %ecx argument we pushed. */ ret $4 - cfi_adjust_cfa_offset(raw_closure_T_FS) -E(X86_RET_STRUCTPOP) +L(UW49): + # cfi_adjust_cfa_offset(raw_closure_T_FS) +E(L(load_table5), X86_RET_STRUCTPOP) addl $raw_closure_T_FS, %esp - cfi_adjust_cfa_offset(-raw_closure_T_FS) +L(UW50): + # cfi_adjust_cfa_offset(-raw_closure_T_FS) ret $8 - cfi_adjust_cfa_offset(raw_closure_T_FS) -E(X86_RET_STRUCTARG) +L(UW51): + # cfi_adjust_cfa_offset(raw_closure_T_FS) +E(L(load_table5), X86_RET_STRUCTARG) movl 16(%esp), %eax - jmp 9b -E(X86_RET_STRUCT_1B) + jmp L(e5) +E(L(load_table5), X86_RET_STRUCT_1B) movzbl 16(%esp), %eax - jmp 9b -E(X86_RET_STRUCT_2B) + jmp L(e5) +E(L(load_table5), X86_RET_STRUCT_2B) movzwl 16(%esp), %eax - jmp 9b + jmp L(e5) /* Fill out the table so that bad values are predictable. */ -E(X86_RET_UNUSED14) +E(L(load_table5), X86_RET_UNUSED14) ud2 -E(X86_RET_UNUSED15) +E(L(load_table5), X86_RET_UNUSED15) ud2 - cfi_endproc +L(UW52): + # cfi_endproc ENDF(C(ffi_closure_raw_THISCALL)) #endif /* !FFI_NO_RAW_API */ +#ifdef X86_DARWIN +# define COMDAT(X) \ + .section __TEXT,__textcoal_nt,coalesced,pure_instructions; \ + .weak_definition X; \ + .private_extern X +#elif defined __ELF__ +# define COMDAT(X) \ + .section .text.X,"axG",@progbits,X,comdat; \ + .globl X; \ + FFI_HIDDEN(X) +#else +# define COMDAT(X) +#endif + #if defined(__PIC__) - .section .text.__x86.get_pc_thunk.bx,"axG",@progbits,__x86.get_pc_thunk.bx,comdat - .globl __x86.get_pc_thunk.bx - .hidden __x86.get_pc_thunk.bx - .type __x86.get_pc_thunk.bx,@function -__x86.get_pc_thunk.bx: - cfi_startproc + COMDAT(C(__x86.get_pc_thunk.bx)) +C(__x86.get_pc_thunk.bx): movl (%esp), %ebx ret - cfi_endproc - .size __x86.get_pc_thunk.bx, . - __x86.get_pc_thunk.bx +ENDF(C(__x86.get_pc_thunk.bx)) +# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE + COMDAT(C(__x86.get_pc_thunk.dx)) +C(__x86.get_pc_thunk.dx): + movl (%esp), %edx + ret +ENDF(C(__x86.get_pc_thunk.dx)) +#endif /* DARWIN || HIDDEN */ #endif /* __PIC__ */ +/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ + +#ifdef __APPLE__ +.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EHFrame0: +#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) +.section .eh_frame,"a",@unwind +#else +.section .eh_frame,"a",@progbits +#endif + +#ifdef HAVE_AS_X86_PCREL +# define PCREL(X) X - . +#else +# define PCREL(X) X@rel +#endif + +/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ +#define ADV(N, P) .byte 2, L(N)-L(P) + + .balign 4 +L(CIE): + .set L(set0),L(ECIE)-L(SCIE) + .long L(set0) /* CIE Length */ +L(SCIE): + .long 0 /* CIE Identifier Tag */ + .byte 1 /* CIE Version */ + .ascii "zR\0" /* CIE Augmentation */ + .byte 1 /* CIE Code Alignment Factor */ + .byte 0x7c /* CIE Data Alignment Factor */ + .byte 0x8 /* CIE RA Column */ + .byte 1 /* Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ + .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp offset 4 */ + .byte 0x80+8, 1 /* DW_CFA_offset, %eip offset 1*-4 */ + .balign 4 +L(ECIE): + + .set L(set1),L(EFDE1)-L(SFDE1) + .long L(set1) /* FDE Length */ +L(SFDE1): + .long L(SFDE1)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW0)) /* Initial location */ + .long L(UW5)-L(UW0) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW1, UW0) + .byte 0xc, 5, 8 /* DW_CFA_def_cfa, %ebp 8 */ + .byte 0x80+5, 2 /* DW_CFA_offset, %ebp 2*-4 */ + ADV(UW2, UW1) + .byte 0x80+3, 0 /* DW_CFA_offset, %ebx 0*-4 */ + ADV(UW3, UW2) + .byte 0xa /* DW_CFA_remember_state */ + .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp 4 */ + .byte 0xc0+3 /* DW_CFA_restore, %ebx */ + .byte 0xc0+5 /* DW_CFA_restore, %ebp */ + ADV(UW4, UW3) + .byte 0xb /* DW_CFA_restore_state */ + .balign 4 +L(EFDE1): + + .set L(set2),L(EFDE2)-L(SFDE2) + .long L(set2) /* FDE Length */ +L(SFDE2): + .long L(SFDE2)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW6)) /* Initial location */ + .long L(UW8)-L(UW6) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW7, UW6) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE2): + + .set L(set3),L(EFDE3)-L(SFDE3) + .long L(set3) /* FDE Length */ +L(SFDE3): + .long L(SFDE3)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW9)) /* Initial location */ + .long L(UW11)-L(UW9) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW10, UW9) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE3): + + .set L(set4),L(EFDE4)-L(SFDE4) + .long L(set4) /* FDE Length */ +L(SFDE4): + .long L(SFDE4)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW12)) /* Initial location */ + .long L(UW20)-L(UW12) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW13, UW12) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ +#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX + ADV(UW14, UW13) + .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ + ADV(UW15, UW14) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW16, UW15) +#else + ADV(UW16, UW13) +#endif + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW17, UW16) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW18, UW17) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW19, UW18) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE4): + + .set L(set5),L(EFDE5)-L(SFDE5) + .long L(set5) /* FDE Length */ +L(SFDE5): + .long L(SFDE5)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW21)) /* Initial location */ + .long L(UW23)-L(UW21) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW22, UW21) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE5): + + .set L(set6),L(EFDE6)-L(SFDE6) + .long L(set6) /* FDE Length */ +L(SFDE6): + .long L(SFDE6)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW24)) /* Initial location */ + .long L(UW26)-L(UW24) /* Address range */ + .byte 0 /* Augmentation size */ + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + .byte 0x80+8, 2 /* DW_CFA_offset %eip, 2*-4 */ + ADV(UW25, UW24) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE6): + + .set L(set7),L(EFDE7)-L(SFDE7) + .long L(set7) /* FDE Length */ +L(SFDE7): + .long L(SFDE7)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW27)) /* Initial location */ + .long L(UW31)-L(UW27) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW28, UW27) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ +#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX + ADV(UW29, UW28) + .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ + ADV(UW30, UW29) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ +#endif + .balign 4 +L(EFDE7): + +#if !FFI_NO_RAW_API + .set L(set8),L(EFDE8)-L(SFDE8) + .long L(set8) /* FDE Length */ +L(SFDE8): + .long L(SFDE8)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW32)) /* Initial location */ + .long L(UW40)-L(UW32) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW33, UW32) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW34, UW33) + .byte 0x80+3, 2 /* DW_CFA_offset %ebx 2*-4 */ + ADV(UW35, UW34) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW36, UW35) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW37, UW36) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW38, UW37) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW39, UW38) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE8): + + .set L(set9),L(EFDE9)-L(SFDE9) + .long L(set9) /* FDE Length */ +L(SFDE9): + .long L(SFDE9)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW41)) /* Initial location */ + .long L(UW52)-L(UW41) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW42, UW41) + .byte 0xe, 0 /* DW_CFA_def_cfa_offset */ + .byte 0x9, 8, 2 /* DW_CFA_register %eip, %edx */ + ADV(UW43, UW42) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW44, UW43) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + .byte 0x80+8, 2 /* DW_CFA_offset %eip 2*-4 */ + ADV(UW45, UW44) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + ADV(UW46, UW45) + .byte 0x80+3, 3 /* DW_CFA_offset %ebx 3*-4 */ + ADV(UW47, UW46) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW48, UW47) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + ADV(UW49, UW48) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + ADV(UW50, UW49) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + ADV(UW51, UW50) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE9): +#endif /* !FFI_NO_RAW_API */ + #endif /* ifndef __x86_64__ */ + #if defined __ELF__ && defined __linux__ .section .note.GNU-stack,"",@progbits #endif diff --git a/src/x86/unix64.S b/src/x86/unix64.S index 58cb153..f9f9163 100644 --- a/src/x86/unix64.S +++ b/src/x86/unix64.S @@ -30,21 +30,41 @@ #define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> -#include <ffi_cfi.h> #include "internal64.h" .text +#define C2(X, Y) X ## Y +#define C1(X, Y) C2(X, Y) +#ifdef __USER_LABEL_PREFIX__ +# define C(X) C1(__USER_LABEL_PREFIX__, X) +#else +# define C(X) X +#endif + +#ifdef __APPLE__ +# define L(X) C1(L, X) +#else +# define L(X) C1(.L, X) +#endif + +#ifdef __ELF__ +# define PLT(X) X@PLT +# define ENDF(X) .type X,@function; .size X, . - X +#else +# define PLT(X) X +# define ENDF(X) +#endif + /* This macro allows the safe creation of jump tables without an actual table. The entry points into the table are all 8 bytes. The use of ORG asserts that we're at the correct location. */ /* ??? The clang assembler doesn't handle .org with symbolic expressions. */ -.macro E index - .align 8 -#ifndef __clang__ - .org 0b + \index * 8, 0x90 +#if defined(__clang__) || defined(__APPLE__) +# define E(BASE, X) .balign 8 +#else +# define E(BASE, X) .balign 8; .org BASE + X * 8 #endif -.endm /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, void *raddr, void (*fnaddr)(void)); @@ -53,13 +73,12 @@ for this function. This has been allocated by ffi_call. We also deallocate some of the stack that has been alloca'd. */ - .align 8 - .globl ffi_call_unix64 - .type ffi_call_unix64,@function - FFI_HIDDEN(ffi_call_unix64) + .balign 8 + .globl C(ffi_call_unix64) + FFI_HIDDEN(C(ffi_call_unix64)) -ffi_call_unix64: - cfi_startproc +C(ffi_call_unix64): +L(UW0): movq (%rsp), %r10 /* Load return address. */ leaq (%rdi, %rsi), %rax /* Find local stack base. */ movq %rdx, (%rax) /* Save flags. */ @@ -75,8 +94,9 @@ ffi_call_unix64: unwind back to ffi_call. Note that the location to which we moved the return address is (the new) CFA-8, so from the perspective of the unwind info, it hasn't moved. */ - cfi_def_cfa(%rbp, 32) - cfi_rel_offset(%rbp, 16) +L(UW1): + /* cfi_def_cfa(%rbp, 32) */ + /* cfi_rel_offset(%rbp, 16) */ movq %rdi, %r10 /* Save a copy of the register area. */ movq %r8, %r11 /* Save a copy of the target fn. */ @@ -91,8 +111,8 @@ ffi_call_unix64: movq 0x28(%r10), %r9 movl 0xb0(%r10), %eax testl %eax, %eax - jnz .Lload_sse -.Lret_from_load_sse: + jnz L(load_sse) +L(ret_from_load_sse): /* Deallocate the reg arg area, except for r10, then load via pop. */ leaq 0xb8(%r10), %rsp @@ -107,94 +127,98 @@ ffi_call_unix64: movq 0(%rbp), %rcx /* Reload flags. */ movq 8(%rbp), %rdi /* Reload raddr. */ movq 16(%rbp), %rbp /* Reload old frame pointer. */ - cfi_remember_state - cfi_def_cfa(%rsp, 8) - cfi_restore(%rbp) +L(UW2): + /* cfi_remember_state */ + /* cfi_def_cfa(%rsp, 8) */ + /* cfi_restore(%rbp) */ /* The first byte of the flags contains the FFI_TYPE. */ cmpb $UNIX64_RET_LAST, %cl movzbl %cl, %r10d - leaq 0f(%rip), %r11 - ja 9f + leaq L(store_table)(%rip), %r11 + ja L(sa) leaq (%r11, %r10, 8), %r10 /* Prep for the structure cases: scratch area in redzone. */ leaq -20(%rsp), %rsi jmp *%r10 - .align 8 -0: -E UNIX64_RET_VOID + .balign 8 +L(store_table): +E(L(store_table), UNIX64_RET_VOID) ret -E UNIX64_RET_UINT8 +E(L(store_table), UNIX64_RET_UINT8) movzbl %al, %eax movq %rax, (%rdi) ret -E UNIX64_RET_UINT16 +E(L(store_table), UNIX64_RET_UINT16) movzwl %ax, %eax movq %rax, (%rdi) ret -E UNIX64_RET_UINT32 +E(L(store_table), UNIX64_RET_UINT32) movl %eax, %eax movq %rax, (%rdi) ret -E UNIX64_RET_SINT8 +E(L(store_table), UNIX64_RET_SINT8) movsbq %al, %rax movq %rax, (%rdi) ret -E UNIX64_RET_SINT16 +E(L(store_table), UNIX64_RET_SINT16) movswq %ax, %rax movq %rax, (%rdi) ret -E UNIX64_RET_SINT32 +E(L(store_table), UNIX64_RET_SINT32) cltq movq %rax, (%rdi) ret -E UNIX64_RET_INT64 +E(L(store_table), UNIX64_RET_INT64) movq %rax, (%rdi) ret -E UNIX64_RET_XMM32 +E(L(store_table), UNIX64_RET_XMM32) movd %xmm0, (%rdi) ret -E UNIX64_RET_XMM64 +E(L(store_table), UNIX64_RET_XMM64) movq %xmm0, (%rdi) ret -E UNIX64_RET_X87 +E(L(store_table), UNIX64_RET_X87) fstpt (%rdi) ret -E UNIX64_RET_X87_2 +E(L(store_table), UNIX64_RET_X87_2) fstpt (%rdi) fstpt 16(%rdi) ret -E UNIX64_RET_ST_XMM0_RAX +E(L(store_table), UNIX64_RET_ST_XMM0_RAX) movq %rax, 8(%rsi) - jmp 3f -E UNIX64_RET_ST_RAX_XMM0 + jmp L(s3) +E(L(store_table), UNIX64_RET_ST_RAX_XMM0) movq %xmm0, 8(%rsi) - jmp 2f -E UNIX64_RET_ST_XMM0_XMM1 + jmp L(s2) +E(L(store_table), UNIX64_RET_ST_XMM0_XMM1) movq %xmm1, 8(%rsi) - jmp 3f -E UNIX64_RET_ST_RAX_RDX + jmp L(s3) +E(L(store_table), UNIX64_RET_ST_RAX_RDX) movq %rdx, 8(%rsi) -2: movq %rax, (%rsi) +L(s2): + movq %rax, (%rsi) shrl $UNIX64_SIZE_SHIFT, %ecx rep movsb ret - .align 8 -3: movq %xmm0, (%rsi) + .balign 8 +L(s3): + movq %xmm0, (%rsi) shrl $UNIX64_SIZE_SHIFT, %ecx rep movsb ret -9: call abort@PLT +L(sa): call PLT(C(abort)) /* Many times we can avoid loading any SSE registers at all. It's not worth an indirect jump to load the exact set of SSE registers needed; zero or all is a good compromise. */ - .align 2 - cfi_restore_state -.Lload_sse: + .balign 2 +L(UW3): + /* cfi_restore_state */ +L(load_sse): movdqa 0x30(%r10), %xmm0 movdqa 0x40(%r10), %xmm1 movdqa 0x50(%r10), %xmm2 @@ -203,10 +227,10 @@ E UNIX64_RET_ST_RAX_RDX movdqa 0x80(%r10), %xmm5 movdqa 0x90(%r10), %xmm6 movdqa 0xa0(%r10), %xmm7 - jmp .Lret_from_load_sse + jmp L(ret_from_load_sse) - cfi_endproc - .size ffi_call_unix64,.-ffi_call_unix64 +L(UW4): +ENDF(C(ffi_call_unix64)) /* 6 general registers, 8 vector registers, 32 bytes of rvalue, 8 bytes of alignment. */ @@ -218,16 +242,15 @@ E UNIX64_RET_ST_RAX_RDX /* The location of rvalue within the red zone after deallocating the frame. */ #define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS) - .align 2 - .globl ffi_closure_unix64_sse - .type ffi_closure_unix64_sse,@function - FFI_HIDDEN(ffi_closure_unix64_sse) + .balign 2 + .globl C(ffi_closure_unix64_sse) + FFI_HIDDEN(C(ffi_closure_unix64_sse)) -ffi_closure_unix64_sse: - cfi_startproc +C(ffi_closure_unix64_sse): +L(UW5): subq $ffi_closure_FS, %rsp - /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ - cfi_def_cfa_offset(ffi_closure_FS + 8) +L(UW6): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp) movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp) @@ -237,22 +260,21 @@ ffi_closure_unix64_sse: movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) - jmp 0f + jmp L(sse_entry1) - cfi_endproc - .size ffi_closure_unix64_sse,.-ffi_closure_unix64_sse +L(UW7): +ENDF(C(ffi_closure_unix64_sse)) - .align 2 - .globl ffi_closure_unix64 - .type ffi_closure_unix64,@function - FFI_HIDDEN(ffi_closure_unix64) + .balign 2 + .globl C(ffi_closure_unix64) + FFI_HIDDEN(C(ffi_closure_unix64)) -ffi_closure_unix64: - cfi_startproc +C(ffi_closure_unix64): +L(UW8): subq $ffi_closure_FS, %rsp - /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ - cfi_def_cfa_offset(ffi_closure_FS + 8) -0: +L(UW9): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ +L(sse_entry1): movq %rdi, ffi_closure_OFS_G+0x00(%rsp) movq %rsi, ffi_closure_OFS_G+0x08(%rsp) movq %rdx, ffi_closure_OFS_G+0x10(%rsp) @@ -269,95 +291,97 @@ ffi_closure_unix64: movq FFI_TRAMPOLINE_SIZE+8(%r10), %rsi /* Load fun */ movq FFI_TRAMPOLINE_SIZE+16(%r10), %rdx /* Load user_data */ #endif -.Ldo_closure: +L(do_closure): leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */ movq %rsp, %r8 /* Load reg_args */ leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */ - call ffi_closure_unix64_inner + call C(ffi_closure_unix64_inner) /* Deallocate stack frame early; return value is now in redzone. */ addq $ffi_closure_FS, %rsp - cfi_adjust_cfa_offset(-ffi_closure_FS) +L(UW10): + /* cfi_adjust_cfa_offset(-ffi_closure_FS) */ /* The first byte of the return value contains the FFI_TYPE. */ cmpb $UNIX64_RET_LAST, %al movzbl %al, %r10d - leaq 0f(%rip), %r11 - ja 9f + leaq L(load_table)(%rip), %r11 + ja L(la) leaq (%r11, %r10, 8), %r10 leaq ffi_closure_RED_RVALUE(%rsp), %rsi jmp *%r10 - .align 8 -0: -E UNIX64_RET_VOID + .balign 8 +L(load_table): +E(L(load_table), UNIX64_RET_VOID) ret -E UNIX64_RET_UINT8 +E(L(load_table), UNIX64_RET_UINT8) movzbl (%rsi), %eax ret -E UNIX64_RET_UINT16 +E(L(load_table), UNIX64_RET_UINT16) movzwl (%rsi), %eax ret -E UNIX64_RET_UINT32 +E(L(load_table), UNIX64_RET_UINT32) movl (%rsi), %eax ret -E UNIX64_RET_SINT8 +E(L(load_table), UNIX64_RET_SINT8) movsbl (%rsi), %eax ret -E UNIX64_RET_SINT16 +E(L(load_table), UNIX64_RET_SINT16) movswl (%rsi), %eax ret -E UNIX64_RET_SINT32 +E(L(load_table), UNIX64_RET_SINT32) movl (%rsi), %eax ret -E UNIX64_RET_INT64 +E(L(load_table), UNIX64_RET_INT64) movq (%rsi), %rax ret -E UNIX64_RET_XMM32 +E(L(load_table), UNIX64_RET_XMM32) movd (%rsi), %xmm0 ret -E UNIX64_RET_XMM64 +E(L(load_table), UNIX64_RET_XMM64) movq (%rsi), %xmm0 ret -E UNIX64_RET_X87 +E(L(load_table), UNIX64_RET_X87) fldt (%rsi) ret -E UNIX64_RET_X87_2 +E(L(load_table), UNIX64_RET_X87_2) fldt 16(%rsi) fldt (%rsi) ret -E UNIX64_RET_ST_XMM0_RAX +E(L(load_table), UNIX64_RET_ST_XMM0_RAX) movq 8(%rsi), %rax - jmp 3f -E UNIX64_RET_ST_RAX_XMM0 + jmp L(l3) +E(L(load_table), UNIX64_RET_ST_RAX_XMM0) movq 8(%rsi), %xmm0 - jmp 2f -E UNIX64_RET_ST_XMM0_XMM1 + jmp L(l2) +E(L(load_table), UNIX64_RET_ST_XMM0_XMM1) movq 8(%rsi), %xmm1 - jmp 3f -E UNIX64_RET_ST_RAX_RDX + jmp L(l3) +E(L(load_table), UNIX64_RET_ST_RAX_RDX) movq 8(%rsi), %rdx -2: movq (%rsi), %rax +L(l2): + movq (%rsi), %rax ret - .align 8 -3: movq (%rsi), %xmm0 + .balign 8 +L(l3): + movq (%rsi), %xmm0 ret -9: call abort@PLT +L(la): call PLT(C(abort)) - cfi_endproc - .size ffi_closure_unix64,.-ffi_closure_unix64 +L(UW11): +ENDF(C(ffi_closure_unix64)) - .align 2 - .globl ffi_go_closure_unix64_sse - .type ffi_go_closure_unix64_sse,@function - FFI_HIDDEN(ffi_go_closure_unix64_sse) + .balign 2 + .globl C(ffi_go_closure_unix64_sse) + FFI_HIDDEN(C(ffi_go_closure_unix64_sse)) -ffi_go_closure_unix64_sse: - cfi_startproc +C(ffi_go_closure_unix64_sse): +L(UW12): subq $ffi_closure_FS, %rsp - /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ - cfi_def_cfa_offset(ffi_closure_FS + 8) +L(UW13): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp) movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp) @@ -367,22 +391,21 @@ ffi_go_closure_unix64_sse: movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) - jmp 0f + jmp L(sse_entry2) - cfi_endproc - .size ffi_go_closure_unix64_sse,.-ffi_go_closure_unix64_sse +L(UW14): +ENDF(C(ffi_go_closure_unix64_sse)) - .align 2 - .globl ffi_go_closure_unix64 - .type ffi_go_closure_unix64,@function - FFI_HIDDEN(ffi_go_closure_unix64) + .balign 2 + .globl C(ffi_go_closure_unix64) + FFI_HIDDEN(C(ffi_go_closure_unix64)) -ffi_go_closure_unix64: - cfi_startproc +C(ffi_go_closure_unix64): +L(UW15): subq $ffi_closure_FS, %rsp - /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ - cfi_def_cfa_offset(ffi_closure_FS + 8) -0: +L(UW16): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ +L(sse_entry2): movq %rdi, ffi_closure_OFS_G+0x00(%rsp) movq %rsi, ffi_closure_OFS_G+0x08(%rsp) movq %rdx, ffi_closure_OFS_G+0x10(%rsp) @@ -399,10 +422,123 @@ ffi_go_closure_unix64: movq 16(%r10), %rsi /* Load fun */ movq %r10, %rdx /* Load closure (user_data) */ #endif - jmp .Ldo_closure + jmp L(do_closure) + +L(UW17): +ENDF(C(ffi_go_closure_unix64)) + +/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ + +#ifdef __APPLE__ +.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EHFrame0: +#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) +.section .eh_frame,"a",@unwind +#else +.section .eh_frame,"a",@progbits +#endif - cfi_endproc - .size ffi_go_closure_unix64,.-ffi_go_closure_unix64 +#ifdef HAVE_AS_X86_PCREL +# define PCREL(X) X - . +#else +# define PCREL(X) X@rel +#endif + +/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ +#define ADV(N, P) .byte 2, L(N)-L(P) + + .balign 8 +L(CIE): + .set L(set0),L(ECIE)-L(SCIE) + .long L(set0) /* CIE Length */ +L(SCIE): + .long 0 /* CIE Identifier Tag */ + .byte 1 /* CIE Version */ + .ascii "zR\0" /* CIE Augmentation */ + .byte 1 /* CIE Code Alignment Factor */ + .byte 0x78 /* CIE Data Alignment Factor */ + .byte 0x10 /* CIE RA Column */ + .byte 1 /* Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ + .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp offset 8 */ + .byte 0x80+16, 1 /* DW_CFA_offset, %rip offset 1*-8 */ + .balign 8 +L(ECIE): + + .set L(set1),L(EFDE1)-L(SFDE1) + .long L(set1) /* FDE Length */ +L(SFDE1): + .long L(SFDE1)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW0)) /* Initial location */ + .long L(UW4)-L(UW0) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW1, UW0) + .byte 0xc, 6, 32 /* DW_CFA_def_cfa, %rbp 32 */ + .byte 0x80+6, 2 /* DW_CFA_offset, %rbp 2*-8 */ + ADV(UW2, UW1) + .byte 0xa /* DW_CFA_remember_state */ + .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp 8 */ + .byte 0xc0+6 /* DW_CFA_restore, %rbp */ + ADV(UW3, UW2) + .byte 0xb /* DW_CFA_restore_state */ + .balign 8 +L(EFDE1): + + .set L(set2),L(EFDE2)-L(SFDE2) + .long L(set2) /* FDE Length */ +L(SFDE2): + .long L(SFDE2)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW5)) /* Initial location */ + .long L(UW7)-L(UW5) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW6, UW5) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + .balign 8 +L(EFDE2): + + .set L(set3),L(EFDE3)-L(SFDE3) + .long L(set3) /* FDE Length */ +L(SFDE3): + .long L(SFDE3)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW8)) /* Initial location */ + .long L(UW11)-L(UW8) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW9, UW8) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + ADV(UW10, UW9) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset 8 */ +L(EFDE3): + + .set L(set4),L(EFDE4)-L(SFDE4) + .long L(set4) /* FDE Length */ +L(SFDE4): + .long L(SFDE4)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW12)) /* Initial location */ + .long L(UW14)-L(UW12) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW13, UW12) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + .balign 8 +L(EFDE4): + + .set L(set5),L(EFDE5)-L(SFDE5) + .long L(set5) /* FDE Length */ +L(SFDE5): + .long L(SFDE5)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW15)) /* Initial location */ + .long L(UW17)-L(UW15) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW16, UW15) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + .balign 8 +L(EFDE5): +#ifdef __APPLE__ + .subsections_via_symbols +#endif #endif /* __x86_64__ */ #if defined __ELF__ && defined __linux__ |