diff options
author | Richard Henderson <rth@twiddle.net> | 2014-11-24 16:26:50 +0100 |
---|---|---|
committer | Richard Henderson <rth@twiddle.net> | 2015-01-05 10:01:37 -0800 |
commit | 5f35e0ffcc05a72ce0aacf228dc06d1262754660 (patch) | |
tree | 69953142d706397487f6de666460b3905ea90b2a /src/x86 | |
parent | ed1ca2777c35fe2f2751de255df3e16f17bdbd8d (diff) | |
download | libffi-5f35e0ffcc05a72ce0aacf228dc06d1262754660.tar.gz |
x86: Avoid using gas local labels
Which are unsupported by Darwin cctools as.
Thankfully this doesn't uglify the source too much.
Diffstat (limited to 'src/x86')
-rw-r--r-- | src/x86/sysv.S | 329 | ||||
-rw-r--r-- | src/x86/unix64.S | 139 |
2 files changed, 247 insertions, 221 deletions
diff --git a/src/x86/sysv.S b/src/x86/sysv.S index f737371..49dc38d 100644 --- a/src/x86/sysv.S +++ b/src/x86/sysv.S @@ -41,6 +41,12 @@ # define C(X) X #endif +#ifdef X86_DARWIN +# define L(X) C1(L, X) +#else +# define L(X) C1(.L, X) +#endif + #ifdef __ELF__ # define ENDF(X) .type X,@function; .size X, . - X #else @@ -61,9 +67,9 @@ The use of ORG asserts that we're at the correct location. */ /* ??? The clang assembler doesn't handle .org with symbolic expressions. */ #if defined(__clang__) || defined(__APPLE__) -# define E(X) .balign 8 +# define E(BASE, X) .balign 8 #else -# define E(X) .balign 8; .org 0b + X * 8 +# define E(BASE, X) .balign 8; .org BASE + X * 8 #endif .text @@ -113,48 +119,50 @@ ffi_call_i386: andl $X86_RET_TYPE_MASK, %ecx #ifdef __PIC__ call C(__x86.get_pc_thunk.bx) -1: leal 0f-1b(%ebx, %ecx, 8), %ebx +L(pc1): + leal L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx #else - leal 0f(,%ecx, 8), %ebx + leal L(store_table)(,%ecx, 8), %ebx #endif movl 16(%ebp), %ecx /* load result address */ jmp *%ebx .balign 8 -0: -E(X86_RET_FLOAT) +L(store_table): +E(L(store_table), X86_RET_FLOAT) fstps (%ecx) - jmp 9f -E(X86_RET_DOUBLE) + jmp L(e1) +E(L(store_table), X86_RET_DOUBLE) fstpl (%ecx) - jmp 9f -E(X86_RET_LDOUBLE) + jmp L(e1) +E(L(store_table), X86_RET_LDOUBLE) fstpt (%ecx) - jmp 9f -E(X86_RET_SINT8) + jmp L(e1) +E(L(store_table), X86_RET_SINT8) movsbl %al, %eax mov %eax, (%ecx) - jmp 9f -E(X86_RET_SINT16) + jmp L(e1) +E(L(store_table), X86_RET_SINT16) movswl %ax, %eax mov %eax, (%ecx) - jmp 9f -E(X86_RET_UINT8) + jmp L(e1) +E(L(store_table), X86_RET_UINT8) movzbl %al, %eax mov %eax, (%ecx) - jmp 9f -E(X86_RET_UINT16) + jmp L(e1) +E(L(store_table), X86_RET_UINT16) movzwl %ax, %eax mov %eax, (%ecx) - jmp 9f -E(X86_RET_INT64) + jmp L(e1) +E(L(store_table), X86_RET_INT64) movl %edx, 4(%ecx) /* fallthru */ -E(X86_RET_INT32) +E(L(store_table), X86_RET_INT32) movl %eax, (%ecx) /* fallthru */ -E(X86_RET_VOID) -9: movl 8(%ebp), %ebx +E(L(store_table), X86_RET_VOID) +L(e1): + movl 8(%ebp), %ebx movl %ebp, %esp popl %ebp cfi_remember_state @@ -164,21 +172,21 @@ E(X86_RET_VOID) ret cfi_restore_state -E(X86_RET_STRUCTPOP) - jmp 9b -E(X86_RET_STRUCTARG) - jmp 9b -E(X86_RET_STRUCT_1B) +E(L(store_table), X86_RET_STRUCTPOP) + jmp L(e1) +E(L(store_table), X86_RET_STRUCTARG) + jmp L(e1) +E(L(store_table), X86_RET_STRUCT_1B) movb %al, (%ecx) - jmp 9b -E(X86_RET_STRUCT_2B) + jmp L(e1) +E(L(store_table), X86_RET_STRUCT_2B) movw %ax, (%ecx) - jmp 9b + jmp L(e1) /* Fill out the table so that bad values are predictable. */ -E(X86_RET_UNUSED14) +E(L(store_table), X86_RET_UNUSED14) ud2 -E(X86_RET_UNUSED15) +E(L(store_table), X86_RET_UNUSED15) ud2 cfi_endproc @@ -216,18 +224,19 @@ ENDF(ffi_call_i386) movl %esp, %ecx; /* load closure_data */ \ leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ call ffi_closure_inner -#define FFI_CLOSURE_MASK_AND_JUMP \ +#define FFI_CLOSURE_MASK_AND_JUMP(N) \ andl $X86_RET_TYPE_MASK, %eax; \ - leal 0f(, %eax, 8), %eax; \ + leal L(C1(load_table,N))(, %eax, 8), %eax; \ jmp *%eax #ifdef __PIC__ # if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE # undef FFI_CLOSURE_MASK_AND_JUMP -# define FFI_CLOSURE_MASK_AND_JUMP \ +# define FFI_CLOSURE_MASK_AND_JUMP(N) \ andl $X86_RET_TYPE_MASK, %eax; \ call C(__x86.get_pc_thunk.dx); \ -1: leal 0f-1b(%edx, %eax, 8), %eax; \ +L(C1(pc,N)): \ + leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %eax; \ jmp *%eax # else # undef FFI_CLOSURE_CALL_INNER @@ -237,19 +246,19 @@ ENDF(ffi_call_i386) movl %ebx, 40(%esp); /* save ebx */ \ cfi_rel_offset(%ebx, 40); \ call C(__x86.get_pc_thunk.bx); /* load got register */ \ -1: addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \ + addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \ call ffi_closure_inner@PLT # undef FFI_CLOSURE_MASK_AND_JUMP -# define FFI_CLOSURE_MASK_AND_JUMP \ +# define FFI_CLOSURE_MASK_AND_JUMP(N) \ andl $X86_RET_TYPE_MASK, %eax; \ - leal 0f@GOTOFF(%ebx, %eax, 8), %eax; \ + leal L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %eax; \ movl 40(%esp), %ebx; /* restore ebx */ \ cfi_restore(%ebx); \ jmp *%eax # endif /* DARWIN || HIDDEN */ #endif /* __PIC__ */ -#define FFI_GO_CLOSURE(suffix, chain, t1, t2) \ +#define FFI_GO_CLOSURE(suffix, chain, t1, t2, entry) \ .balign 16; \ .globl C(C1(ffi_go_closure_,suffix)); \ FFI_HIDDEN(C(C1(ffi_go_closure_,suffix))); \ @@ -264,12 +273,12 @@ C(C1(ffi_go_closure_,suffix)): \ movl t1, 28(%esp); \ movl t2, 32(%esp); \ movl chain, 36(%esp); /* closure is user_data */ \ - jmp 88f; \ + jmp entry; \ cfi_endproc; \ ENDF(C(C1(ffi_go_closure_,suffix))) -FFI_GO_CLOSURE(EAX, %eax, %edx, %ecx) -FFI_GO_CLOSURE(ECX, %ecx, %edx, %eax) +FFI_GO_CLOSURE(EAX, %eax, %edx, %ecx, L(do_closure_i386)) +FFI_GO_CLOSURE(ECX, %ecx, %edx, %eax, L(do_closure_i386)) /* The closure entry points are reached from the ffi_closure trampoline. On entry, %eax contains the address of the ffi_closure. */ @@ -287,70 +296,72 @@ C(ffi_closure_i386): FFI_CLOSURE_SAVE_REGS FFI_CLOSURE_COPY_TRAMP_DATA -88: /* Entry point from preceeding Go closures. */ + /* Entry point from preceeding Go closures. */ +L(do_closure_i386): FFI_CLOSURE_CALL_INNER - FFI_CLOSURE_MASK_AND_JUMP + FFI_CLOSURE_MASK_AND_JUMP(2) .balign 8 -0: -E(X86_RET_FLOAT) +L(load_table2): +E(L(load_table2), X86_RET_FLOAT) flds (%esp) - jmp 9f -E(X86_RET_DOUBLE) + jmp L(e2) +E(L(load_table2), X86_RET_DOUBLE) fldl (%esp) - jmp 9f -E(X86_RET_LDOUBLE) + jmp L(e2) +E(L(load_table2), X86_RET_LDOUBLE) fldt (%esp) - jmp 9f -E(X86_RET_SINT8) + jmp L(e2) +E(L(load_table2), X86_RET_SINT8) movsbl (%esp), %eax - jmp 9f -E(X86_RET_SINT16) + jmp L(e2) +E(L(load_table2), X86_RET_SINT16) movswl (%esp), %eax - jmp 9f -E(X86_RET_UINT8) + jmp L(e2) +E(L(load_table2), X86_RET_UINT8) movzbl (%esp), %eax - jmp 9f -E(X86_RET_UINT16) + jmp L(e2) +E(L(load_table2), X86_RET_UINT16) movzwl (%esp), %eax - jmp 9f -E(X86_RET_INT64) + jmp L(e2) +E(L(load_table2), X86_RET_INT64) movl 4(%esp), %edx /* fallthru */ -E(X86_RET_INT32) +E(L(load_table2), X86_RET_INT32) movl (%esp), %eax /* fallthru */ -E(X86_RET_VOID) -9: addl $closure_FS, %esp +E(L(load_table2), X86_RET_VOID) +L(e2): + addl $closure_FS, %esp cfi_adjust_cfa_offset(-closure_FS) ret cfi_adjust_cfa_offset(closure_FS) -E(X86_RET_STRUCTPOP) +E(L(load_table2), X86_RET_STRUCTPOP) addl $closure_FS, %esp cfi_adjust_cfa_offset(-closure_FS) ret $4 cfi_adjust_cfa_offset(closure_FS) -E(X86_RET_STRUCTARG) +E(L(load_table2), X86_RET_STRUCTARG) movl (%esp), %eax - jmp 9b -E(X86_RET_STRUCT_1B) + jmp L(e2) +E(L(load_table2), X86_RET_STRUCT_1B) movzbl (%esp), %eax - jmp 9b -E(X86_RET_STRUCT_2B) + jmp L(e2) +E(L(load_table2), X86_RET_STRUCT_2B) movzwl (%esp), %eax - jmp 9b + jmp L(e2) /* Fill out the table so that bad values are predictable. */ -E(X86_RET_UNUSED14) +E(L(load_table2), X86_RET_UNUSED14) ud2 -E(X86_RET_UNUSED15) +E(L(load_table2), X86_RET_UNUSED15) ud2 cfi_endproc ENDF(C(ffi_closure_i386)) -FFI_GO_CLOSURE(STDCALL, %ecx, %edx, %eax) +FFI_GO_CLOSURE(STDCALL, %ecx, %edx, %eax, L(do_closure_STDCALL)) /* For REGISTER, we have no available parameter registers, and so we enter here having pushed the closure onto the stack. */ @@ -371,7 +382,7 @@ C(ffi_closure_REGISTER): movl closure_FS-4(%esp), %ecx /* load retaddr */ movl closure_FS(%esp), %eax /* load closure */ movl %ecx, closure_FS(%esp) /* move retaddr */ - jmp 0f + jmp L(do_closure_REGISTER) cfi_endproc ENDF(C(ffi_closure_REGISTER)) @@ -391,11 +402,13 @@ C(ffi_closure_STDCALL): FFI_CLOSURE_SAVE_REGS -0: /* Entry point from ffi_closure_REGISTER. */ + /* Entry point from ffi_closure_REGISTER. */ +L(do_closure_REGISTER): FFI_CLOSURE_COPY_TRAMP_DATA -88: /* Entry point from preceeding Go closure. */ + /* Entry point from preceeding Go closure. */ +L(do_closure_STDCALL): FFI_CLOSURE_CALL_INNER @@ -411,70 +424,70 @@ C(ffi_closure_STDCALL): there is always a window between the mov and the ret which will be wrong from one point of view or another. */ - FFI_CLOSURE_MASK_AND_JUMP + FFI_CLOSURE_MASK_AND_JUMP(3) .balign 8 -0: -E(X86_RET_FLOAT) +L(load_table3): +E(L(load_table3), X86_RET_FLOAT) flds (%esp) movl %ecx, %esp ret -E(X86_RET_DOUBLE) +E(L(load_table3), X86_RET_DOUBLE) fldl (%esp) movl %ecx, %esp ret -E(X86_RET_LDOUBLE) +E(L(load_table3), X86_RET_LDOUBLE) fldt (%esp) movl %ecx, %esp ret -E(X86_RET_SINT8) +E(L(load_table3), X86_RET_SINT8) movsbl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_SINT16) +E(L(load_table3), X86_RET_SINT16) movswl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_UINT8) +E(L(load_table3), X86_RET_UINT8) movzbl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_UINT16) +E(L(load_table3), X86_RET_UINT16) movzwl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_INT64) +E(L(load_table3), X86_RET_INT64) popl %eax popl %edx movl %ecx, %esp ret -E(X86_RET_INT32) +E(L(load_table3), X86_RET_INT32) movl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_VOID) +E(L(load_table3), X86_RET_VOID) movl %ecx, %esp ret -E(X86_RET_STRUCTPOP) +E(L(load_table3), X86_RET_STRUCTPOP) movl %ecx, %esp ret -E(X86_RET_STRUCTARG) +E(L(load_table3), X86_RET_STRUCTARG) movl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_STRUCT_1B) +E(L(load_table3), X86_RET_STRUCT_1B) movzbl (%esp), %eax movl %ecx, %esp ret -E(X86_RET_STRUCT_2B) +E(L(load_table3), X86_RET_STRUCT_2B) movzwl (%esp), %eax movl %ecx, %esp ret /* Fill out the table so that bad values are predictable. */ -E(X86_RET_UNUSED14) +E(L(load_table3), X86_RET_UNUSED14) ud2 -E(X86_RET_UNUSED15) +E(L(load_table3), X86_RET_UNUSED15) ud2 cfi_endproc @@ -509,67 +522,69 @@ C(ffi_closure_raw_SYSV): andl $X86_RET_TYPE_MASK, %eax #ifdef __PIC__ call C(__x86.get_pc_thunk.bx) -1: leal 0f-1b(%ebx, %eax, 8), %eax +L(pc4): + leal L(load_table4)-L(pc4)(%ebx, %eax, 8), %eax #else - leal 0f(,%eax, 8), %eax + leal L(load_table4)(,%eax, 8), %eax #endif movl raw_closure_S_FS-4(%esp), %ebx cfi_restore(%ebx) jmp *%eax .balign 8 -0: -E(X86_RET_FLOAT) +L(load_table4): +E(L(load_table4), X86_RET_FLOAT) flds 16(%esp) - jmp 9f -E(X86_RET_DOUBLE) + jmp L(e4) +E(L(load_table4), X86_RET_DOUBLE) fldl 16(%esp) - jmp 9f -E(X86_RET_LDOUBLE) + jmp L(e4) +E(L(load_table4), X86_RET_LDOUBLE) fldt 16(%esp) - jmp 9f -E(X86_RET_SINT8) + jmp L(e4) +E(L(load_table4), X86_RET_SINT8) movsbl 16(%esp), %eax - jmp 9f -E(X86_RET_SINT16) + jmp L(e4) +E(L(load_table4), X86_RET_SINT16) movswl 16(%esp), %eax - jmp 9f -E(X86_RET_UINT8) + jmp L(e4) +E(L(load_table4), X86_RET_UINT8) movzbl 16(%esp), %eax - jmp 9f -E(X86_RET_UINT16) + jmp L(e4) +E(L(load_table4), X86_RET_UINT16) movzwl 16(%esp), %eax - jmp 9f -E(X86_RET_INT64) + jmp L(e4) +E(L(load_table4), X86_RET_INT64) movl 16+4(%esp), %edx /* fallthru */ -E(X86_RET_INT32) +E(L(load_table4), X86_RET_INT32) movl 16(%esp), %eax /* fallthru */ -E(X86_RET_VOID) -9: addl $raw_closure_S_FS, %esp +E(L(load_table4), X86_RET_VOID) +L(e4): + addl $raw_closure_S_FS, %esp cfi_adjust_cfa_offset(-raw_closure_S_FS) ret cfi_adjust_cfa_offset(raw_closure_S_FS) -E(X86_RET_STRUCTPOP) +E(L(load_table4), X86_RET_STRUCTPOP) addl $raw_closure_S_FS, %esp cfi_adjust_cfa_offset(-raw_closure_S_FS) ret $4 cfi_adjust_cfa_offset(raw_closure_S_FS) -E(X86_RET_STRUCTARG) +E(L(load_table4), X86_RET_STRUCTARG) movl 16(%esp), %eax - jmp 9b -E(X86_RET_STRUCT_1B) + jmp L(e4) +E(L(load_table4), X86_RET_STRUCT_1B) movzbl 16(%esp), %eax - jmp 9b -E(X86_RET_STRUCT_2B) + jmp L(e4) +E(L(load_table4), X86_RET_STRUCT_2B) movzwl 16(%esp), %eax - jmp 9b + jmp L(e4) /* Fill out the table so that bad values are predictable. */ -E(X86_RET_UNUSED14) +E(L(load_table4), X86_RET_UNUSED14) ud2 -E(X86_RET_UNUSED15) +E(L(load_table4), X86_RET_UNUSED15) ud2 cfi_endproc @@ -613,68 +628,70 @@ C(ffi_closure_raw_THISCALL): andl $X86_RET_TYPE_MASK, %eax #ifdef __PIC__ call C(__x86.get_pc_thunk.bx) -1: leal 0f-1b(%ebx, %eax, 8), %eax +L(pc5): + leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %eax #else - leal 0f(,%eax, 8), %eax + leal L(load_table5)(,%eax, 8), %eax #endif movl raw_closure_T_FS-4(%esp), %ebx cfi_restore(%ebx) jmp *%eax .balign 8 -0: -E(X86_RET_FLOAT) +L(load_table5): +E(L(load_table5), X86_RET_FLOAT) flds 16(%esp) - jmp 9f -E(X86_RET_DOUBLE) + jmp L(e5) +E(L(load_table5), X86_RET_DOUBLE) fldl 16(%esp) - jmp 9f -E(X86_RET_LDOUBLE) + jmp L(e5) +E(L(load_table5), X86_RET_LDOUBLE) fldt 16(%esp) - jmp 9f -E(X86_RET_SINT8) + jmp L(e5) +E(L(load_table5), X86_RET_SINT8) movsbl 16(%esp), %eax - jmp 9f -E(X86_RET_SINT16) + jmp L(e5) +E(L(load_table5), X86_RET_SINT16) movswl 16(%esp), %eax - jmp 9f -E(X86_RET_UINT8) + jmp L(e5) +E(L(load_table5), X86_RET_UINT8) movzbl 16(%esp), %eax - jmp 9f -E(X86_RET_UINT16) + jmp L(e5) +E(L(load_table5), X86_RET_UINT16) movzwl 16(%esp), %eax - jmp 9f -E(X86_RET_INT64) + jmp L(e5) +E(L(load_table5), X86_RET_INT64) movl 16+4(%esp), %edx /* fallthru */ -E(X86_RET_INT32) +E(L(load_table5), X86_RET_INT32) movl 16(%esp), %eax /* fallthru */ -E(X86_RET_VOID) -9: addl $raw_closure_T_FS, %esp +E(L(load_table5), X86_RET_VOID) +L(e5): + addl $raw_closure_T_FS, %esp cfi_adjust_cfa_offset(-raw_closure_T_FS) /* Remove the extra %ecx argument we pushed. */ ret $4 cfi_adjust_cfa_offset(raw_closure_T_FS) -E(X86_RET_STRUCTPOP) +E(L(load_table5), X86_RET_STRUCTPOP) addl $raw_closure_T_FS, %esp cfi_adjust_cfa_offset(-raw_closure_T_FS) ret $8 cfi_adjust_cfa_offset(raw_closure_T_FS) -E(X86_RET_STRUCTARG) +E(L(load_table5), X86_RET_STRUCTARG) movl 16(%esp), %eax - jmp 9b -E(X86_RET_STRUCT_1B) + jmp L(e5) +E(L(load_table5), X86_RET_STRUCT_1B) movzbl 16(%esp), %eax - jmp 9b -E(X86_RET_STRUCT_2B) + jmp L(e5) +E(L(load_table5), X86_RET_STRUCT_2B) movzwl 16(%esp), %eax - jmp 9b + jmp L(e5) /* Fill out the table so that bad values are predictable. */ -E(X86_RET_UNUSED14) +E(L(load_table5), X86_RET_UNUSED14) ud2 -E(X86_RET_UNUSED15) +E(L(load_table5), X86_RET_UNUSED15) ud2 cfi_endproc diff --git a/src/x86/unix64.S b/src/x86/unix64.S index 42880d5..4c32213 100644 --- a/src/x86/unix64.S +++ b/src/x86/unix64.S @@ -43,6 +43,12 @@ # define C(X) X #endif +#ifdef __APPLE__ +# define L(X) C1(L, X) +#else +# define L(X) C1(.L, X) +#endif + #ifdef __ELF__ # define PLT(X) X@PLT # define ENDF(X) .type X,@function; .size X, . - X @@ -55,12 +61,11 @@ actual table. The entry points into the table are all 8 bytes. The use of ORG asserts that we're at the correct location. */ /* ??? The clang assembler doesn't handle .org with symbolic expressions. */ -.macro E index - .balign 8 -#if !defined(__clang__) && !defined(__APPLE__) - .org 0b + \index * 8, 0x90 +#if defined(__clang__) || defined(__APPLE__) +# define E(BASE, X) .balign 8 +#else +# define E(BASE, X) .balign 8; .org BASE + X * 8 #endif -.endm /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, void *raddr, void (*fnaddr)(void)); @@ -106,8 +111,8 @@ C(ffi_call_unix64): movq 0x28(%r10), %r9 movl 0xb0(%r10), %eax testl %eax, %eax - jnz .Lload_sse -.Lret_from_load_sse: + jnz L(load_sse) +L(ret_from_load_sse): /* Deallocate the reg arg area, except for r10, then load via pop. */ leaq 0xb8(%r10), %rsp @@ -129,8 +134,8 @@ C(ffi_call_unix64): /* The first byte of the flags contains the FFI_TYPE. */ cmpb $UNIX64_RET_LAST, %cl movzbl %cl, %r10d - leaq 0f(%rip), %r11 - ja 9f + leaq L(store_table)(%rip), %r11 + ja L(sa) leaq (%r11, %r10, 8), %r10 /* Prep for the structure cases: scratch area in redzone. */ @@ -138,78 +143,80 @@ C(ffi_call_unix64): jmp *%r10 .balign 8 -0: -E UNIX64_RET_VOID +L(store_table): +E(L(store_table), UNIX64_RET_VOID) ret -E UNIX64_RET_UINT8 +E(L(store_table), UNIX64_RET_UINT8) movzbl %al, %eax movq %rax, (%rdi) ret -E UNIX64_RET_UINT16 +E(L(store_table), UNIX64_RET_UINT16) movzwl %ax, %eax movq %rax, (%rdi) ret -E UNIX64_RET_UINT32 +E(L(store_table), UNIX64_RET_UINT32) movl %eax, %eax movq %rax, (%rdi) ret -E UNIX64_RET_SINT8 +E(L(store_table), UNIX64_RET_SINT8) movsbq %al, %rax movq %rax, (%rdi) ret -E UNIX64_RET_SINT16 +E(L(store_table), UNIX64_RET_SINT16) movswq %ax, %rax movq %rax, (%rdi) ret -E UNIX64_RET_SINT32 +E(L(store_table), UNIX64_RET_SINT32) cltq movq %rax, (%rdi) ret -E UNIX64_RET_INT64 +E(L(store_table), UNIX64_RET_INT64) movq %rax, (%rdi) ret -E UNIX64_RET_XMM32 +E(L(store_table), UNIX64_RET_XMM32) movd %xmm0, (%rdi) ret -E UNIX64_RET_XMM64 +E(L(store_table), UNIX64_RET_XMM64) movq %xmm0, (%rdi) ret -E UNIX64_RET_X87 +E(L(store_table), UNIX64_RET_X87) fstpt (%rdi) ret -E UNIX64_RET_X87_2 +E(L(store_table), UNIX64_RET_X87_2) fstpt (%rdi) fstpt 16(%rdi) ret -E UNIX64_RET_ST_XMM0_RAX +E(L(store_table), UNIX64_RET_ST_XMM0_RAX) movq %rax, 8(%rsi) - jmp 3f -E UNIX64_RET_ST_RAX_XMM0 + jmp L(s3) +E(L(store_table), UNIX64_RET_ST_RAX_XMM0) movq %xmm0, 8(%rsi) - jmp 2f -E UNIX64_RET_ST_XMM0_XMM1 + jmp L(s2) +E(L(store_table), UNIX64_RET_ST_XMM0_XMM1) movq %xmm1, 8(%rsi) - jmp 3f -E UNIX64_RET_ST_RAX_RDX + jmp L(s3) +E(L(store_table), UNIX64_RET_ST_RAX_RDX) movq %rdx, 8(%rsi) -2: movq %rax, (%rsi) +L(s2): + movq %rax, (%rsi) shrl $UNIX64_SIZE_SHIFT, %ecx rep movsb ret .balign 8 -3: movq %xmm0, (%rsi) +L(s3): + movq %xmm0, (%rsi) shrl $UNIX64_SIZE_SHIFT, %ecx rep movsb ret -9: call PLT(C(abort)) +L(sa): call PLT(C(abort)) /* Many times we can avoid loading any SSE registers at all. It's not worth an indirect jump to load the exact set of SSE registers needed; zero or all is a good compromise. */ .balign 2 cfi_restore_state -.Lload_sse: +L(load_sse): movdqa 0x30(%r10), %xmm0 movdqa 0x40(%r10), %xmm1 movdqa 0x50(%r10), %xmm2 @@ -218,7 +225,7 @@ E UNIX64_RET_ST_RAX_RDX movdqa 0x80(%r10), %xmm5 movdqa 0x90(%r10), %xmm6 movdqa 0xa0(%r10), %xmm7 - jmp .Lret_from_load_sse + jmp L(ret_from_load_sse) cfi_endproc ENDF(C(ffi_call_unix64)) @@ -251,7 +258,7 @@ C(ffi_closure_unix64_sse): movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) - jmp 0f + jmp L(sse_entry1) cfi_endproc ENDF(C(ffi_closure_unix64_sse)) @@ -265,7 +272,7 @@ C(ffi_closure_unix64): subq $ffi_closure_FS, %rsp /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ cfi_def_cfa_offset(ffi_closure_FS + 8) -0: +L(sse_entry1): movq %rdi, ffi_closure_OFS_G+0x00(%rsp) movq %rsi, ffi_closure_OFS_G+0x08(%rsp) movq %rdx, ffi_closure_OFS_G+0x10(%rsp) @@ -282,7 +289,7 @@ C(ffi_closure_unix64): movq FFI_TRAMPOLINE_SIZE+8(%r10), %rsi /* Load fun */ movq FFI_TRAMPOLINE_SIZE+16(%r10), %rdx /* Load user_data */ #endif -.Ldo_closure: +L(do_closure): leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */ movq %rsp, %r8 /* Load reg_args */ leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */ @@ -295,68 +302,70 @@ C(ffi_closure_unix64): /* The first byte of the return value contains the FFI_TYPE. */ cmpb $UNIX64_RET_LAST, %al movzbl %al, %r10d - leaq 0f(%rip), %r11 - ja 9f + leaq L(load_table)(%rip), %r11 + ja L(la) leaq (%r11, %r10, 8), %r10 leaq ffi_closure_RED_RVALUE(%rsp), %rsi jmp *%r10 .balign 8 -0: -E UNIX64_RET_VOID +L(load_table): +E(L(load_table), UNIX64_RET_VOID) ret -E UNIX64_RET_UINT8 +E(L(load_table), UNIX64_RET_UINT8) movzbl (%rsi), %eax ret -E UNIX64_RET_UINT16 +E(L(load_table), UNIX64_RET_UINT16) movzwl (%rsi), %eax ret -E UNIX64_RET_UINT32 +E(L(load_table), UNIX64_RET_UINT32) movl (%rsi), %eax ret -E UNIX64_RET_SINT8 +E(L(load_table), UNIX64_RET_SINT8) movsbl (%rsi), %eax ret -E UNIX64_RET_SINT16 +E(L(load_table), UNIX64_RET_SINT16) movswl (%rsi), %eax ret -E UNIX64_RET_SINT32 +E(L(load_table), UNIX64_RET_SINT32) movl (%rsi), %eax ret -E UNIX64_RET_INT64 +E(L(load_table), UNIX64_RET_INT64) movq (%rsi), %rax ret -E UNIX64_RET_XMM32 +E(L(load_table), UNIX64_RET_XMM32) movd (%rsi), %xmm0 ret -E UNIX64_RET_XMM64 +E(L(load_table), UNIX64_RET_XMM64) movq (%rsi), %xmm0 ret -E UNIX64_RET_X87 +E(L(load_table), UNIX64_RET_X87) fldt (%rsi) ret -E UNIX64_RET_X87_2 +E(L(load_table), UNIX64_RET_X87_2) fldt 16(%rsi) fldt (%rsi) ret -E UNIX64_RET_ST_XMM0_RAX +E(L(load_table), UNIX64_RET_ST_XMM0_RAX) movq 8(%rsi), %rax - jmp 3f -E UNIX64_RET_ST_RAX_XMM0 + jmp L(l3) +E(L(load_table), UNIX64_RET_ST_RAX_XMM0) movq 8(%rsi), %xmm0 - jmp 2f -E UNIX64_RET_ST_XMM0_XMM1 + jmp L(l2) +E(L(load_table), UNIX64_RET_ST_XMM0_XMM1) movq 8(%rsi), %xmm1 - jmp 3f -E UNIX64_RET_ST_RAX_RDX + jmp L(l3) +E(L(load_table), UNIX64_RET_ST_RAX_RDX) movq 8(%rsi), %rdx -2: movq (%rsi), %rax +L(l2): + movq (%rsi), %rax ret .balign 8 -3: movq (%rsi), %xmm0 +L(l3): + movq (%rsi), %xmm0 ret -9: call PLT(C(abort)) +L(la): call PLT(C(abort)) cfi_endproc ENDF(C(ffi_closure_unix64)) @@ -379,7 +388,7 @@ C(ffi_go_closure_unix64_sse): movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) - jmp 0f + jmp L(sse_entry2) cfi_endproc ENDF(C(ffi_go_closure_unix64_sse)) @@ -393,7 +402,7 @@ C(ffi_go_closure_unix64): subq $ffi_closure_FS, %rsp /* Note clang bug 21515: adjust_cfa_offset error across endproc. */ cfi_def_cfa_offset(ffi_closure_FS + 8) -0: +L(sse_entry2): movq %rdi, ffi_closure_OFS_G+0x00(%rsp) movq %rsi, ffi_closure_OFS_G+0x08(%rsp) movq %rdx, ffi_closure_OFS_G+0x10(%rsp) @@ -410,7 +419,7 @@ C(ffi_go_closure_unix64): movq 16(%r10), %rsi /* Load fun */ movq %r10, %rdx /* Load closure (user_data) */ #endif - jmp .Ldo_closure + jmp L(do_closure) cfi_endproc ENDF(C(ffi_go_closure_unix64)) |