x86: Avoid using gas local labels

Which are unsupported by Darwin cctools as. Thankfully this doesn't uglify the source too much.
author: Richard Henderson <rth@twiddle.net> 2014-11-24 16:26:50 +0100
committer: Richard Henderson <rth@twiddle.net> 2015-01-05 10:01:37 -0800
commit: 5f35e0ffcc05a72ce0aacf228dc06d1262754660 (patch)
tree: 69953142d706397487f6de666460b3905ea90b2a /src/x86
parent: ed1ca2777c35fe2f2751de255df3e16f17bdbd8d (diff)
download: libffi-5f35e0ffcc05a72ce0aacf228dc06d1262754660.tar.gz
2 files changed, 247 insertions, 221 deletions
diff --git a/src/x86/sysv.S b/src/x86/sysv.S
index f737371..49dc38d 100644
--- a/src/x86/sysv.S
+++ b/src/x86/sysv.S
@@ -41,6 +41,12 @@
 # define C(X)     X
 #endif
 
+#ifdef X86_DARWIN
+# define L(X)     C1(L, X)
+#else
+# define L(X)     C1(.L, X)
+#endif
+
 #ifdef __ELF__
 # define ENDF(X)  .type	X,@function; .size X, . - X
 #else
@@ -61,9 +67,9 @@
    The use of ORG asserts that we're at the correct location.  */
 /* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
 #if defined(__clang__) || defined(__APPLE__)
-# define E(X)	.balign 8
+# define E(BASE, X)	.balign 8
 #else
-# define E(X)	.balign 8; .org 0b + X * 8
+# define E(BASE, X)	.balign 8; .org BASE + X * 8
 #endif
 
 	.text
@@ -113,48 +119,50 @@ ffi_call_i386:
 	andl	$X86_RET_TYPE_MASK, %ecx
 #ifdef __PIC__
 	call	C(__x86.get_pc_thunk.bx)
-1:	leal	0f-1b(%ebx, %ecx, 8), %ebx
+L(pc1):
+	leal	L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx
 #else
-	leal	0f(,%ecx, 8), %ebx
+	leal	L(store_table)(,%ecx, 8), %ebx
 #endif
 	movl	16(%ebp), %ecx		/* load result address */
 	jmp	*%ebx
 
 	.balign	8
-0:
-E(X86_RET_FLOAT)
+L(store_table):
+E(L(store_table), X86_RET_FLOAT)
 	fstps	(%ecx)
-	jmp	9f
-E(X86_RET_DOUBLE)
+	jmp	L(e1)
+E(L(store_table), X86_RET_DOUBLE)
 	fstpl	(%ecx)
-	jmp	9f
-E(X86_RET_LDOUBLE)
+	jmp	L(e1)
+E(L(store_table), X86_RET_LDOUBLE)
 	fstpt	(%ecx)
-	jmp	9f
-E(X86_RET_SINT8)
+	jmp	L(e1)
+E(L(store_table), X86_RET_SINT8)
 	movsbl	%al, %eax
 	mov	%eax, (%ecx)
-	jmp	9f
-E(X86_RET_SINT16)
+	jmp	L(e1)
+E(L(store_table), X86_RET_SINT16)
 	movswl	%ax, %eax
 	mov	%eax, (%ecx)
-	jmp	9f
-E(X86_RET_UINT8)
+	jmp	L(e1)
+E(L(store_table), X86_RET_UINT8)
 	movzbl	%al, %eax
 	mov	%eax, (%ecx)
-	jmp	9f
-E(X86_RET_UINT16)
+	jmp	L(e1)
+E(L(store_table), X86_RET_UINT16)
 	movzwl	%ax, %eax
 	mov	%eax, (%ecx)
-	jmp	9f
-E(X86_RET_INT64)
+	jmp	L(e1)
+E(L(store_table), X86_RET_INT64)
 	movl	%edx, 4(%ecx)
 	/* fallthru */
-E(X86_RET_INT32)
+E(L(store_table), X86_RET_INT32)
 	movl	%eax, (%ecx)
 	/* fallthru */
-E(X86_RET_VOID)
-9:	movl	8(%ebp), %ebx
+E(L(store_table), X86_RET_VOID)
+L(e1):
+	movl	8(%ebp), %ebx
 	movl	%ebp, %esp
 	popl	%ebp
 	cfi_remember_state
@@ -164,21 +172,21 @@ E(X86_RET_VOID)
 	ret
 	cfi_restore_state
 
-E(X86_RET_STRUCTPOP)
-	jmp	9b
-E(X86_RET_STRUCTARG)
-	jmp	9b
-E(X86_RET_STRUCT_1B)
+E(L(store_table), X86_RET_STRUCTPOP)
+	jmp	L(e1)
+E(L(store_table), X86_RET_STRUCTARG)
+	jmp	L(e1)
+E(L(store_table), X86_RET_STRUCT_1B)
 	movb	%al, (%ecx)
-	jmp	9b
-E(X86_RET_STRUCT_2B)
+	jmp	L(e1)
+E(L(store_table), X86_RET_STRUCT_2B)
 	movw	%ax, (%ecx)
-	jmp	9b
+	jmp	L(e1)
 
 	/* Fill out the table so that bad values are predictable.  */
-E(X86_RET_UNUSED14)
+E(L(store_table), X86_RET_UNUSED14)
 	ud2
-E(X86_RET_UNUSED15)
+E(L(store_table), X86_RET_UNUSED15)
 	ud2
 
 	cfi_endproc
@@ -216,18 +224,19 @@ ENDF(ffi_call_i386)
 	movl	%esp, %ecx;			/* load closure_data */	\
 	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */ \
 	call	ffi_closure_inner
-#define FFI_CLOSURE_MASK_AND_JUMP					\
+#define FFI_CLOSURE_MASK_AND_JUMP(N)					\
 	andl	$X86_RET_TYPE_MASK, %eax;				\
-	leal	0f(, %eax, 8), %eax;					\
+	leal	L(C1(load_table,N))(, %eax, 8), %eax;			\
 	jmp	*%eax
 
 #ifdef __PIC__
 # if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
 #  undef FFI_CLOSURE_MASK_AND_JUMP
-#  define FFI_CLOSURE_MASK_AND_JUMP					\
+#  define FFI_CLOSURE_MASK_AND_JUMP(N)					\
 	andl	$X86_RET_TYPE_MASK, %eax;				\
 	call	C(__x86.get_pc_thunk.dx);				\
-1:	leal	0f-1b(%edx, %eax, 8), %eax;				\
+L(C1(pc,N)):								\
+	leal	L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %eax;	\
 	jmp	*%eax
 # else
 #  undef FFI_CLOSURE_CALL_INNER
@@ -237,19 +246,19 @@ ENDF(ffi_call_i386)
 	movl	%ebx, 40(%esp);			/* save ebx */		\
 	cfi_rel_offset(%ebx, 40);					\
 	call	C(__x86.get_pc_thunk.bx);	/* load got register */	\
-1:	addl	$C(_GLOBAL_OFFSET_TABLE_), %ebx;			\
+	addl	$C(_GLOBAL_OFFSET_TABLE_), %ebx;			\
 	call	ffi_closure_inner@PLT
 #  undef FFI_CLOSURE_MASK_AND_JUMP
-#  define FFI_CLOSURE_MASK_AND_JUMP					\
+#  define FFI_CLOSURE_MASK_AND_JUMP(N)					\
 	andl	$X86_RET_TYPE_MASK, %eax;				\
-	leal	0f@GOTOFF(%ebx, %eax, 8), %eax;				\
+	leal	L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %eax;	\
 	movl	40(%esp), %ebx;			/* restore ebx */	\
 	cfi_restore(%ebx);						\
 	jmp	*%eax
 # endif /* DARWIN || HIDDEN */
 #endif /* __PIC__ */
 
-#define FFI_GO_CLOSURE(suffix, chain, t1, t2)				\
+#define FFI_GO_CLOSURE(suffix, chain, t1, t2, entry)			\
 	.balign	16;							\
 	.globl	C(C1(ffi_go_closure_,suffix));				\
 	FFI_HIDDEN(C(C1(ffi_go_closure_,suffix)));			\
@@ -264,12 +273,12 @@ C(C1(ffi_go_closure_,suffix)):						\
 	movl	t1, 28(%esp);						\
 	movl	t2, 32(%esp);						\
 	movl	chain, 36(%esp);	/* closure is user_data */	\
-	jmp	88f;							\
+	jmp	entry;							\
 	cfi_endproc;							\
 ENDF(C(C1(ffi_go_closure_,suffix)))
 
-FFI_GO_CLOSURE(EAX, %eax, %edx, %ecx)
-FFI_GO_CLOSURE(ECX, %ecx, %edx, %eax)
+FFI_GO_CLOSURE(EAX, %eax, %edx, %ecx, L(do_closure_i386))
+FFI_GO_CLOSURE(ECX, %ecx, %edx, %eax, L(do_closure_i386))
 
 /* The closure entry points are reached from the ffi_closure trampoline.
    On entry, %eax contains the address of the ffi_closure.  */
@@ -287,70 +296,72 @@ C(ffi_closure_i386):
 	FFI_CLOSURE_SAVE_REGS
 	FFI_CLOSURE_COPY_TRAMP_DATA
 
-88:	/* Entry point from preceeding Go closures.  */
+	/* Entry point from preceeding Go closures.  */
+L(do_closure_i386):
 
 	FFI_CLOSURE_CALL_INNER
-	FFI_CLOSURE_MASK_AND_JUMP
+	FFI_CLOSURE_MASK_AND_JUMP(2)
 
 	.balign	8
-0:
-E(X86_RET_FLOAT)
+L(load_table2):
+E(L(load_table2), X86_RET_FLOAT)
 	flds	(%esp)
-	jmp	9f
-E(X86_RET_DOUBLE)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_DOUBLE)
 	fldl	(%esp)
-	jmp	9f
-E(X86_RET_LDOUBLE)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_LDOUBLE)
 	fldt	(%esp)
-	jmp	9f
-E(X86_RET_SINT8)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_SINT8)
 	movsbl	(%esp), %eax
-	jmp	9f
-E(X86_RET_SINT16)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_SINT16)
 	movswl	(%esp), %eax
-	jmp	9f
-E(X86_RET_UINT8)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_UINT8)
 	movzbl	(%esp), %eax
-	jmp	9f
-E(X86_RET_UINT16)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_UINT16)
 	movzwl	(%esp), %eax
-	jmp	9f
-E(X86_RET_INT64)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_INT64)
 	movl	4(%esp), %edx
 	/* fallthru */
-E(X86_RET_INT32)
+E(L(load_table2), X86_RET_INT32)
 	movl	(%esp), %eax
 	/* fallthru */
-E(X86_RET_VOID)
-9:	addl	$closure_FS, %esp
+E(L(load_table2), X86_RET_VOID)
+L(e2):
+	addl	$closure_FS, %esp
 	cfi_adjust_cfa_offset(-closure_FS)
 	ret
 	cfi_adjust_cfa_offset(closure_FS)
-E(X86_RET_STRUCTPOP)
+E(L(load_table2), X86_RET_STRUCTPOP)
 	addl	$closure_FS, %esp
 	cfi_adjust_cfa_offset(-closure_FS)
 	ret	$4
 	cfi_adjust_cfa_offset(closure_FS)
-E(X86_RET_STRUCTARG)
+E(L(load_table2), X86_RET_STRUCTARG)
 	movl	(%esp), %eax
-	jmp	9b
-E(X86_RET_STRUCT_1B)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_STRUCT_1B)
 	movzbl	(%esp), %eax
-	jmp	9b
-E(X86_RET_STRUCT_2B)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_STRUCT_2B)
 	movzwl	(%esp), %eax
-	jmp	9b
+	jmp	L(e2)
 
 	/* Fill out the table so that bad values are predictable.  */
-E(X86_RET_UNUSED14)
+E(L(load_table2), X86_RET_UNUSED14)
 	ud2
-E(X86_RET_UNUSED15)
+E(L(load_table2), X86_RET_UNUSED15)
 	ud2
 
 	cfi_endproc
 ENDF(C(ffi_closure_i386))
 
-FFI_GO_CLOSURE(STDCALL, %ecx, %edx, %eax)
+FFI_GO_CLOSURE(STDCALL, %ecx, %edx, %eax, L(do_closure_STDCALL))
 
 /* For REGISTER, we have no available parameter registers, and so we
    enter here having pushed the closure onto the stack.  */
@@ -371,7 +382,7 @@ C(ffi_closure_REGISTER):
 	movl	closure_FS-4(%esp), %ecx	/* load retaddr */
 	movl	closure_FS(%esp), %eax		/* load closure */
 	movl	%ecx, closure_FS(%esp)		/* move retaddr */
-	jmp	0f
+	jmp	L(do_closure_REGISTER)
 
 	cfi_endproc
 ENDF(C(ffi_closure_REGISTER))
@@ -391,11 +402,13 @@ C(ffi_closure_STDCALL):
 
 	FFI_CLOSURE_SAVE_REGS
 
-0:	/* Entry point from ffi_closure_REGISTER.  */
+	/* Entry point from ffi_closure_REGISTER.  */
+L(do_closure_REGISTER):
 
 	FFI_CLOSURE_COPY_TRAMP_DATA
 
-88:	/* Entry point from preceeding Go closure.  */
+	/* Entry point from preceeding Go closure.  */
+L(do_closure_STDCALL):
 
 	FFI_CLOSURE_CALL_INNER
 
@@ -411,70 +424,70 @@ C(ffi_closure_STDCALL):
 	   there is always a window between the mov and the ret which
 	   will be wrong from one point of view or another.  */
 
-	FFI_CLOSURE_MASK_AND_JUMP
+	FFI_CLOSURE_MASK_AND_JUMP(3)
 
 	.balign	8
-0:
-E(X86_RET_FLOAT)
+L(load_table3):
+E(L(load_table3), X86_RET_FLOAT)
 	flds    (%esp)
 	movl    %ecx, %esp
 	ret
-E(X86_RET_DOUBLE)
+E(L(load_table3), X86_RET_DOUBLE)
 	fldl    (%esp)
 	movl    %ecx, %esp
 	ret
-E(X86_RET_LDOUBLE)
+E(L(load_table3), X86_RET_LDOUBLE)
 	fldt    (%esp)
 	movl    %ecx, %esp
 	ret
-E(X86_RET_SINT8)
+E(L(load_table3), X86_RET_SINT8)
 	movsbl  (%esp), %eax
 	movl    %ecx, %esp
 	ret
-E(X86_RET_SINT16)
+E(L(load_table3), X86_RET_SINT16)
 	movswl  (%esp), %eax
 	movl    %ecx, %esp
 	ret
-E(X86_RET_UINT8)
+E(L(load_table3), X86_RET_UINT8)
 	movzbl  (%esp), %eax
 	movl    %ecx, %esp
 	ret
-E(X86_RET_UINT16)
+E(L(load_table3), X86_RET_UINT16)
 	movzwl  (%esp), %eax
 	movl    %ecx, %esp
 	ret
-E(X86_RET_INT64)
+E(L(load_table3), X86_RET_INT64)
 	popl    %eax
 	popl    %edx
 	movl    %ecx, %esp
 	ret
-E(X86_RET_INT32)
+E(L(load_table3), X86_RET_INT32)
 	movl    (%esp), %eax
 	movl    %ecx, %esp
 	ret
-E(X86_RET_VOID)
+E(L(load_table3), X86_RET_VOID)
 	movl    %ecx, %esp
 	ret
-E(X86_RET_STRUCTPOP)
+E(L(load_table3), X86_RET_STRUCTPOP)
 	movl    %ecx, %esp
 	ret
-E(X86_RET_STRUCTARG)
+E(L(load_table3), X86_RET_STRUCTARG)
 	movl	(%esp), %eax
 	movl	%ecx, %esp
 	ret
-E(X86_RET_STRUCT_1B)
+E(L(load_table3), X86_RET_STRUCT_1B)
 	movzbl	(%esp), %eax
 	movl	%ecx, %esp
 	ret
-E(X86_RET_STRUCT_2B)
+E(L(load_table3), X86_RET_STRUCT_2B)
 	movzwl	(%esp), %eax
 	movl	%ecx, %esp
 	ret
 
 	/* Fill out the table so that bad values are predictable.  */
-E(X86_RET_UNUSED14)
+E(L(load_table3), X86_RET_UNUSED14)
 	ud2
-E(X86_RET_UNUSED15)
+E(L(load_table3), X86_RET_UNUSED15)
 	ud2
 
 	cfi_endproc
@@ -509,67 +522,69 @@ C(ffi_closure_raw_SYSV):
 	andl	$X86_RET_TYPE_MASK, %eax
 #ifdef __PIC__
 	call	C(__x86.get_pc_thunk.bx)
-1:	leal	0f-1b(%ebx, %eax, 8), %eax
+L(pc4):
+	leal	L(load_table4)-L(pc4)(%ebx, %eax, 8), %eax
 #else
-	leal	0f(,%eax, 8), %eax
+	leal	L(load_table4)(,%eax, 8), %eax
 #endif
 	movl	raw_closure_S_FS-4(%esp), %ebx
 	cfi_restore(%ebx)
 	jmp	*%eax
 
 	.balign	8
-0:
-E(X86_RET_FLOAT)
+L(load_table4):
+E(L(load_table4), X86_RET_FLOAT)
 	flds	16(%esp)
-	jmp	9f
-E(X86_RET_DOUBLE)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_DOUBLE)
 	fldl	16(%esp)
-	jmp	9f
-E(X86_RET_LDOUBLE)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_LDOUBLE)
 	fldt	16(%esp)
-	jmp	9f
-E(X86_RET_SINT8)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_SINT8)
 	movsbl	16(%esp), %eax
-	jmp	9f
-E(X86_RET_SINT16)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_SINT16)
 	movswl	16(%esp), %eax
-	jmp	9f
-E(X86_RET_UINT8)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_UINT8)
 	movzbl	16(%esp), %eax
-	jmp	9f
-E(X86_RET_UINT16)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_UINT16)
 	movzwl	16(%esp), %eax
-	jmp	9f
-E(X86_RET_INT64)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_INT64)
 	movl	16+4(%esp), %edx
 	/* fallthru */
-E(X86_RET_INT32)
+E(L(load_table4), X86_RET_INT32)
 	movl	16(%esp), %eax
 	/* fallthru */
-E(X86_RET_VOID)
-9:	addl	$raw_closure_S_FS, %esp
+E(L(load_table4), X86_RET_VOID)
+L(e4):
+	addl	$raw_closure_S_FS, %esp
 	cfi_adjust_cfa_offset(-raw_closure_S_FS)
 	ret
 	cfi_adjust_cfa_offset(raw_closure_S_FS)
-E(X86_RET_STRUCTPOP)
+E(L(load_table4), X86_RET_STRUCTPOP)
 	addl	$raw_closure_S_FS, %esp
 	cfi_adjust_cfa_offset(-raw_closure_S_FS)
 	ret	$4
 	cfi_adjust_cfa_offset(raw_closure_S_FS)
-E(X86_RET_STRUCTARG)
+E(L(load_table4), X86_RET_STRUCTARG)
 	movl	16(%esp), %eax
-	jmp	9b
-E(X86_RET_STRUCT_1B)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_STRUCT_1B)
 	movzbl	16(%esp), %eax
-	jmp	9b
-E(X86_RET_STRUCT_2B)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_STRUCT_2B)
 	movzwl	16(%esp), %eax
-	jmp	9b
+	jmp	L(e4)
 
 	/* Fill out the table so that bad values are predictable.  */
-E(X86_RET_UNUSED14)
+E(L(load_table4), X86_RET_UNUSED14)
 	ud2
-E(X86_RET_UNUSED15)
+E(L(load_table4), X86_RET_UNUSED15)
 	ud2
 
 	cfi_endproc
@@ -613,68 +628,70 @@ C(ffi_closure_raw_THISCALL):
 	andl	$X86_RET_TYPE_MASK, %eax
 #ifdef __PIC__
 	call	C(__x86.get_pc_thunk.bx)
-1:	leal	0f-1b(%ebx, %eax, 8), %eax
+L(pc5):
+	leal	L(load_table5)-L(pc5)(%ebx, %eax, 8), %eax
 #else
-	leal	0f(,%eax, 8), %eax
+	leal	L(load_table5)(,%eax, 8), %eax
 #endif
 	movl	raw_closure_T_FS-4(%esp), %ebx
 	cfi_restore(%ebx)
 	jmp	*%eax
 
 	.balign	8
-0:
-E(X86_RET_FLOAT)
+L(load_table5):
+E(L(load_table5), X86_RET_FLOAT)
 	flds	16(%esp)
-	jmp	9f
-E(X86_RET_DOUBLE)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_DOUBLE)
 	fldl	16(%esp)
-	jmp	9f
-E(X86_RET_LDOUBLE)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_LDOUBLE)
 	fldt	16(%esp)
-	jmp	9f
-E(X86_RET_SINT8)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_SINT8)
 	movsbl	16(%esp), %eax
-	jmp	9f
-E(X86_RET_SINT16)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_SINT16)
 	movswl	16(%esp), %eax
-	jmp	9f
-E(X86_RET_UINT8)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_UINT8)
 	movzbl	16(%esp), %eax
-	jmp	9f
-E(X86_RET_UINT16)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_UINT16)
 	movzwl	16(%esp), %eax
-	jmp	9f
-E(X86_RET_INT64)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_INT64)
 	movl	16+4(%esp), %edx
 	/* fallthru */
-E(X86_RET_INT32)
+E(L(load_table5), X86_RET_INT32)
 	movl	16(%esp), %eax
 	/* fallthru */
-E(X86_RET_VOID)
-9:	addl	$raw_closure_T_FS, %esp
+E(L(load_table5), X86_RET_VOID)
+L(e5):
+	addl	$raw_closure_T_FS, %esp
 	cfi_adjust_cfa_offset(-raw_closure_T_FS)
 	/* Remove the extra %ecx argument we pushed.  */
 	ret	$4
 	cfi_adjust_cfa_offset(raw_closure_T_FS)
-E(X86_RET_STRUCTPOP)
+E(L(load_table5), X86_RET_STRUCTPOP)
 	addl	$raw_closure_T_FS, %esp
 	cfi_adjust_cfa_offset(-raw_closure_T_FS)
 	ret	$8
 	cfi_adjust_cfa_offset(raw_closure_T_FS)
-E(X86_RET_STRUCTARG)
+E(L(load_table5), X86_RET_STRUCTARG)
 	movl	16(%esp), %eax
-	jmp	9b
-E(X86_RET_STRUCT_1B)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_STRUCT_1B)
 	movzbl	16(%esp), %eax
-	jmp	9b
-E(X86_RET_STRUCT_2B)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_STRUCT_2B)
 	movzwl	16(%esp), %eax
-	jmp	9b
+	jmp	L(e5)
 
 	/* Fill out the table so that bad values are predictable.  */
-E(X86_RET_UNUSED14)
+E(L(load_table5), X86_RET_UNUSED14)
 	ud2
-E(X86_RET_UNUSED15)
+E(L(load_table5), X86_RET_UNUSED15)
 	ud2
 
 	cfi_endproc
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index 42880d5..4c32213 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -43,6 +43,12 @@
 # define C(X)     X
 #endif
 
+#ifdef __APPLE__
+# define L(X)     C1(L, X)
+#else
+# define L(X)     C1(.L, X)
+#endif
+
 #ifdef __ELF__
 # define PLT(X)	  X@PLT
 # define ENDF(X)  .type	X,@function; .size X, . - X
@@ -55,12 +61,11 @@
    actual table.  The entry points into the table are all 8 bytes.
    The use of ORG asserts that we're at the correct location.  */
 /* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
-.macro E index
-	.balign	8
-#if !defined(__clang__) && !defined(__APPLE__)
-	.org	0b + \index * 8, 0x90
+#if defined(__clang__) || defined(__APPLE__)
+# define E(BASE, X)	.balign 8
+#else
+# define E(BASE, X)	.balign 8; .org BASE + X * 8
 #endif
-.endm
 
 /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
 	            void *raddr, void (*fnaddr)(void));
@@ -106,8 +111,8 @@ C(ffi_call_unix64):
 	movq	0x28(%r10), %r9
 	movl	0xb0(%r10), %eax
 	testl	%eax, %eax
-	jnz	.Lload_sse
-.Lret_from_load_sse:
+	jnz	L(load_sse)
+L(ret_from_load_sse):
 
 	/* Deallocate the reg arg area, except for r10, then load via pop.  */
 	leaq	0xb8(%r10), %rsp
@@ -129,8 +134,8 @@ C(ffi_call_unix64):
 	/* The first byte of the flags contains the FFI_TYPE.  */
 	cmpb	$UNIX64_RET_LAST, %cl
 	movzbl	%cl, %r10d
-	leaq	0f(%rip), %r11
-	ja	9f
+	leaq	L(store_table)(%rip), %r11
+	ja	L(sa)
 	leaq	(%r11, %r10, 8), %r10
 
 	/* Prep for the structure cases: scratch area in redzone.  */
@@ -138,78 +143,80 @@ C(ffi_call_unix64):
 	jmp	*%r10
 
 	.balign	8
-0:
-E UNIX64_RET_VOID
+L(store_table):
+E(L(store_table), UNIX64_RET_VOID)
 	ret
-E UNIX64_RET_UINT8
+E(L(store_table), UNIX64_RET_UINT8)
 	movzbl	%al, %eax
 	movq	%rax, (%rdi)
 	ret
-E UNIX64_RET_UINT16
+E(L(store_table), UNIX64_RET_UINT16)
 	movzwl	%ax, %eax
 	movq	%rax, (%rdi)
 	ret
-E UNIX64_RET_UINT32
+E(L(store_table), UNIX64_RET_UINT32)
 	movl	%eax, %eax
 	movq	%rax, (%rdi)
 	ret
-E UNIX64_RET_SINT8
+E(L(store_table), UNIX64_RET_SINT8)
 	movsbq	%al, %rax
 	movq	%rax, (%rdi)
 	ret
-E UNIX64_RET_SINT16
+E(L(store_table), UNIX64_RET_SINT16)
 	movswq	%ax, %rax
 	movq	%rax, (%rdi)
 	ret
-E UNIX64_RET_SINT32
+E(L(store_table), UNIX64_RET_SINT32)
 	cltq
 	movq	%rax, (%rdi)
 	ret
-E UNIX64_RET_INT64
+E(L(store_table), UNIX64_RET_INT64)
 	movq	%rax, (%rdi)
 	ret
-E UNIX64_RET_XMM32
+E(L(store_table), UNIX64_RET_XMM32)
 	movd	%xmm0, (%rdi)
 	ret
-E UNIX64_RET_XMM64
+E(L(store_table), UNIX64_RET_XMM64)
 	movq	%xmm0, (%rdi)
 	ret
-E UNIX64_RET_X87
+E(L(store_table), UNIX64_RET_X87)
 	fstpt	(%rdi)
 	ret
-E UNIX64_RET_X87_2
+E(L(store_table), UNIX64_RET_X87_2)
 	fstpt	(%rdi)
 	fstpt	16(%rdi)
 	ret
-E UNIX64_RET_ST_XMM0_RAX
+E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
 	movq	%rax, 8(%rsi)
-	jmp	3f
-E UNIX64_RET_ST_RAX_XMM0
+	jmp	L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
 	movq	%xmm0, 8(%rsi)
-	jmp	2f
-E UNIX64_RET_ST_XMM0_XMM1
+	jmp	L(s2)
+E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
 	movq	%xmm1, 8(%rsi)
-	jmp	3f
-E UNIX64_RET_ST_RAX_RDX
+	jmp	L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_RDX)
 	movq	%rdx, 8(%rsi)
-2:	movq	%rax, (%rsi)
+L(s2):
+	movq	%rax, (%rsi)
 	shrl	$UNIX64_SIZE_SHIFT, %ecx
 	rep movsb
 	ret
 	.balign 8
-3:	movq	%xmm0, (%rsi)
+L(s3):
+	movq	%xmm0, (%rsi)
 	shrl	$UNIX64_SIZE_SHIFT, %ecx
 	rep movsb
 	ret
 
-9:	call	PLT(C(abort))
+L(sa):	call	PLT(C(abort))
 
 	/* Many times we can avoid loading any SSE registers at all.
 	   It's not worth an indirect jump to load the exact set of
 	   SSE registers needed; zero or all is a good compromise.  */
 	.balign 2
 	cfi_restore_state
-.Lload_sse:
+L(load_sse):
 	movdqa	0x30(%r10), %xmm0
 	movdqa	0x40(%r10), %xmm1
 	movdqa	0x50(%r10), %xmm2
@@ -218,7 +225,7 @@ E UNIX64_RET_ST_RAX_RDX
 	movdqa	0x80(%r10), %xmm5
 	movdqa	0x90(%r10), %xmm6
 	movdqa	0xa0(%r10), %xmm7
-	jmp	.Lret_from_load_sse
+	jmp	L(ret_from_load_sse)
 
 	cfi_endproc
 ENDF(C(ffi_call_unix64))
@@ -251,7 +258,7 @@ C(ffi_closure_unix64_sse):
 	movdqa	%xmm5, ffi_closure_OFS_V+0x50(%rsp)
 	movdqa	%xmm6, ffi_closure_OFS_V+0x60(%rsp)
 	movdqa	%xmm7, ffi_closure_OFS_V+0x70(%rsp)
-	jmp	0f
+	jmp	L(sse_entry1)
 
 	cfi_endproc
 ENDF(C(ffi_closure_unix64_sse))
@@ -265,7 +272,7 @@ C(ffi_closure_unix64):
 	subq	$ffi_closure_FS, %rsp
 	/* Note clang bug 21515: adjust_cfa_offset error across endproc.  */
 	cfi_def_cfa_offset(ffi_closure_FS + 8)
-0:
+L(sse_entry1):
 	movq	%rdi, ffi_closure_OFS_G+0x00(%rsp)
 	movq    %rsi, ffi_closure_OFS_G+0x08(%rsp)
 	movq    %rdx, ffi_closure_OFS_G+0x10(%rsp)
@@ -282,7 +289,7 @@ C(ffi_closure_unix64):
 	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rsi	/* Load fun */
 	movq	FFI_TRAMPOLINE_SIZE+16(%r10), %rdx	/* Load user_data */
 #endif
-.Ldo_closure:
+L(do_closure):
 	leaq	ffi_closure_OFS_RVALUE(%rsp), %rcx	/* Load rvalue */
 	movq	%rsp, %r8				/* Load reg_args */
 	leaq	ffi_closure_FS+8(%rsp), %r9		/* Load argp */
@@ -295,68 +302,70 @@ C(ffi_closure_unix64):
 	/* The first byte of the return value contains the FFI_TYPE.  */
 	cmpb	$UNIX64_RET_LAST, %al
 	movzbl	%al, %r10d
-	leaq	0f(%rip), %r11
-	ja	9f
+	leaq	L(load_table)(%rip), %r11
+	ja	L(la)
 	leaq	(%r11, %r10, 8), %r10
 	leaq	ffi_closure_RED_RVALUE(%rsp), %rsi
 	jmp	*%r10
 
 	.balign	8
-0:
-E UNIX64_RET_VOID
+L(load_table):
+E(L(load_table), UNIX64_RET_VOID)
 	ret
-E UNIX64_RET_UINT8
+E(L(load_table), UNIX64_RET_UINT8)
 	movzbl	(%rsi), %eax
 	ret
-E UNIX64_RET_UINT16
+E(L(load_table), UNIX64_RET_UINT16)
 	movzwl	(%rsi), %eax
 	ret
-E UNIX64_RET_UINT32
+E(L(load_table), UNIX64_RET_UINT32)
 	movl	(%rsi), %eax
 	ret
-E UNIX64_RET_SINT8
+E(L(load_table), UNIX64_RET_SINT8)
 	movsbl	(%rsi), %eax
 	ret
-E UNIX64_RET_SINT16
+E(L(load_table), UNIX64_RET_SINT16)
 	movswl	(%rsi), %eax
 	ret
-E UNIX64_RET_SINT32
+E(L(load_table), UNIX64_RET_SINT32)
 	movl	(%rsi), %eax
 	ret
-E UNIX64_RET_INT64
+E(L(load_table), UNIX64_RET_INT64)
 	movq	(%rsi), %rax
 	ret
-E UNIX64_RET_XMM32
+E(L(load_table), UNIX64_RET_XMM32)
 	movd	(%rsi), %xmm0
 	ret
-E UNIX64_RET_XMM64
+E(L(load_table), UNIX64_RET_XMM64)
 	movq	(%rsi), %xmm0
 	ret
-E UNIX64_RET_X87
+E(L(load_table), UNIX64_RET_X87)
 	fldt	(%rsi)
 	ret
-E UNIX64_RET_X87_2
+E(L(load_table), UNIX64_RET_X87_2)
 	fldt	16(%rsi)
 	fldt	(%rsi)
 	ret
-E UNIX64_RET_ST_XMM0_RAX
+E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
 	movq	8(%rsi), %rax
-	jmp	3f
-E UNIX64_RET_ST_RAX_XMM0
+	jmp	L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
 	movq	8(%rsi), %xmm0
-	jmp	2f
-E UNIX64_RET_ST_XMM0_XMM1
+	jmp	L(l2)
+E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
 	movq	8(%rsi), %xmm1
-	jmp	3f
-E UNIX64_RET_ST_RAX_RDX
+	jmp	L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_RDX)
 	movq	8(%rsi), %rdx
-2:	movq	(%rsi), %rax
+L(l2):
+	movq	(%rsi), %rax
 	ret
 	.balign	8
-3:	movq	(%rsi), %xmm0
+L(l3):
+	movq	(%rsi), %xmm0
 	ret
 
-9:	call	PLT(C(abort))
+L(la):	call	PLT(C(abort))
 
 	cfi_endproc
 ENDF(C(ffi_closure_unix64))
@@ -379,7 +388,7 @@ C(ffi_go_closure_unix64_sse):
 	movdqa	%xmm5, ffi_closure_OFS_V+0x50(%rsp)
 	movdqa	%xmm6, ffi_closure_OFS_V+0x60(%rsp)
 	movdqa	%xmm7, ffi_closure_OFS_V+0x70(%rsp)
-	jmp	0f
+	jmp	L(sse_entry2)
 
 	cfi_endproc
 ENDF(C(ffi_go_closure_unix64_sse))
@@ -393,7 +402,7 @@ C(ffi_go_closure_unix64):
 	subq	$ffi_closure_FS, %rsp
 	/* Note clang bug 21515: adjust_cfa_offset error across endproc.  */
 	cfi_def_cfa_offset(ffi_closure_FS + 8)
-0:
+L(sse_entry2):
 	movq	%rdi, ffi_closure_OFS_G+0x00(%rsp)
 	movq    %rsi, ffi_closure_OFS_G+0x08(%rsp)
 	movq    %rdx, ffi_closure_OFS_G+0x10(%rsp)
@@ -410,7 +419,7 @@ C(ffi_go_closure_unix64):
 	movq	16(%r10), %rsi		/* Load fun */
 	movq	%r10, %rdx		/* Load closure (user_data) */
 #endif
-	jmp	.Ldo_closure
+	jmp	L(do_closure)
 
 	cfi_endproc
 ENDF(C(ffi_go_closure_unix64))
author	Richard Henderson <rth@twiddle.net>	2014-11-24 16:26:50 +0100
committer	Richard Henderson <rth@twiddle.net>	2015-01-05 10:01:37 -0800
commit	5f35e0ffcc05a72ce0aacf228dc06d1262754660 (patch)
tree	69953142d706397487f6de666460b3905ea90b2a /src/x86
parent	ed1ca2777c35fe2f2751de255df3e16f17bdbd8d (diff)
download	libffi-5f35e0ffcc05a72ce0aacf228dc06d1262754660.tar.gz