summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMoxie Bot <bot@moxielogic.com>2020-02-21 22:13:14 -0500
committerMoxie Bot <bot@moxielogic.com>2020-02-21 22:13:14 -0500
commit624c7a35f7e1d12f917453d6c657cd5947ac57f7 (patch)
tree238839705a70c7667ebd5a68a8b57b86e50e591f
parent4c775d7cd6e914c6a2f66465497106cff360aeb5 (diff)
parent7855656148b96c7070ec362d2a73af840025a2b7 (diff)
downloadlibffi-624c7a35f7e1d12f917453d6c657cd5947ac57f7.tar.gz
Merge branch 'master' of github.com:/libffi/libffi
-rw-r--r--Makefile.am3
-rw-r--r--libtool-version2
-rw-r--r--src/powerpc/sysv.S12
-rw-r--r--src/x86/ffi.c11
-rw-r--r--src/x86/ffi64.c18
-rw-r--r--src/x86/ffitarget.h17
-rw-r--r--src/x86/ffiw64.c18
-rw-r--r--src/x86/sysv.S17
-rw-r--r--src/x86/unix64.S60
-rw-r--r--src/x86/win64.S5
10 files changed, 125 insertions, 38 deletions
diff --git a/Makefile.am b/Makefile.am
index 4fd6193..563e9f2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -144,7 +144,8 @@ endif
libffi_version_info = -version-info `grep -v '^\#' $(srcdir)/libtool-version`
libffi.map: $(top_srcdir)/libffi.map.in
- $(COMPILE) -D$(TARGET) -E -x assembler-with-cpp -o $@ $<
+ $(COMPILE) -D$(TARGET) -DGENERATE_LIBFFI_MAP \
+ -E -x assembler-with-cpp -o $@ $<
libffi_la_LDFLAGS = -no-undefined $(libffi_version_info) $(libffi_version_script) $(LTLDFLAGS) $(AM_LTLDFLAGS)
libffi_la_DEPENDENCIES = $(libffi_la_LIBADD) $(libffi_version_dep)
diff --git a/libtool-version b/libtool-version
index e4f5aa2..607fee5 100644
--- a/libtool-version
+++ b/libtool-version
@@ -26,4 +26,4 @@
# release, then set age to 0.
#
# CURRENT:REVISION:AGE
-8:0:1
+9:0:1
diff --git a/src/powerpc/sysv.S b/src/powerpc/sysv.S
index 1474ce7..df97734 100644
--- a/src/powerpc/sysv.S
+++ b/src/powerpc/sysv.S
@@ -104,17 +104,16 @@ ENTRY(ffi_call_SYSV)
bctrl
/* Now, deal with the return value. */
- mtcrf 0x01,%r31 /* cr7 */
+ mtcrf 0x03,%r31 /* cr6-cr7 */
bt- 31,L(small_struct_return_value)
bt- 30,L(done_return_value)
#ifndef __NO_FPRS__
bt- 29,L(fp_return_value)
#endif
stw %r3,0(%r30)
- bf+ 28,L(done_return_value)
+ bf+ 27,L(done_return_value)
stw %r4,4(%r30)
- mtcrf 0x02,%r31 /* cr6 */
- bf 27,L(done_return_value)
+ bf 26,L(done_return_value)
stw %r5,8(%r30)
stw %r6,12(%r30)
/* Fall through... */
@@ -145,10 +144,9 @@ L(done_return_value):
#ifndef __NO_FPRS__
L(fp_return_value):
.cfi_restore_state
- bf 28,L(float_return_value)
+ bf 27,L(float_return_value)
stfd %f1,0(%r30)
- mtcrf 0x02,%r31 /* cr6 */
- bf 27,L(done_return_value)
+ bf 26,L(done_return_value)
stfd %f2,8(%r30)
b L(done_return_value)
L(float_return_value):
diff --git a/src/x86/ffi.c b/src/x86/ffi.c
index 9a59218..e247322 100644
--- a/src/x86/ffi.c
+++ b/src/x86/ffi.c
@@ -557,13 +557,16 @@ ffi_prep_closure_loc (ffi_closure* closure,
return FFI_BAD_ABI;
}
+ /* endbr32. */
+ *(UINT32 *) tramp = 0xfb1e0ff3;
+
/* movl or pushl immediate. */
- tramp[0] = op;
- *(void **)(tramp + 1) = codeloc;
+ tramp[4] = op;
+ *(void **)(tramp + 5) = codeloc;
/* jmp dest */
- tramp[5] = 0xe9;
- *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+ tramp[9] = 0xe9;
+ *(unsigned *)(tramp + 10) = (unsigned)dest - ((unsigned)codeloc + 10);
closure->cif = cif;
closure->fun = fun;
diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
index dec331c..ed82e23 100644
--- a/src/x86/ffi64.c
+++ b/src/x86/ffi64.c
@@ -728,13 +728,15 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
- static const unsigned char trampoline[16] = {
- /* leaq -0x7(%rip),%r10 # 0x0 */
- 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
- /* jmpq *0x3(%rip) # 0x10 */
- 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
- /* nopl (%rax) */
- 0x0f, 0x1f, 0x00
+ static const unsigned char trampoline[24] = {
+ /* endbr64 */
+ 0xf3, 0x0f, 0x1e, 0xfa,
+ /* leaq -0xb(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff,
+ /* jmpq *0x7(%rip) # 0x18 */
+ 0xff, 0x25, 0x07, 0x00, 0x00, 0x00,
+ /* nopl 0(%rax) */
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
};
void (*dest)(void);
char *tramp = closure->tramp;
@@ -752,7 +754,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
dest = ffi_closure_unix64;
memcpy (tramp, trampoline, sizeof(trampoline));
- *(UINT64 *)(tramp + 16) = (uintptr_t)dest;
+ *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)dest;
closure->cif = cif;
closure->fun = fun;
diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h
index 85ccedf..a34f3e5 100644
--- a/src/x86/ffitarget.h
+++ b/src/x86/ffitarget.h
@@ -136,12 +136,25 @@ typedef enum ffi_abi {
#if defined (X86_64) || defined(X86_WIN64) \
|| (defined (__x86_64__) && defined (X86_DARWIN))
-# define FFI_TRAMPOLINE_SIZE 24
+/* 4 bytes of ENDBR64 + 7 bytes of LEA + 6 bytes of JMP + 7 bytes of NOP
+ + 8 bytes of pointer. */
+# define FFI_TRAMPOLINE_SIZE 32
# define FFI_NATIVE_RAW_API 0
#else
-# define FFI_TRAMPOLINE_SIZE 12
+/* 4 bytes of ENDBR32 + 5 bytes of MOV + 5 bytes of JMP + 2 unused
+ bytes. */
+# define FFI_TRAMPOLINE_SIZE 16
# define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
#endif
+#if !defined(GENERATE_LIBFFI_MAP) && defined(__ASSEMBLER__) \
+ && defined(__CET__)
+# include <cet.h>
+# define _CET_NOTRACK notrack
+#else
+# define _CET_ENDBR
+# define _CET_NOTRACK
+#endif
+
#endif
diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c
index b68f69c..034dffd 100644
--- a/src/x86/ffiw64.c
+++ b/src/x86/ffiw64.c
@@ -196,13 +196,15 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure,
void *user_data,
void *codeloc)
{
- static const unsigned char trampoline[16] = {
- /* leaq -0x7(%rip),%r10 # 0x0 */
- 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
- /* jmpq *0x3(%rip) # 0x10 */
- 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
- /* nopl (%rax) */
- 0x0f, 0x1f, 0x00
+ static const unsigned char trampoline[FFI_TRAMPOLINE_SIZE - 8] = {
+ /* endbr64 */
+ 0xf3, 0x0f, 0x1e, 0xfa,
+ /* leaq -0xb(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff,
+ /* jmpq *0x7(%rip) # 0x18 */
+ 0xff, 0x25, 0x07, 0x00, 0x00, 0x00,
+ /* nopl 0(%rax) */
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
};
char *tramp = closure->tramp;
@@ -216,7 +218,7 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure,
}
memcpy (tramp, trampoline, sizeof(trampoline));
- *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64;
+ *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64;
closure->cif = cif;
closure->fun = fun;
diff --git a/src/x86/sysv.S b/src/x86/sysv.S
index 7c9598c..6d56483 100644
--- a/src/x86/sysv.S
+++ b/src/x86/sysv.S
@@ -92,6 +92,7 @@
ffi_call_i386:
L(UW0):
# cfi_startproc
+ _CET_ENDBR
#if !HAVE_FASTCALL
movl 4(%esp), %ecx
movl 8(%esp), %edx
@@ -133,7 +134,7 @@ L(pc1):
leal L(store_table)(,%ecx, 8), %ebx
#endif
movl 16(%ebp), %ecx /* load result address */
- jmp *%ebx
+ _CET_NOTRACK jmp *%ebx
.balign 8
L(store_table):
@@ -256,7 +257,7 @@ ENDF(ffi_call_i386)
andl $X86_RET_TYPE_MASK, %eax; \
leal L(C1(load_table,N))(, %eax, 8), %edx; \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
#ifdef __PIC__
# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
@@ -267,7 +268,7 @@ ENDF(ffi_call_i386)
L(C1(pc,N)): \
leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
# else
# define FFI_CLOSURE_CALL_INNER_SAVE_EBX
# undef FFI_CLOSURE_CALL_INNER
@@ -286,7 +287,7 @@ L(C1(UW,UWN)): \
L(C1(UW,UWN)): \
/* cfi_restore(%ebx); */ \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
# endif /* DARWIN || HIDDEN */
#endif /* __PIC__ */
@@ -296,6 +297,7 @@ L(C1(UW,UWN)): \
C(ffi_go_closure_EAX):
L(UW6):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW7):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -316,6 +318,7 @@ ENDF(C(ffi_go_closure_EAX))
C(ffi_go_closure_ECX):
L(UW9):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW10):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -340,6 +343,7 @@ ENDF(C(ffi_go_closure_ECX))
C(ffi_closure_i386):
L(UW12):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW13):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -423,6 +427,7 @@ ENDF(C(ffi_closure_i386))
C(ffi_go_closure_STDCALL):
L(UW21):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW22):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -448,6 +453,7 @@ L(UW24):
# cfi_startproc
# cfi_def_cfa(%esp, 8)
# cfi_offset(%eip, -8)
+ _CET_ENDBR
subl $closure_FS-4, %esp
L(UW25):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -470,6 +476,7 @@ ENDF(C(ffi_closure_REGISTER))
C(ffi_closure_STDCALL):
L(UW27):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW28):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -576,6 +583,7 @@ ENDF(C(ffi_closure_STDCALL))
C(ffi_closure_raw_SYSV):
L(UW32):
# cfi_startproc
+ _CET_ENDBR
subl $raw_closure_S_FS, %esp
L(UW33):
# cfi_def_cfa_offset(raw_closure_S_FS + 4)
@@ -679,6 +687,7 @@ ENDF(C(ffi_closure_raw_SYSV))
C(ffi_closure_raw_THISCALL):
L(UW41):
# cfi_startproc
+ _CET_ENDBR
/* Rearrange the stack such that %ecx is the first argument.
This means moving the return address. */
popl %edx
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index 41563f5..ee3c04f 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -42,7 +42,11 @@
#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
# define E(BASE, X) .balign 8
#else
-# define E(BASE, X) .balign 8; .org BASE + X * 8
+# ifdef __CET__
+# define E(BASE, X) .balign 8; .org BASE + X * 16
+# else
+# define E(BASE, X) .balign 8; .org BASE + X * 8
+# endif
#endif
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
@@ -58,6 +62,7 @@
C(ffi_call_unix64):
L(UW0):
+ _CET_ENDBR
movq (%rsp), %r10 /* Load return address. */
leaq (%rdi, %rsi), %rax /* Find local stack base. */
movq %rdx, (%rax) /* Save flags. */
@@ -116,6 +121,11 @@ L(UW2):
movzbl %cl, %r10d
leaq L(store_table)(%rip), %r11
ja L(sa)
+#ifdef __CET__
+ /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 +
+ 4 bytes NOP padding double slot size to 16 bytes. */
+ addl %r10d, %r10d
+#endif
leaq (%r11, %r10, 8), %r10
/* Prep for the structure cases: scratch area in redzone. */
@@ -125,57 +135,73 @@ L(UW2):
.balign 8
L(store_table):
E(L(store_table), UNIX64_RET_VOID)
+ _CET_ENDBR
ret
E(L(store_table), UNIX64_RET_UINT8)
+ _CET_ENDBR
movzbl %al, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_UINT16)
+ _CET_ENDBR
movzwl %ax, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_UINT32)
+ _CET_ENDBR
movl %eax, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT8)
+ _CET_ENDBR
movsbq %al, %rax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT16)
+ _CET_ENDBR
movswq %ax, %rax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT32)
+ _CET_ENDBR
cltq
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_INT64)
+ _CET_ENDBR
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_XMM32)
+ _CET_ENDBR
movd %xmm0, (%rdi)
ret
E(L(store_table), UNIX64_RET_XMM64)
+ _CET_ENDBR
movq %xmm0, (%rdi)
ret
E(L(store_table), UNIX64_RET_X87)
+ _CET_ENDBR
fstpt (%rdi)
ret
E(L(store_table), UNIX64_RET_X87_2)
+ _CET_ENDBR
fstpt (%rdi)
fstpt 16(%rdi)
ret
E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
+ _CET_ENDBR
movq %rax, 8(%rsi)
jmp L(s3)
E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
+ _CET_ENDBR
movq %xmm0, 8(%rsi)
jmp L(s2)
E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
+ _CET_ENDBR
movq %xmm1, 8(%rsi)
jmp L(s3)
E(L(store_table), UNIX64_RET_ST_RAX_RDX)
+ _CET_ENDBR
movq %rdx, 8(%rsi)
L(s2):
movq %rax, (%rsi)
@@ -227,6 +253,7 @@ ENDF(C(ffi_call_unix64))
C(ffi_closure_unix64_sse):
L(UW5):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW6):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -250,6 +277,7 @@ ENDF(C(ffi_closure_unix64_sse))
C(ffi_closure_unix64):
L(UW8):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW9):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -286,6 +314,11 @@ L(UW10):
movzbl %al, %r10d
leaq L(load_table)(%rip), %r11
ja L(la)
+#ifdef __CET__
+ /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 +
+ 4 bytes NOP padding double slot size to 16 bytes. */
+ addl %r10d, %r10d
+#endif
leaq (%r11, %r10, 8), %r10
leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10
@@ -293,51 +326,67 @@ L(UW10):
.balign 8
L(load_table):
E(L(load_table), UNIX64_RET_VOID)
+ _CET_ENDBR
ret
E(L(load_table), UNIX64_RET_UINT8)
+ _CET_ENDBR
movzbl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_UINT16)
+ _CET_ENDBR
movzwl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_UINT32)
+ _CET_ENDBR
movl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT8)
+ _CET_ENDBR
movsbl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT16)
+ _CET_ENDBR
movswl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT32)
+ _CET_ENDBR
movl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_INT64)
+ _CET_ENDBR
movq (%rsi), %rax
ret
E(L(load_table), UNIX64_RET_XMM32)
+ _CET_ENDBR
movd (%rsi), %xmm0
ret
E(L(load_table), UNIX64_RET_XMM64)
+ _CET_ENDBR
movq (%rsi), %xmm0
ret
E(L(load_table), UNIX64_RET_X87)
+ _CET_ENDBR
fldt (%rsi)
ret
E(L(load_table), UNIX64_RET_X87_2)
+ _CET_ENDBR
fldt 16(%rsi)
fldt (%rsi)
ret
E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
+ _CET_ENDBR
movq 8(%rsi), %rax
jmp L(l3)
E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
+ _CET_ENDBR
movq 8(%rsi), %xmm0
jmp L(l2)
E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
+ _CET_ENDBR
movq 8(%rsi), %xmm1
jmp L(l3)
E(L(load_table), UNIX64_RET_ST_RAX_RDX)
+ _CET_ENDBR
movq 8(%rsi), %rdx
L(l2):
movq (%rsi), %rax
@@ -358,6 +407,7 @@ ENDF(C(ffi_closure_unix64))
C(ffi_go_closure_unix64_sse):
L(UW12):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW13):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -381,6 +431,7 @@ ENDF(C(ffi_go_closure_unix64_sse))
C(ffi_go_closure_unix64):
L(UW15):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW16):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -424,7 +475,12 @@ EHFrame0:
#endif
/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
-#define ADV(N, P) .byte 2, L(N)-L(P)
+#ifdef __CET__
+/* Use DW_CFA_advance_loc2 when IBT is enabled. */
+# define ADV(N, P) .byte 3; .2byte L(N)-L(P)
+#else
+# define ADV(N, P) .byte 2, L(N)-L(P)
+#endif
.balign 8
L(CIE):
diff --git a/src/x86/win64.S b/src/x86/win64.S
index 2c334c8..57c0e65 100644
--- a/src/x86/win64.S
+++ b/src/x86/win64.S
@@ -48,6 +48,7 @@
SEH(.seh_proc ffi_call_win64)
C(ffi_call_win64):
cfi_startproc
+ _CET_ENDBR
/* Set up the local stack frame and install it in rbp/rsp. */
movq (%rsp), %rax
movq %rbp, (arg1)
@@ -80,7 +81,7 @@ C(ffi_call_win64):
cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx
leaq (%r10, %rcx, 8), %r10
ja 99f
- jmp *%r10
+ _CET_NOTRACK jmp *%r10
/* Below, we're space constrained most of the time. Thus we eschew the
modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */
@@ -176,6 +177,7 @@ E(0b, FFI_TYPE_SMALL_STRUCT_4B)
SEH(.seh_proc ffi_go_closure_win64)
C(ffi_go_closure_win64):
cfi_startproc
+ _CET_ENDBR
/* Save all integer arguments into the incoming reg stack space. */
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)
@@ -196,6 +198,7 @@ C(ffi_go_closure_win64):
SEH(.seh_proc ffi_closure_win64)
C(ffi_closure_win64):
cfi_startproc
+ _CET_ENDBR
/* Save all integer arguments into the incoming reg stack space. */
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)