summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhjl-tools <hjl.tools@gmail.com>2020-02-21 19:08:06 -0800
committerGitHub <noreply@github.com>2020-02-21 22:08:06 -0500
commit7855656148b96c7070ec362d2a73af840025a2b7 (patch)
tree8a22c616b7e88ea007d8c454f94f7a70f2aa1565
parent4d6d2866ae43e55325e8ee96561221804602cd7a (diff)
downloadlibffi-7855656148b96c7070ec362d2a73af840025a2b7.tar.gz
x86: Add indirect branch tracking support (#540)
Intel Control-flow Enforcement Technology (CET): https://software.intel.com/en-us/articles/intel-sdm contains shadow stack (SHSTK) and indirect branch tracking (IBT). When CET is enabled, ELF object files must be marked with .note.gnu.property section. When Intel CET is enabled, include <cet.h> in assembly codes to mark Intel CET support. Also when IBT is enabled, all indirect branch targets must start with ENDBR instruction and notrack prefix can be used to disable IBT on indirect branch. <cet.h> defines _CET_ENDBR which can be used in assembly codes for ENDBR instruction. If <cet.h> isn't included, define _CET_ENDBR as empty so that _CET_ENDBR can be used in assembly codes. Trampoline must be enlarged to add ENDBR instruction unconditionally, which is NOP on non-CET processors. This is required regardless if libffi is enabled with CET since libffi.so will be marked in legacy bitmap, but trampoline won't. Update library version for larger FFI_TRAMPOLINE_SIZE. This fixed: https://github.com/libffi/libffi/issues/474 Tested with $ CC="gcc -Wl,-z,cet-report=error -fcf-protection" CXX="g++ -Wl,-z,cet-report=error -fcf-protection" .../configure on Linux CET machines in i686, x32 and x86-64 modes.
-rw-r--r--Makefile.am3
-rw-r--r--libtool-version2
-rw-r--r--src/x86/ffi.c11
-rw-r--r--src/x86/ffi64.c18
-rw-r--r--src/x86/ffitarget.h17
-rw-r--r--src/x86/ffiw64.c18
-rw-r--r--src/x86/sysv.S17
-rw-r--r--src/x86/unix64.S60
-rw-r--r--src/x86/win64.S5
9 files changed, 120 insertions, 31 deletions
diff --git a/Makefile.am b/Makefile.am
index 4fd6193..563e9f2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -144,7 +144,8 @@ endif
libffi_version_info = -version-info `grep -v '^\#' $(srcdir)/libtool-version`
libffi.map: $(top_srcdir)/libffi.map.in
- $(COMPILE) -D$(TARGET) -E -x assembler-with-cpp -o $@ $<
+ $(COMPILE) -D$(TARGET) -DGENERATE_LIBFFI_MAP \
+ -E -x assembler-with-cpp -o $@ $<
libffi_la_LDFLAGS = -no-undefined $(libffi_version_info) $(libffi_version_script) $(LTLDFLAGS) $(AM_LTLDFLAGS)
libffi_la_DEPENDENCIES = $(libffi_la_LIBADD) $(libffi_version_dep)
diff --git a/libtool-version b/libtool-version
index e4f5aa2..607fee5 100644
--- a/libtool-version
+++ b/libtool-version
@@ -26,4 +26,4 @@
# release, then set age to 0.
#
# CURRENT:REVISION:AGE
-8:0:1
+9:0:1
diff --git a/src/x86/ffi.c b/src/x86/ffi.c
index 9a59218..e247322 100644
--- a/src/x86/ffi.c
+++ b/src/x86/ffi.c
@@ -557,13 +557,16 @@ ffi_prep_closure_loc (ffi_closure* closure,
return FFI_BAD_ABI;
}
+ /* endbr32. */
+ *(UINT32 *) tramp = 0xfb1e0ff3;
+
/* movl or pushl immediate. */
- tramp[0] = op;
- *(void **)(tramp + 1) = codeloc;
+ tramp[4] = op;
+ *(void **)(tramp + 5) = codeloc;
/* jmp dest */
- tramp[5] = 0xe9;
- *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+ tramp[9] = 0xe9;
+ *(unsigned *)(tramp + 10) = (unsigned)dest - ((unsigned)codeloc + 10);
closure->cif = cif;
closure->fun = fun;
diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
index dec331c..ed82e23 100644
--- a/src/x86/ffi64.c
+++ b/src/x86/ffi64.c
@@ -728,13 +728,15 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
- static const unsigned char trampoline[16] = {
- /* leaq -0x7(%rip),%r10 # 0x0 */
- 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
- /* jmpq *0x3(%rip) # 0x10 */
- 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
- /* nopl (%rax) */
- 0x0f, 0x1f, 0x00
+ static const unsigned char trampoline[24] = {
+ /* endbr64 */
+ 0xf3, 0x0f, 0x1e, 0xfa,
+ /* leaq -0xb(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff,
+ /* jmpq *0x7(%rip) # 0x18 */
+ 0xff, 0x25, 0x07, 0x00, 0x00, 0x00,
+ /* nopl 0(%rax) */
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
};
void (*dest)(void);
char *tramp = closure->tramp;
@@ -752,7 +754,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
dest = ffi_closure_unix64;
memcpy (tramp, trampoline, sizeof(trampoline));
- *(UINT64 *)(tramp + 16) = (uintptr_t)dest;
+ *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)dest;
closure->cif = cif;
closure->fun = fun;
diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h
index 85ccedf..a34f3e5 100644
--- a/src/x86/ffitarget.h
+++ b/src/x86/ffitarget.h
@@ -136,12 +136,25 @@ typedef enum ffi_abi {
#if defined (X86_64) || defined(X86_WIN64) \
|| (defined (__x86_64__) && defined (X86_DARWIN))
-# define FFI_TRAMPOLINE_SIZE 24
+/* 4 bytes of ENDBR64 + 7 bytes of LEA + 6 bytes of JMP + 7 bytes of NOP
+ + 8 bytes of pointer. */
+# define FFI_TRAMPOLINE_SIZE 32
# define FFI_NATIVE_RAW_API 0
#else
-# define FFI_TRAMPOLINE_SIZE 12
+/* 4 bytes of ENDBR32 + 5 bytes of MOV + 5 bytes of JMP + 2 unused
+ bytes. */
+# define FFI_TRAMPOLINE_SIZE 16
# define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */
#endif
+#if !defined(GENERATE_LIBFFI_MAP) && defined(__ASSEMBLER__) \
+ && defined(__CET__)
+# include <cet.h>
+# define _CET_NOTRACK notrack
+#else
+# define _CET_ENDBR
+# define _CET_NOTRACK
+#endif
+
#endif
diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c
index b68f69c..034dffd 100644
--- a/src/x86/ffiw64.c
+++ b/src/x86/ffiw64.c
@@ -196,13 +196,15 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure,
void *user_data,
void *codeloc)
{
- static const unsigned char trampoline[16] = {
- /* leaq -0x7(%rip),%r10 # 0x0 */
- 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
- /* jmpq *0x3(%rip) # 0x10 */
- 0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
- /* nopl (%rax) */
- 0x0f, 0x1f, 0x00
+ static const unsigned char trampoline[FFI_TRAMPOLINE_SIZE - 8] = {
+ /* endbr64 */
+ 0xf3, 0x0f, 0x1e, 0xfa,
+ /* leaq -0xb(%rip),%r10 # 0x0 */
+ 0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff,
+ /* jmpq *0x7(%rip) # 0x18 */
+ 0xff, 0x25, 0x07, 0x00, 0x00, 0x00,
+ /* nopl 0(%rax) */
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
};
char *tramp = closure->tramp;
@@ -216,7 +218,7 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure,
}
memcpy (tramp, trampoline, sizeof(trampoline));
- *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64;
+ *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64;
closure->cif = cif;
closure->fun = fun;
diff --git a/src/x86/sysv.S b/src/x86/sysv.S
index 7c9598c..6d56483 100644
--- a/src/x86/sysv.S
+++ b/src/x86/sysv.S
@@ -92,6 +92,7 @@
ffi_call_i386:
L(UW0):
# cfi_startproc
+ _CET_ENDBR
#if !HAVE_FASTCALL
movl 4(%esp), %ecx
movl 8(%esp), %edx
@@ -133,7 +134,7 @@ L(pc1):
leal L(store_table)(,%ecx, 8), %ebx
#endif
movl 16(%ebp), %ecx /* load result address */
- jmp *%ebx
+ _CET_NOTRACK jmp *%ebx
.balign 8
L(store_table):
@@ -256,7 +257,7 @@ ENDF(ffi_call_i386)
andl $X86_RET_TYPE_MASK, %eax; \
leal L(C1(load_table,N))(, %eax, 8), %edx; \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
#ifdef __PIC__
# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
@@ -267,7 +268,7 @@ ENDF(ffi_call_i386)
L(C1(pc,N)): \
leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
# else
# define FFI_CLOSURE_CALL_INNER_SAVE_EBX
# undef FFI_CLOSURE_CALL_INNER
@@ -286,7 +287,7 @@ L(C1(UW,UWN)): \
L(C1(UW,UWN)): \
/* cfi_restore(%ebx); */ \
movl closure_CF(%esp), %eax; /* optimiztic load */ \
- jmp *%edx
+ _CET_NOTRACK jmp *%edx
# endif /* DARWIN || HIDDEN */
#endif /* __PIC__ */
@@ -296,6 +297,7 @@ L(C1(UW,UWN)): \
C(ffi_go_closure_EAX):
L(UW6):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW7):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -316,6 +318,7 @@ ENDF(C(ffi_go_closure_EAX))
C(ffi_go_closure_ECX):
L(UW9):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW10):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -340,6 +343,7 @@ ENDF(C(ffi_go_closure_ECX))
C(ffi_closure_i386):
L(UW12):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW13):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -423,6 +427,7 @@ ENDF(C(ffi_closure_i386))
C(ffi_go_closure_STDCALL):
L(UW21):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW22):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -448,6 +453,7 @@ L(UW24):
# cfi_startproc
# cfi_def_cfa(%esp, 8)
# cfi_offset(%eip, -8)
+ _CET_ENDBR
subl $closure_FS-4, %esp
L(UW25):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -470,6 +476,7 @@ ENDF(C(ffi_closure_REGISTER))
C(ffi_closure_STDCALL):
L(UW27):
# cfi_startproc
+ _CET_ENDBR
subl $closure_FS, %esp
L(UW28):
# cfi_def_cfa_offset(closure_FS + 4)
@@ -576,6 +583,7 @@ ENDF(C(ffi_closure_STDCALL))
C(ffi_closure_raw_SYSV):
L(UW32):
# cfi_startproc
+ _CET_ENDBR
subl $raw_closure_S_FS, %esp
L(UW33):
# cfi_def_cfa_offset(raw_closure_S_FS + 4)
@@ -679,6 +687,7 @@ ENDF(C(ffi_closure_raw_SYSV))
C(ffi_closure_raw_THISCALL):
L(UW41):
# cfi_startproc
+ _CET_ENDBR
/* Rearrange the stack such that %ecx is the first argument.
This means moving the return address. */
popl %edx
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index 41563f5..ee3c04f 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -42,7 +42,11 @@
#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
# define E(BASE, X) .balign 8
#else
-# define E(BASE, X) .balign 8; .org BASE + X * 8
+# ifdef __CET__
+# define E(BASE, X) .balign 8; .org BASE + X * 16
+# else
+# define E(BASE, X) .balign 8; .org BASE + X * 8
+# endif
#endif
/* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
@@ -58,6 +62,7 @@
C(ffi_call_unix64):
L(UW0):
+ _CET_ENDBR
movq (%rsp), %r10 /* Load return address. */
leaq (%rdi, %rsi), %rax /* Find local stack base. */
movq %rdx, (%rax) /* Save flags. */
@@ -116,6 +121,11 @@ L(UW2):
movzbl %cl, %r10d
leaq L(store_table)(%rip), %r11
ja L(sa)
+#ifdef __CET__
+ /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 +
+ 4 bytes NOP padding double slot size to 16 bytes. */
+ addl %r10d, %r10d
+#endif
leaq (%r11, %r10, 8), %r10
/* Prep for the structure cases: scratch area in redzone. */
@@ -125,57 +135,73 @@ L(UW2):
.balign 8
L(store_table):
E(L(store_table), UNIX64_RET_VOID)
+ _CET_ENDBR
ret
E(L(store_table), UNIX64_RET_UINT8)
+ _CET_ENDBR
movzbl %al, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_UINT16)
+ _CET_ENDBR
movzwl %ax, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_UINT32)
+ _CET_ENDBR
movl %eax, %eax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT8)
+ _CET_ENDBR
movsbq %al, %rax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT16)
+ _CET_ENDBR
movswq %ax, %rax
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_SINT32)
+ _CET_ENDBR
cltq
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_INT64)
+ _CET_ENDBR
movq %rax, (%rdi)
ret
E(L(store_table), UNIX64_RET_XMM32)
+ _CET_ENDBR
movd %xmm0, (%rdi)
ret
E(L(store_table), UNIX64_RET_XMM64)
+ _CET_ENDBR
movq %xmm0, (%rdi)
ret
E(L(store_table), UNIX64_RET_X87)
+ _CET_ENDBR
fstpt (%rdi)
ret
E(L(store_table), UNIX64_RET_X87_2)
+ _CET_ENDBR
fstpt (%rdi)
fstpt 16(%rdi)
ret
E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
+ _CET_ENDBR
movq %rax, 8(%rsi)
jmp L(s3)
E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
+ _CET_ENDBR
movq %xmm0, 8(%rsi)
jmp L(s2)
E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
+ _CET_ENDBR
movq %xmm1, 8(%rsi)
jmp L(s3)
E(L(store_table), UNIX64_RET_ST_RAX_RDX)
+ _CET_ENDBR
movq %rdx, 8(%rsi)
L(s2):
movq %rax, (%rsi)
@@ -227,6 +253,7 @@ ENDF(C(ffi_call_unix64))
C(ffi_closure_unix64_sse):
L(UW5):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW6):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -250,6 +277,7 @@ ENDF(C(ffi_closure_unix64_sse))
C(ffi_closure_unix64):
L(UW8):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW9):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -286,6 +314,11 @@ L(UW10):
movzbl %al, %r10d
leaq L(load_table)(%rip), %r11
ja L(la)
+#ifdef __CET__
+ /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 +
+ 4 bytes NOP padding double slot size to 16 bytes. */
+ addl %r10d, %r10d
+#endif
leaq (%r11, %r10, 8), %r10
leaq ffi_closure_RED_RVALUE(%rsp), %rsi
jmp *%r10
@@ -293,51 +326,67 @@ L(UW10):
.balign 8
L(load_table):
E(L(load_table), UNIX64_RET_VOID)
+ _CET_ENDBR
ret
E(L(load_table), UNIX64_RET_UINT8)
+ _CET_ENDBR
movzbl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_UINT16)
+ _CET_ENDBR
movzwl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_UINT32)
+ _CET_ENDBR
movl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT8)
+ _CET_ENDBR
movsbl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT16)
+ _CET_ENDBR
movswl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_SINT32)
+ _CET_ENDBR
movl (%rsi), %eax
ret
E(L(load_table), UNIX64_RET_INT64)
+ _CET_ENDBR
movq (%rsi), %rax
ret
E(L(load_table), UNIX64_RET_XMM32)
+ _CET_ENDBR
movd (%rsi), %xmm0
ret
E(L(load_table), UNIX64_RET_XMM64)
+ _CET_ENDBR
movq (%rsi), %xmm0
ret
E(L(load_table), UNIX64_RET_X87)
+ _CET_ENDBR
fldt (%rsi)
ret
E(L(load_table), UNIX64_RET_X87_2)
+ _CET_ENDBR
fldt 16(%rsi)
fldt (%rsi)
ret
E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
+ _CET_ENDBR
movq 8(%rsi), %rax
jmp L(l3)
E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
+ _CET_ENDBR
movq 8(%rsi), %xmm0
jmp L(l2)
E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
+ _CET_ENDBR
movq 8(%rsi), %xmm1
jmp L(l3)
E(L(load_table), UNIX64_RET_ST_RAX_RDX)
+ _CET_ENDBR
movq 8(%rsi), %rdx
L(l2):
movq (%rsi), %rax
@@ -358,6 +407,7 @@ ENDF(C(ffi_closure_unix64))
C(ffi_go_closure_unix64_sse):
L(UW12):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW13):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -381,6 +431,7 @@ ENDF(C(ffi_go_closure_unix64_sse))
C(ffi_go_closure_unix64):
L(UW15):
+ _CET_ENDBR
subq $ffi_closure_FS, %rsp
L(UW16):
/* cfi_adjust_cfa_offset(ffi_closure_FS) */
@@ -424,7 +475,12 @@ EHFrame0:
#endif
/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */
-#define ADV(N, P) .byte 2, L(N)-L(P)
+#ifdef __CET__
+/* Use DW_CFA_advance_loc2 when IBT is enabled. */
+# define ADV(N, P) .byte 3; .2byte L(N)-L(P)
+#else
+# define ADV(N, P) .byte 2, L(N)-L(P)
+#endif
.balign 8
L(CIE):
diff --git a/src/x86/win64.S b/src/x86/win64.S
index 2c334c8..57c0e65 100644
--- a/src/x86/win64.S
+++ b/src/x86/win64.S
@@ -48,6 +48,7 @@
SEH(.seh_proc ffi_call_win64)
C(ffi_call_win64):
cfi_startproc
+ _CET_ENDBR
/* Set up the local stack frame and install it in rbp/rsp. */
movq (%rsp), %rax
movq %rbp, (arg1)
@@ -80,7 +81,7 @@ C(ffi_call_win64):
cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx
leaq (%r10, %rcx, 8), %r10
ja 99f
- jmp *%r10
+ _CET_NOTRACK jmp *%r10
/* Below, we're space constrained most of the time. Thus we eschew the
modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */
@@ -176,6 +177,7 @@ E(0b, FFI_TYPE_SMALL_STRUCT_4B)
SEH(.seh_proc ffi_go_closure_win64)
C(ffi_go_closure_win64):
cfi_startproc
+ _CET_ENDBR
/* Save all integer arguments into the incoming reg stack space. */
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)
@@ -196,6 +198,7 @@ C(ffi_go_closure_win64):
SEH(.seh_proc ffi_closure_win64)
C(ffi_closure_win64):
cfi_startproc
+ _CET_ENDBR
/* Save all integer arguments into the incoming reg stack space. */
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)