diff options
author | Moxie Bot <bot@moxielogic.com> | 2020-02-21 22:13:14 -0500 |
---|---|---|
committer | Moxie Bot <bot@moxielogic.com> | 2020-02-21 22:13:14 -0500 |
commit | 624c7a35f7e1d12f917453d6c657cd5947ac57f7 (patch) | |
tree | 238839705a70c7667ebd5a68a8b57b86e50e591f | |
parent | 4c775d7cd6e914c6a2f66465497106cff360aeb5 (diff) | |
parent | 7855656148b96c7070ec362d2a73af840025a2b7 (diff) | |
download | libffi-624c7a35f7e1d12f917453d6c657cd5947ac57f7.tar.gz |
Merge branch 'master' of github.com:/libffi/libffi
-rw-r--r-- | Makefile.am | 3 | ||||
-rw-r--r-- | libtool-version | 2 | ||||
-rw-r--r-- | src/powerpc/sysv.S | 12 | ||||
-rw-r--r-- | src/x86/ffi.c | 11 | ||||
-rw-r--r-- | src/x86/ffi64.c | 18 | ||||
-rw-r--r-- | src/x86/ffitarget.h | 17 | ||||
-rw-r--r-- | src/x86/ffiw64.c | 18 | ||||
-rw-r--r-- | src/x86/sysv.S | 17 | ||||
-rw-r--r-- | src/x86/unix64.S | 60 | ||||
-rw-r--r-- | src/x86/win64.S | 5 |
10 files changed, 125 insertions, 38 deletions
diff --git a/Makefile.am b/Makefile.am index 4fd6193..563e9f2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -144,7 +144,8 @@ endif libffi_version_info = -version-info `grep -v '^\#' $(srcdir)/libtool-version` libffi.map: $(top_srcdir)/libffi.map.in - $(COMPILE) -D$(TARGET) -E -x assembler-with-cpp -o $@ $< + $(COMPILE) -D$(TARGET) -DGENERATE_LIBFFI_MAP \ + -E -x assembler-with-cpp -o $@ $< libffi_la_LDFLAGS = -no-undefined $(libffi_version_info) $(libffi_version_script) $(LTLDFLAGS) $(AM_LTLDFLAGS) libffi_la_DEPENDENCIES = $(libffi_la_LIBADD) $(libffi_version_dep) diff --git a/libtool-version b/libtool-version index e4f5aa2..607fee5 100644 --- a/libtool-version +++ b/libtool-version @@ -26,4 +26,4 @@ # release, then set age to 0. # # CURRENT:REVISION:AGE -8:0:1 +9:0:1 diff --git a/src/powerpc/sysv.S b/src/powerpc/sysv.S index 1474ce7..df97734 100644 --- a/src/powerpc/sysv.S +++ b/src/powerpc/sysv.S @@ -104,17 +104,16 @@ ENTRY(ffi_call_SYSV) bctrl /* Now, deal with the return value. */ - mtcrf 0x01,%r31 /* cr7 */ + mtcrf 0x03,%r31 /* cr6-cr7 */ bt- 31,L(small_struct_return_value) bt- 30,L(done_return_value) #ifndef __NO_FPRS__ bt- 29,L(fp_return_value) #endif stw %r3,0(%r30) - bf+ 28,L(done_return_value) + bf+ 27,L(done_return_value) stw %r4,4(%r30) - mtcrf 0x02,%r31 /* cr6 */ - bf 27,L(done_return_value) + bf 26,L(done_return_value) stw %r5,8(%r30) stw %r6,12(%r30) /* Fall through... */ @@ -145,10 +144,9 @@ L(done_return_value): #ifndef __NO_FPRS__ L(fp_return_value): .cfi_restore_state - bf 28,L(float_return_value) + bf 27,L(float_return_value) stfd %f1,0(%r30) - mtcrf 0x02,%r31 /* cr6 */ - bf 27,L(done_return_value) + bf 26,L(done_return_value) stfd %f2,8(%r30) b L(done_return_value) L(float_return_value): diff --git a/src/x86/ffi.c b/src/x86/ffi.c index 9a59218..e247322 100644 --- a/src/x86/ffi.c +++ b/src/x86/ffi.c @@ -557,13 +557,16 @@ ffi_prep_closure_loc (ffi_closure* closure, return FFI_BAD_ABI; } + /* endbr32. */ + *(UINT32 *) tramp = 0xfb1e0ff3; + /* movl or pushl immediate. */ - tramp[0] = op; - *(void **)(tramp + 1) = codeloc; + tramp[4] = op; + *(void **)(tramp + 5) = codeloc; /* jmp dest */ - tramp[5] = 0xe9; - *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10); + tramp[9] = 0xe9; + *(unsigned *)(tramp + 10) = (unsigned)dest - ((unsigned)codeloc + 10); closure->cif = cif; closure->fun = fun; diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c index dec331c..ed82e23 100644 --- a/src/x86/ffi64.c +++ b/src/x86/ffi64.c @@ -728,13 +728,15 @@ ffi_prep_closure_loc (ffi_closure* closure, void *user_data, void *codeloc) { - static const unsigned char trampoline[16] = { - /* leaq -0x7(%rip),%r10 # 0x0 */ - 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff, - /* jmpq *0x3(%rip) # 0x10 */ - 0xff, 0x25, 0x03, 0x00, 0x00, 0x00, - /* nopl (%rax) */ - 0x0f, 0x1f, 0x00 + static const unsigned char trampoline[24] = { + /* endbr64 */ + 0xf3, 0x0f, 0x1e, 0xfa, + /* leaq -0xb(%rip),%r10 # 0x0 */ + 0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff, + /* jmpq *0x7(%rip) # 0x18 */ + 0xff, 0x25, 0x07, 0x00, 0x00, 0x00, + /* nopl 0(%rax) */ + 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 }; void (*dest)(void); char *tramp = closure->tramp; @@ -752,7 +754,7 @@ ffi_prep_closure_loc (ffi_closure* closure, dest = ffi_closure_unix64; memcpy (tramp, trampoline, sizeof(trampoline)); - *(UINT64 *)(tramp + 16) = (uintptr_t)dest; + *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)dest; closure->cif = cif; closure->fun = fun; diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h index 85ccedf..a34f3e5 100644 --- a/src/x86/ffitarget.h +++ b/src/x86/ffitarget.h @@ -136,12 +136,25 @@ typedef enum ffi_abi { #if defined (X86_64) || defined(X86_WIN64) \ || (defined (__x86_64__) && defined (X86_DARWIN)) -# define FFI_TRAMPOLINE_SIZE 24 +/* 4 bytes of ENDBR64 + 7 bytes of LEA + 6 bytes of JMP + 7 bytes of NOP + + 8 bytes of pointer. */ +# define FFI_TRAMPOLINE_SIZE 32 # define FFI_NATIVE_RAW_API 0 #else -# define FFI_TRAMPOLINE_SIZE 12 +/* 4 bytes of ENDBR32 + 5 bytes of MOV + 5 bytes of JMP + 2 unused + bytes. */ +# define FFI_TRAMPOLINE_SIZE 16 # define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */ #endif +#if !defined(GENERATE_LIBFFI_MAP) && defined(__ASSEMBLER__) \ + && defined(__CET__) +# include <cet.h> +# define _CET_NOTRACK notrack +#else +# define _CET_ENDBR +# define _CET_NOTRACK +#endif + #endif diff --git a/src/x86/ffiw64.c b/src/x86/ffiw64.c index b68f69c..034dffd 100644 --- a/src/x86/ffiw64.c +++ b/src/x86/ffiw64.c @@ -196,13 +196,15 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure, void *user_data, void *codeloc) { - static const unsigned char trampoline[16] = { - /* leaq -0x7(%rip),%r10 # 0x0 */ - 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff, - /* jmpq *0x3(%rip) # 0x10 */ - 0xff, 0x25, 0x03, 0x00, 0x00, 0x00, - /* nopl (%rax) */ - 0x0f, 0x1f, 0x00 + static const unsigned char trampoline[FFI_TRAMPOLINE_SIZE - 8] = { + /* endbr64 */ + 0xf3, 0x0f, 0x1e, 0xfa, + /* leaq -0xb(%rip),%r10 # 0x0 */ + 0x4c, 0x8d, 0x15, 0xf5, 0xff, 0xff, 0xff, + /* jmpq *0x7(%rip) # 0x18 */ + 0xff, 0x25, 0x07, 0x00, 0x00, 0x00, + /* nopl 0(%rax) */ + 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 }; char *tramp = closure->tramp; @@ -216,7 +218,7 @@ EFI64(ffi_prep_closure_loc)(ffi_closure* closure, } memcpy (tramp, trampoline, sizeof(trampoline)); - *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64; + *(UINT64 *)(tramp + sizeof (trampoline)) = (uintptr_t)ffi_closure_win64; closure->cif = cif; closure->fun = fun; diff --git a/src/x86/sysv.S b/src/x86/sysv.S index 7c9598c..6d56483 100644 --- a/src/x86/sysv.S +++ b/src/x86/sysv.S @@ -92,6 +92,7 @@ ffi_call_i386: L(UW0): # cfi_startproc + _CET_ENDBR #if !HAVE_FASTCALL movl 4(%esp), %ecx movl 8(%esp), %edx @@ -133,7 +134,7 @@ L(pc1): leal L(store_table)(,%ecx, 8), %ebx #endif movl 16(%ebp), %ecx /* load result address */ - jmp *%ebx + _CET_NOTRACK jmp *%ebx .balign 8 L(store_table): @@ -256,7 +257,7 @@ ENDF(ffi_call_i386) andl $X86_RET_TYPE_MASK, %eax; \ leal L(C1(load_table,N))(, %eax, 8), %edx; \ movl closure_CF(%esp), %eax; /* optimiztic load */ \ - jmp *%edx + _CET_NOTRACK jmp *%edx #ifdef __PIC__ # if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE @@ -267,7 +268,7 @@ ENDF(ffi_call_i386) L(C1(pc,N)): \ leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \ movl closure_CF(%esp), %eax; /* optimiztic load */ \ - jmp *%edx + _CET_NOTRACK jmp *%edx # else # define FFI_CLOSURE_CALL_INNER_SAVE_EBX # undef FFI_CLOSURE_CALL_INNER @@ -286,7 +287,7 @@ L(C1(UW,UWN)): \ L(C1(UW,UWN)): \ /* cfi_restore(%ebx); */ \ movl closure_CF(%esp), %eax; /* optimiztic load */ \ - jmp *%edx + _CET_NOTRACK jmp *%edx # endif /* DARWIN || HIDDEN */ #endif /* __PIC__ */ @@ -296,6 +297,7 @@ L(C1(UW,UWN)): \ C(ffi_go_closure_EAX): L(UW6): # cfi_startproc + _CET_ENDBR subl $closure_FS, %esp L(UW7): # cfi_def_cfa_offset(closure_FS + 4) @@ -316,6 +318,7 @@ ENDF(C(ffi_go_closure_EAX)) C(ffi_go_closure_ECX): L(UW9): # cfi_startproc + _CET_ENDBR subl $closure_FS, %esp L(UW10): # cfi_def_cfa_offset(closure_FS + 4) @@ -340,6 +343,7 @@ ENDF(C(ffi_go_closure_ECX)) C(ffi_closure_i386): L(UW12): # cfi_startproc + _CET_ENDBR subl $closure_FS, %esp L(UW13): # cfi_def_cfa_offset(closure_FS + 4) @@ -423,6 +427,7 @@ ENDF(C(ffi_closure_i386)) C(ffi_go_closure_STDCALL): L(UW21): # cfi_startproc + _CET_ENDBR subl $closure_FS, %esp L(UW22): # cfi_def_cfa_offset(closure_FS + 4) @@ -448,6 +453,7 @@ L(UW24): # cfi_startproc # cfi_def_cfa(%esp, 8) # cfi_offset(%eip, -8) + _CET_ENDBR subl $closure_FS-4, %esp L(UW25): # cfi_def_cfa_offset(closure_FS + 4) @@ -470,6 +476,7 @@ ENDF(C(ffi_closure_REGISTER)) C(ffi_closure_STDCALL): L(UW27): # cfi_startproc + _CET_ENDBR subl $closure_FS, %esp L(UW28): # cfi_def_cfa_offset(closure_FS + 4) @@ -576,6 +583,7 @@ ENDF(C(ffi_closure_STDCALL)) C(ffi_closure_raw_SYSV): L(UW32): # cfi_startproc + _CET_ENDBR subl $raw_closure_S_FS, %esp L(UW33): # cfi_def_cfa_offset(raw_closure_S_FS + 4) @@ -679,6 +687,7 @@ ENDF(C(ffi_closure_raw_SYSV)) C(ffi_closure_raw_THISCALL): L(UW41): # cfi_startproc + _CET_ENDBR /* Rearrange the stack such that %ecx is the first argument. This means moving the return address. */ popl %edx diff --git a/src/x86/unix64.S b/src/x86/unix64.S index 41563f5..ee3c04f 100644 --- a/src/x86/unix64.S +++ b/src/x86/unix64.S @@ -42,7 +42,11 @@ #if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) # define E(BASE, X) .balign 8 #else -# define E(BASE, X) .balign 8; .org BASE + X * 8 +# ifdef __CET__ +# define E(BASE, X) .balign 8; .org BASE + X * 16 +# else +# define E(BASE, X) .balign 8; .org BASE + X * 8 +# endif #endif /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, @@ -58,6 +62,7 @@ C(ffi_call_unix64): L(UW0): + _CET_ENDBR movq (%rsp), %r10 /* Load return address. */ leaq (%rdi, %rsi), %rax /* Find local stack base. */ movq %rdx, (%rax) /* Save flags. */ @@ -116,6 +121,11 @@ L(UW2): movzbl %cl, %r10d leaq L(store_table)(%rip), %r11 ja L(sa) +#ifdef __CET__ + /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 + + 4 bytes NOP padding double slot size to 16 bytes. */ + addl %r10d, %r10d +#endif leaq (%r11, %r10, 8), %r10 /* Prep for the structure cases: scratch area in redzone. */ @@ -125,57 +135,73 @@ L(UW2): .balign 8 L(store_table): E(L(store_table), UNIX64_RET_VOID) + _CET_ENDBR ret E(L(store_table), UNIX64_RET_UINT8) + _CET_ENDBR movzbl %al, %eax movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_UINT16) + _CET_ENDBR movzwl %ax, %eax movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_UINT32) + _CET_ENDBR movl %eax, %eax movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_SINT8) + _CET_ENDBR movsbq %al, %rax movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_SINT16) + _CET_ENDBR movswq %ax, %rax movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_SINT32) + _CET_ENDBR cltq movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_INT64) + _CET_ENDBR movq %rax, (%rdi) ret E(L(store_table), UNIX64_RET_XMM32) + _CET_ENDBR movd %xmm0, (%rdi) ret E(L(store_table), UNIX64_RET_XMM64) + _CET_ENDBR movq %xmm0, (%rdi) ret E(L(store_table), UNIX64_RET_X87) + _CET_ENDBR fstpt (%rdi) ret E(L(store_table), UNIX64_RET_X87_2) + _CET_ENDBR fstpt (%rdi) fstpt 16(%rdi) ret E(L(store_table), UNIX64_RET_ST_XMM0_RAX) + _CET_ENDBR movq %rax, 8(%rsi) jmp L(s3) E(L(store_table), UNIX64_RET_ST_RAX_XMM0) + _CET_ENDBR movq %xmm0, 8(%rsi) jmp L(s2) E(L(store_table), UNIX64_RET_ST_XMM0_XMM1) + _CET_ENDBR movq %xmm1, 8(%rsi) jmp L(s3) E(L(store_table), UNIX64_RET_ST_RAX_RDX) + _CET_ENDBR movq %rdx, 8(%rsi) L(s2): movq %rax, (%rsi) @@ -227,6 +253,7 @@ ENDF(C(ffi_call_unix64)) C(ffi_closure_unix64_sse): L(UW5): + _CET_ENDBR subq $ffi_closure_FS, %rsp L(UW6): /* cfi_adjust_cfa_offset(ffi_closure_FS) */ @@ -250,6 +277,7 @@ ENDF(C(ffi_closure_unix64_sse)) C(ffi_closure_unix64): L(UW8): + _CET_ENDBR subq $ffi_closure_FS, %rsp L(UW9): /* cfi_adjust_cfa_offset(ffi_closure_FS) */ @@ -286,6 +314,11 @@ L(UW10): movzbl %al, %r10d leaq L(load_table)(%rip), %r11 ja L(la) +#ifdef __CET__ + /* NB: Originally, each slot is 8 byte. 4 bytes of ENDBR64 + + 4 bytes NOP padding double slot size to 16 bytes. */ + addl %r10d, %r10d +#endif leaq (%r11, %r10, 8), %r10 leaq ffi_closure_RED_RVALUE(%rsp), %rsi jmp *%r10 @@ -293,51 +326,67 @@ L(UW10): .balign 8 L(load_table): E(L(load_table), UNIX64_RET_VOID) + _CET_ENDBR ret E(L(load_table), UNIX64_RET_UINT8) + _CET_ENDBR movzbl (%rsi), %eax ret E(L(load_table), UNIX64_RET_UINT16) + _CET_ENDBR movzwl (%rsi), %eax ret E(L(load_table), UNIX64_RET_UINT32) + _CET_ENDBR movl (%rsi), %eax ret E(L(load_table), UNIX64_RET_SINT8) + _CET_ENDBR movsbl (%rsi), %eax ret E(L(load_table), UNIX64_RET_SINT16) + _CET_ENDBR movswl (%rsi), %eax ret E(L(load_table), UNIX64_RET_SINT32) + _CET_ENDBR movl (%rsi), %eax ret E(L(load_table), UNIX64_RET_INT64) + _CET_ENDBR movq (%rsi), %rax ret E(L(load_table), UNIX64_RET_XMM32) + _CET_ENDBR movd (%rsi), %xmm0 ret E(L(load_table), UNIX64_RET_XMM64) + _CET_ENDBR movq (%rsi), %xmm0 ret E(L(load_table), UNIX64_RET_X87) + _CET_ENDBR fldt (%rsi) ret E(L(load_table), UNIX64_RET_X87_2) + _CET_ENDBR fldt 16(%rsi) fldt (%rsi) ret E(L(load_table), UNIX64_RET_ST_XMM0_RAX) + _CET_ENDBR movq 8(%rsi), %rax jmp L(l3) E(L(load_table), UNIX64_RET_ST_RAX_XMM0) + _CET_ENDBR movq 8(%rsi), %xmm0 jmp L(l2) E(L(load_table), UNIX64_RET_ST_XMM0_XMM1) + _CET_ENDBR movq 8(%rsi), %xmm1 jmp L(l3) E(L(load_table), UNIX64_RET_ST_RAX_RDX) + _CET_ENDBR movq 8(%rsi), %rdx L(l2): movq (%rsi), %rax @@ -358,6 +407,7 @@ ENDF(C(ffi_closure_unix64)) C(ffi_go_closure_unix64_sse): L(UW12): + _CET_ENDBR subq $ffi_closure_FS, %rsp L(UW13): /* cfi_adjust_cfa_offset(ffi_closure_FS) */ @@ -381,6 +431,7 @@ ENDF(C(ffi_go_closure_unix64_sse)) C(ffi_go_closure_unix64): L(UW15): + _CET_ENDBR subq $ffi_closure_FS, %rsp L(UW16): /* cfi_adjust_cfa_offset(ffi_closure_FS) */ @@ -424,7 +475,12 @@ EHFrame0: #endif /* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ -#define ADV(N, P) .byte 2, L(N)-L(P) +#ifdef __CET__ +/* Use DW_CFA_advance_loc2 when IBT is enabled. */ +# define ADV(N, P) .byte 3; .2byte L(N)-L(P) +#else +# define ADV(N, P) .byte 2, L(N)-L(P) +#endif .balign 8 L(CIE): diff --git a/src/x86/win64.S b/src/x86/win64.S index 2c334c8..57c0e65 100644 --- a/src/x86/win64.S +++ b/src/x86/win64.S @@ -48,6 +48,7 @@ SEH(.seh_proc ffi_call_win64) C(ffi_call_win64): cfi_startproc + _CET_ENDBR /* Set up the local stack frame and install it in rbp/rsp. */ movq (%rsp), %rax movq %rbp, (arg1) @@ -80,7 +81,7 @@ C(ffi_call_win64): cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx leaq (%r10, %rcx, 8), %r10 ja 99f - jmp *%r10 + _CET_NOTRACK jmp *%r10 /* Below, we're space constrained most of the time. Thus we eschew the modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */ @@ -176,6 +177,7 @@ E(0b, FFI_TYPE_SMALL_STRUCT_4B) SEH(.seh_proc ffi_go_closure_win64) C(ffi_go_closure_win64): cfi_startproc + _CET_ENDBR /* Save all integer arguments into the incoming reg stack space. */ movq %rcx, 8(%rsp) movq %rdx, 16(%rsp) @@ -196,6 +198,7 @@ C(ffi_go_closure_win64): SEH(.seh_proc ffi_closure_win64) C(ffi_closure_win64): cfi_startproc + _CET_ENDBR /* Save all integer arguments into the incoming reg stack space. */ movq %rcx, 8(%rsp) movq %rdx, 16(%rsp) |