diff options
Diffstat (limited to 'lib/xray/xray_trampoline_x86_64.S')
-rw-r--r-- | lib/xray/xray_trampoline_x86_64.S | 163 |
1 files changed, 102 insertions, 61 deletions
diff --git a/lib/xray/xray_trampoline_x86_64.S b/lib/xray/xray_trampoline_x86_64.S index b9fef6dad..ffbfb5c7e 100644 --- a/lib/xray/xray_trampoline_x86_64.S +++ b/lib/xray/xray_trampoline_x86_64.S @@ -13,42 +13,51 @@ // //===----------------------------------------------------------------------===// +#include "../builtins/assembly.h" + .macro SAVE_REGISTERS - subq $200, %rsp - movupd %xmm0, 184(%rsp) - movupd %xmm1, 168(%rsp) - movupd %xmm2, 152(%rsp) - movupd %xmm3, 136(%rsp) - movupd %xmm4, 120(%rsp) - movupd %xmm5, 104(%rsp) - movupd %xmm6, 88(%rsp) - movupd %xmm7, 72(%rsp) - movq %rdi, 64(%rsp) - movq %rax, 56(%rsp) - movq %rdx, 48(%rsp) - movq %rsi, 40(%rsp) - movq %rcx, 32(%rsp) - movq %r8, 24(%rsp) - movq %r9, 16(%rsp) + subq $192, %rsp + .cfi_def_cfa_offset 200 + // At this point, the stack pointer should be aligned to an 8-byte boundary, + // because any call instructions that come after this will add another 8 + // bytes and therefore align it to 16-bytes. + movq %rbp, 184(%rsp) + movupd %xmm0, 168(%rsp) + movupd %xmm1, 152(%rsp) + movupd %xmm2, 136(%rsp) + movupd %xmm3, 120(%rsp) + movupd %xmm4, 104(%rsp) + movupd %xmm5, 88(%rsp) + movupd %xmm6, 72(%rsp) + movupd %xmm7, 56(%rsp) + movq %rdi, 48(%rsp) + movq %rax, 40(%rsp) + movq %rdx, 32(%rsp) + movq %rsi, 24(%rsp) + movq %rcx, 16(%rsp) + movq %r8, 8(%rsp) + movq %r9, 0(%rsp) .endm .macro RESTORE_REGISTERS - movupd 184(%rsp), %xmm0 - movupd 168(%rsp), %xmm1 - movupd 152(%rsp), %xmm2 - movupd 136(%rsp), %xmm3 - movupd 120(%rsp), %xmm4 - movupd 104(%rsp), %xmm5 - movupd 88(%rsp) , %xmm6 - movupd 72(%rsp) , %xmm7 - movq 64(%rsp), %rdi - movq 56(%rsp), %rax - movq 48(%rsp), %rdx - movq 40(%rsp), %rsi - movq 32(%rsp), %rcx - movq 24(%rsp), %r8 - movq 16(%rsp), %r9 - addq $200, %rsp + movq 184(%rsp), %rbp + movupd 168(%rsp), %xmm0 + movupd 152(%rsp), %xmm1 + movupd 136(%rsp), %xmm2 + movupd 120(%rsp), %xmm3 + movupd 104(%rsp), %xmm4 + movupd 88(%rsp), %xmm5 + movupd 72(%rsp) , %xmm6 + movupd 56(%rsp) , %xmm7 + movq 48(%rsp), %rdi + movq 40(%rsp), %rax + movq 32(%rsp), %rdx + movq 24(%rsp), %rsi + movq 16(%rsp), %rcx + movq 8(%rsp), %r8 + movq 0(%rsp), %r9 + addq $192, %rsp + .cfi_def_cfa_offset 8 .endm .text @@ -62,8 +71,6 @@ __xray_FunctionEntry: .cfi_startproc - pushq %rbp - .cfi_def_cfa_offset 16 SAVE_REGISTERS // This load has to be atomic, it's concurrent with __xray_patch(). @@ -78,7 +85,6 @@ __xray_FunctionEntry: callq *%rax .Ltmp0: RESTORE_REGISTERS - popq %rbp retq .Ltmp1: .size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry @@ -94,14 +100,13 @@ __xray_FunctionExit: // Save the important registers first. Since we're assuming that this // function is only jumped into, we only preserve the registers for // returning. - pushq %rbp - .cfi_def_cfa_offset 16 subq $56, %rsp - .cfi_def_cfa_offset 32 - movupd %xmm0, 40(%rsp) - movupd %xmm1, 24(%rsp) - movq %rax, 16(%rsp) - movq %rdx, 8(%rsp) + .cfi_def_cfa_offset 64 + movq %rbp, 48(%rsp) + movupd %xmm0, 32(%rsp) + movupd %xmm1, 16(%rsp) + movq %rax, 8(%rsp) + movq %rdx, 0(%rsp) movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax testq %rax,%rax je .Ltmp2 @@ -111,12 +116,13 @@ __xray_FunctionExit: callq *%rax .Ltmp2: // Restore the important registers. - movupd 40(%rsp), %xmm0 - movupd 24(%rsp), %xmm1 - movq 16(%rsp), %rax - movq 8(%rsp), %rdx + movq 48(%rsp), %rbp + movupd 32(%rsp), %xmm0 + movupd 16(%rsp), %xmm1 + movq 8(%rsp), %rax + movq 0(%rsp), %rdx addq $56, %rsp - popq %rbp + .cfi_def_cfa_offset 8 retq .Ltmp3: .size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit @@ -129,12 +135,6 @@ __xray_FunctionExit: .type __xray_FunctionTailExit,@function __xray_FunctionTailExit: .cfi_startproc - // Save the important registers as in the entry trampoline, but indicate that - // this is an exit. In the future, we will introduce a new entry type that - // differentiates between a normal exit and a tail exit, but we'd have to do - // this and increment the version number for the header. - pushq %rbp - .cfi_def_cfa_offset 16 SAVE_REGISTERS movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax @@ -142,12 +142,11 @@ __xray_FunctionTailExit: je .Ltmp4 movl %r10d, %edi - movl $1, %esi + movl $2, %esi callq *%rax .Ltmp4: RESTORE_REGISTERS - popq %rbp retq .Ltmp5: .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit @@ -160,8 +159,6 @@ __xray_FunctionTailExit: .type __xray_ArgLoggerEntry,@function __xray_ArgLoggerEntry: .cfi_startproc - pushq %rbp - .cfi_def_cfa_offset 16 SAVE_REGISTERS // Again, these function pointer loads must be atomic; MOV is fine. @@ -175,16 +172,60 @@ __xray_ArgLoggerEntry: je .Larg1entryFail .Larg1entryLog: - movq %rdi, %rdx // first argument will become the third - xorq %rsi, %rsi // XRayEntryType::ENTRY into the second - movl %r10d, %edi // 32-bit function ID becomes the first + + // First argument will become the third + movq %rdi, %rdx + + // XRayEntryType::LOG_ARGS_ENTRY into the second + mov $0x3, %esi + + // 32-bit function ID becomes the first + movl %r10d, %edi callq *%rax .Larg1entryFail: RESTORE_REGISTERS - popq %rbp retq .Larg1entryEnd: .size __xray_ArgLoggerEntry, .Larg1entryEnd-__xray_ArgLoggerEntry .cfi_endproc + +//===----------------------------------------------------------------------===// + + .global __xray_CustomEvent + .align 16, 0x90 + .type __xray_CustomEvent,@function +__xray_CustomEvent: + .cfi_startproc + SAVE_REGISTERS + + // We take two arguments to this trampoline, which should be in rdi and rsi + // already. We also make sure that we stash %rax because we use that register + // to call the logging handler. + movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax + testq %rax,%rax + je .LcustomEventCleanup + + // At this point we know that rcx and rdx already has the data, so we just + // call the logging handler, after aligning the stack to a 16-byte boundary. + // The approach we're taking here uses additional stack space to stash the + // stack pointer twice before aligning the pointer to 16-bytes. If the stack + // was 8-byte aligned, it will become 16-byte aligned -- when restoring the + // pointer, we can always look -8 bytes from the current position to get + // either of the values we've stashed in the first place. + pushq %rsp + pushq (%rsp) + andq $-0x10, %rsp + callq *%rax + movq 8(%rsp), %rsp + +.LcustomEventCleanup: + RESTORE_REGISTERS + retq + +.Ltmp8: + .size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent + .cfi_endproc + +NO_EXEC_STACK_DIRECTIVE |