summaryrefslogtreecommitdiff
path: root/REORG.TODO/sysdeps/x86_64/dl-trampoline.S
diff options
context:
space:
mode:
Diffstat (limited to 'REORG.TODO/sysdeps/x86_64/dl-trampoline.S')
-rw-r--r--REORG.TODO/sysdeps/x86_64/dl-trampoline.S147
1 files changed, 147 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/x86_64/dl-trampoline.S b/REORG.TODO/sysdeps/x86_64/dl-trampoline.S
new file mode 100644
index 0000000000..c14c61aa58
--- /dev/null
+++ b/REORG.TODO/sysdeps/x86_64/dl-trampoline.S
@@ -0,0 +1,147 @@
+/* PLT trampolines. x86-64 version.
+ Copyright (C) 2004-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <sysdep.h>
+#include <cpu-features.h>
+#include <link-defines.h>
+
+#ifndef DL_STACK_ALIGNMENT
+/* Due to GCC bug:
+
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+
+ __tls_get_addr may be called with 8-byte stack alignment. Although
+ this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume
+ that stack will be always aligned at 16 bytes. We use unaligned
+ 16-byte move to load and store SSE registers, which has no penalty
+ on modern processors if stack is 16-byte aligned. */
+# define DL_STACK_ALIGNMENT 8
+#endif
+
+#ifndef DL_RUNTIME_UNALIGNED_VEC_SIZE
+/* The maximum size in bytes of unaligned vector load and store in the
+ dynamic linker. Since SSE optimized memory/string functions with
+ aligned SSE register load and store are used in the dynamic linker,
+ we must set this to 8 so that _dl_runtime_resolve_sse will align the
+ stack before calling _dl_fixup. */
+# define DL_RUNTIME_UNALIGNED_VEC_SIZE 8
+#endif
+
+/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes. */
+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+ (VEC_SIZE > DL_STACK_ALIGNMENT \
+ && VEC_SIZE > DL_RUNTIME_UNALIGNED_VEC_SIZE)
+
+/* Align vector register save area to 16 bytes. */
+#define REGISTER_SAVE_VEC_OFF 0
+
+/* Area on stack to save and restore registers used for parameter
+ passing when calling _dl_fixup. */
+#ifdef __ILP32__
+# define REGISTER_SAVE_RAX (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
+# define PRESERVE_BND_REGS_PREFIX
+#else
+/* Align bound register save area to 16 bytes. */
+# define REGISTER_SAVE_BND0 (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
+# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16)
+# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16)
+# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16)
+# define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16)
+# ifdef HAVE_MPX_SUPPORT
+# define PRESERVE_BND_REGS_PREFIX bnd
+# else
+# define PRESERVE_BND_REGS_PREFIX .byte 0xf2
+# endif
+#endif
+#define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8)
+#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
+#define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8)
+#define REGISTER_SAVE_RDI (REGISTER_SAVE_RSI + 8)
+#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8)
+#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
+
+#define RESTORE_AVX
+
+#define VEC_SIZE 64
+#define VMOVA vmovdqa64
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+# define VMOV vmovdqa64
+#else
+# define VMOV vmovdqu64
+#endif
+#define VEC(i) zmm##i
+#define _dl_runtime_resolve _dl_runtime_resolve_avx512
+#define _dl_runtime_profile _dl_runtime_profile_avx512
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef _dl_runtime_profile
+#undef VEC
+#undef VMOV
+#undef VMOVA
+#undef VEC_SIZE
+
+#define VEC_SIZE 32
+#define VMOVA vmovdqa
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+# define VMOV vmovdqa
+#else
+# define VMOV vmovdqu
+#endif
+#define VEC(i) ymm##i
+#define _dl_runtime_resolve _dl_runtime_resolve_avx
+#define _dl_runtime_resolve_opt _dl_runtime_resolve_avx_opt
+#define _dl_runtime_profile _dl_runtime_profile_avx
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef _dl_runtime_resolve_opt
+#undef _dl_runtime_profile
+#undef VEC
+#undef VMOV
+#undef VMOVA
+#undef VEC_SIZE
+
+/* movaps/movups is 1-byte shorter. */
+#define VEC_SIZE 16
+#define VMOVA movaps
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+# define VMOV movaps
+#else
+# define VMOV movups
+#endif
+#define VEC(i) xmm##i
+#define _dl_runtime_resolve _dl_runtime_resolve_sse
+#define _dl_runtime_profile _dl_runtime_profile_sse
+#undef RESTORE_AVX
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef _dl_runtime_profile
+#undef VMOV
+#undef VMOVA
+
+/* Used by _dl_runtime_resolve_avx_opt/_dl_runtime_resolve_avx512_opt
+ to preserve the full vector registers with zero upper bits. */
+#define VMOVA vmovdqa
+#if DL_RUNTIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
+# define VMOV vmovdqa
+#else
+# define VMOV vmovdqu
+#endif
+#define _dl_runtime_resolve _dl_runtime_resolve_sse_vex
+#define _dl_runtime_resolve_opt _dl_runtime_resolve_avx512_opt
+#include "dl-trampoline.h"