summaryrefslogtreecommitdiff
path: root/sysdeps/arm
diff options
context:
space:
mode:
authorRoland McGrath <roland@hack.frob.com>2014-10-22 14:20:35 -0700
committerRoland McGrath <roland@hack.frob.com>2014-10-22 14:20:35 -0700
commit8c2b1ed8bbd20d35314c2a602b903159fa567ffb (patch)
tree4a9e06786fdb32c5558cdae218af3b4ff0b78af6 /sysdeps/arm
parentb5af9297d51a43f96c5be1bafab032184690dd6f (diff)
downloadglibc-8c2b1ed8bbd20d35314c2a602b903159fa567ffb.tar.gz
ARM: Use movw/movt more when available
Diffstat (limited to 'sysdeps/arm')
-rw-r--r--sysdeps/arm/__longjmp.S35
-rw-r--r--sysdeps/arm/configure52
-rw-r--r--sysdeps/arm/configure.ac44
-rw-r--r--sysdeps/arm/setjmp.S35
-rw-r--r--sysdeps/arm/sysdep.h78
5 files changed, 176 insertions, 68 deletions
diff --git a/sysdeps/arm/__longjmp.S b/sysdeps/arm/__longjmp.S
index 27d1b713de..a98395797a 100644
--- a/sysdeps/arm/__longjmp.S
+++ b/sysdeps/arm/__longjmp.S
@@ -77,21 +77,15 @@ ENTRY (__longjmp)
#ifdef NEED_HWCAP
# ifdef IS_IN_rtld
- ldr a4, 1f
- ldr a3, .Lrtld_local_ro
-0: add a4, pc, a4
- add a4, a4, a3
- ldr a4, [a4, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
+ LDST_PCREL (ldr, a4, a3, \
+ C_SYMBOL_NAME(_rtld_local_ro) \
+ + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
# else
# ifdef PIC
- ldr a4, 1f
- ldr a3, .Lrtld_global_ro
-0: add a4, pc, a4
- ldr a4, [a4, a3]
- ldr a4, [a4, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
+ LDR_GLOBAL (a4, a3, C_SYMBOL_NAME(_rtld_global_ro), \
+ RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
# else
- ldr a4, .Lhwcap
- ldr a4, [a4, #0]
+ LDR_GLOBAL (a4, a3, C_SYMBOL_NAME(_dl_hwcap), 0)
# endif
# endif
#endif
@@ -138,21 +132,4 @@ ENTRY (__longjmp)
DO_RET(lr)
-#ifdef NEED_HWCAP
-# ifdef IS_IN_rtld
-1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
-.Lrtld_local_ro:
- .long C_SYMBOL_NAME(_rtld_local_ro)(GOTOFF)
-# else
-# ifdef PIC
-1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
-.Lrtld_global_ro:
- .long C_SYMBOL_NAME(_rtld_global_ro)(GOT)
-# else
-.Lhwcap:
- .long C_SYMBOL_NAME(_dl_hwcap)
-# endif
-# endif
-#endif
-
END (__longjmp)
diff --git a/sysdeps/arm/configure b/sysdeps/arm/configure
index 238b335f7d..45667cc522 100644
--- a/sysdeps/arm/configure
+++ b/sysdeps/arm/configure
@@ -150,8 +150,8 @@ else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifdef __ARM_PCS_VFP
- yes
- #endif
+ yes
+ #endif
_ACEOF
if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
@@ -211,6 +211,54 @@ else
have-arm-tls-desc = no"
fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether PC-relative relocs in movw/movt work properly" >&5
+$as_echo_n "checking whether PC-relative relocs in movw/movt work properly... " >&6; }
+if ${libc_cv_arm_pcrel_movw+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+cat > conftest.s <<\EOF
+ .syntax unified
+ .arm
+ .arch armv7-a
+
+ .text
+ .globl foo
+ .type foo,%function
+foo: movw r0, #:lower16:symbol - 1f - 8
+ movt r0, #:upper16:symbol - 1f - 8
+1: add r0, pc
+ @ And now a case with a local symbol.
+ movw r0, #:lower16:3f - 2f - 8
+ movt r0, #:upper16:3f - 2f - 8
+2: add r0, pc
+ bx lr
+
+.data
+ .globl symbol
+ .hidden symbol
+symbol: .long 23
+3: .long 17
+EOF
+libc_cv_arm_pcrel_movw=no
+${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \
+ -nostartfiles -nostdlib -shared \
+ -o conftest.so conftest.s 1>&5 2>&5 &&
+LC_ALL=C $READELF -dr conftest.so > conftest.dr 2>&5 &&
+{
+ cat conftest.dr 1>&5
+ fgrep 'TEXTREL
+R_ARM_NONE' conftest.dr > /dev/null || libc_cv_arm_pcrel_movw=yes
+}
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arm_pcrel_movw" >&5
+$as_echo "$libc_cv_arm_pcrel_movw" >&6; }
+if test $libc_cv_arm_pcrel_movw = yes; then
+ $as_echo "#define ARM_PCREL_MOVW_OK 1" >>confdefs.h
+
+fi
+
libc_cv_gcc_unwind_find_fde=no
# Remove -fno-unwind-tables that was added in sysdeps/arm/preconfigure.ac.
diff --git a/sysdeps/arm/configure.ac b/sysdeps/arm/configure.ac
index 86c0c08317..002b8ef365 100644
--- a/sysdeps/arm/configure.ac
+++ b/sysdeps/arm/configure.ac
@@ -17,8 +17,8 @@ dnl it. Until we do, don't define it.
AC_CACHE_CHECK([whether the compiler is using the ARM hard-float ABI],
[libc_cv_arm_pcs_vfp],
[AC_EGREP_CPP(yes,[#ifdef __ARM_PCS_VFP
- yes
- #endif
+ yes
+ #endif
], libc_cv_arm_pcs_vfp=yes, libc_cv_arm_pcs_vfp=no)])
if test $libc_cv_arm_pcs_vfp = yes; then
AC_DEFINE(HAVE_ARM_PCS_VFP)
@@ -40,6 +40,46 @@ else
LIBC_CONFIG_VAR([have-arm-tls-desc], [no])
fi
+AC_CACHE_CHECK([whether PC-relative relocs in movw/movt work properly],
+ libc_cv_arm_pcrel_movw, [
+cat > conftest.s <<\EOF
+ .syntax unified
+ .arm
+ .arch armv7-a
+
+ .text
+ .globl foo
+ .type foo,%function
+foo: movw r0, #:lower16:symbol - 1f - 8
+ movt r0, #:upper16:symbol - 1f - 8
+1: add r0, pc
+ @ And now a case with a local symbol.
+ movw r0, #:lower16:3f - 2f - 8
+ movt r0, #:upper16:3f - 2f - 8
+2: add r0, pc
+ bx lr
+
+.data
+ .globl symbol
+ .hidden symbol
+symbol: .long 23
+3: .long 17
+EOF
+libc_cv_arm_pcrel_movw=no
+${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \
+ -nostartfiles -nostdlib -shared \
+ -o conftest.so conftest.s 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD &&
+LC_ALL=C $READELF -dr conftest.so > conftest.dr 2>&AS_MESSAGE_LOG_FD &&
+{
+ cat conftest.dr 1>&AS_MESSAGE_LOG_FD
+ fgrep 'TEXTREL
+R_ARM_NONE' conftest.dr > /dev/null || libc_cv_arm_pcrel_movw=yes
+}
+rm -f conftest*])
+if test $libc_cv_arm_pcrel_movw = yes; then
+ AC_DEFINE([ARM_PCREL_MOVW_OK])
+fi
+
libc_cv_gcc_unwind_find_fde=no
# Remove -fno-unwind-tables that was added in sysdeps/arm/preconfigure.ac.
diff --git a/sysdeps/arm/setjmp.S b/sysdeps/arm/setjmp.S
index 17a16c9b5d..6f54ab3b70 100644
--- a/sysdeps/arm/setjmp.S
+++ b/sysdeps/arm/setjmp.S
@@ -58,21 +58,15 @@ ENTRY (__sigsetjmp)
#ifdef NEED_HWCAP
/* Check if we have a VFP unit. */
# ifdef IS_IN_rtld
- ldr a3, 1f
- ldr a4, .Lrtld_local_ro
-0: add a3, pc, a3
- add a3, a3, a4
- ldr a3, [a3, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
+ LDST_PCREL (ldr, a3, a4, \
+ C_SYMBOL_NAME(_rtld_local_ro) \
+ + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
# else
# ifdef PIC
- ldr a3, 1f
- ldr a4, .Lrtld_global_ro
-0: add a3, pc, a3
- ldr a3, [a3, a4]
- ldr a3, [a3, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET]
+ LDR_GLOBAL (a3, a4, C_SYMBOL_NAME(_rtld_global_ro), \
+ RTLD_GLOBAL_RO_DL_HWCAP_OFFSET)
# else
- ldr a3, .Lhwcap
- ldr a3, [a3, #0]
+ LDR_GLOBAL (a3, a4, C_SYMBOL_NAME(_dl_hwcap), 0)
# endif
# endif
#endif
@@ -114,23 +108,6 @@ ENTRY (__sigsetjmp)
/* Make a tail call to __sigjmp_save; it takes the same args. */
B PLTJMP(C_SYMBOL_NAME(__sigjmp_save))
-#ifdef NEED_HWCAP
-# ifdef IS_IN_rtld
-1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
-.Lrtld_local_ro:
- .long C_SYMBOL_NAME(_rtld_local_ro)(GOTOFF)
-# else
-# ifdef PIC
-1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
-.Lrtld_global_ro:
- .long C_SYMBOL_NAME(_rtld_global_ro)(GOT)
-# else
-.Lhwcap:
- .long C_SYMBOL_NAME(_dl_hwcap)
-# endif
-# endif
-#endif
-
END (__sigsetjmp)
hidden_def (__sigsetjmp)
diff --git a/sysdeps/arm/sysdep.h b/sysdeps/arm/sysdep.h
index 4c41213c37..8614b4a058 100644
--- a/sysdeps/arm/sysdep.h
+++ b/sysdeps/arm/sysdep.h
@@ -21,6 +21,8 @@
#ifndef __ASSEMBLER__
# include <stdint.h>
+#else
+# include <arm-features.h>
#endif
/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
@@ -157,6 +159,32 @@
.arm
# endif
+/* Load or store to/from address X + Y into/from R, (maybe) using T.
+ X or Y can use T freely; T can be R if OP is a load. The first
+ version eschews the two-register addressing mode, while the
+ second version uses it. */
+# define LDST_INDEXED_NOINDEX(OP, R, T, X, Y) \
+ add T, X, Y; \
+ sfi_breg T, \
+ OP R, [T]
+# define LDST_INDEXED_INDEX(OP, R, X, Y) \
+ OP R, [X, Y]
+
+# ifdef ARM_NO_INDEX_REGISTER
+/* We're never using the two-register addressing mode, so this
+ always uses an intermediate add. */
+# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_NOINDEX (OP, R, T, X, Y)
+# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
+# else
+/* The two-register addressing mode is OK, except on Thumb with pc. */
+# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_INDEX (OP, R, X, Y)
+# ifdef __thumb2__
+# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X)
+# else
+# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_INDEX (OP, R, pc, X)
+# endif
+# endif
+
/* Load or store to/from a pc-relative EXPR into/from R, using T. */
# ifdef __thumb2__
# define LDST_PCREL(OP, R, T, EXPR) \
@@ -166,6 +194,11 @@
.previous; \
99: add T, T, pc; \
OP R, [T]
+# elif defined (ARCH_HAS_T2) && ARM_PCREL_MOVW_OK
+# define LDST_PCREL(OP, R, T, EXPR) \
+ movw T, #:lower16:EXPR - 99f - PC_OFS; \
+ movt T, #:upper16:EXPR - 99f - PC_OFS; \
+99: LDST_PC_INDEXED (OP, R, T, T)
# else
# define LDST_PCREL(OP, R, T, EXPR) \
ldr T, 98f; \
@@ -175,17 +208,50 @@
99: OP R, [pc, T]
# endif
-/* Load or store to/from a global EXPR into/from R, using T. */
-# define LDST_GLOBAL(OP, R, T, EXPR) \
+/* Load from a global SYMBOL + CONSTANT into R, using T. */
+# if defined (ARCH_HAS_T2) && !defined (PIC)
+# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
+ movw T, #:lower16:SYMBOL; \
+ movt T, #:upper16:SYMBOL; \
+ ldr R, [T, $CONSTANT]
+# elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK
+# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
+ movw R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \
+ movw T, #:lower16:99f - 98f - PC_OFS; \
+ movt R, #:upper16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \
+ movt T, #:upper16:99f - 98f - PC_OFS; \
+ .pushsection .rodata.cst4, "aM", %progbits, 4; \
+ .balign 4; \
+99: .word SYMBOL##(GOT); \
+ .popsection; \
+97: add R, R, pc; \
+98: LDST_PC_INDEXED (ldr, T, T, T); \
+ LDST_INDEXED (ldr, R, T, R, T); \
+ ldr R, [R, $CONSTANT]
+# else
+# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \
ldr T, 99f; \
ldr R, 100f; \
98: add T, T, pc; \
ldr T, [T, R]; \
.subsection 2; \
99: .word _GLOBAL_OFFSET_TABLE_ - 98b - PC_OFS; \
-100: .word EXPR##(GOT); \
+100: .word SYMBOL##(GOT); \
.previous; \
- OP R, [T]
+ ldr R, [T, $CONSTANT]
+# endif
+
+/* This is the same as LDR_GLOBAL, but for a SYMBOL that is known to
+ be in the same linked object (as for one with hidden visibility).
+ We can avoid the GOT indirection in the PIC case. For the pure
+ static case, LDR_GLOBAL is already optimal. */
+# ifdef PIC
+# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
+ LDST_PCREL (ldr, R, T, SYMBOL + CONSTANT)
+# else
+# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \
+ LDR_GLOBAL (R, T, SYMBOL, CONSTANT)
+# endif
/* Cope with negative memory offsets, which thumb can't encode.
Use NEGOFF_ADJ_BASE to (conditionally) alter the base register,
@@ -296,7 +362,7 @@
(!defined SHARED && (!defined NOT_IN_libc || defined IS_IN_libpthread)))
# ifdef __ASSEMBLER__
# define PTR_MANGLE_LOAD(guard, tmp) \
- LDST_PCREL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local));
+ LDR_HIDDEN (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local), 0)
# define PTR_MANGLE(dst, src, guard, tmp) \
PTR_MANGLE_LOAD(guard, tmp); \
PTR_MANGLE2(dst, src, guard)
@@ -316,7 +382,7 @@ extern uintptr_t __pointer_chk_guard_local attribute_relro attribute_hidden;
#else
# ifdef __ASSEMBLER__
# define PTR_MANGLE_LOAD(guard, tmp) \
- LDST_GLOBAL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard));
+ LDR_GLOBAL (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard), 0);
# define PTR_MANGLE(dst, src, guard, tmp) \
PTR_MANGLE_LOAD(guard, tmp); \
PTR_MANGLE2(dst, src, guard)