diff options
author | Roland McGrath <roland@hack.frob.com> | 2014-10-22 14:20:35 -0700 |
---|---|---|
committer | Roland McGrath <roland@hack.frob.com> | 2014-10-22 14:20:35 -0700 |
commit | 8c2b1ed8bbd20d35314c2a602b903159fa567ffb (patch) | |
tree | 4a9e06786fdb32c5558cdae218af3b4ff0b78af6 /sysdeps/arm/sysdep.h | |
parent | b5af9297d51a43f96c5be1bafab032184690dd6f (diff) | |
download | glibc-8c2b1ed8bbd20d35314c2a602b903159fa567ffb.tar.gz |
ARM: Use movw/movt more when available
Diffstat (limited to 'sysdeps/arm/sysdep.h')
-rw-r--r-- | sysdeps/arm/sysdep.h | 78 |
1 files changed, 72 insertions, 6 deletions
diff --git a/sysdeps/arm/sysdep.h b/sysdeps/arm/sysdep.h index 4c41213c37..8614b4a058 100644 --- a/sysdeps/arm/sysdep.h +++ b/sysdeps/arm/sysdep.h @@ -21,6 +21,8 @@ #ifndef __ASSEMBLER__ # include <stdint.h> +#else +# include <arm-features.h> #endif /* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ @@ -157,6 +159,32 @@ .arm # endif +/* Load or store to/from address X + Y into/from R, (maybe) using T. + X or Y can use T freely; T can be R if OP is a load. The first + version eschews the two-register addressing mode, while the + second version uses it. */ +# define LDST_INDEXED_NOINDEX(OP, R, T, X, Y) \ + add T, X, Y; \ + sfi_breg T, \ + OP R, [T] +# define LDST_INDEXED_INDEX(OP, R, X, Y) \ + OP R, [X, Y] + +# ifdef ARM_NO_INDEX_REGISTER +/* We're never using the two-register addressing mode, so this + always uses an intermediate add. */ +# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_NOINDEX (OP, R, T, X, Y) +# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X) +# else +/* The two-register addressing mode is OK, except on Thumb with pc. */ +# define LDST_INDEXED(OP, R, T, X, Y) LDST_INDEXED_INDEX (OP, R, X, Y) +# ifdef __thumb2__ +# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_NOINDEX (OP, R, T, pc, X) +# else +# define LDST_PC_INDEXED(OP, R, T, X) LDST_INDEXED_INDEX (OP, R, pc, X) +# endif +# endif + /* Load or store to/from a pc-relative EXPR into/from R, using T. */ # ifdef __thumb2__ # define LDST_PCREL(OP, R, T, EXPR) \ @@ -166,6 +194,11 @@ .previous; \ 99: add T, T, pc; \ OP R, [T] +# elif defined (ARCH_HAS_T2) && ARM_PCREL_MOVW_OK +# define LDST_PCREL(OP, R, T, EXPR) \ + movw T, #:lower16:EXPR - 99f - PC_OFS; \ + movt T, #:upper16:EXPR - 99f - PC_OFS; \ +99: LDST_PC_INDEXED (OP, R, T, T) # else # define LDST_PCREL(OP, R, T, EXPR) \ ldr T, 98f; \ @@ -175,17 +208,50 @@ 99: OP R, [pc, T] # endif -/* Load or store to/from a global EXPR into/from R, using T. */ -# define LDST_GLOBAL(OP, R, T, EXPR) \ +/* Load from a global SYMBOL + CONSTANT into R, using T. */ +# if defined (ARCH_HAS_T2) && !defined (PIC) +# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ + movw T, #:lower16:SYMBOL; \ + movt T, #:upper16:SYMBOL; \ + ldr R, [T, $CONSTANT] +# elif defined (ARCH_HAS_T2) && defined (PIC) && ARM_PCREL_MOVW_OK +# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ + movw R, #:lower16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \ + movw T, #:lower16:99f - 98f - PC_OFS; \ + movt R, #:upper16:_GLOBAL_OFFSET_TABLE_ - 97f - PC_OFS; \ + movt T, #:upper16:99f - 98f - PC_OFS; \ + .pushsection .rodata.cst4, "aM", %progbits, 4; \ + .balign 4; \ +99: .word SYMBOL##(GOT); \ + .popsection; \ +97: add R, R, pc; \ +98: LDST_PC_INDEXED (ldr, T, T, T); \ + LDST_INDEXED (ldr, R, T, R, T); \ + ldr R, [R, $CONSTANT] +# else +# define LDR_GLOBAL(R, T, SYMBOL, CONSTANT) \ ldr T, 99f; \ ldr R, 100f; \ 98: add T, T, pc; \ ldr T, [T, R]; \ .subsection 2; \ 99: .word _GLOBAL_OFFSET_TABLE_ - 98b - PC_OFS; \ -100: .word EXPR##(GOT); \ +100: .word SYMBOL##(GOT); \ .previous; \ - OP R, [T] + ldr R, [T, $CONSTANT] +# endif + +/* This is the same as LDR_GLOBAL, but for a SYMBOL that is known to + be in the same linked object (as for one with hidden visibility). + We can avoid the GOT indirection in the PIC case. For the pure + static case, LDR_GLOBAL is already optimal. */ +# ifdef PIC +# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \ + LDST_PCREL (ldr, R, T, SYMBOL + CONSTANT) +# else +# define LDR_HIDDEN(R, T, SYMBOL, CONSTANT) \ + LDR_GLOBAL (R, T, SYMBOL, CONSTANT) +# endif /* Cope with negative memory offsets, which thumb can't encode. Use NEGOFF_ADJ_BASE to (conditionally) alter the base register, @@ -296,7 +362,7 @@ (!defined SHARED && (!defined NOT_IN_libc || defined IS_IN_libpthread))) # ifdef __ASSEMBLER__ # define PTR_MANGLE_LOAD(guard, tmp) \ - LDST_PCREL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local)); + LDR_HIDDEN (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard_local), 0) # define PTR_MANGLE(dst, src, guard, tmp) \ PTR_MANGLE_LOAD(guard, tmp); \ PTR_MANGLE2(dst, src, guard) @@ -316,7 +382,7 @@ extern uintptr_t __pointer_chk_guard_local attribute_relro attribute_hidden; #else # ifdef __ASSEMBLER__ # define PTR_MANGLE_LOAD(guard, tmp) \ - LDST_GLOBAL(ldr, guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard)); + LDR_GLOBAL (guard, tmp, C_SYMBOL_NAME(__pointer_chk_guard), 0); # define PTR_MANGLE(dst, src, guard, tmp) \ PTR_MANGLE_LOAD(guard, tmp); \ PTR_MANGLE2(dst, src, guard) |