summaryrefslogtreecommitdiff
path: root/bfd/elf32-arm.c
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2019-08-20 16:34:26 +0100
committerTamar Christina <tamar.christina@arm.com>2019-08-20 16:35:28 +0100
commitb4e87f2c1eee99dee805e3828514f8afa162f9ba (patch)
tree213986cc59e1260fff20c20126bd5e4ffd3d0835 /bfd/elf32-arm.c
parentd7a11d1383a2d0cdffc8c08e46b0cf5a66b3ce28 (diff)
downloadbinutils-gdb-b4e87f2c1eee99dee805e3828514f8afa162f9ba.tar.gz
Arm: Fix performance issue with thumb-2 tailcalls
We currently use a padding NOP after a Thumb to Arm interworking veneer (BX pc). The NOP is never executed but may result in a performance penalty on some cores. For this reason this patch changes the NOPs after Thumb to Arm veneers into B .-2 and adds a note to this in the source code for future reference. bfd/ChangeLog: * elf32-arm.c (elf32_thumb2_plt_entry, elf32_arm_plt_thumb_stub, elf32_arm_stub_long_branch_v4t_thumb_thumb, elf32_arm_stub_long_branch_v4t_thumb_arm, elf32_arm_stub_short_branch_v4t_thumb_arm, elf32_arm_stub_long_branch_v4t_thumb_arm_pic, elf32_arm_stub_long_branch_v4t_thumb_thumb_pic, elf32_arm_stub_long_branch_v4t_thumb_tls_pic): Change nop to branch to previous instruction. ld/ChangeLog: * testsuite/ld-arm/cortex-a8-fix-b-plt.d: Update Testcase. * testsuite/ld-arm/cortex-a8-fix-b-rel-arm.d: Likewise. * testsuite/ld-arm/cortex-a8-fix-bcc-plt.d: Likewise. * testsuite/ld-arm/farcall-cond-thumb-arm.d: Likewise. * testsuite/ld-arm/farcall-mixed-app.d: Likewise. * testsuite/ld-arm/farcall-mixed-app2.d: Likewise. * testsuite/ld-arm/farcall-mixed-lib-v4t.d: Likewise. * testsuite/ld-arm/farcall-thumb-arm-pic-veneer.d: Likewise. * testsuite/ld-arm/farcall-thumb-arm-short.d: Likewise. * testsuite/ld-arm/farcall-thumb-arm.d: Likewise. * testsuite/ld-arm/farcall-thumb-thumb-pic-veneer.d: Likewise. * testsuite/ld-arm/farcall-thumb-thumb.d: Likewise. * testsuite/ld-arm/fix-arm1176-on.d: Likewise. * testsuite/ld-arm/ifunc-10.dd: Likewise. * testsuite/ld-arm/ifunc-2.dd: Likewise. * testsuite/ld-arm/ifunc-4.dd: Likewise. * testsuite/ld-arm/ifunc-6.dd: Likewise. * testsuite/ld-arm/ifunc-8.dd: Likewise. * testsuite/ld-arm/jump-reloc-veneers-long.d: Likewise. * testsuite/ld-arm/mixed-app.d: Likewise. * testsuite/ld-arm/thumb2-b-interwork.d: Likewise. * testsuite/ld-arm/tls-longplt.d: Likewise. * testsuite/ld-arm/tls-thumb1.d: Likewise.
Diffstat (limited to 'bfd/elf32-arm.c')
-rw-r--r--bfd/elf32-arm.c25
1 files changed, 16 insertions, 9 deletions
diff --git a/bfd/elf32-arm.c b/bfd/elf32-arm.c
index f1895df2788..d1548d6db33 100644
--- a/bfd/elf32-arm.c
+++ b/bfd/elf32-arm.c
@@ -2329,6 +2329,11 @@ static const unsigned long dl_tlsdesc_lazy_trampoline [] =
0x00000018, /* 4: .word _GLOBAL_OFFSET_TABLE_ - 2b - 8 */
};
+/* NOTE: [Thumb nop sequence]
+ When adding code that transitions from Thumb to Arm the instruction that
+ should be used for the alignment padding should be 0xe7fd (b .-2) instead of
+ a nop for performance reasons. */
+
/* ARM FDPIC PLT entry. */
/* The last 5 words contain PLT lazy fragment code and data. */
static const bfd_vma elf32_arm_fdpic_plt_entry [] =
@@ -2446,8 +2451,8 @@ static const bfd_vma elf32_thumb2_plt_entry [] =
0x0c00f240, /* movw ip, #0xNNNN */
0x0c00f2c0, /* movt ip, #0xNNNN */
0xf8dc44fc, /* add ip, pc */
- 0xbf00f000 /* ldr.w pc, [ip] */
- /* nop */
+ 0xe7fdf000 /* ldr.w pc, [ip] */
+ /* b .-2 */
};
/* The format of the first entry in the procedure linkage table
@@ -2487,7 +2492,7 @@ static const bfd_vma elf32_arm_vxworks_shared_plt_entry[] =
static const bfd_vma elf32_arm_plt_thumb_stub [] =
{
0x4778, /* bx pc */
- 0x46c0 /* nop */
+ 0xe7fd /* b .-2 */
};
/* The entries in a PLT when using a DLL-based target with multiple
@@ -2574,6 +2579,8 @@ typedef struct
int reloc_addend;
} insn_sequence;
+/* See note [Thumb nop sequence] when adding a veneer. */
+
/* Arm/Thumb -> Arm/Thumb long branch stub. On V5T and above, use blx
to reach the stub if necessary. */
static const insn_sequence elf32_arm_stub_long_branch_any_any[] =
@@ -2624,7 +2631,7 @@ static const insn_sequence elf32_arm_stub_long_branch_thumb2_only_pure[] =
static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_thumb[] =
{
THUMB16_INSN (0x4778), /* bx pc */
- THUMB16_INSN (0x46c0), /* nop */
+ THUMB16_INSN (0xe7fd), /* b .-2 */
ARM_INSN (0xe59fc000), /* ldr ip, [pc, #0] */
ARM_INSN (0xe12fff1c), /* bx ip */
DATA_WORD (0, R_ARM_ABS32, 0), /* dcd R_ARM_ABS32(X) */
@@ -2635,7 +2642,7 @@ static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_thumb[] =
static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_arm[] =
{
THUMB16_INSN (0x4778), /* bx pc */
- THUMB16_INSN (0x46c0), /* nop */
+ THUMB16_INSN (0xe7fd), /* b .-2 */
ARM_INSN (0xe51ff004), /* ldr pc, [pc, #-4] */
DATA_WORD (0, R_ARM_ABS32, 0), /* dcd R_ARM_ABS32(X) */
};
@@ -2645,7 +2652,7 @@ static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_arm[] =
static const insn_sequence elf32_arm_stub_short_branch_v4t_thumb_arm[] =
{
THUMB16_INSN (0x4778), /* bx pc */
- THUMB16_INSN (0x46c0), /* nop */
+ THUMB16_INSN (0xe7fd), /* b .-2 */
ARM_REL_INSN (0xea000000, -8), /* b (X-8) */
};
@@ -2683,7 +2690,7 @@ static const insn_sequence elf32_arm_stub_long_branch_v4t_arm_thumb_pic[] =
static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_arm_pic[] =
{
THUMB16_INSN (0x4778), /* bx pc */
- THUMB16_INSN (0x46c0), /* nop */
+ THUMB16_INSN (0xe7fd), /* b .-2 */
ARM_INSN (0xe59fc000), /* ldr ip, [pc, #0] */
ARM_INSN (0xe08cf00f), /* add pc, ip, pc */
DATA_WORD (0, R_ARM_REL32, -4), /* dcd R_ARM_REL32(X) */
@@ -2707,7 +2714,7 @@ static const insn_sequence elf32_arm_stub_long_branch_thumb_only_pic[] =
static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_thumb_pic[] =
{
THUMB16_INSN (0x4778), /* bx pc */
- THUMB16_INSN (0x46c0), /* nop */
+ THUMB16_INSN (0xe7fd), /* b .-2 */
ARM_INSN (0xe59fc004), /* ldr ip, [pc, #4] */
ARM_INSN (0xe08fc00c), /* add ip, pc, ip */
ARM_INSN (0xe12fff1c), /* bx ip */
@@ -2728,7 +2735,7 @@ static const insn_sequence elf32_arm_stub_long_branch_any_tls_pic[] =
static const insn_sequence elf32_arm_stub_long_branch_v4t_thumb_tls_pic[] =
{
THUMB16_INSN (0x4778), /* bx pc */
- THUMB16_INSN (0x46c0), /* nop */
+ THUMB16_INSN (0xe7fd), /* b .-2 */
ARM_INSN (0xe59f1000), /* ldr r1, [pc, #0] */
ARM_INSN (0xe081f00f), /* add pc, r1, pc */
DATA_WORD (0, R_ARM_REL32, -4), /* dcd R_ARM_REL32(X) */