summaryrefslogtreecommitdiff
path: root/sysdeps/sparc
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-03-03 02:10:22 -0800
committerDavid S. Miller <davem@davemloft.net>2010-03-03 02:14:02 -0800
commit7ec1221ff7a5e3faa4e58cdfeb3722b2958499e2 (patch)
treedc64c2d2b15eab3aa62dc7314b535b6997f54b18 /sysdeps/sparc
parent42488a4d317ffdc9274b8e2b430fc930db4da8b8 (diff)
downloadglibc-7ec1221ff7a5e3faa4e58cdfeb3722b2958499e2.tar.gz
sparc: Use ba,a,pt in PLTs and fix bugs in R_SPARC_JMP_IREL handling.
2010-03-03 David S. Miller <davem@davemloft.net> * sysdeps/sparc/sparc32/dl-machine.h (elf_machine_lazy_rel): Must pass '1' for 't' argument to sparc_fixup_plt. * sysdeps/sparc/sparc64/dl-machine.h (elf_machine_lazy_rel): Likewise. * sysdeps/sparc/sparc32/dl-plt.h (OPCODE_BA_PT): Define. (sparc_fixup_plt): Document 't' argument. Enable branch optimization and use v9 branches when possible. Explain why we cannot unconditionally patch the branch into the first PLT instruction. * sysdeps/sparc/sparc64/dl-plt.h (sparc64_fixup_plt): Document 't' argument. Use v9 branches when possible. Explain why we can in fact unconditionally use a branch in the first PLT instruction here.
Diffstat (limited to 'sysdeps/sparc')
-rw-r--r--sysdeps/sparc/sparc32/dl-machine.h2
-rw-r--r--sysdeps/sparc/sparc32/dl-plt.h47
-rw-r--r--sysdeps/sparc/sparc64/dl-machine.h2
-rw-r--r--sysdeps/sparc/sparc64/dl-plt.h29
4 files changed, 67 insertions, 13 deletions
diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h
index 53257104a6..9631db32e1 100644
--- a/sysdeps/sparc/sparc32/dl-machine.h
+++ b/sysdeps/sparc/sparc32/dl-machine.h
@@ -563,7 +563,7 @@ elf_machine_lazy_rel (struct link_map *map,
{
Elf32_Addr value = map->l_addr + reloc->r_addend;
value = ((Elf32_Addr (*) (void)) value) ();
- sparc_fixup_plt (reloc, reloc_addr, value, 0, 1);
+ sparc_fixup_plt (reloc, reloc_addr, value, 1, 1);
}
else if (r_type == R_SPARC_NONE)
;
diff --git a/sysdeps/sparc/sparc32/dl-plt.h b/sysdeps/sparc/sparc32/dl-plt.h
index edcc5c1374..bfb891fe69 100644
--- a/sysdeps/sparc/sparc32/dl-plt.h
+++ b/sysdeps/sparc/sparc32/dl-plt.h
@@ -25,19 +25,55 @@
#define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */
#define OPCODE_SAVE_SP 0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */
#define OPCODE_BA 0x30800000 /* b,a ?; add PC-rel word address */
+#define OPCODE_BA_PT 0x30480000 /* ba,a,pt %icc, ?; add PC-rel word address */
static inline __attribute__ ((always_inline)) Elf32_Addr
sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr,
Elf32_Addr value, int t, int do_flush)
{
- Elf32_Sword disp = value - (Elf32_Addr) reloc_addr;
+ Elf32_Sword disp;
- if (0 && disp >= -0x800000 && disp < 0x800000)
+ /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap,
+ in which case we'll be resolving all PLT entries and thus can
+ optimize by overwriting instructions starting at the first PLT entry
+ instruction and we need not be mindful of thread safety.
+
+ Otherwise, 't' is '1'. */
+ reloc_addr += t;
+ disp = value - (Elf32_Addr) reloc_addr;
+
+ if (disp >= -0x800000 && disp < 0x800000)
{
- /* Don't need to worry about thread safety. We're writing just one
- instruction. */
+ unsigned int insn = OPCODE_BA | ((disp >> 2) & 0x3fffff);
+
+#ifdef __sparc_v9__
+ /* On V9 we can do even better by using a branch with
+ prediction if we fit into the even smaller 19-bit
+ displacement field. */
+ if (disp >= -0x100000 && disp < 0x100000)
+ insn = OPCODE_BA_PT | ((disp >> 2) & 0x07ffff);
+#endif
+
+ /* Even if we are writing just a single branch, we must not
+ ignore the 't' offset. Consider a case where we have some
+ PLT slots which can be optimized into a single branch and
+ some which cannot. Then we can end up with a PLT which looks
+ like:
+
+ PLT4.0: sethi %(PLT_4_INDEX), %g1
+ sethi %(fully_resolved_sym_4), %g1
+ jmp %g1 + %lo(fully_resolved_sym_4)
+ PLT5.0: ba,a fully_resolved_sym_5
+ ba,a PLT0.0
+ ...
+
+ The delay slot of that jmp must always be either a sethi to
+ %g1 or a nop. But if we try to place this displacement
+ branch there, PLT4.0 will jump to fully_resolved_sym_4 for 1
+ instruction and then go immediately to
+ fully_resolved_sym_5. */
- reloc_addr[0] = OPCODE_BA | ((disp >> 2) & 0x3fffff);
+ reloc_addr[0] = insn;
if (do_flush)
__asm __volatile ("flush %0" : : "r"(reloc_addr));
}
@@ -48,7 +84,6 @@ sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr,
need not be done during bootstrapping, since there are no threads.
But we also can't tell if we _can_ use flush, so don't. */
- reloc_addr += t;
reloc_addr[1] = OPCODE_JMP_G1 | (value & 0x3ff);
if (do_flush)
__asm __volatile ("flush %0+4" : : "r"(reloc_addr));
diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h
index 4c915eb586..fcfbb06ac2 100644
--- a/sysdeps/sparc/sparc64/dl-machine.h
+++ b/sysdeps/sparc/sparc64/dl-machine.h
@@ -661,7 +661,7 @@ elf_machine_lazy_rel (struct link_map *map,
{
/* 'high' is always zero, for large PLT entries the linker
emits an R_SPARC_IRELATIVE. */
- sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 0);
+ sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 1);
}
else
*reloc_addr = value;
diff --git a/sysdeps/sparc/sparc64/dl-plt.h b/sysdeps/sparc/sparc64/dl-plt.h
index e06be43a0a..ca2fe3bbd8 100644
--- a/sysdeps/sparc/sparc64/dl-plt.h
+++ b/sysdeps/sparc/sparc64/dl-plt.h
@@ -28,7 +28,14 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc,
Elf64_Addr plt_vaddr = (Elf64_Addr) reloc_addr;
Elf64_Sxword disp = value - plt_vaddr;
- /* Now move plt_vaddr up to the call instruction. */
+ /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap,
+ in which case we'll be resolving all PLT entries and thus can
+ optimize by overwriting instructions starting at the first PLT entry
+ instruction and we need not be mindful of thread safety.
+
+ Otherwise, 't' is '1'.
+
+ Now move plt_vaddr up to the call instruction. */
plt_vaddr += ((t + 1) * 4);
/* PLT entries .PLT32768 and above look always the same. */
@@ -39,10 +46,22 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc,
/* Near destination. */
else if (disp >= -0x800000 && disp < 0x800000)
{
- /* As this is just one instruction, it is thread safe and so
- we can avoid the unnecessary sethi FOO, %g1.
- b,a target */
- insns[0] = 0x30800000 | ((disp >> 2) & 0x3fffff);
+ unsigned int insn;
+
+ /* ba,a */
+ insn = 0x30800000 | ((disp >> 2) & 0x3fffff);
+
+ if (disp >= -0x100000 && disp < 0x100000)
+ {
+ /* ba,a,pt %icc */
+ insn = 0x30480000 | ((disp >> 2) & 0x07ffff);
+ }
+
+ /* As this is just one instruction, it is thread safe and so we
+ can avoid the unnecessary sethi FOO, %g1. Each 64-bit PLT
+ entry is 8 instructions long, so we can't run into the 'jmp'
+ delay slot problems 32-bit PLTs can. */
+ insns[0] = insn;
__asm __volatile ("flush %0" : : "r" (insns));
}
/* 32-bit Sparc style, the target is in the lower 32-bits of