summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2016-02-16 22:15:03 -0800
committerH. Peter Anvin <hpa@linux.intel.com>2016-02-16 22:15:03 -0800
commitdde34c51b35733299b3e3d160c873fa81e64827a (patch)
tree2f4b8b17334676f43aa6b2b01d306beab7dcd718
parent085a4a9f98861ed904b37dee1e4c7906a1bc0929 (diff)
downloadnasm-dde34c51b35733299b3e3d160c873fa81e64827a.tar.gz
outmacho: correct handling of GOT relocation, add TLVP relocations
Correct the handling of GOT relocations, as they need a symbol reference. Add handling of TLVP relocations; it is unclear to me if non-local relocations in TLVP space is permitted. Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r--output/outmacho.c284
1 files changed, 183 insertions, 101 deletions
diff --git a/output/outmacho.c b/output/outmacho.c
index d2952b2c..b3df4302 100644
--- a/output/outmacho.c
+++ b/output/outmacho.c
@@ -51,6 +51,7 @@
#include "nasmlib.h"
#include "saa.h"
#include "raa.h"
+#include "rbtree.h"
#include "output/outform.h"
#include "output/outlib.h"
@@ -77,10 +78,29 @@
#define CPU_SUBTYPE_I386_ALL 3 /* all-x86 compatible */
#define MH_OBJECT 0x1 /* object file */
+/* Mach-O load commands */
#define LC_SEGMENT 0x1 /* 32-bit segment load cmd */
#define LC_SEGMENT_64 0x19 /* 64-bit segment load cmd */
#define LC_SYMTAB 0x2 /* symbol table load command */
+/* Mach-O relocations numbers */
+
+/* Generic relocs, used by i386 Mach-O */
+#define GENERIC_RELOC_VANILLA 0 /* Generic relocation */
+#define GENERIC_RELOC_TLV 5 /* Thread local */
+
+#define X86_64_RELOC_UNSIGNED 0 /* Absolute address */
+#define X86_64_RELOC_SIGNED 1 /* Signed 32-bit disp */
+#define X86_64_RELOC_BRANCH 2 /* CALL/JMP with 32-bit disp */
+#define X86_64_RELOC_GOT_LOAD 3 /* MOVQ of GOT entry */
+#define X86_64_RELOC_GOT 4 /* Different GOT entry */
+#define X86_64_RELOC_SUBTRACTOR 5 /* Subtracting two symbols */
+#define X86_64_RELOC_SIGNED_1 6 /* SIGNED with -1 addend */
+#define X86_64_RELOC_SIGNED_2 7 /* SIGNED with -2 addend */
+#define X86_64_RELOC_SIGNED_4 8 /* SIGNED with -4 addend */
+#define X86_64_RELOC_TLV 9 /* Thread local */
+
+/* Mach-O VM permission constants */
#define VM_PROT_NONE (0x00)
#define VM_PROT_READ (0x01)
#define VM_PROT_WRITE (0x02)
@@ -89,6 +109,18 @@
#define VM_PROT_DEFAULT (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
#define VM_PROT_ALL (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
+/* Our internal relocation types */
+enum reltype {
+ RL_ABS, /* Absolute relocation */
+ RL_REL, /* Relative relocation */
+ RL_TLV, /* Thread local */
+ RL_SUB, /* X86_64_RELOC_SUBTRACT */
+ RL_GOT, /* X86_64_RELOC_GOT */
+ RL_GOTLOAD, /* X86_64_RELOC_GOT_LOAD */
+};
+#define RL_MAX_32 RL_TLV
+#define RL_MAX_64 RL_GOTLOAD
+
struct macho_fmt {
uint32_t ptrsize; /* Pointer size in bytes */
uint32_t mh_magic; /* Which magic number to use */
@@ -98,6 +130,10 @@ struct macho_fmt {
uint32_t segcmd_size; /* Segment command size */
uint32_t sectcmd_size; /* Section command size */
uint32_t nlist_size; /* Nlist (symbol) size */
+ enum reltype maxreltype; /* Maximum entry in enum reltype permitted */
+ uint32_t reloc_abs; /* Absolute relocation type */
+ uint32_t reloc_rel; /* Relative relocation type */
+ uint32_t reloc_tlv; /* Thread local relocation type */
};
static const struct macho_fmt *fmt;
@@ -114,6 +150,7 @@ struct section {
int32_t index;
int32_t fileindex;
struct reloc *relocs;
+ struct rbtree *gsyms; /* Global symbols in section */
int align;
bool by_name; /* This section was specified by full MachO name */
@@ -144,7 +181,7 @@ struct section {
#define S_ATTR_PURE_INSTRUCTIONS 0x80000000 /* section uses pure
machine instructions */
-static struct sectmap {
+static const struct sectmap {
const char *nasmsect;
const char *segname;
const char *sectname;
@@ -178,18 +215,18 @@ struct reloc {
struct symbol {
/* nasm internal data */
+ struct rbtree symv; /* Global symbol rbtree; "key" contains the
+ symbol offset. */
struct symbol *next; /* next symbol in the list */
char *name; /* name of this symbol */
- int32_t initial_snum; /* symbol number used above in
- reloc */
- int32_t snum; /* true snum for reloc */
+ int32_t initial_snum; /* symbol number used above in reloc */
+ int32_t snum; /* true snum for reloc */
/* data that goes into the file */
- uint32_t strx; /* string table index */
- uint8_t type; /* symbol type */
- uint8_t sect; /* NO_SECT or section number */
- uint16_t desc; /* for stab debugging, 0 for us */
- uint64_t value; /* offset of symbol in section */
+ uint32_t strx; /* string table index */
+ uint8_t type; /* symbol type */
+ uint8_t sect; /* NO_SECT or section number */
+ uint16_t desc; /* for stab debugging, 0 for us */
};
/* symbol type bits */
@@ -290,18 +327,12 @@ static struct section *get_section_by_index(const int32_t index)
return s;
}
-static uint8_t get_section_fileindex_by_index(const int32_t index)
-{
- struct section *s = get_section_by_index(index);
-
- return s ? s->fileindex : NO_SECT;
-}
-
/*
* Special section numbers which are used to define Mach-O special
* symbols, which can be used with WRT to provide PIC relocation
* types.
*/
+static int32_t macho_tlvp_sect;
static int32_t macho_gotpcrel_sect;
static void macho_init(void)
@@ -322,6 +353,11 @@ static void macho_init(void)
/* string table starts with a zero byte so index 0 is an empty string */
saa_wbytes(strs, zero_buffer, 1);
strslen = 1;
+
+ /* add special symbol for TLVP */
+ macho_tlvp_sect = seg_alloc() + 1;
+ define_label("..tlvp", macho_tlvp_sect, 0L, NULL, false, false);
+
}
static void sect_write(struct section *sect,
@@ -331,32 +367,42 @@ static void sect_write(struct section *sect,
sect->size += len;
}
-enum reltype {
- RL_ABS, /* Absolute relocation */
- RL_REL, /* Relative relocation */
- RL_SUB, /* X86_64_RELOC_SUBTRACT */
- RL_GOT, /* X86_64_RELOC_GOT */
- RL_GOTLOAD, /* X86_64_RELOC_GOT_LOAD */
-};
+/*
+ * Find a suitable global symbol for a ..gotpcrel or ..tlvp reference
+ */
+static struct symbol *macho_find_gsym(struct section *s,
+ uint64_t offset, bool exact)
+{
+ struct rbtree *srb;
+
+ srb = rb_search(s->gsyms, offset);
+
+ if (!srb || (exact && srb->key != offset)) {
+ nasm_error(ERR_NONFATAL, "unable to find a suitable global symbol"
+ " for this reference");
+ return NULL;
+ }
+
+ return container_of(srb, struct symbol, symv);
+}
static int64_t add_reloc(struct section *sect, int32_t section,
+ int64_t offset,
enum reltype reltype, int bytes)
{
struct reloc *r;
+ struct section *s;
int32_t fi;
int64_t adjust;
- /* NeXT as puts relocs in reversed order (address-wise) into the
- ** files, so we do the same, doesn't seem to make much of a
- ** difference either way */
- r = nasm_malloc(sizeof(struct reloc));
- r->next = sect->relocs;
- sect->relocs = r;
+ /* Double check this is a valid relocation type for this platform */
+ nasm_assert(reltype <= fmt->maxreltype);
/* the current end of the section will be the symbol's address for
** now, might have to be fixed by macho_fixup_relocs() later on. make
** sure we don't make the symbol scattered by setting the highest
** bit by accident */
+ r = nasm_malloc(sizeof(struct reloc));
r->addr = sect->size & ~R_SCATTERED;
r->ext = 1;
adjust = bytes;
@@ -365,10 +411,15 @@ static int64_t add_reloc(struct section *sect, int32_t section,
r->length = ilog2_32(bytes);
/* set default relocation values */
- r->type = 0;
+ r->type = fmt->reloc_abs;
r->pcrel = 0;
r->snum = R_ABS;
+ s = NULL;
+ if (section != NO_SEG)
+ s = get_section_by_index(section);
+ fi = s ? s->fileindex : NO_SECT;
+
/* absolute relocation */
switch (reltype) {
case RL_ABS:
@@ -376,64 +427,82 @@ static int64_t add_reloc(struct section *sect, int32_t section,
/* absolute (can this even happen?) */
r->ext = 0;
r->snum = R_ABS;
+ } else if (fi == NO_SECT) {
+ /* external */
+ r->snum = raa_read(extsyms, section);
} else {
- /* inter-section */
- fi = get_section_fileindex_by_index(section);
- if (fi == NO_SECT) {
- /* external */
- r->snum = raa_read(extsyms, section);
- } else {
- /* local */
- r->ext = 0;
- r->snum = fi;
- adjust = -sect->size;
- }
+ /* local */
+ r->ext = 0;
+ r->snum = fi;
+ adjust = -sect->size;
}
break;
case RL_REL:
+ r->type = fmt->reloc_rel;
r->pcrel = 1;
if (section == NO_SEG) {
- /* intra-section */
- r->type = 1; // X86_64_RELOC_SIGNED
+ goto bail; /* No relocation needed */
+ } else if (fi == NO_SECT) {
+ /* external */
+ sect->extreloc = 1;
+ r->snum = raa_read(extsyms, section);
} else {
- /* inter-section */
- r->type = 1; // X86_64_RELOC_SIGNED
- fi = get_section_fileindex_by_index(section);
-
- if (fi == NO_SECT) {
- /* external */
- sect->extreloc = 1;
- r->snum = raa_read(extsyms, section);
- } else {
- /* local */
- r->ext = 0;
- r->snum = fi;
- adjust = -sect->size;
- }
+ /* local */
+ r->ext = 0;
+ r->snum = fi;
+ adjust = -sect->size;
}
break;
case RL_SUB:
r->pcrel = 0;
- r->type = 5; // X86_64_RELOC_SUBTRACTOR
+ r->type = X86_64_RELOC_SUBTRACTOR;
break;
case RL_GOT:
- r->pcrel = 1;
- r->type = 4; // X86_64_RELOC_GOT
- r->snum = macho_gotpcrel_sect; /* WTF? */
- break;
+ r->type = X86_64_RELOC_GOT;
+ goto needsym;
case RL_GOTLOAD:
+ r->type = X86_64_RELOC_GOT_LOAD;
+ goto needsym;
+
+ case RL_TLV:
+ r->type = fmt->reloc_tlv;
+ goto needsym;
+
+ needsym:
r->pcrel = 1;
- r->type = 3; // X86_64_RELOC_GOT_LOAD
- r->snum = macho_gotpcrel_sect; /* WTF? */
+ if (section == NO_SEG) {
+ nasm_error(ERR_NONFATAL, "Unsupported use of use of WRT");
+ } else if (fi == NO_SECT) {
+ /* external */
+ r->snum = raa_read(extsyms, section);
+ } else {
+ /* internal */
+ struct symbol *sym = macho_find_gsym(s, offset, reltype != RL_TLV);
+ if (!sym)
+ goto bail;
+ r->snum = sym->initial_snum;
+ }
break;
}
+ /* NeXT as puts relocs in reversed order (address-wise) into the
+ ** files, so we do the same, doesn't seem to make much of a
+ ** difference either way */
+ r->next = sect->relocs;
+ sect->relocs = r;
+ if (r->ext)
+ sect->extreloc = 1;
++sect->nreloc;
+
return adjust;
+
+ bail:
+ nasm_free(r);
+ return 0;
}
static void macho_output(int32_t secto, const void *data,
@@ -441,7 +510,7 @@ static void macho_output(int32_t secto, const void *data,
int32_t section, int32_t wrt)
{
struct section *s;
- int64_t addr;
+ int64_t addr, offset;
uint8_t mydata[16], *p, gotload;
bool is_bss;
@@ -449,7 +518,6 @@ static void macho_output(int32_t secto, const void *data,
if (type != OUT_RESERVE)
nasm_error(ERR_NONFATAL, "attempt to assemble code in "
"[ABSOLUTE] space");
-
return;
}
@@ -510,7 +578,7 @@ static void macho_output(int32_t secto, const void *data,
"Mach-O 64-bit format does not support"
" 32-bit absolute addresses");
} else {
- add_reloc(s, section, RL_ABS, asize);
+ add_reloc(s, section, addr, RL_ABS, asize);
}
} else {
nasm_error(ERR_NONFATAL, "Mach-O format does not support"
@@ -528,7 +596,8 @@ static void macho_output(int32_t secto, const void *data,
nasm_assert(section != secto);
p = mydata;
- addr = *(int64_t *)data - size;
+ offset = *(int64_t *)data;
+ addr = offset - size;
if (section != NO_SEG && section % 2) {
nasm_error(ERR_NONFATAL, "Mach-O format does not support"
@@ -541,7 +610,7 @@ static void macho_output(int32_t secto, const void *data,
" this use of WRT");
wrt = NO_SEG; /* we can at least _try_ to continue */
} else {
- addr += add_reloc(s, section, RL_REL, 2);
+ addr += add_reloc(s, section, addr+size, RL_REL, 2);
}
WRITESHORT(p, addr);
@@ -552,14 +621,15 @@ static void macho_output(int32_t secto, const void *data,
nasm_assert(section != secto);
p = mydata;
- addr = *(int64_t *)data - size;
+ offset = *(int64_t *)data;
+ addr = offset - size;
if (section != NO_SEG && section % 2) {
nasm_error(ERR_NONFATAL, "Mach-O format does not support"
" section base references");
} else if (wrt == NO_SEG) {
/* Plain relative relocation */
- addr += add_reloc(s, section, RL_REL, 4);
+ addr += add_reloc(s, section, offset, RL_REL, 4);
} else if (wrt == macho_gotpcrel_sect) {
if (s->data->datalen > 1) {
/* Retrieve instruction opcode */
@@ -569,11 +639,13 @@ static void macho_output(int32_t secto, const void *data,
}
if (gotload == 0x8B) {
/* Check for MOVQ Opcode -> X86_64_RELOC_GOT_LOAD */
- addr += add_reloc(s, section, RL_GOTLOAD, 4);
+ addr += add_reloc(s, section, offset, RL_GOTLOAD, 4);
} else {
/* X86_64_RELOC_GOT */
- addr += add_reloc(s, section, RL_GOT, 4);
+ addr += add_reloc(s, section, offset, RL_GOT, 4);
}
+ } else if (wrt == macho_tlvp_sect) {
+ addr += add_reloc(s, section, offset, RL_TLV, 4);
} else {
nasm_error(ERR_NONFATAL, "Mach-O format does not support"
" this use of WRT");
@@ -593,7 +665,7 @@ static void macho_output(int32_t secto, const void *data,
static int32_t macho_section(char *name, int pass, int *bits)
{
char *sectionAttributes;
- struct sectmap *sm;
+ const struct sectmap *sm;
struct section *s;
const char *section, *segment;
uint32_t flags;
@@ -668,14 +740,12 @@ static int32_t macho_section(char *name, int pass, int *bits)
if (!s) {
new_seg = true;
- s = *sectstail = nasm_malloc(sizeof(struct section));
- s->next = NULL;
+ s = *sectstail = nasm_zalloc(sizeof(struct section));
sectstail = &s->next;
s->data = saa_init(1L);
s->index = seg_alloc();
s->fileindex = ++seg_nsects;
- s->relocs = NULL;
s->align = -1;
s->pad = -1;
s->offset = -1;
@@ -771,18 +841,18 @@ static void macho_symdef(char *name, int32_t section, int64_t offset,
return;
}
- if (name[0] == '.' && name[1] == '.' && name[2] != '@') {
- /*
- * This is a NASM special symbol. We never allow it into
- * the Macho-O symbol table, even if it's a valid one. If it
- * _isn't_ a valid one, we should barf immediately.
- */
- if (strcmp(name, "..gotpcrel"))
+ if (name[0] == '.' && name[1] == '.' && name[2] != '@') {
+ /*
+ * This is a NASM special symbol. We never allow it into
+ * the Macho-O symbol table, even if it's a valid one. If it
+ * _isn't_ a valid one, we should barf immediately.
+ */
+ if (strcmp(name, "..gotpcrel") && strcmp(name, "..tlvp"))
nasm_error(ERR_NONFATAL, "unrecognized special symbol `%s'", name);
- return;
+ return;
}
- sym = *symstail = nasm_malloc(sizeof(struct symbol));
+ sym = *symstail = nasm_zalloc(sizeof(struct symbol));
sym->next = NULL;
symstail = &sym->next;
@@ -790,7 +860,7 @@ static void macho_symdef(char *name, int32_t section, int64_t offset,
sym->strx = strslen;
sym->type = 0;
sym->desc = 0;
- sym->value = offset;
+ sym->symv.key = offset;
sym->initial_snum = -1;
/* external and common symbols get N_EXT */
@@ -803,15 +873,17 @@ static void macho_symdef(char *name, int32_t section, int64_t offset,
sym->type |= N_ABS;
sym->sect = NO_SECT;
} else {
+ struct section *s = get_section_by_index(section);
+
sym->type |= N_SECT;
/* get the in-file index of the section the symbol was defined in */
- sym->sect = get_section_fileindex_by_index(section);
+ sym->sect = s ? s->fileindex : NO_SECT;
/* track the initially allocated symbol number for use in future fix-ups */
sym->initial_snum = nsyms;
- if (sym->sect == NO_SECT) {
+ if (!s) {
/* remember symbol number of references to external
** symbols, this works because every external symbol gets
** its own section number allocated internally by nasm and
@@ -823,7 +895,7 @@ static void macho_symdef(char *name, int32_t section, int64_t offset,
case 2:
/* there isn't actually a difference between global
** and common symbols, both even have their size in
- ** sym->value */
+ ** sym->symv.key */
sym->type = N_EXT;
break;
@@ -832,8 +904,11 @@ static void macho_symdef(char *name, int32_t section, int64_t offset,
** external or common symbol (assemble_file() does a
** seg_alloc() on every call for them) */
nasm_panic(0, "in-file index for section %d not found, is_global = %d", section, is_global);
+ break;
}
- }
+ } else if (is_global) {
+ s->gsyms = rb_insert(s->gsyms, &sym->symv);
+ }
}
++nsyms;
}
@@ -1242,10 +1317,10 @@ static void macho_write_symtab (void)
sizes. */
if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
nasm_assert(sym->sect <= seg_nsects);
- sym->value += sectstab[sym->sect]->addr;
+ sym->symv.key += sectstab[sym->sect]->addr;
}
- fwriteptr(sym->value, ofile); /* value (i.e. offset) */
+ fwriteptr(sym->symv.key, ofile); /* value (i.e. offset) */
}
}
@@ -1260,10 +1335,10 @@ static void macho_write_symtab (void)
sizes. */
if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
nasm_assert(sym->sect <= seg_nsects);
- sym->value += sectstab[sym->sect]->addr;
+ sym->symv.key += sectstab[sym->sect]->addr;
}
- fwriteptr(sym->value, ofile); /* value (i.e. offset) */
+ fwriteptr(sym->symv.key, ofile); /* value (i.e. offset) */
}
for (i = 0; i < nundefsym; i++) {
@@ -1277,10 +1352,10 @@ static void macho_write_symtab (void)
sizes. */
if (((sym->type & N_TYPE) == N_SECT) && (sym->sect != NO_SECT)) {
nasm_assert(sym->sect <= seg_nsects);
- sym->value += sectstab[sym->sect]->addr;
+ sym->symv.key += sectstab[sym->sect]->addr;
}
- fwriteptr(sym->value, ofile); /* value (i.e. offset) */
+ fwriteptr(sym->symv.key, ofile); /* value (i.e. offset) */
}
}
@@ -1505,7 +1580,11 @@ static const struct macho_fmt macho32_fmt = {
MACHO_HEADER_SIZE,
MACHO_SEGCMD_SIZE,
MACHO_SECTCMD_SIZE,
- MACHO_NLIST_SIZE
+ MACHO_NLIST_SIZE,
+ RL_MAX_32,
+ GENERIC_RELOC_VANILLA,
+ GENERIC_RELOC_VANILLA,
+ GENERIC_RELOC_TLV
};
static void macho32_init(void)
@@ -1546,7 +1625,11 @@ static const struct macho_fmt macho64_fmt = {
MACHO_HEADER64_SIZE,
MACHO_SEGCMD64_SIZE,
MACHO_SECTCMD64_SIZE,
- MACHO_NLIST64_SIZE
+ MACHO_NLIST64_SIZE,
+ RL_MAX_64,
+ X86_64_RELOC_UNSIGNED,
+ X86_64_RELOC_SIGNED,
+ X86_64_RELOC_TLV
};
static void macho64_init(void)
@@ -1555,8 +1638,7 @@ static void macho64_init(void)
macho_init();
/* add special symbol for ..gotpcrel */
- macho_gotpcrel_sect = seg_alloc();
- macho_gotpcrel_sect++;
+ macho_gotpcrel_sect = seg_alloc() + 1;
define_label("..gotpcrel", macho_gotpcrel_sect, 0L, NULL, false, false);
}