diff options
-rw-r--r-- | assemble.c | 238 | ||||
-rw-r--r-- | insns.dat | 6 | ||||
-rw-r--r-- | nasm.h | 41 | ||||
-rw-r--r-- | parser.c | 255 | ||||
-rw-r--r-- | test/movimm.asm | 26 | ||||
-rw-r--r-- | tokens.dat | 4 |
6 files changed, 345 insertions, 225 deletions
@@ -22,8 +22,7 @@ * assembly mode or the operand-size override on the operand * \40..\43 - a long immediate operand, from operand 0..3 * \44..\47 - select between \3[0-3], \4[0-3] and \5[4-7] - * depending on assembly mode or the address-size override - * on the operand. + * depending on the address size of the instruction. * \50..\53 - a byte relative operand, from operand 0..3 * \54..\57 - a qword immediate operand, from operand 0..3 * \60..\63 - a word relative operand, from operand 0..3 @@ -115,25 +114,50 @@ static int32_t regflag(const operand *); static int32_t regval(const operand *); static int rexflags(int, int32_t, int); static int op_rexflags(const operand *, int); -static ea *process_ea(operand *, ea *, int, int, int32_t, int); +static ea *process_ea(operand *, ea *, int, int, int, int32_t, int); static void add_asp(insn *, int); -static int has_prefix(insn * ins, enum prefixes prefix) +static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix) { - int j; - for (j = 0; j < ins->nprefix; j++) { - if (ins->prefixes[j] == prefix) - return 1; - } - return 0; + return ins->prefixes[pos] == prefix; +} + +static void assert_no_prefix(insn * ins, enum prefix_pos pos) +{ + if (ins->prefixes[pos]) + errfunc(ERR_NONFATAL, "invalid %s prefix", + prefix_name(ins->prefixes[pos])); } -static void assert_no_prefix(insn * ins, enum prefixes prefix) +static const char *size_name(int size) { - if (has_prefix(ins, prefix)) - errfunc(ERR_NONFATAL, "invalid %s prefix", prefix_name(prefix)); + switch (size) { + case 1: + return "byte"; + case 2: + return "word"; + case 4: + return "dword"; + case 8: + return "qword"; + case 10: + return "tword"; + case 16: + return "oword"; + default: + return "???"; + } } +static void warn_overflow(int size, int64_t data) +{ + if (size < 8) { + int64_t lim = (1 << (size*8))-1; + + if (data < ~lim || data > lim) + errfunc(ERR_WARNING, "%s data exceeds bounds", size_name(size)); + } +} /* * This routine wrappers the real output format's output routine, * in order to pass a copy of the data off to the listing file @@ -433,7 +457,7 @@ int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp, error(ERR_PANIC, "errors made it through from pass one"); else while (itimes--) { - for (j = 0; j < instruction->nprefix; j++) { + for (j = 0; j < MAXPREFIX; j++) { uint8_t c = 0; switch (instruction->prefixes[j]) { case P_LOCK: @@ -492,15 +516,23 @@ int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp, error(ERR_NONFATAL, "16-bit addressing is not supported " "in 64-bit mode"); - break; - } - if (bits != 16) + } else if (bits != 16) c = 0x67; break; case P_A32: if (bits != 32) c = 0x67; break; + case P_A64: + if (bits != 64) { + error(ERR_NONFATAL, + "64-bit addressing is only supported " + "in 64-bit mode"); + } + break; + case P_ASP: + c = 0x67; + break; case P_O16: if (bits != 16) c = 0x66; @@ -509,6 +541,14 @@ int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp, if (bits == 16) c = 0x66; break; + case P_O64: + /* REX.W */ + break; + case P_OSP: + c = 0x66; + break; + case P_none: + break; default: error(ERR_PANIC, "invalid instruction prefix"); } @@ -634,7 +674,8 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp, strncpy(fname, instruction->eops->stringval, len); fname[len] = '\0'; - while (1) { /* added by alexfru: 'incbin' uses include paths */ + /* added by alexfru: 'incbin' uses include paths */ + while (1) { combine = nasm_malloc(strlen(prefix) + len + 1); strcpy(combine, prefix); strcat(combine, fname); @@ -689,7 +730,7 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp, isize = calcsize(segment, offset, bits, instruction, codes); if (isize < 0) return -1; - for (j = 0; j < instruction->nprefix; j++) { + for (j = 0; j < MAXPREFIX; j++) { switch (instruction->prefixes[j]) { case P_A16: if (bits != 16) @@ -707,6 +748,10 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp, if (bits == 16) isize++; break; + case P_A64: + case P_O64: + case P_none: + break; default: isize++; break; @@ -745,6 +790,9 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, int rex_mask = ~0; ins->rex = 0; /* Ensure REX is reset */ + if (ins->prefixes[PPS_OSIZE] == P_O64) + ins->rex |= REX_W; + (void)segment; /* Don't warn that this parameter is unused */ (void)offset; /* Don't warn that this parameter is unused */ @@ -812,8 +860,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, case 045: case 046: case 047: - length += ((ins->oprs[c - 044].addr_size ? - ins->oprs[c - 044].addr_size : bits) >> 3); + length += ins->addr_size >> 3; break; case 050: case 051: @@ -909,15 +956,16 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, case 0310: if (bits == 64) return -1; - length += (bits != 16) && !has_prefix(ins,P_A16); + length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16); break; case 0311: - length += (bits != 32) && !has_prefix(ins,P_A32); + length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32); break; case 0312: break; case 0313: - if (bits != 64 || has_prefix(ins,P_A16) || has_prefix(ins,P_A32)) + if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) || + has_prefix(ins, PPS_ASIZE, P_A32)) return -1; break; case 0320: @@ -944,7 +992,6 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, length++; break; case 0334: - assert_no_prefix(ins, P_LOCK); ins->rex |= REX_L; break; case 0335: @@ -990,7 +1037,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, if (!process_ea (&ins->oprs[(c >> 3) & 7], &ea_data, bits, - rfield, rflags, ins->forw_ref)) { + ins->addr_size, rfield, rflags, ins->forw_ref)) { errfunc(ERR_NONFATAL, "invalid effective address"); return -1; } else { @@ -1019,10 +1066,13 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, if (ins->rex & REX_H) { errfunc(ERR_NONFATAL, "cannot use high register in rex instruction"); return -1; - } else if (bits == 64 || - ((ins->rex & REX_L) && - !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) && - cpu >= IF_X86_64)) { + } else if (bits == 64) { + length++; + } else if ((ins->rex & REX_L) && + !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) && + cpu >= IF_X86_64) { + /* LOCK-as-REX.R */ + assert_no_prefix(ins, PPS_LREP); length++; } else { errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode"); @@ -1179,13 +1229,10 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 031: case 032: case 033: - if (ins->oprs[c - 030].segment == NO_SEG && - ins->oprs[c - 030].wrt == NO_SEG && - (ins->oprs[c - 030].offset < -65536L || - ins->oprs[c - 030].offset > 65535L)) { - errfunc(ERR_WARNING, "word value exceeds bounds"); - } data = ins->oprs[c - 030].offset; + if (ins->oprs[c - 030].segment == NO_SEG && + ins->oprs[c - 030].wrt == NO_SEG) + warn_overflow(2, data); out(offset, segment, &data, OUT_ADDRESS + 2, ins->oprs[c - 030].segment, ins->oprs[c - 030].wrt); offset += 2; @@ -1200,8 +1247,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, else size = (bits == 16) ? 2 : 4; data = ins->oprs[c - 034].offset; - if (size == 2 && (data < -65536L || data > 65535L)) - errfunc(ERR_WARNING, "word value exceeds bounds"); + warn_overflow(size, data); out(offset, segment, &data, OUT_ADDRESS + size, ins->oprs[c - 034].segment, ins->oprs[c - 034].wrt); offset += size; @@ -1222,10 +1268,8 @@ static void gencode(int32_t segment, int32_t offset, int bits, case 046: case 047: data = ins->oprs[c - 044].offset; - size = ((ins->oprs[c - 044].addr_size ? - ins->oprs[c - 044].addr_size : bits) >> 3); - if (size == 2 && (data < -65536L || data > 65535L)) - errfunc(ERR_WARNING, "word value exceeds bounds"); + size = ins->addr_size >> 3; + warn_overflow(size, data); out(offset, segment, &data, OUT_ADDRESS + size, ins->oprs[c - 044].segment, ins->oprs[c - 044].wrt); offset += size; @@ -1337,10 +1381,8 @@ static void gencode(int32_t segment, int32_t offset, int bits, offset++; } else { if (ins->oprs[c - 0140].segment == NO_SEG && - ins->oprs[c - 0140].wrt == NO_SEG && - (data < -65536L || data > 65535L)) { - errfunc(ERR_WARNING, "word value exceeds bounds"); - } + ins->oprs[c - 0140].wrt == NO_SEG) + warn_overflow(2, data); out(offset, segment, &data, OUT_ADDRESS + 2, ins->oprs[c - 0140].segment, ins->oprs[c - 0140].wrt); offset += 2; @@ -1424,7 +1466,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, break; case 0310: - if (bits == 32 && !has_prefix(ins,P_A16)) { + if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) { *bytes = 0x67; out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); @@ -1434,7 +1476,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, break; case 0311: - if (bits != 32 && !has_prefix(ins,P_A32)) { + if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) { *bytes = 0x67; out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); @@ -1562,7 +1604,7 @@ static void gencode(int32_t segment, int32_t offset, int bits, if (!process_ea (&ins->oprs[(c >> 3) & 7], &ea_data, bits, - rfield, rflags, ins->forw_ref)) { + ins->addr_size, rfield, rflags, ins->forw_ref)) { errfunc(ERR_NONFATAL, "invalid effective address"); } @@ -1700,7 +1742,7 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits) if (instruction->oprs[i].type != instruction->oprs[j].type || instruction->oprs[i].basereg != instruction->oprs[j].basereg) return 0; - } else if (itemp->opd[i] & ~instruction->oprs[i].type || + } else if (itemp->opd[i] & ~instruction->oprs[i].type || ((itemp->opd[i] & SIZE_MASK) && ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) { if ((itemp->opd[i] & ~instruction->oprs[i].type & ~SIZE_MASK) || @@ -1824,8 +1866,8 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits) return ret; } -static ea *process_ea(operand * input, ea * output, int addrbits, - int rfield, int32_t rflags, int forw_ref) +static ea *process_ea(operand * input, ea * output, int bits, + int addrbits, int rfield, int32_t rflags, int forw_ref) { output->rip = false; @@ -1854,10 +1896,7 @@ static ea *process_ea(operand * input, ea * output, int addrbits, if (input->basereg == -1 && (input->indexreg == -1 || input->scale == 0)) { /* it's a pure offset */ - if (input->addr_size) - addrbits = input->addr_size; - - if (globalbits == 64 && (~input->type & IP_REL)) { + if (bits == 64 && (~input->type & IP_REL)) { int scale, index, base; output->sib_present = true; scale = 0; @@ -1871,7 +1910,7 @@ static ea *process_ea(operand * input, ea * output, int addrbits, output->sib_present = false; output->bytes = (addrbits != 16 ? 4 : 2); output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3); - output->rip = globalbits == 64; + output->rip = bits == 64; } } else { /* it's an indirection */ int i = input->indexreg, b = input->basereg, s = input->scale; @@ -1921,11 +1960,15 @@ static ea *process_ea(operand * input, ea * output, int addrbits, sok &= ~bx; } - /* While we're here, ensure the user didn't specify WORD. */ - if (input->addr_size == 16 || - (input->addr_size == 32 && !(sok & BITS32)) || - (input->addr_size == 64 && !(sok & BITS64))) - return NULL; + /* While we're here, ensure the user didn't specify + WORD or QWORD. */ + if (input->disp_size == 16 || input->disp_size == 64) + return NULL; + + if (addrbits == 16 || + (addrbits == 32 && !(sok & BITS32)) || + (addrbits == 64 && !(sok & BITS64))) + return NULL; /* now reorganize base/index */ if (s == 1 && bt != it && bt != -1 && it != -1 && @@ -2051,7 +2094,7 @@ static ea *process_ea(operand * input, ea * output, int addrbits, return NULL; /* ensure the user didn't specify DWORD/QWORD */ - if (input->addr_size == 32 || input->addr_size == 64) + if (input->disp_size == 32 || input->disp_size == 64) return NULL; if (s != 1 && i != -1) @@ -2130,36 +2173,56 @@ static ea *process_ea(operand * input, ea * output, int addrbits, return output; } -static void add_asp(insn *instruction, int addrbits) +static void add_asp(insn *ins, int addrbits) { int j, valid; + int defdisp; valid = (addrbits == 64) ? 64|32 : 32|16; - for (j = 0; j < instruction->operands; j++) { - if (!(MEMORY & ~instruction->oprs[j].type)) { + switch (ins->prefixes[PPS_ASIZE]) { + case P_A16: + valid &= 16; + break; + case P_A32: + valid &= 32; + break; + case P_A64: + valid &= 64; + break; + case P_ASP: + valid &= (addrbits == 32) ? 16 : 32; + break; + default: + break; + } + + for (j = 0; j < ins->operands; j++) { + if (!(MEMORY & ~ins->oprs[j].type)) { int32_t i, b; /* Verify as Register */ - if (instruction->oprs[j].indexreg < EXPR_REG_START - || instruction->oprs[j].indexreg >= REG_ENUM_LIMIT) + if (ins->oprs[j].indexreg < EXPR_REG_START + || ins->oprs[j].indexreg >= REG_ENUM_LIMIT) i = 0; else - i = reg_flags[instruction->oprs[j].indexreg]; + i = reg_flags[ins->oprs[j].indexreg]; /* Verify as Register */ - if (instruction->oprs[j].basereg < EXPR_REG_START - || instruction->oprs[j].basereg >= REG_ENUM_LIMIT) + if (ins->oprs[j].basereg < EXPR_REG_START + || ins->oprs[j].basereg >= REG_ENUM_LIMIT) b = 0; else - b = reg_flags[instruction->oprs[j].basereg]; + b = reg_flags[ins->oprs[j].basereg]; - if (instruction->oprs[j].scale == 0) + if (ins->oprs[j].scale == 0) i = 0; if (!i && !b) { - if (instruction->oprs[j].addr_size) - valid &= instruction->oprs[j].addr_size; + int ds = ins->oprs[j].disp_size; + if ((addrbits != 64 && ds > 8) || + (addrbits == 64 && ds == 16)) + valid &= ds; } else { if (!(REG16 & ~b)) valid &= 16; @@ -2179,18 +2242,27 @@ static void add_asp(insn *instruction, int addrbits) } if (valid & addrbits) { - /* Don't do anything */ + ins->addr_size = addrbits; } else if (valid & ((addrbits == 32) ? 16 : 32)) { - /* Add an instruction size prefix */ + /* Add an address size prefix */ enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32; - for (j = 0; j < instruction->nprefix; j++) { - if (instruction->prefixes[j] == pref) - return; /* Already there */ - } - instruction->prefixes[j] = pref; - instruction->nprefix++; + ins->prefixes[PPS_ASIZE] = pref; + ins->addr_size = (addrbits == 32) ? 16 : 32; } else { /* Impossible... */ errfunc(ERR_NONFATAL, "impossible combination of address sizes"); + ins->addr_size = addrbits; /* Error recovery */ + } + + defdisp = ins->addr_size == 16 ? 16 : 32; + + for (j = 0; j < ins->operands; j++) { + if (!(MEM_OFFS & ~ins->oprs[j].type) && + (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp) + != ins->addr_size) { + /* mem_offs sizes must match the address size; if not, + strip the MEM_OFFS bit and match only EA instructions */ + ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY); + } } } @@ -722,15 +722,15 @@ MOV reg16,reg16 \320\1\x8B\110 8086 MOV reg32,mem \321\1\x8B\110 386,SM MOV reg32,reg32 \321\1\x8B\110 386 MOV reg64,mem \324\1\x8B\110 X64,SM -MOV reg64,reg64 \324\1\x8B\110 X64,SM +MOV reg64,reg64 \324\1\x8B\110 X64 MOV reg8,imm \10\xB0\21 8086,SM MOV reg16,imm \320\10\xB8\31 8086,SM MOV reg32,imm \321\10\xB8\41 386,SM -MOV reg64,imm \324\10\xB8\55 X64,SQ +MOV reg64,imm \324\10\xB8\55 X64,SM MOV rm8,imm \1\xC6\200\21 8086,SM MOV rm16,imm \320\1\xC7\200\31 8086,SM MOV rm32,imm \321\1\xC7\200\41 386,SM -MOV rm64,imm \324\1\xC7\200\41 X64,SD +MOV rm64,imm \324\1\xC7\200\41 X64,SM MOV mem,imm8 \1\xC6\200\21 8086,SM MOV mem,imm16 \320\1\xC7\200\31 8086,SM MOV mem,imm32 \321\1\xC7\200\41 386,SM @@ -541,9 +541,12 @@ enum ccode { /* condition code names */ * register names do not overlap. */ enum prefixes { /* instruction prefixes */ + P_none = 0, PREFIX_ENUM_START = REG_ENUM_LIMIT, - P_A16 = PREFIX_ENUM_START, P_A32, P_LOCK, P_O16, P_O32, - P_REP, P_REPE, P_REPNE, P_REPNZ, P_REPZ, P_TIMES + P_A16 = PREFIX_ENUM_START, P_A32, P_A64, P_ASP, + P_LOCK, P_O16, P_O32, P_O64, P_OSP, + P_REP, P_REPE, P_REPNE, P_REPNZ, P_REPZ, P_TIMES, + PREFIX_ENUM_LIMIT }; enum { /* extended operand types */ @@ -565,9 +568,9 @@ enum eval_hint { /* values for `hinttype' */ EAH_NOTBASE = 2 /* try _not_ to make reg the base */ }; -typedef struct { /* operand to an instruction */ +typedef struct operand { /* operand to an instruction */ int32_t type; /* type of operand */ - int addr_size; /* 0 means default; 16; 32; 64 */ + int disp_size; /* 0 means default; 16; 32; 64 */ enum reg_enum basereg, indexreg; /* address registers */ int scale; /* index scale */ int hintbase; @@ -592,17 +595,32 @@ typedef struct extop { /* extended operand */ int32_t wrt; /* ... and here */ } extop; -#define MAXPREFIX 4 +/* Prefix positions: each type of prefix goes in a specific slot. + This affects the final ordering of the assembled output, which + shouldn't matter to the processor, but if you have stylistic + preferences, you can change this. REX prefixes are handled + differently for the time being. + + Note that LOCK and REP are in the same slot. This is + an x86 architectural constraint. */ +enum prefix_pos { + PPS_LREP, /* Lock or REP prefix */ + PPS_SEG, /* Segment override prefix */ + PPS_OSIZE, /* Operand size prefix */ + PPS_ASIZE, /* Address size prefix */ + MAXPREFIX /* Total number of prefix slots */ +}; + #define MAX_OPERANDS 4 -typedef struct { /* an instruction itself */ - char *label; /* the label defined, or NULL */ +typedef struct insn { /* an instruction itself */ + char *label; /* the label defined, or NULL */ enum prefixes prefixes[MAXPREFIX]; /* instruction prefixes, if any */ - int nprefix; /* number of entries in above */ enum opcode opcode; /* the opcode - not just the string */ enum ccode condition; /* the condition code, if Jcc/SETcc */ int operands; /* how many operands? 0-3 * (more if db et al) */ + int addr_size; /* address size */ operand oprs[MAX_OPERANDS]; /* the operands, defined as above */ extop *eops; /* extended operands */ int eops_float; /* true if DD and floating */ @@ -934,8 +952,11 @@ struct dfmt { */ enum special_tokens { - S_ABS, S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT, - S_OWORD, S_QWORD, S_REL, S_SHORT, S_STRICT, S_TO, S_TWORD, S_WORD + SPECIAL_ENUM_START = PREFIX_ENUM_LIMIT, + S_ABS = SPECIAL_ENUM_START, + S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT, + S_OWORD, S_QWORD, S_REL, S_SHORT, S_STRICT, S_TO, S_TWORD, S_WORD, + SPECIAL_ENUM_LIMIT }; /* @@ -44,12 +44,133 @@ void parser_global_info(struct ofmt *output, struct location * locp) location = locp; } +static int prefix_slot(enum prefixes prefix) +{ + switch (prefix) { + case R_CS: + case R_DS: + case R_SS: + case R_ES: + case R_FS: + case R_GS: + return PPS_SEG; + case P_LOCK: + case P_REP: + case P_REPE: + case P_REPZ: + case P_REPNE: + case P_REPNZ: + return PPS_LREP; + case P_O16: + case P_O32: + case P_O64: + case P_OSP: + return PPS_OSIZE; + case P_A16: + case P_A32: + case P_A64: + case P_ASP: + return PPS_ASIZE; + default: + error(ERR_PANIC, "Invalid value %d passed to prefix_slot()", prefix); + return -1; + } +} + +static void process_size_override(insn * result, int operand) +{ + if (tasm_compatible_mode) { + switch ((int)tokval.t_integer) { + /* For TASM compatibility a size override inside the + * brackets changes the size of the operand, not the + * address type of the operand as it does in standard + * NASM syntax. Hence: + * + * mov eax,[DWORD val] + * + * is valid syntax in TASM compatibility mode. Note that + * you lose the ability to override the default address + * type for the instruction, but we never use anything + * but 32-bit flat model addressing in our code. + */ + case S_BYTE: + result->oprs[operand].type |= BITS8; + break; + case S_WORD: + result->oprs[operand].type |= BITS16; + break; + case S_DWORD: + case S_LONG: + result->oprs[operand].type |= BITS32; + break; + case S_QWORD: + result->oprs[operand].type |= BITS64; + break; + case S_TWORD: + result->oprs[operand].type |= BITS80; + break; + case S_OWORD: + result->oprs[operand].type |= BITS128; + break; + default: + error(ERR_NONFATAL, + "invalid operand size specification"); + break; + } + } else { + /* Standard NASM compatible syntax */ + switch ((int)tokval.t_integer) { + case S_NOSPLIT: + result->oprs[operand].eaflags |= EAF_TIMESTWO; + break; + case S_REL: + result->oprs[operand].eaflags |= EAF_REL; + break; + case S_ABS: + result->oprs[operand].eaflags |= EAF_ABS; + break; + case S_BYTE: + result->oprs[operand].disp_size = 8; + result->oprs[operand].eaflags |= EAF_BYTEOFFS; + break; + case P_A16: + case P_A32: + case P_A64: + if (result->prefixes[PPS_ASIZE] && + result->prefixes[PPS_ASIZE] != tokval.t_integer) + error(ERR_NONFATAL, + "conflicting address size specifications"); + else + result->prefixes[PPS_ASIZE] = tokval.t_integer; + break; + case S_WORD: + result->oprs[operand].disp_size = 16; + result->oprs[operand].eaflags |= EAF_WORDOFFS; + break; + case S_DWORD: + case S_LONG: + result->oprs[operand].disp_size = 32; + result->oprs[operand].eaflags |= EAF_WORDOFFS; + break; + case S_QWORD: + result->oprs[operand].disp_size = 64; + result->oprs[operand].eaflags |= EAF_WORDOFFS; + break; + default: + error(ERR_NONFATAL, "invalid size specification in" + " effective address"); + break; + } + } +} + insn *parse_line(int pass, char *buffer, insn * result, efunc errfunc, evalfunc evaluate, ldfunc ldef) { int operand; int critical; struct eval_hints hints; + int j; result->forw_ref = false; error = errfunc; @@ -101,7 +222,8 @@ insn *parse_line(int pass, char *buffer, insn * result, return result; } - result->nprefix = 0; + for (j = 0; j < MAXPREFIX; j++) + result->prefixes[j] = P_none; result->times = 1L; while (i == TOKEN_PREFIX || @@ -134,17 +256,25 @@ insn *parse_line(int pass, char *buffer, insn * result, } } } else { - if (result->nprefix == MAXPREFIX) - error(ERR_NONFATAL, - "instruction has more than %d prefixes", MAXPREFIX); - else - result->prefixes[result->nprefix++] = tokval.t_integer; + int slot = prefix_slot(tokval.t_integer); + if (result->prefixes[slot]) { + error(ERR_NONFATAL, + "instruction has conflicting prefixes"); + } + result->prefixes[slot] = tokval.t_integer; i = stdscan(NULL, &tokval); } } if (i != TOKEN_INSN) { - if (result->nprefix > 0 && i == 0) { + int j; + enum prefixes pfx; + + for (j = 0; j < MAXPREFIX; j++) + if ((pfx = result->prefixes[j]) != P_none) + break; + + if (i == 0 && pfx != P_none) { /* * Instruction prefixes are present, but no actual * instruction. This is allowed: at this point we @@ -358,12 +488,12 @@ insn *parse_line(int pass, char *buffer, insn * result, * of these, separated by commas, and terminated by a zero token. */ for (operand = 0; operand < MAX_OPERANDS; operand++) { - expr *value; /* used most of the time */ + expr *value; /* used most of the time */ int mref; /* is this going to be a memory ref? */ int bracket; /* is it a [] mref, or a & mref? */ int setsize = 0; - result->oprs[operand].addr_size = 0; /* have to zero this whatever */ + result->oprs[operand].disp_size = 0; /* have to zero this whatever */ result->oprs[operand].eaflags = 0; /* and this */ result->oprs[operand].opflags = 0; @@ -428,78 +558,10 @@ insn *parse_line(int pass, char *buffer, insn * result, if (i == '[' || i == '&') { /* memory reference */ mref = true; bracket = (i == '['); - while ((i = stdscan(NULL, &tokval)) == TOKEN_SPECIAL) { - /* check for address directives */ - if (tasm_compatible_mode) { - switch ((int)tokval.t_integer) { - /* For TASM compatibility a size override inside the - * brackets changes the size of the operand, not the - * address type of the operand as it does in standard - * NASM syntax. Hence: - * - * mov eax,[DWORD val] - * - * is valid syntax in TASM compatibility mode. Note that - * you lose the ability to override the default address - * type for the instruction, but we never use anything - * but 32-bit flat model addressing in our code. - */ - case S_BYTE: - result->oprs[operand].type |= BITS8; - break; - case S_WORD: - result->oprs[operand].type |= BITS16; - break; - case S_DWORD: - case S_LONG: - result->oprs[operand].type |= BITS32; - break; - case S_QWORD: - result->oprs[operand].type |= BITS64; - break; - case S_TWORD: - result->oprs[operand].type |= BITS80; - break; - case S_OWORD: - result->oprs[operand].type |= BITS128; - break; - default: - error(ERR_NONFATAL, - "invalid operand size specification"); - } - } else { - /* Standard NASM compatible syntax */ - switch ((int)tokval.t_integer) { - case S_NOSPLIT: - result->oprs[operand].eaflags |= EAF_TIMESTWO; - break; - case S_REL: - result->oprs[operand].eaflags |= EAF_REL; - break; - case S_ABS: - result->oprs[operand].eaflags |= EAF_ABS; - break; - case S_BYTE: - result->oprs[operand].eaflags |= EAF_BYTEOFFS; - break; - case S_WORD: - result->oprs[operand].addr_size = 16; - result->oprs[operand].eaflags |= EAF_WORDOFFS; - break; - case S_DWORD: - case S_LONG: - result->oprs[operand].addr_size = 32; - result->oprs[operand].eaflags |= EAF_WORDOFFS; - break; - case S_QWORD: - result->oprs[operand].addr_size = 64; - result->oprs[operand].eaflags |= EAF_WORDOFFS; - break; - default: - error(ERR_NONFATAL, "invalid size specification in" - " effective address"); - } - } + i = stdscan(NULL, &tokval); /* then skip the colon */ + while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) { + process_size_override(result, operand); + i = stdscan(NULL, &tokval); } } else { /* immediate operand, or register */ mref = false; @@ -529,32 +591,18 @@ insn *parse_line(int pass, char *buffer, insn * result, if (value[1].type != 0 || value->value != 1 || REG_SREG & ~reg_flags[value->type]) error(ERR_NONFATAL, "invalid segment override"); - else if (result->nprefix == MAXPREFIX) + else if (result->prefixes[PPS_SEG]) error(ERR_NONFATAL, - "instruction has more than %d prefixes", MAXPREFIX); + "instruction has conflicting segment overrides"); else { - result->prefixes[result->nprefix++] = value->type; + result->prefixes[PPS_SEG] = value->type; if (!(REG_FSGS & ~reg_flags[value->type])) result->oprs[operand].eaflags |= EAF_FSGS; } i = stdscan(NULL, &tokval); /* then skip the colon */ - if (i == TOKEN_SPECIAL) { /* another check for size override */ - switch ((int)tokval.t_integer) { - case S_WORD: - result->oprs[operand].addr_size = 16; - break; - case S_DWORD: - case S_LONG: - result->oprs[operand].addr_size = 32; - break; - case S_QWORD: - result->oprs[operand].addr_size = 64; - break; - default: - error(ERR_NONFATAL, "invalid size specification in" - " effective address"); - } + while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) { + process_size_override(result, operand); i = stdscan(NULL, &tokval); } value = evaluate(stdscan, NULL, &tokval, @@ -700,7 +748,7 @@ insn *parse_line(int pass, char *buffer, insn * result, !(result->oprs[operand].eaflags & EAF_FSGS)) || (result->oprs[operand].eaflags & EAF_REL)); - result->oprs[operand].type |= is_rel ? IP_REL : MEM_OFFS; + result->oprs[operand].type |= is_rel ? IP_REL : MEM_OFFS; } result->oprs[operand].basereg = b; result->oprs[operand].indexreg = i; @@ -770,8 +818,9 @@ insn *parse_line(int pass, char *buffer, insn * result, result->operands = operand; /* set operand count */ - while (operand < 3) /* clear remaining operands */ - result->oprs[operand++].type = 0; +/* clear remaining operands */ +while (operand < MAX_OPERANDS) + result->oprs[operand++].type = 0; /* * Transform RESW, RESD, RESQ, REST, RESO into RESB. diff --git a/test/movimm.asm b/test/movimm.asm deleted file mode 100644 index b5a7a274..00000000 --- a/test/movimm.asm +++ /dev/null @@ -1,26 +0,0 @@ - bits 64 - - mov rax,1234567890abcdefh - mov eax,1234567890abcdefh - mov rax,dword 1234567890abcdefh - mov rax,qword 1234567890abcdefh - mov dword [rsi],1234567890abcdefh - mov qword [rsi],1234567890abcdefh - mov dword [rsi],dword 1234567890abcdefh - mov qword [rsi],dword 1234567890abcdefh -; mov qword [rsi],qword 1234567890abcdefh ; Error -; mov [rsi],qword 1234567890abcdefh ; Error - mov [rsi],dword 1234567890abcdefh - - ; The optimizer probably should compact these forms, doesn't yet? - mov rax,12345678h - mov eax,12345678h - mov rax,dword 12345678h - mov rax,qword 12345678h - mov dword [rsi],12345678h - mov qword [rsi],12345678h - mov dword [rsi],dword 12345678h - mov qword [rsi],dword 12345678h -; mov qword [rsi],qword 12345678h ; Error -; mov [rsi],qword 12345678h ; Error - mov [rsi],dword 12345678h @@ -5,9 +5,13 @@ % TOKEN_PREFIX, 0, P_* a16 a32 +a64 +asp lock o16 o32 +o64 +osp rep repe repne |