summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--assemble.c238
-rw-r--r--insns.dat6
-rw-r--r--nasm.h41
-rw-r--r--parser.c255
-rw-r--r--test/movimm.asm26
-rw-r--r--tokens.dat4
6 files changed, 345 insertions, 225 deletions
diff --git a/assemble.c b/assemble.c
index 2fd9f973..83a9a8a0 100644
--- a/assemble.c
+++ b/assemble.c
@@ -22,8 +22,7 @@
* assembly mode or the operand-size override on the operand
* \40..\43 - a long immediate operand, from operand 0..3
* \44..\47 - select between \3[0-3], \4[0-3] and \5[4-7]
- * depending on assembly mode or the address-size override
- * on the operand.
+ * depending on the address size of the instruction.
* \50..\53 - a byte relative operand, from operand 0..3
* \54..\57 - a qword immediate operand, from operand 0..3
* \60..\63 - a word relative operand, from operand 0..3
@@ -115,25 +114,50 @@ static int32_t regflag(const operand *);
static int32_t regval(const operand *);
static int rexflags(int, int32_t, int);
static int op_rexflags(const operand *, int);
-static ea *process_ea(operand *, ea *, int, int, int32_t, int);
+static ea *process_ea(operand *, ea *, int, int, int, int32_t, int);
static void add_asp(insn *, int);
-static int has_prefix(insn * ins, enum prefixes prefix)
+static int has_prefix(insn * ins, enum prefix_pos pos, enum prefixes prefix)
{
- int j;
- for (j = 0; j < ins->nprefix; j++) {
- if (ins->prefixes[j] == prefix)
- return 1;
- }
- return 0;
+ return ins->prefixes[pos] == prefix;
+}
+
+static void assert_no_prefix(insn * ins, enum prefix_pos pos)
+{
+ if (ins->prefixes[pos])
+ errfunc(ERR_NONFATAL, "invalid %s prefix",
+ prefix_name(ins->prefixes[pos]));
}
-static void assert_no_prefix(insn * ins, enum prefixes prefix)
+static const char *size_name(int size)
{
- if (has_prefix(ins, prefix))
- errfunc(ERR_NONFATAL, "invalid %s prefix", prefix_name(prefix));
+ switch (size) {
+ case 1:
+ return "byte";
+ case 2:
+ return "word";
+ case 4:
+ return "dword";
+ case 8:
+ return "qword";
+ case 10:
+ return "tword";
+ case 16:
+ return "oword";
+ default:
+ return "???";
+ }
}
+static void warn_overflow(int size, int64_t data)
+{
+ if (size < 8) {
+ int64_t lim = (1 << (size*8))-1;
+
+ if (data < ~lim || data > lim)
+ errfunc(ERR_WARNING, "%s data exceeds bounds", size_name(size));
+ }
+}
/*
* This routine wrappers the real output format's output routine,
* in order to pass a copy of the data off to the listing file
@@ -433,7 +457,7 @@ int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
error(ERR_PANIC, "errors made it through from pass one");
else
while (itimes--) {
- for (j = 0; j < instruction->nprefix; j++) {
+ for (j = 0; j < MAXPREFIX; j++) {
uint8_t c = 0;
switch (instruction->prefixes[j]) {
case P_LOCK:
@@ -492,15 +516,23 @@ int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
error(ERR_NONFATAL,
"16-bit addressing is not supported "
"in 64-bit mode");
- break;
- }
- if (bits != 16)
+ } else if (bits != 16)
c = 0x67;
break;
case P_A32:
if (bits != 32)
c = 0x67;
break;
+ case P_A64:
+ if (bits != 64) {
+ error(ERR_NONFATAL,
+ "64-bit addressing is only supported "
+ "in 64-bit mode");
+ }
+ break;
+ case P_ASP:
+ c = 0x67;
+ break;
case P_O16:
if (bits != 16)
c = 0x66;
@@ -509,6 +541,14 @@ int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
if (bits == 16)
c = 0x66;
break;
+ case P_O64:
+ /* REX.W */
+ break;
+ case P_OSP:
+ c = 0x66;
+ break;
+ case P_none:
+ break;
default:
error(ERR_PANIC, "invalid instruction prefix");
}
@@ -634,7 +674,8 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp,
strncpy(fname, instruction->eops->stringval, len);
fname[len] = '\0';
- while (1) { /* added by alexfru: 'incbin' uses include paths */
+ /* added by alexfru: 'incbin' uses include paths */
+ while (1) {
combine = nasm_malloc(strlen(prefix) + len + 1);
strcpy(combine, prefix);
strcat(combine, fname);
@@ -689,7 +730,7 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp,
isize = calcsize(segment, offset, bits, instruction, codes);
if (isize < 0)
return -1;
- for (j = 0; j < instruction->nprefix; j++) {
+ for (j = 0; j < MAXPREFIX; j++) {
switch (instruction->prefixes[j]) {
case P_A16:
if (bits != 16)
@@ -707,6 +748,10 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp,
if (bits == 16)
isize++;
break;
+ case P_A64:
+ case P_O64:
+ case P_none:
+ break;
default:
isize++;
break;
@@ -745,6 +790,9 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
int rex_mask = ~0;
ins->rex = 0; /* Ensure REX is reset */
+ if (ins->prefixes[PPS_OSIZE] == P_O64)
+ ins->rex |= REX_W;
+
(void)segment; /* Don't warn that this parameter is unused */
(void)offset; /* Don't warn that this parameter is unused */
@@ -812,8 +860,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
case 045:
case 046:
case 047:
- length += ((ins->oprs[c - 044].addr_size ?
- ins->oprs[c - 044].addr_size : bits) >> 3);
+ length += ins->addr_size >> 3;
break;
case 050:
case 051:
@@ -909,15 +956,16 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
case 0310:
if (bits == 64)
return -1;
- length += (bits != 16) && !has_prefix(ins,P_A16);
+ length += (bits != 16) && !has_prefix(ins, PPS_ASIZE, P_A16);
break;
case 0311:
- length += (bits != 32) && !has_prefix(ins,P_A32);
+ length += (bits != 32) && !has_prefix(ins, PPS_ASIZE, P_A32);
break;
case 0312:
break;
case 0313:
- if (bits != 64 || has_prefix(ins,P_A16) || has_prefix(ins,P_A32))
+ if (bits != 64 || has_prefix(ins, PPS_ASIZE, P_A16) ||
+ has_prefix(ins, PPS_ASIZE, P_A32))
return -1;
break;
case 0320:
@@ -944,7 +992,6 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
length++;
break;
case 0334:
- assert_no_prefix(ins, P_LOCK);
ins->rex |= REX_L;
break;
case 0335:
@@ -990,7 +1037,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
if (!process_ea
(&ins->oprs[(c >> 3) & 7], &ea_data, bits,
- rfield, rflags, ins->forw_ref)) {
+ ins->addr_size, rfield, rflags, ins->forw_ref)) {
errfunc(ERR_NONFATAL, "invalid effective address");
return -1;
} else {
@@ -1019,10 +1066,13 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
if (ins->rex & REX_H) {
errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
return -1;
- } else if (bits == 64 ||
- ((ins->rex & REX_L) &&
- !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
- cpu >= IF_X86_64)) {
+ } else if (bits == 64) {
+ length++;
+ } else if ((ins->rex & REX_L) &&
+ !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
+ cpu >= IF_X86_64) {
+ /* LOCK-as-REX.R */
+ assert_no_prefix(ins, PPS_LREP);
length++;
} else {
errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
@@ -1179,13 +1229,10 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 031:
case 032:
case 033:
- if (ins->oprs[c - 030].segment == NO_SEG &&
- ins->oprs[c - 030].wrt == NO_SEG &&
- (ins->oprs[c - 030].offset < -65536L ||
- ins->oprs[c - 030].offset > 65535L)) {
- errfunc(ERR_WARNING, "word value exceeds bounds");
- }
data = ins->oprs[c - 030].offset;
+ if (ins->oprs[c - 030].segment == NO_SEG &&
+ ins->oprs[c - 030].wrt == NO_SEG)
+ warn_overflow(2, data);
out(offset, segment, &data, OUT_ADDRESS + 2,
ins->oprs[c - 030].segment, ins->oprs[c - 030].wrt);
offset += 2;
@@ -1200,8 +1247,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
else
size = (bits == 16) ? 2 : 4;
data = ins->oprs[c - 034].offset;
- if (size == 2 && (data < -65536L || data > 65535L))
- errfunc(ERR_WARNING, "word value exceeds bounds");
+ warn_overflow(size, data);
out(offset, segment, &data, OUT_ADDRESS + size,
ins->oprs[c - 034].segment, ins->oprs[c - 034].wrt);
offset += size;
@@ -1222,10 +1268,8 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 046:
case 047:
data = ins->oprs[c - 044].offset;
- size = ((ins->oprs[c - 044].addr_size ?
- ins->oprs[c - 044].addr_size : bits) >> 3);
- if (size == 2 && (data < -65536L || data > 65535L))
- errfunc(ERR_WARNING, "word value exceeds bounds");
+ size = ins->addr_size >> 3;
+ warn_overflow(size, data);
out(offset, segment, &data, OUT_ADDRESS + size,
ins->oprs[c - 044].segment, ins->oprs[c - 044].wrt);
offset += size;
@@ -1337,10 +1381,8 @@ static void gencode(int32_t segment, int32_t offset, int bits,
offset++;
} else {
if (ins->oprs[c - 0140].segment == NO_SEG &&
- ins->oprs[c - 0140].wrt == NO_SEG &&
- (data < -65536L || data > 65535L)) {
- errfunc(ERR_WARNING, "word value exceeds bounds");
- }
+ ins->oprs[c - 0140].wrt == NO_SEG)
+ warn_overflow(2, data);
out(offset, segment, &data, OUT_ADDRESS + 2,
ins->oprs[c - 0140].segment, ins->oprs[c - 0140].wrt);
offset += 2;
@@ -1424,7 +1466,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
break;
case 0310:
- if (bits == 32 && !has_prefix(ins,P_A16)) {
+ if (bits == 32 && !has_prefix(ins, PPS_ASIZE, P_A16)) {
*bytes = 0x67;
out(offset, segment, bytes,
OUT_RAWDATA + 1, NO_SEG, NO_SEG);
@@ -1434,7 +1476,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
break;
case 0311:
- if (bits != 32 && !has_prefix(ins,P_A32)) {
+ if (bits != 32 && !has_prefix(ins, PPS_ASIZE, P_A32)) {
*bytes = 0x67;
out(offset, segment, bytes,
OUT_RAWDATA + 1, NO_SEG, NO_SEG);
@@ -1562,7 +1604,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
if (!process_ea
(&ins->oprs[(c >> 3) & 7], &ea_data, bits,
- rfield, rflags, ins->forw_ref)) {
+ ins->addr_size, rfield, rflags, ins->forw_ref)) {
errfunc(ERR_NONFATAL, "invalid effective address");
}
@@ -1700,7 +1742,7 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits)
if (instruction->oprs[i].type != instruction->oprs[j].type ||
instruction->oprs[i].basereg != instruction->oprs[j].basereg)
return 0;
- } else if (itemp->opd[i] & ~instruction->oprs[i].type ||
+ } else if (itemp->opd[i] & ~instruction->oprs[i].type ||
((itemp->opd[i] & SIZE_MASK) &&
((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
if ((itemp->opd[i] & ~instruction->oprs[i].type & ~SIZE_MASK) ||
@@ -1824,8 +1866,8 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits)
return ret;
}
-static ea *process_ea(operand * input, ea * output, int addrbits,
- int rfield, int32_t rflags, int forw_ref)
+static ea *process_ea(operand * input, ea * output, int bits,
+ int addrbits, int rfield, int32_t rflags, int forw_ref)
{
output->rip = false;
@@ -1854,10 +1896,7 @@ static ea *process_ea(operand * input, ea * output, int addrbits,
if (input->basereg == -1
&& (input->indexreg == -1 || input->scale == 0)) {
/* it's a pure offset */
- if (input->addr_size)
- addrbits = input->addr_size;
-
- if (globalbits == 64 && (~input->type & IP_REL)) {
+ if (bits == 64 && (~input->type & IP_REL)) {
int scale, index, base;
output->sib_present = true;
scale = 0;
@@ -1871,7 +1910,7 @@ static ea *process_ea(operand * input, ea * output, int addrbits,
output->sib_present = false;
output->bytes = (addrbits != 16 ? 4 : 2);
output->modrm = (addrbits != 16 ? 5 : 6) | ((rfield & 7) << 3);
- output->rip = globalbits == 64;
+ output->rip = bits == 64;
}
} else { /* it's an indirection */
int i = input->indexreg, b = input->basereg, s = input->scale;
@@ -1921,11 +1960,15 @@ static ea *process_ea(operand * input, ea * output, int addrbits,
sok &= ~bx;
}
- /* While we're here, ensure the user didn't specify WORD. */
- if (input->addr_size == 16 ||
- (input->addr_size == 32 && !(sok & BITS32)) ||
- (input->addr_size == 64 && !(sok & BITS64)))
- return NULL;
+ /* While we're here, ensure the user didn't specify
+ WORD or QWORD. */
+ if (input->disp_size == 16 || input->disp_size == 64)
+ return NULL;
+
+ if (addrbits == 16 ||
+ (addrbits == 32 && !(sok & BITS32)) ||
+ (addrbits == 64 && !(sok & BITS64)))
+ return NULL;
/* now reorganize base/index */
if (s == 1 && bt != it && bt != -1 && it != -1 &&
@@ -2051,7 +2094,7 @@ static ea *process_ea(operand * input, ea * output, int addrbits,
return NULL;
/* ensure the user didn't specify DWORD/QWORD */
- if (input->addr_size == 32 || input->addr_size == 64)
+ if (input->disp_size == 32 || input->disp_size == 64)
return NULL;
if (s != 1 && i != -1)
@@ -2130,36 +2173,56 @@ static ea *process_ea(operand * input, ea * output, int addrbits,
return output;
}
-static void add_asp(insn *instruction, int addrbits)
+static void add_asp(insn *ins, int addrbits)
{
int j, valid;
+ int defdisp;
valid = (addrbits == 64) ? 64|32 : 32|16;
- for (j = 0; j < instruction->operands; j++) {
- if (!(MEMORY & ~instruction->oprs[j].type)) {
+ switch (ins->prefixes[PPS_ASIZE]) {
+ case P_A16:
+ valid &= 16;
+ break;
+ case P_A32:
+ valid &= 32;
+ break;
+ case P_A64:
+ valid &= 64;
+ break;
+ case P_ASP:
+ valid &= (addrbits == 32) ? 16 : 32;
+ break;
+ default:
+ break;
+ }
+
+ for (j = 0; j < ins->operands; j++) {
+ if (!(MEMORY & ~ins->oprs[j].type)) {
int32_t i, b;
/* Verify as Register */
- if (instruction->oprs[j].indexreg < EXPR_REG_START
- || instruction->oprs[j].indexreg >= REG_ENUM_LIMIT)
+ if (ins->oprs[j].indexreg < EXPR_REG_START
+ || ins->oprs[j].indexreg >= REG_ENUM_LIMIT)
i = 0;
else
- i = reg_flags[instruction->oprs[j].indexreg];
+ i = reg_flags[ins->oprs[j].indexreg];
/* Verify as Register */
- if (instruction->oprs[j].basereg < EXPR_REG_START
- || instruction->oprs[j].basereg >= REG_ENUM_LIMIT)
+ if (ins->oprs[j].basereg < EXPR_REG_START
+ || ins->oprs[j].basereg >= REG_ENUM_LIMIT)
b = 0;
else
- b = reg_flags[instruction->oprs[j].basereg];
+ b = reg_flags[ins->oprs[j].basereg];
- if (instruction->oprs[j].scale == 0)
+ if (ins->oprs[j].scale == 0)
i = 0;
if (!i && !b) {
- if (instruction->oprs[j].addr_size)
- valid &= instruction->oprs[j].addr_size;
+ int ds = ins->oprs[j].disp_size;
+ if ((addrbits != 64 && ds > 8) ||
+ (addrbits == 64 && ds == 16))
+ valid &= ds;
} else {
if (!(REG16 & ~b))
valid &= 16;
@@ -2179,18 +2242,27 @@ static void add_asp(insn *instruction, int addrbits)
}
if (valid & addrbits) {
- /* Don't do anything */
+ ins->addr_size = addrbits;
} else if (valid & ((addrbits == 32) ? 16 : 32)) {
- /* Add an instruction size prefix */
+ /* Add an address size prefix */
enum prefixes pref = (addrbits == 32) ? P_A16 : P_A32;
- for (j = 0; j < instruction->nprefix; j++) {
- if (instruction->prefixes[j] == pref)
- return; /* Already there */
- }
- instruction->prefixes[j] = pref;
- instruction->nprefix++;
+ ins->prefixes[PPS_ASIZE] = pref;
+ ins->addr_size = (addrbits == 32) ? 16 : 32;
} else {
/* Impossible... */
errfunc(ERR_NONFATAL, "impossible combination of address sizes");
+ ins->addr_size = addrbits; /* Error recovery */
+ }
+
+ defdisp = ins->addr_size == 16 ? 16 : 32;
+
+ for (j = 0; j < ins->operands; j++) {
+ if (!(MEM_OFFS & ~ins->oprs[j].type) &&
+ (ins->oprs[j].disp_size ? ins->oprs[j].disp_size : defdisp)
+ != ins->addr_size) {
+ /* mem_offs sizes must match the address size; if not,
+ strip the MEM_OFFS bit and match only EA instructions */
+ ins->oprs[j].type &= ~(MEM_OFFS & ~MEMORY);
+ }
}
}
diff --git a/insns.dat b/insns.dat
index 1a3cecd9..a26f3e37 100644
--- a/insns.dat
+++ b/insns.dat
@@ -722,15 +722,15 @@ MOV reg16,reg16 \320\1\x8B\110 8086
MOV reg32,mem \321\1\x8B\110 386,SM
MOV reg32,reg32 \321\1\x8B\110 386
MOV reg64,mem \324\1\x8B\110 X64,SM
-MOV reg64,reg64 \324\1\x8B\110 X64,SM
+MOV reg64,reg64 \324\1\x8B\110 X64
MOV reg8,imm \10\xB0\21 8086,SM
MOV reg16,imm \320\10\xB8\31 8086,SM
MOV reg32,imm \321\10\xB8\41 386,SM
-MOV reg64,imm \324\10\xB8\55 X64,SQ
+MOV reg64,imm \324\10\xB8\55 X64,SM
MOV rm8,imm \1\xC6\200\21 8086,SM
MOV rm16,imm \320\1\xC7\200\31 8086,SM
MOV rm32,imm \321\1\xC7\200\41 386,SM
-MOV rm64,imm \324\1\xC7\200\41 X64,SD
+MOV rm64,imm \324\1\xC7\200\41 X64,SM
MOV mem,imm8 \1\xC6\200\21 8086,SM
MOV mem,imm16 \320\1\xC7\200\31 8086,SM
MOV mem,imm32 \321\1\xC7\200\41 386,SM
diff --git a/nasm.h b/nasm.h
index 71583f5e..6b2e33e5 100644
--- a/nasm.h
+++ b/nasm.h
@@ -541,9 +541,12 @@ enum ccode { /* condition code names */
* register names do not overlap.
*/
enum prefixes { /* instruction prefixes */
+ P_none = 0,
PREFIX_ENUM_START = REG_ENUM_LIMIT,
- P_A16 = PREFIX_ENUM_START, P_A32, P_LOCK, P_O16, P_O32,
- P_REP, P_REPE, P_REPNE, P_REPNZ, P_REPZ, P_TIMES
+ P_A16 = PREFIX_ENUM_START, P_A32, P_A64, P_ASP,
+ P_LOCK, P_O16, P_O32, P_O64, P_OSP,
+ P_REP, P_REPE, P_REPNE, P_REPNZ, P_REPZ, P_TIMES,
+ PREFIX_ENUM_LIMIT
};
enum { /* extended operand types */
@@ -565,9 +568,9 @@ enum eval_hint { /* values for `hinttype' */
EAH_NOTBASE = 2 /* try _not_ to make reg the base */
};
-typedef struct { /* operand to an instruction */
+typedef struct operand { /* operand to an instruction */
int32_t type; /* type of operand */
- int addr_size; /* 0 means default; 16; 32; 64 */
+ int disp_size; /* 0 means default; 16; 32; 64 */
enum reg_enum basereg, indexreg; /* address registers */
int scale; /* index scale */
int hintbase;
@@ -592,17 +595,32 @@ typedef struct extop { /* extended operand */
int32_t wrt; /* ... and here */
} extop;
-#define MAXPREFIX 4
+/* Prefix positions: each type of prefix goes in a specific slot.
+ This affects the final ordering of the assembled output, which
+ shouldn't matter to the processor, but if you have stylistic
+ preferences, you can change this. REX prefixes are handled
+ differently for the time being.
+
+ Note that LOCK and REP are in the same slot. This is
+ an x86 architectural constraint. */
+enum prefix_pos {
+ PPS_LREP, /* Lock or REP prefix */
+ PPS_SEG, /* Segment override prefix */
+ PPS_OSIZE, /* Operand size prefix */
+ PPS_ASIZE, /* Address size prefix */
+ MAXPREFIX /* Total number of prefix slots */
+};
+
#define MAX_OPERANDS 4
-typedef struct { /* an instruction itself */
- char *label; /* the label defined, or NULL */
+typedef struct insn { /* an instruction itself */
+ char *label; /* the label defined, or NULL */
enum prefixes prefixes[MAXPREFIX]; /* instruction prefixes, if any */
- int nprefix; /* number of entries in above */
enum opcode opcode; /* the opcode - not just the string */
enum ccode condition; /* the condition code, if Jcc/SETcc */
int operands; /* how many operands? 0-3
* (more if db et al) */
+ int addr_size; /* address size */
operand oprs[MAX_OPERANDS]; /* the operands, defined as above */
extop *eops; /* extended operands */
int eops_float; /* true if DD and floating */
@@ -934,8 +952,11 @@ struct dfmt {
*/
enum special_tokens {
- S_ABS, S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT,
- S_OWORD, S_QWORD, S_REL, S_SHORT, S_STRICT, S_TO, S_TWORD, S_WORD
+ SPECIAL_ENUM_START = PREFIX_ENUM_LIMIT,
+ S_ABS = SPECIAL_ENUM_START,
+ S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT,
+ S_OWORD, S_QWORD, S_REL, S_SHORT, S_STRICT, S_TO, S_TWORD, S_WORD,
+ SPECIAL_ENUM_LIMIT
};
/*
diff --git a/parser.c b/parser.c
index f2b7cfe3..fad755e8 100644
--- a/parser.c
+++ b/parser.c
@@ -44,12 +44,133 @@ void parser_global_info(struct ofmt *output, struct location * locp)
location = locp;
}
+static int prefix_slot(enum prefixes prefix)
+{
+ switch (prefix) {
+ case R_CS:
+ case R_DS:
+ case R_SS:
+ case R_ES:
+ case R_FS:
+ case R_GS:
+ return PPS_SEG;
+ case P_LOCK:
+ case P_REP:
+ case P_REPE:
+ case P_REPZ:
+ case P_REPNE:
+ case P_REPNZ:
+ return PPS_LREP;
+ case P_O16:
+ case P_O32:
+ case P_O64:
+ case P_OSP:
+ return PPS_OSIZE;
+ case P_A16:
+ case P_A32:
+ case P_A64:
+ case P_ASP:
+ return PPS_ASIZE;
+ default:
+ error(ERR_PANIC, "Invalid value %d passed to prefix_slot()", prefix);
+ return -1;
+ }
+}
+
+static void process_size_override(insn * result, int operand)
+{
+ if (tasm_compatible_mode) {
+ switch ((int)tokval.t_integer) {
+ /* For TASM compatibility a size override inside the
+ * brackets changes the size of the operand, not the
+ * address type of the operand as it does in standard
+ * NASM syntax. Hence:
+ *
+ * mov eax,[DWORD val]
+ *
+ * is valid syntax in TASM compatibility mode. Note that
+ * you lose the ability to override the default address
+ * type for the instruction, but we never use anything
+ * but 32-bit flat model addressing in our code.
+ */
+ case S_BYTE:
+ result->oprs[operand].type |= BITS8;
+ break;
+ case S_WORD:
+ result->oprs[operand].type |= BITS16;
+ break;
+ case S_DWORD:
+ case S_LONG:
+ result->oprs[operand].type |= BITS32;
+ break;
+ case S_QWORD:
+ result->oprs[operand].type |= BITS64;
+ break;
+ case S_TWORD:
+ result->oprs[operand].type |= BITS80;
+ break;
+ case S_OWORD:
+ result->oprs[operand].type |= BITS128;
+ break;
+ default:
+ error(ERR_NONFATAL,
+ "invalid operand size specification");
+ break;
+ }
+ } else {
+ /* Standard NASM compatible syntax */
+ switch ((int)tokval.t_integer) {
+ case S_NOSPLIT:
+ result->oprs[operand].eaflags |= EAF_TIMESTWO;
+ break;
+ case S_REL:
+ result->oprs[operand].eaflags |= EAF_REL;
+ break;
+ case S_ABS:
+ result->oprs[operand].eaflags |= EAF_ABS;
+ break;
+ case S_BYTE:
+ result->oprs[operand].disp_size = 8;
+ result->oprs[operand].eaflags |= EAF_BYTEOFFS;
+ break;
+ case P_A16:
+ case P_A32:
+ case P_A64:
+ if (result->prefixes[PPS_ASIZE] &&
+ result->prefixes[PPS_ASIZE] != tokval.t_integer)
+ error(ERR_NONFATAL,
+ "conflicting address size specifications");
+ else
+ result->prefixes[PPS_ASIZE] = tokval.t_integer;
+ break;
+ case S_WORD:
+ result->oprs[operand].disp_size = 16;
+ result->oprs[operand].eaflags |= EAF_WORDOFFS;
+ break;
+ case S_DWORD:
+ case S_LONG:
+ result->oprs[operand].disp_size = 32;
+ result->oprs[operand].eaflags |= EAF_WORDOFFS;
+ break;
+ case S_QWORD:
+ result->oprs[operand].disp_size = 64;
+ result->oprs[operand].eaflags |= EAF_WORDOFFS;
+ break;
+ default:
+ error(ERR_NONFATAL, "invalid size specification in"
+ " effective address");
+ break;
+ }
+ }
+}
+
insn *parse_line(int pass, char *buffer, insn * result,
efunc errfunc, evalfunc evaluate, ldfunc ldef)
{
int operand;
int critical;
struct eval_hints hints;
+ int j;
result->forw_ref = false;
error = errfunc;
@@ -101,7 +222,8 @@ insn *parse_line(int pass, char *buffer, insn * result,
return result;
}
- result->nprefix = 0;
+ for (j = 0; j < MAXPREFIX; j++)
+ result->prefixes[j] = P_none;
result->times = 1L;
while (i == TOKEN_PREFIX ||
@@ -134,17 +256,25 @@ insn *parse_line(int pass, char *buffer, insn * result,
}
}
} else {
- if (result->nprefix == MAXPREFIX)
- error(ERR_NONFATAL,
- "instruction has more than %d prefixes", MAXPREFIX);
- else
- result->prefixes[result->nprefix++] = tokval.t_integer;
+ int slot = prefix_slot(tokval.t_integer);
+ if (result->prefixes[slot]) {
+ error(ERR_NONFATAL,
+ "instruction has conflicting prefixes");
+ }
+ result->prefixes[slot] = tokval.t_integer;
i = stdscan(NULL, &tokval);
}
}
if (i != TOKEN_INSN) {
- if (result->nprefix > 0 && i == 0) {
+ int j;
+ enum prefixes pfx;
+
+ for (j = 0; j < MAXPREFIX; j++)
+ if ((pfx = result->prefixes[j]) != P_none)
+ break;
+
+ if (i == 0 && pfx != P_none) {
/*
* Instruction prefixes are present, but no actual
* instruction. This is allowed: at this point we
@@ -358,12 +488,12 @@ insn *parse_line(int pass, char *buffer, insn * result,
* of these, separated by commas, and terminated by a zero token. */
for (operand = 0; operand < MAX_OPERANDS; operand++) {
- expr *value; /* used most of the time */
+ expr *value; /* used most of the time */
int mref; /* is this going to be a memory ref? */
int bracket; /* is it a [] mref, or a & mref? */
int setsize = 0;
- result->oprs[operand].addr_size = 0; /* have to zero this whatever */
+ result->oprs[operand].disp_size = 0; /* have to zero this whatever */
result->oprs[operand].eaflags = 0; /* and this */
result->oprs[operand].opflags = 0;
@@ -428,78 +558,10 @@ insn *parse_line(int pass, char *buffer, insn * result,
if (i == '[' || i == '&') { /* memory reference */
mref = true;
bracket = (i == '[');
- while ((i = stdscan(NULL, &tokval)) == TOKEN_SPECIAL) {
- /* check for address directives */
- if (tasm_compatible_mode) {
- switch ((int)tokval.t_integer) {
- /* For TASM compatibility a size override inside the
- * brackets changes the size of the operand, not the
- * address type of the operand as it does in standard
- * NASM syntax. Hence:
- *
- * mov eax,[DWORD val]
- *
- * is valid syntax in TASM compatibility mode. Note that
- * you lose the ability to override the default address
- * type for the instruction, but we never use anything
- * but 32-bit flat model addressing in our code.
- */
- case S_BYTE:
- result->oprs[operand].type |= BITS8;
- break;
- case S_WORD:
- result->oprs[operand].type |= BITS16;
- break;
- case S_DWORD:
- case S_LONG:
- result->oprs[operand].type |= BITS32;
- break;
- case S_QWORD:
- result->oprs[operand].type |= BITS64;
- break;
- case S_TWORD:
- result->oprs[operand].type |= BITS80;
- break;
- case S_OWORD:
- result->oprs[operand].type |= BITS128;
- break;
- default:
- error(ERR_NONFATAL,
- "invalid operand size specification");
- }
- } else {
- /* Standard NASM compatible syntax */
- switch ((int)tokval.t_integer) {
- case S_NOSPLIT:
- result->oprs[operand].eaflags |= EAF_TIMESTWO;
- break;
- case S_REL:
- result->oprs[operand].eaflags |= EAF_REL;
- break;
- case S_ABS:
- result->oprs[operand].eaflags |= EAF_ABS;
- break;
- case S_BYTE:
- result->oprs[operand].eaflags |= EAF_BYTEOFFS;
- break;
- case S_WORD:
- result->oprs[operand].addr_size = 16;
- result->oprs[operand].eaflags |= EAF_WORDOFFS;
- break;
- case S_DWORD:
- case S_LONG:
- result->oprs[operand].addr_size = 32;
- result->oprs[operand].eaflags |= EAF_WORDOFFS;
- break;
- case S_QWORD:
- result->oprs[operand].addr_size = 64;
- result->oprs[operand].eaflags |= EAF_WORDOFFS;
- break;
- default:
- error(ERR_NONFATAL, "invalid size specification in"
- " effective address");
- }
- }
+ i = stdscan(NULL, &tokval); /* then skip the colon */
+ while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
+ process_size_override(result, operand);
+ i = stdscan(NULL, &tokval);
}
} else { /* immediate operand, or register */
mref = false;
@@ -529,32 +591,18 @@ insn *parse_line(int pass, char *buffer, insn * result,
if (value[1].type != 0 || value->value != 1 ||
REG_SREG & ~reg_flags[value->type])
error(ERR_NONFATAL, "invalid segment override");
- else if (result->nprefix == MAXPREFIX)
+ else if (result->prefixes[PPS_SEG])
error(ERR_NONFATAL,
- "instruction has more than %d prefixes", MAXPREFIX);
+ "instruction has conflicting segment overrides");
else {
- result->prefixes[result->nprefix++] = value->type;
+ result->prefixes[PPS_SEG] = value->type;
if (!(REG_FSGS & ~reg_flags[value->type]))
result->oprs[operand].eaflags |= EAF_FSGS;
}
i = stdscan(NULL, &tokval); /* then skip the colon */
- if (i == TOKEN_SPECIAL) { /* another check for size override */
- switch ((int)tokval.t_integer) {
- case S_WORD:
- result->oprs[operand].addr_size = 16;
- break;
- case S_DWORD:
- case S_LONG:
- result->oprs[operand].addr_size = 32;
- break;
- case S_QWORD:
- result->oprs[operand].addr_size = 64;
- break;
- default:
- error(ERR_NONFATAL, "invalid size specification in"
- " effective address");
- }
+ while (i == TOKEN_SPECIAL || i == TOKEN_PREFIX) {
+ process_size_override(result, operand);
i = stdscan(NULL, &tokval);
}
value = evaluate(stdscan, NULL, &tokval,
@@ -700,7 +748,7 @@ insn *parse_line(int pass, char *buffer, insn * result,
!(result->oprs[operand].eaflags & EAF_FSGS)) ||
(result->oprs[operand].eaflags & EAF_REL));
- result->oprs[operand].type |= is_rel ? IP_REL : MEM_OFFS;
+ result->oprs[operand].type |= is_rel ? IP_REL : MEM_OFFS;
}
result->oprs[operand].basereg = b;
result->oprs[operand].indexreg = i;
@@ -770,8 +818,9 @@ insn *parse_line(int pass, char *buffer, insn * result,
result->operands = operand; /* set operand count */
- while (operand < 3) /* clear remaining operands */
- result->oprs[operand++].type = 0;
+/* clear remaining operands */
+while (operand < MAX_OPERANDS)
+ result->oprs[operand++].type = 0;
/*
* Transform RESW, RESD, RESQ, REST, RESO into RESB.
diff --git a/test/movimm.asm b/test/movimm.asm
deleted file mode 100644
index b5a7a274..00000000
--- a/test/movimm.asm
+++ /dev/null
@@ -1,26 +0,0 @@
- bits 64
-
- mov rax,1234567890abcdefh
- mov eax,1234567890abcdefh
- mov rax,dword 1234567890abcdefh
- mov rax,qword 1234567890abcdefh
- mov dword [rsi],1234567890abcdefh
- mov qword [rsi],1234567890abcdefh
- mov dword [rsi],dword 1234567890abcdefh
- mov qword [rsi],dword 1234567890abcdefh
-; mov qword [rsi],qword 1234567890abcdefh ; Error
-; mov [rsi],qword 1234567890abcdefh ; Error
- mov [rsi],dword 1234567890abcdefh
-
- ; The optimizer probably should compact these forms, doesn't yet?
- mov rax,12345678h
- mov eax,12345678h
- mov rax,dword 12345678h
- mov rax,qword 12345678h
- mov dword [rsi],12345678h
- mov qword [rsi],12345678h
- mov dword [rsi],dword 12345678h
- mov qword [rsi],dword 12345678h
-; mov qword [rsi],qword 12345678h ; Error
-; mov [rsi],qword 12345678h ; Error
- mov [rsi],dword 12345678h
diff --git a/tokens.dat b/tokens.dat
index eb7fa115..d4c046a4 100644
--- a/tokens.dat
+++ b/tokens.dat
@@ -5,9 +5,13 @@
% TOKEN_PREFIX, 0, P_*
a16
a32
+a64
+asp
lock
o16
o32
+o64
+osp
rep
repe
repne