summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH. Peter Anvin (Intel) <hpa@zytor.com>2019-08-14 15:24:56 -0700
committerH. Peter Anvin (Intel) <hpa@zytor.com>2019-08-14 15:44:50 -0700
commit8981724f176ad18aaeac570faa5a26cc28bfef08 (patch)
treeeb4c77810945791e9ee1cd3fa37b6ddd87c31a0e
parent02b60ddd1c86ba6d932301f4ab205027beafc688 (diff)
downloadnasm-8981724f176ad18aaeac570faa5a26cc28bfef08.tar.gz
masm.mac, parser: VERY limited MASM emulation package
Very limited MASM emulation. The parser has been extended to emulate the PTR keyword if the corresponding macro is enabled, and the syntax displacement[index] for memory operations is now recognized. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
-rw-r--r--asm/parser.c312
-rw-r--r--asm/tokens.dat4
-rw-r--r--include/nasm.h2
-rw-r--r--macros/masm.mac80
4 files changed, 256 insertions, 142 deletions
diff --git a/asm/parser.c b/asm/parser.c
index 072e8842..012364ac 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -234,96 +234,91 @@ static bool parse_braces(decoflags_t *decoflags)
}
}
-static int parse_mref(operand *op, const expr *e)
+static inline const expr *next_expr(const expr *e, const expr **next_list)
{
- int b, i, s; /* basereg, indexreg, scale */
- int64_t o; /* offset */
-
- b = i = -1;
- o = s = 0;
- op->segment = op->wrt = NO_SEG;
-
- if (e->type && e->type <= EXPR_REG_END) { /* this bit's a register */
- bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
-
- if (is_gpr && e->value == 1)
- b = e->type; /* It can be basereg */
- else /* No, it has to be indexreg */
- i = e->type, s = e->value;
- e++;
+ e++;
+ if (!e->type) {
+ if (next_list) {
+ e = *next_list;
+ *next_list = NULL;
+ } else {
+ e = NULL;
+ }
}
- if (e->type && e->type <= EXPR_REG_END) { /* it's a 2nd register */
- bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
+ return e;
+}
- if (b != -1) /* If the first was the base, ... */
- i = e->type, s = e->value; /* second has to be indexreg */
+static inline void init_operand(operand *op)
+{
+ memset(op, 0, sizeof *op);
- else if (!is_gpr || e->value != 1) {
- /* If both want to be index */
- nasm_nonfatal("invalid effective address: two index registers");
- return -1;
- } else
- b = e->type;
- e++;
- }
+ op->basereg = -1;
+ op->indexreg = -1;
+ op->segment = NO_SEG;
+ op->wrt = NO_SEG;
+}
- if (e->type) { /* is there an offset? */
- if (e->type <= EXPR_REG_END) { /* in fact, is there an error? */
- nasm_nonfatal("invalid effective address: impossible register");
- return -1;
- } else {
- if (e->type == EXPR_UNKNOWN) {
- op->opflags |= OPFLAG_UNKNOWN;
- o = 0; /* doesn't matter what */
- while (e->type)
- e++; /* go to the end of the line */
- } else {
- if (e->type == EXPR_SIMPLE) {
- o = e->value;
- e++;
- }
- if (e->type == EXPR_WRT) {
- op->wrt = e->value;
- e++;
- }
- /*
- * Look for a segment base type.
- */
- for (; e->type; e++) {
- if (!e->value)
- continue;
+static int parse_mref(operand *op, const expr *e)
+{
+ int b, i, s; /* basereg, indexreg, scale */
+ int64_t o; /* offset */
- if (e->type <= EXPR_REG_END) {
- nasm_nonfatal("invalid effective address: too many registers");
- return -1;
- } else if (e->type < EXPR_SEGBASE) {
- nasm_nonfatal("invalid effective address: bad subexpression type");
- return -1;
- } else if (e->value == 1) {
- if (op->segment != NO_SEG) {
- nasm_nonfatal("invalid effective address: multiple base segments");
- return -1;
- }
- op->segment = e->type - EXPR_SEGBASE;
- } else if (e->value == -1 &&
- e->type == location.segment + EXPR_SEGBASE &&
- !(op->opflags & OPFLAG_RELATIVE)) {
- op->opflags |= OPFLAG_RELATIVE;
- } else {
- nasm_nonfatal("invalid effective address: impossible segment base multiplier");
- return -1;
- }
+ b = op->basereg;
+ i = op->indexreg;
+ s = op->scale;
+ o = op->offset;
+
+ for (; e->type; e++) {
+ if (e->type <= EXPR_REG_END) {
+ bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
+
+ if (is_gpr && e->value == 1 && b == -1) {
+ /* It can be basereg */
+ b = e->type;
+ } else if (i == -1) {
+ /* Must be index register */
+ i = e->type;
+ s = e->value;
+ } else {
+ if (b == -1)
+ nasm_nonfatal("invalid effective address: two index registers");
+ else if (!is_gpr)
+ nasm_nonfatal("invalid effective address: impossible register");
+ else
+ nasm_nonfatal("invalid effective address: too many registers");
+ return -1;
+ }
+ } else if (e->type == EXPR_UNKNOWN) {
+ op->opflags |= OPFLAG_UNKNOWN;
+ } else if (e->type == EXPR_SIMPLE) {
+ o += e->value;
+ } else if (e->type == EXPR_WRT) {
+ op->wrt = e->value;
+ } else if (e->type >= EXPR_SEGBASE) {
+ if (e->value == 1) {
+ if (op->segment != NO_SEG) {
+ nasm_nonfatal("invalid effective address: multiple base segments");
+ return -1;
}
+ op->segment = e->type - EXPR_SEGBASE;
+ } else if (e->value == -1 &&
+ e->type == location.segment + EXPR_SEGBASE &&
+ !(op->opflags & OPFLAG_RELATIVE)) {
+ op->opflags |= OPFLAG_RELATIVE;
+ } else {
+ nasm_nonfatal("invalid effective address: impossible segment base multiplier");
+ return -1;
}
+ } else {
+ nasm_nonfatal("invalid effective address: bad subexpression type");
+ return -1;
}
- }
-
- nasm_assert(!e->type); /* We should be at the end */
+ }
- op->basereg = b;
+ op->basereg = b;
op->indexreg = i;
- op->scale = s;
- op->offset = o;
+ op->scale = s;
+ op->offset = o;
return 0;
}
@@ -419,6 +414,7 @@ insn *parse_line(char *buffer, insn *result)
bool critical;
bool first;
bool recover;
+ bool far_jmp_ok;
int i;
nasm_static_assert(P_none == 0);
@@ -740,20 +736,18 @@ is_expression:
* Now we begin to parse the operands. There may be up to four
* of these, separated by commas, and terminated by a zero token.
*/
+ far_jmp_ok = result->opcode == I_JMP || result->opcode == I_CALL;
for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
operand *op = &result->oprs[opnum];
expr *value; /* used most of the time */
- bool mref; /* is this going to be a memory ref? */
- bool bracket; /* is it a [] mref, or a & mref? */
+ bool mref = false; /* is this going to be a memory ref? */
+ int bracket = 0; /* is it a [] mref, or a "naked" mref? */
bool mib; /* compound (mib) mref? */
int setsize = 0;
decoflags_t brace_flags = 0; /* flags for decorators in braces */
- op->disp_size = 0; /* have to zero this whatever */
- op->eaflags = 0; /* and this */
- op->opflags = 0;
- op->decoflags = 0;
+ init_operand(op);
i = stdscan(NULL, &tokval);
if (i == TOKEN_EOS)
@@ -829,30 +823,55 @@ is_expression:
i = stdscan(NULL, &tokval);
}
- if (i == '[' || i == '&') { /* memory reference */
+ if (i == '[' || i == TOKEN_MASM_PTR || i == '&') {
+ /* memory reference */
mref = true;
- bracket = (i == '[');
- i = stdscan(NULL, &tokval); /* then skip the colon */
- while (i == TOKEN_SPECIAL || i == TOKEN_SIZE ||
- i == TOKEN_PREFIX) {
- process_size_override(result, op);
- i = stdscan(NULL, &tokval);
- }
- /* when a comma follows an opening bracket - [ , eax*4] */
- if (i == ',') {
- /* treat as if there is a zero displacement virtually */
- tokval.t_type = TOKEN_NUM;
- tokval.t_integer = 0;
- stdscan_set(stdscan_get() - 1); /* rewind the comma */
- }
- } else { /* immediate operand, or register */
- mref = false;
- bracket = false; /* placate optimisers */
+ bracket += (i == '[');
+ i = stdscan(NULL, &tokval);
}
- if ((op->type & FAR) && !mref &&
- result->opcode != I_JMP && result->opcode != I_CALL)
- nasm_nonfatal("invalid use of FAR operand specifier");
+ mref_more:
+ if (mref) {
+ bool done = false;
+ bool nofw = false;
+
+ while (!done) {
+ switch (i) {
+ case TOKEN_SPECIAL:
+ case TOKEN_SIZE:
+ case TOKEN_PREFIX:
+ process_size_override(result, op);
+ break;
+
+ case '[':
+ bracket++;
+ break;
+
+ case ',':
+ tokval.t_type = TOKEN_NUM;
+ tokval.t_integer = 0;
+ stdscan_set(stdscan_get() - 1); /* rewind the comma */
+ done = nofw = true;
+ break;
+
+ case TOKEN_MASM_FLAT:
+ i = stdscan(NULL, &tokval);
+ if (i != ':') {
+ nasm_nonfatal("unknown use of FLAT in MASM emulation");
+ nofw = true;
+ }
+ done = true;
+ break;
+
+ default:
+ done = nofw = true;
+ break;
+ }
+
+ if (!nofw)
+ i = stdscan(NULL, &tokval);
+ }
+ }
value = evaluate(stdscan, NULL, &tokval,
&op->opflags, critical, &hints);
@@ -862,7 +881,18 @@ is_expression:
}
if (!value) /* Error in evaluator */
goto fail;
- if (i == ':' && mref) { /* it was seg:offset */
+
+ if (i == '[' && !bracket) {
+ /* displacement[regs] syntax */
+ mref = true;
+ parse_mref(op, value); /* Process what we have so far */
+ goto mref_more;
+ }
+
+ if (i == ':' && (mref || !far_jmp_ok)) {
+ /* segment override? */
+ mref = true;
+
/*
* Process the segment override.
*/
@@ -879,29 +909,15 @@ is_expression:
}
i = stdscan(NULL, &tokval); /* then skip the colon */
- while (i == TOKEN_SPECIAL || i == TOKEN_SIZE ||
- i == TOKEN_PREFIX) {
- process_size_override(result, op);
- i = stdscan(NULL, &tokval);
- }
- value = evaluate(stdscan, NULL, &tokval,
- &op->opflags, critical, &hints);
- i = tokval.t_type;
- if (op->opflags & OPFLAG_FORWARD) {
- result->forw_ref = true;
- }
- /* and get the offset */
- if (!value) /* Error in evaluator */
- goto fail;
+ goto mref_more;
}
mib = false;
if (mref && bracket && i == ',') {
/* [seg:base+offset,index*scale] syntax (mib) */
+ operand o2; /* Index operand */
- operand o1, o2; /* Partial operands */
-
- if (parse_mref(&o1, value))
+ if (parse_mref(op, value))
goto fail;
i = stdscan(NULL, &tokval); /* Eat comma */
@@ -911,6 +927,7 @@ is_expression:
if (!value)
goto fail;
+ init_operand(&o2);
if (parse_mref(&o2, value))
goto fail;
@@ -920,18 +937,14 @@ is_expression:
o2.basereg = -1;
}
- if (o1.indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
+ if (op->indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
o2.segment != NO_SEG || o2.wrt != NO_SEG) {
nasm_nonfatal("invalid mib expression");
goto fail;
}
- op->basereg = o1.basereg;
op->indexreg = o2.indexreg;
op->scale = o2.scale;
- op->offset = o1.offset;
- op->segment = o1.segment;
- op->wrt = o1.wrt;
if (op->basereg != -1) {
op->hintbase = op->basereg;
@@ -948,21 +961,33 @@ is_expression:
}
recover = false;
- if (mref && bracket) { /* find ] at the end */
- if (i != ']') {
- nasm_nonfatal("parser: expecting ]");
- recover = true;
- } else { /* we got the required ] */
- i = stdscan(NULL, &tokval);
- if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
- /* parse opmask (and zeroing) after an operand */
- recover = parse_braces(&brace_flags);
- i = tokval.t_type;
- }
- if (i != 0 && i != ',') {
- nasm_nonfatal("comma or end of line expected");
+ if (mref) {
+ if (bracket == 1) {
+ if (i == ']') {
+ bracket--;
+ i = stdscan(NULL, &tokval);
+ } else {
+ nasm_nonfatal("expecting ] at end of memory operand");
recover = true;
}
+ } else if (bracket == 0) {
+ /* Do nothing */
+ } else if (bracket > 0) {
+ nasm_nonfatal("excess brackets in memory operand");
+ recover = true;
+ } else if (bracket < 0) {
+ nasm_nonfatal("unmatched ] in memory operand");
+ recover = true;
+ }
+
+ if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
+ /* parse opmask (and zeroing) after an operand */
+ recover = parse_braces(&brace_flags);
+ i = tokval.t_type;
+ }
+ if (!recover && i != 0 && i != ',') {
+ nasm_nonfatal("comma, decorator or end of line expected, got %d", i);
+ recover = true;
}
} else { /* immediate operand */
if (i != 0 && i != ',' && i != ':' &&
@@ -998,6 +1023,9 @@ is_expression:
op->hinttype = hints.type;
}
mref_set_optype(op);
+ } else if ((op->type & FAR) && !far_jmp_ok) {
+ nasm_nonfatal("invalid use of FAR operand specifier");
+ recover = true;
} else { /* it's not a memory reference */
if (is_just_unknown(value)) { /* it's immediate but unknown */
op->type |= IMMEDIATE;
diff --git a/asm/tokens.dat b/asm/tokens.dat
index 9f1513c4..d75640cf 100644
--- a/asm/tokens.dat
+++ b/asm/tokens.dat
@@ -125,6 +125,10 @@ __ilog2c__
seg
wrt
+% TOKEN_{__*__}, 0, 0, 0
+__masm_ptr__
+__masm_flat__
+
% TOKEN_DECORATOR, 0, TFLAG_BRC | TFLAG_BRDCAST , BRC_1TO{1to*}
1to2
1to4
diff --git a/include/nasm.h b/include/nasm.h
index f108bdfa..860d1fc0 100644
--- a/include/nasm.h
+++ b/include/nasm.h
@@ -190,6 +190,8 @@ enum token_type { /* token types, other than chars */
TOKEN_STRFUNC, /* __utf16*__, __utf32*__ */
TOKEN_IFUNC, /* __ilog2*__ */
TOKEN_DECORATOR, /* decorators such as {...} */
+ TOKEN_MASM_PTR, /* __masm_ptr__ for the masm package */
+ TOKEN_MASM_FLAT, /* __masm_flat__ for the masm package */
TOKEN_OPMASK /* translated token for opmask registers */
};
diff --git a/macros/masm.mac b/macros/masm.mac
new file mode 100644
index 00000000..3e64f70a
--- /dev/null
+++ b/macros/masm.mac
@@ -0,0 +1,80 @@
+;; --------------------------------------------------------------------------
+;;
+;; Copyright 2019 The NASM Authors - All Rights Reserved
+;; See the file AUTHORS included with the NASM distribution for
+;; the specific copyright holders.
+;;
+;; Redistribution and use in source and binary forms, with or without
+;; modification, are permitted provided that the following
+;; conditions are met:
+;;
+;; * Redistributions of source code must retain the above copyright
+;; notice, this list of conditions and the following disclaimer.
+;; * Redistributions in binary form must reproduce the above
+;; copyright notice, this list of conditions and the following
+;; disclaimer in the documentation and/or other materials provided
+;; with the distribution.
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+;; CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+;; INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+;; MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+;; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+;; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+;; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+;; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+;; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+;; EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;
+;; --------------------------------------------------------------------------
+
+;;
+;; masm.mac
+;;
+;; Very limited MASM compatiblity package; intended to be used
+;; primarily with machine-generated code. It does not include any
+;; "programmer friendly" shortcuts, nor does it in any way support
+;; ASSUME, symbol typing, or MASM-style structures.
+;;
+
+USE: masm
+
+%unimacro segment 1+
+
+%imacro segment 0-1+.nolist
+ %define __SECT__ [segment %00 %1]
+ __SECT__
+%endmacro
+
+%imacro ends 0+.nolist
+ %pragma ignore ends %00
+%endmacro
+
+%imacro proc 0-*.nolist
+ %rep %0
+ %ifidni %1,far
+ %idefine ret retf
+ %else
+ %idefine ret retn
+ %endif
+ %rotate 1
+ %endrep
+%endmacro
+
+%imacro endp 0.nolist
+ %pragma ignore endp %00
+ %undef ret
+%endmacro
+
+%idefine ptr __masm_ptr__
+%idefine flat __masm_flat__ ; is %idefine really correct here?
+%idefine offset
+
+%imacro end 0+.nolist
+ ; Nothing
+%endmacro
+
+ default rel