masm.mac, parser: VERY limited MASM emulation package

Very limited MASM emulation. The parser has been extended to emulate the PTR keyword if the corresponding macro is enabled, and the syntax displacement[index] for memory operations is now recognized. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
author: H. Peter Anvin (Intel) <hpa@zytor.com> 2019-08-14 15:24:56 -0700
committer: H. Peter Anvin (Intel) <hpa@zytor.com> 2019-08-14 15:44:50 -0700
commit: 8981724f176ad18aaeac570faa5a26cc28bfef08 (patch)
tree: eb4c77810945791e9ee1cd3fa37b6ddd87c31a0e
parent: 02b60ddd1c86ba6d932301f4ab205027beafc688 (diff)
download: nasm-8981724f176ad18aaeac570faa5a26cc28bfef08.tar.gz
4 files changed, 256 insertions, 142 deletions
diff --git a/asm/parser.c b/asm/parser.c
index 072e8842..012364ac 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -234,96 +234,91 @@ static bool parse_braces(decoflags_t *decoflags)
     }
 }
 
-static int parse_mref(operand *op, const expr *e)
+static inline const expr *next_expr(const expr *e, const expr **next_list)
 {
-    int b, i, s;        /* basereg, indexreg, scale */
-    int64_t o;          /* offset */
-
-    b = i = -1;
-    o = s = 0;
-    op->segment = op->wrt = NO_SEG;
-
-    if (e->type && e->type <= EXPR_REG_END) {   /* this bit's a register */
-        bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
-
-        if (is_gpr && e->value == 1)
-            b = e->type;	/* It can be basereg */
-        else			/* No, it has to be indexreg */
-            i = e->type, s = e->value;
-        e++;
+    e++;
+    if (!e->type) {
+        if (next_list) {
+            e = *next_list;
+            *next_list = NULL;
+        } else {
+            e = NULL;
+        }
     }
-    if (e->type && e->type <= EXPR_REG_END) {   /* it's a 2nd register */
-        bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
+    return e;
+}
 
-        if (b != -1)    /* If the first was the base, ... */
-            i = e->type, s = e->value;  /* second has to be indexreg */
+static inline void init_operand(operand *op)
+{
+    memset(op, 0, sizeof *op);
 
-        else if (!is_gpr || e->value != 1) {
-            /* If both want to be index */
-            nasm_nonfatal("invalid effective address: two index registers");
-            return -1;
-        } else
-            b = e->type;
-        e++;
-    }
+    op->basereg  = -1;
+    op->indexreg = -1;
+    op->segment  = NO_SEG;
+    op->wrt      = NO_SEG;
+}
 
-    if (e->type) {                     /* is there an offset? */
-        if (e->type <= EXPR_REG_END) {  /* in fact, is there an error? */
-            nasm_nonfatal("invalid effective address: impossible register");
-            return -1;
-        } else {
-            if (e->type == EXPR_UNKNOWN) {
-                op->opflags |= OPFLAG_UNKNOWN;
-                o = 0;  /* doesn't matter what */
-                while (e->type)
-                    e++;        /* go to the end of the line */
-            } else {
-                if (e->type == EXPR_SIMPLE) {
-                    o = e->value;
-                    e++;
-                }
-                if (e->type == EXPR_WRT) {
-                    op->wrt = e->value;
-                    e++;
-                }
-                /*
-                 * Look for a segment base type.
-                 */
-                for (; e->type; e++) {
-                    if (!e->value)
-                        continue;
+static int parse_mref(operand *op, const expr *e)
+{
+    int b, i, s;        /* basereg, indexreg, scale */
+    int64_t o;          /* offset */
 
-                    if (e->type <= EXPR_REG_END) {
-                        nasm_nonfatal("invalid effective address: too many registers");
-                        return -1;
-                    } else if (e->type < EXPR_SEGBASE) {
-                        nasm_nonfatal("invalid effective address: bad subexpression type");
-                        return -1;
-                    } else if (e->value == 1) {
-                        if (op->segment != NO_SEG) {
-                            nasm_nonfatal("invalid effective address: multiple base segments");
-                            return -1;
-                        }
-                        op->segment = e->type - EXPR_SEGBASE;
-                    } else if (e->value == -1 &&
-                               e->type == location.segment + EXPR_SEGBASE &&
-                               !(op->opflags & OPFLAG_RELATIVE)) {
-                        op->opflags |= OPFLAG_RELATIVE;
-                    } else {
-                        nasm_nonfatal("invalid effective address: impossible segment base multiplier");
-                        return -1;
-                    }
+    b = op->basereg;
+    i = op->indexreg;
+    s = op->scale;
+    o = op->offset;
+
+    for (; e->type; e++) {
+        if (e->type <= EXPR_REG_END) {
+            bool is_gpr = is_class(REG_GPR,nasm_reg_flags[e->type]);
+
+            if (is_gpr && e->value == 1 && b == -1) {
+                /* It can be basereg */
+                b = e->type;
+            } else if (i == -1) {
+                /* Must be index register */
+                i = e->type;
+                s = e->value;
+            } else {
+                if (b == -1)
+                    nasm_nonfatal("invalid effective address: two index registers");
+                else if (!is_gpr)
+                    nasm_nonfatal("invalid effective address: impossible register");
+                else
+                    nasm_nonfatal("invalid effective address: too many registers");
+                return -1;
+            }
+        } else if (e->type == EXPR_UNKNOWN) {
+            op->opflags |= OPFLAG_UNKNOWN;
+        } else if (e->type == EXPR_SIMPLE) {
+            o += e->value;
+        } else if  (e->type == EXPR_WRT) {
+            op->wrt = e->value;
+        } else if (e->type >= EXPR_SEGBASE) {
+            if (e->value == 1) {
+                if (op->segment != NO_SEG) {
+                    nasm_nonfatal("invalid effective address: multiple base segments");
+                    return -1;
                 }
+                op->segment = e->type - EXPR_SEGBASE;
+            } else if (e->value == -1 &&
+                       e->type == location.segment + EXPR_SEGBASE &&
+                       !(op->opflags & OPFLAG_RELATIVE)) {
+                op->opflags |= OPFLAG_RELATIVE;
+            } else {
+                nasm_nonfatal("invalid effective address: impossible segment base multiplier");
+                return -1;
             }
+        } else {
+            nasm_nonfatal("invalid effective address: bad subexpression type");
+            return -1;
         }
-    }
-
-    nasm_assert(!e->type);      /* We should be at the end */
+   }
 
-    op->basereg = b;
+    op->basereg  = b;
     op->indexreg = i;
-    op->scale = s;
-    op->offset = o;
+    op->scale    = s;
+    op->offset   = o;
     return 0;
 }
 
@@ -419,6 +414,7 @@ insn *parse_line(char *buffer, insn *result)
     bool critical;
     bool first;
     bool recover;
+    bool far_jmp_ok;
     int i;
 
     nasm_static_assert(P_none == 0);
@@ -740,20 +736,18 @@ is_expression:
      * Now we begin to parse the operands. There may be up to four
      * of these, separated by commas, and terminated by a zero token.
      */
+    far_jmp_ok = result->opcode == I_JMP || result->opcode == I_CALL;
 
     for (opnum = 0; opnum < MAX_OPERANDS; opnum++) {
         operand *op = &result->oprs[opnum];
         expr *value;            /* used most of the time */
-        bool mref;              /* is this going to be a memory ref? */
-        bool bracket;           /* is it a [] mref, or a & mref? */
+        bool mref = false;      /* is this going to be a memory ref? */
+        int bracket = 0;        /* is it a [] mref, or a "naked" mref? */
         bool mib;               /* compound (mib) mref? */
         int setsize = 0;
         decoflags_t brace_flags = 0;    /* flags for decorators in braces */
 
-        op->disp_size = 0;    /* have to zero this whatever */
-        op->eaflags   = 0;    /* and this */
-        op->opflags   = 0;
-        op->decoflags = 0;
+        init_operand(op);
 
         i = stdscan(NULL, &tokval);
         if (i == TOKEN_EOS)
@@ -829,30 +823,55 @@ is_expression:
             i = stdscan(NULL, &tokval);
         }
 
-        if (i == '[' || i == '&') {     /* memory reference */
+        if (i == '[' || i == TOKEN_MASM_PTR || i == '&') {
+            /* memory reference */
             mref = true;
-            bracket = (i == '[');
-            i = stdscan(NULL, &tokval); /* then skip the colon */
-            while (i == TOKEN_SPECIAL || i == TOKEN_SIZE ||
-                   i == TOKEN_PREFIX) {
-                process_size_override(result, op);
-                i = stdscan(NULL, &tokval);
-            }
-            /* when a comma follows an opening bracket - [ , eax*4] */
-            if (i == ',') {
-                /* treat as if there is a zero displacement virtually */
-                tokval.t_type = TOKEN_NUM;
-                tokval.t_integer = 0;
-                stdscan_set(stdscan_get() - 1);     /* rewind the comma */
-            }
-        } else {                /* immediate operand, or register */
-            mref = false;
-            bracket = false;    /* placate optimisers */
+            bracket += (i == '[');
+            i = stdscan(NULL, &tokval);
         }
 
-        if ((op->type & FAR) && !mref &&
-            result->opcode != I_JMP && result->opcode != I_CALL)
-            nasm_nonfatal("invalid use of FAR operand specifier");
+    mref_more:
+        if (mref) {
+            bool done = false;
+            bool nofw = false;
+
+            while (!done) {
+                switch (i) {
+                case TOKEN_SPECIAL:
+                case TOKEN_SIZE:
+                case TOKEN_PREFIX:
+                    process_size_override(result, op);
+                    break;
+
+                case '[':
+                    bracket++;
+                    break;
+
+                case ',':
+                    tokval.t_type = TOKEN_NUM;
+                    tokval.t_integer = 0;
+                    stdscan_set(stdscan_get() - 1);     /* rewind the comma */
+                    done = nofw = true;
+                    break;
+
+                case TOKEN_MASM_FLAT:
+                    i = stdscan(NULL, &tokval);
+                    if (i != ':') {
+                        nasm_nonfatal("unknown use of FLAT in MASM emulation");
+                        nofw = true;
+                    }
+                    done = true;
+                    break;
+
+                default:
+                    done = nofw = true;
+                    break;
+                }
+
+                if (!nofw)
+                    i = stdscan(NULL, &tokval);
+            }
+        }
 
         value = evaluate(stdscan, NULL, &tokval,
                          &op->opflags, critical, &hints);
@@ -862,7 +881,18 @@ is_expression:
         }
         if (!value)                  /* Error in evaluator */
             goto fail;
-        if (i == ':' && mref) { /* it was seg:offset */
+
+        if (i == '[' && !bracket) {
+            /* displacement[regs] syntax */
+            mref = true;
+            parse_mref(op, value); /* Process what we have so far */
+            goto mref_more;
+        }
+
+        if (i == ':' && (mref || !far_jmp_ok)) {
+            /* segment override? */
+            mref = true;
+
             /*
              * Process the segment override.
              */
@@ -879,29 +909,15 @@ is_expression:
             }
 
             i = stdscan(NULL, &tokval); /* then skip the colon */
-            while (i == TOKEN_SPECIAL || i == TOKEN_SIZE ||
-                   i == TOKEN_PREFIX) {
-                process_size_override(result, op);
-                i = stdscan(NULL, &tokval);
-            }
-            value = evaluate(stdscan, NULL, &tokval,
-                             &op->opflags, critical, &hints);
-            i = tokval.t_type;
-            if (op->opflags & OPFLAG_FORWARD) {
-                result->forw_ref = true;
-            }
-            /* and get the offset */
-            if (!value)                  /* Error in evaluator */
-                goto fail;
+            goto mref_more;
         }
 
         mib = false;
         if (mref && bracket && i == ',') {
             /* [seg:base+offset,index*scale] syntax (mib) */
+            operand o2;         /* Index operand */
 
-            operand o1, o2;     /* Partial operands */
-
-            if (parse_mref(&o1, value))
+            if (parse_mref(op, value))
                 goto fail;
 
             i = stdscan(NULL, &tokval); /* Eat comma */
@@ -911,6 +927,7 @@ is_expression:
             if (!value)
                 goto fail;
 
+            init_operand(&o2);
             if (parse_mref(&o2, value))
                 goto fail;
 
@@ -920,18 +937,14 @@ is_expression:
                 o2.basereg = -1;
             }
 
-            if (o1.indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
+            if (op->indexreg != -1 || o2.basereg != -1 || o2.offset != 0 ||
                 o2.segment != NO_SEG || o2.wrt != NO_SEG) {
                 nasm_nonfatal("invalid mib expression");
                 goto fail;
             }
 
-            op->basereg = o1.basereg;
             op->indexreg = o2.indexreg;
             op->scale = o2.scale;
-            op->offset = o1.offset;
-            op->segment = o1.segment;
-            op->wrt = o1.wrt;
 
             if (op->basereg != -1) {
                 op->hintbase = op->basereg;
@@ -948,21 +961,33 @@ is_expression:
         }
 
         recover = false;
-        if (mref && bracket) {  /* find ] at the end */
-            if (i != ']') {
-                nasm_nonfatal("parser: expecting ]");
-                recover = true;
-            } else {            /* we got the required ] */
-                i = stdscan(NULL, &tokval);
-                if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
-                    /* parse opmask (and zeroing) after an operand */
-                    recover = parse_braces(&brace_flags);
-                    i = tokval.t_type;
-                }
-                if (i != 0 && i != ',') {
-                    nasm_nonfatal("comma or end of line expected");
+        if (mref) {
+            if (bracket == 1) {
+                if (i == ']') {
+                    bracket--;
+                    i = stdscan(NULL, &tokval);
+                } else {
+                    nasm_nonfatal("expecting ] at end of memory operand");
                     recover = true;
                 }
+            } else if (bracket == 0) {
+                /* Do nothing */
+            } else if (bracket > 0) {
+                nasm_nonfatal("excess brackets in memory operand");
+                recover = true;
+            } else if (bracket < 0) {
+                nasm_nonfatal("unmatched ] in memory operand");
+                recover = true;
+            }
+
+            if (i == TOKEN_DECORATOR || i == TOKEN_OPMASK) {
+                /* parse opmask (and zeroing) after an operand */
+                recover = parse_braces(&brace_flags);
+                i = tokval.t_type;
+            }
+            if (!recover && i != 0 && i != ',') {
+                nasm_nonfatal("comma, decorator or end of line expected, got %d", i);
+                recover = true;
             }
         } else {                /* immediate operand */
             if (i != 0 && i != ',' && i != ':' &&
@@ -998,6 +1023,9 @@ is_expression:
                 op->hinttype = hints.type;
             }
             mref_set_optype(op);
+        } else if ((op->type & FAR) && !far_jmp_ok) {
+                nasm_nonfatal("invalid use of FAR operand specifier");
+                recover = true;
         } else {                /* it's not a memory reference */
             if (is_just_unknown(value)) {       /* it's immediate but unknown */
                 op->type      |= IMMEDIATE;
diff --git a/asm/tokens.dat b/asm/tokens.dat
index 9f1513c4..d75640cf 100644
--- a/asm/tokens.dat
+++ b/asm/tokens.dat
@@ -125,6 +125,10 @@ __ilog2c__
 seg
 wrt
 
+% TOKEN_{__*__}, 0, 0, 0
+__masm_ptr__
+__masm_flat__
+
 % TOKEN_DECORATOR, 0, TFLAG_BRC | TFLAG_BRDCAST , BRC_1TO{1to*}
 1to2
 1to4
diff --git a/include/nasm.h b/include/nasm.h
index f108bdfa..860d1fc0 100644
--- a/include/nasm.h
+++ b/include/nasm.h
@@ -190,6 +190,8 @@ enum token_type { /* token types, other than chars */
     TOKEN_STRFUNC,      /* __utf16*__, __utf32*__ */
     TOKEN_IFUNC,        /* __ilog2*__ */
     TOKEN_DECORATOR,    /* decorators such as {...} */
+    TOKEN_MASM_PTR,     /* __masm_ptr__ for the masm package */
+    TOKEN_MASM_FLAT,    /* __masm_flat__ for the masm package */
     TOKEN_OPMASK        /* translated token for opmask registers */
 };
 
diff --git a/macros/masm.mac b/macros/masm.mac
new file mode 100644
index 00000000..3e64f70a
--- /dev/null
+++ b/macros/masm.mac
@@ -0,0 +1,80 @@
+;; --------------------------------------------------------------------------
+;;
+;;   Copyright 2019 The NASM Authors - All Rights Reserved
+;;   See the file AUTHORS included with the NASM distribution for
+;;   the specific copyright holders.
+;;
+;;   Redistribution and use in source and binary forms, with or without
+;;   modification, are permitted provided that the following
+;;   conditions are met:
+;;
+;;   * Redistributions of source code must retain the above copyright
+;;     notice, this list of conditions and the following disclaimer.
+;;   * Redistributions in binary form must reproduce the above
+;;     copyright notice, this list of conditions and the following
+;;     disclaimer in the documentation and/or other materials provided
+;;     with the distribution.
+;;
+;;     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+;;     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+;;     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+;;     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+;;     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+;;     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+;;     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+;;     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+;;     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+;;     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+;;     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+;;     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+;;     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;
+;; --------------------------------------------------------------------------
+
+;;
+;; masm.mac
+;;
+;; Very limited MASM compatiblity package; intended to be used
+;; primarily with machine-generated code. It does not include any
+;; "programmer friendly" shortcuts, nor does it in any way support
+;; ASSUME, symbol typing, or MASM-style structures.
+;;
+
+USE: masm
+
+%unimacro segment 1+
+
+%imacro segment 0-1+.nolist
+  %define __SECT__ [segment %00 %1]
+	__SECT__
+%endmacro
+
+%imacro ends 0+.nolist
+  %pragma ignore ends %00
+%endmacro
+
+%imacro proc 0-*.nolist
+  %rep %0
+    %ifidni %1,far
+      %idefine ret retf
+    %else
+      %idefine ret retn
+    %endif
+    %rotate 1
+  %endrep
+%endmacro
+
+%imacro endp 0.nolist
+  %pragma ignore endp %00
+  %undef ret
+%endmacro
+
+%idefine ptr __masm_ptr__
+%idefine flat __masm_flat__	; is %idefine really correct here?
+%idefine offset
+
+%imacro end 0+.nolist
+	; Nothing
+%endmacro
+
+	default rel
author	H. Peter Anvin (Intel) <hpa@zytor.com>	2019-08-14 15:24:56 -0700
committer	H. Peter Anvin (Intel) <hpa@zytor.com>	2019-08-14 15:44:50 -0700
commit	8981724f176ad18aaeac570faa5a26cc28bfef08 (patch)
tree	eb4c77810945791e9ee1cd3fa37b6ddd87c31a0e
parent	02b60ddd1c86ba6d932301f4ab205027beafc688 (diff)
download	nasm-8981724f176ad18aaeac570faa5a26cc28bfef08.tar.gz