Implement an enhanced version of MASM's dup() and "db ?" syntax.

Add support for complex data (Dx) statement expressions involving both initialized and uninitialized data. In addition, we have support for overriding the size of each element on an individual item and/or list basis. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
author: H. Peter Anvin (Intel) <hpa@zytor.com> 2019-10-16 14:29:16 -0700
committer: H. Peter Anvin (Intel) <hpa@zytor.com> 2019-10-16 14:29:16 -0700
commit: 84b852bff0ee7085245defbeb38d6218052579ae (patch)
tree: 78f08be964725c1d9ad619fe811575015b7f6214
parent: d03a6c8ffedd1496221eca5f02ba0215794cfaf9 (diff)
download: nasm-84b852bff0ee7085245defbeb38d6218052579ae.tar.gz
6 files changed, 517 insertions, 287 deletions
diff --git a/asm/assemble.c b/asm/assemble.c
index 875a0d32..7a0830ca 100644
--- a/asm/assemble.c
+++ b/asm/assemble.c
@@ -601,6 +601,83 @@ static inline int64_t merge_resb(insn *ins, int64_t isize)
     return isize;
 }
 
+/* This must be handle non-power-of-2 alignment values */
+static inline size_t pad_bytes(size_t len, size_t align)
+{
+    size_t partial = len % align;
+    return partial ? align - partial : 0;
+}
+
+static void out_eops(struct out_data *data, const extop *e)
+{
+    while (e) {
+        size_t dup = e->dup;
+
+        switch (e->type) {
+        case EOT_NOTHING:
+            break;
+
+        case EOT_EXTOP:
+            while (dup--)
+                out_eops(data, e->val.subexpr);
+            break;
+
+        case EOT_DB_NUMBER:
+            if (e->elem > 8) {
+                nasm_nonfatal("integer supplied as %d-bit data",
+                              e->elem << 3);
+            } else {
+                while (dup--) {
+                    data->insoffs = 0;
+                    data->inslen = data->size = e->elem;
+                    data->tsegment = e->val.num.segment;
+                    data->toffset  = e->val.num.offset;
+                    data->twrt = e->val.num.wrt;
+                    data->relbase = 0;
+                    if (e->val.num.segment != NO_SEG &&
+                        (e->val.num.segment & 1)) {
+                        data->type = OUT_SEGMENT;
+                        data->sign = OUT_UNSIGNED;
+                    } else {
+                        data->type = e->val.num.relative
+                            ? OUT_RELADDR : OUT_ADDRESS;
+                        data->sign = OUT_WRAP;
+                    }
+                    out(data);
+                }
+            }
+            break;
+
+        case EOT_DB_FLOAT:
+        case EOT_DB_STRING:
+        case EOT_DB_STRING_FREE:
+        {
+            size_t pad, len;
+
+            pad = pad_bytes(e->val.string.len, e->elem);
+            len = e->val.string.len + pad;
+
+            while (dup--) {
+                data->insoffs = 0;
+                data->inslen = len;
+                out_rawdata(data, e->val.string.data, e->val.string.len);
+                if (pad)
+                    out_rawdata(data, zero_buffer, pad);
+            }
+            break;
+        }
+
+        case EOT_DB_RESERVE:
+            data->insoffs = 0;
+            data->inslen = dup * e->elem;
+            out_reserve(data, data->inslen);
+            break;
+        }
+
+        e = e->next;
+    }
+}
+
 /* This is totally just a wild guess what is reasonable... */
 #define INCBIN_MAX_BUF (ZERO_BUF_SIZE * 16)
 
@@ -609,7 +686,9 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
     struct out_data data;
     const struct itemplate *temp;
     enum match_result m;
-    int64_t wsize;              /* size for DB etc. */
+
+    if (instruction->opcode == I_none)
+        return 0;
 
     nasm_zero(data);
     data.offset = start;
@@ -617,49 +696,10 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
     data.itemp = NULL;
     data.bits = bits;
 
-    wsize = db_bytes(instruction->opcode);
-    if (wsize == -1)
-        return 0;
-
-    if (wsize) {
-        extop *e;
-
-        list_for_each(e, instruction->eops) {
-            if (e->type == EOT_DB_NUMBER) {
-                if (wsize > 8) {
-                    nasm_nonfatal("integer supplied to a DT,DO,DY or DZ");
-                } else {
-                    data.insoffs = 0;
-                    data.inslen = data.size = wsize;
-                    data.toffset = e->offset;
-                    data.twrt = e->wrt;
-                    data.relbase = 0;
-                    if (e->segment != NO_SEG && (e->segment & 1)) {
-                        data.tsegment = e->segment;
-                        data.type = OUT_SEGMENT;
-                        data.sign = OUT_UNSIGNED;
-                    } else {
-                        data.tsegment = e->segment;
-                        data.type = e->relative ? OUT_RELADDR : OUT_ADDRESS;
-                        data.sign = OUT_WRAP;
-                    }
-                    out(&data);
-                }
-            } else if (e->type == EOT_DB_STRING ||
-                       e->type == EOT_DB_STRING_FREE) {
-                int align = e->stringlen % wsize;
-                if (align)
-                    align = wsize - align;
-
-                data.insoffs = 0;
-                data.inslen = e->stringlen + align;
-
-                out_rawdata(&data, e->stringval, e->stringlen);
-                out_rawdata(&data, zero_buffer, align);
-            }
-        }
+    if (opcode_is_db(instruction->opcode)) {
+        out_eops(&data, instruction->eops);
     } else if (instruction->opcode == I_INCBIN) {
-        const char *fname = instruction->eops->stringval;
+        const char *fname = instruction->eops->val.string.data;
         FILE *fp;
         size_t t = instruction->times; /* INCBIN handles TIMES by itself */
         off_t base = 0;
@@ -688,14 +728,14 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
         }
 
         if (instruction->eops->next) {
-            base = instruction->eops->next->offset;
+            base = instruction->eops->next->val.num.offset;
             if (base >= len) {
                 len = 0;
             } else {
                 len -= base;
                 if (instruction->eops->next->next &&
-                    len > (off_t)instruction->eops->next->next->offset)
-                    len = (off_t)instruction->eops->next->next->offset;
+                    len > (off_t)instruction->eops->next->next->val.num.offset)
+                    len = (off_t)instruction->eops->next->next->val.num.offset;
             }
         }
 
@@ -897,45 +937,57 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
     return data.offset - start;
 }
 
-static void debug_set_db_type(insn *instruction)
+static int32_t eops_typeinfo(const extop *e)
 {
-    /* Is this really correct? .operands doesn't mean much for Dx */
-    int32_t typeinfo = TYS_ELEMENTS(instruction->operands);
+    int32_t typeinfo = 0;
 
-    switch (instruction->opcode) {
-    case I_DB:
-        typeinfo |= TY_BYTE;
-        break;
-    case I_DW:
-        typeinfo |= TY_WORD;
-        break;
-    case I_DD:
-        if (instruction->eops_float)
-            typeinfo |= TY_FLOAT;
-        else
-            typeinfo |= TY_DWORD;
-        break;
-    case I_DQ:
-        /* What about double? */
-        typeinfo |= TY_QWORD;
-        break;
-    case I_DT:
-        /* What about long double? */
-        typeinfo |= TY_TBYTE;
-        break;
-    case I_DO:
-        typeinfo |= TY_OWORD;
-        break;
-    case I_DY:
-        typeinfo |= TY_YWORD;
-        break;
-    case I_DZ:
-        typeinfo |= TY_ZWORD;
-        break;
-    default:
-        panic();
+    while (e) {
+        switch (e->type) {
+        case EOT_NOTHING:
+            break;
+
+        case EOT_EXTOP:
+            typeinfo |= eops_typeinfo(e->val.subexpr);
+            break;
+
+        case EOT_DB_FLOAT:
+            switch (e->elem) {
+            case  1: typeinfo |= TY_BYTE;  break;
+            case  2: typeinfo |= TY_WORD;  break;
+            case  4: typeinfo |= TY_FLOAT; break;
+            case  8: typeinfo |= TY_QWORD; break; /* double? */
+            case 10: typeinfo |= TY_TBYTE; break; /* long double? */
+            case 16: typeinfo |= TY_YWORD; break;
+            case 32: typeinfo |= TY_ZWORD; break;
+            default: break;
+            }
+            break;
+
+        default:
+            switch (e->elem) {
+            case  1: typeinfo |= TY_BYTE;  break;
+            case  2: typeinfo |= TY_WORD;  break;
+            case  4: typeinfo |= TY_DWORD; break;
+            case  8: typeinfo |= TY_QWORD; break;
+            case 10: typeinfo |= TY_TBYTE; break;
+            case 16: typeinfo |= TY_YWORD; break;
+            case 32: typeinfo |= TY_ZWORD; break;
+            default: break;
+            }
+            break;
+        }
+        e = e->next;
     }
 
+    return typeinfo;
+}
+
+static inline void debug_set_db_type(insn *instruction)
+{
+
+    int32_t typeinfo = TYS_ELEMENTS(instruction->operands);
+
+    typeinfo |= eops_typeinfo(instruction->eops);
     dfmt->debug_typevalue(typeinfo);
 }
 
@@ -1009,6 +1061,42 @@ static void define_equ(insn * instruction)
     }
 }
 
+static int64_t len_extops(const extop *e)
+{
+    int64_t isize = 0;
+    size_t pad;
+
+    while (e) {
+        switch (e->type) {
+        case EOT_NOTHING:
+            break;
+
+        case EOT_EXTOP:
+            isize += e->dup * len_extops(e->val.subexpr);
+            break;
+
+        case EOT_DB_STRING:
+        case EOT_DB_STRING_FREE:
+        case EOT_DB_FLOAT:
+            pad = pad_bytes(e->val.string.len, e->elem);
+            isize += e->dup * (e->val.string.len + pad);
+            break;
+
+        case EOT_DB_NUMBER:
+            warn_overflow_const(e->val.num.offset, e->elem);
+            isize += e->dup * e->elem;
+            break;
+
+        case EOT_DB_RESERVE:
+            isize += e->dup;
+            break;
+        }
+
+        e = e->next;
+    }
+
+    return isize;
+}
 
 int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction)
 {
@@ -1022,33 +1110,12 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction)
         define_equ(instruction);
         return 0;
     } else if (opcode_is_db(instruction->opcode)) {
-        extop *e;
-        int32_t osize, wsize;
-
-        wsize = db_bytes(instruction->opcode);
-        nasm_assert(wsize > 0);
-
-        list_for_each(e, instruction->eops) {
-            int32_t align;
-
-            osize = 0;
-            if (e->type == EOT_DB_NUMBER) {
-                osize = 1;
-                warn_overflow_const(e->offset, wsize);
-            } else if (e->type == EOT_DB_STRING ||
-                       e->type == EOT_DB_STRING_FREE)
-                osize = e->stringlen;
-
-            align = (-osize) % wsize;
-            if (align < 0)
-                align += wsize;
-            isize += osize + align;
-        }
-
+        isize = len_extops(instruction->eops);
         debug_set_db_type(instruction);
         return isize;
     } else if (instruction->opcode == I_INCBIN) {
-        const char *fname = instruction->eops->stringval;
+        const extop *e = instruction->eops;
+        const char *fname = e->val.string.data;
         off_t len;
 
         len = nasm_file_size_by_path(fname);
@@ -1058,14 +1125,15 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction)
             return 0;
         }
 
-        if (instruction->eops->next) {
-            if (len <= (off_t)instruction->eops->next->offset) {
+        e = e->next;
+        if (e) {
+            if (len <= (off_t)e->val.num.offset) {
                 len = 0;
             } else {
-                len -= instruction->eops->next->offset;
-                if (instruction->eops->next->next &&
-                    len > (off_t)instruction->eops->next->next->offset) {
-                    len = (off_t)instruction->eops->next->next->offset;
+                len -= e->val.num.offset;
+                e = e->next;
+                if (e && len > (off_t)e->val.num.offset) {
+                    len = (off_t)e->val.num.offset;
                 }
             }
         }
diff --git a/asm/parser.c b/asm/parser.c
index 15cfcdfa..a59acb19 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -51,7 +51,7 @@
 #include "tables.h"
 
 
-static int is_comma_next(void);
+static int end_expression_next(void);
 
 static struct tokenval tokval;
 
@@ -355,14 +355,15 @@ static void mref_set_optype(operand *op)
 
 /*
  * Convert an expression vector returned from evaluate() into an
- * extop structure.  Return zero on success.
+ * extop structure.  Return zero on success.  Note that the eop
+ * already has dup and elem set, so we can't clear it here.
  */
-static int value_to_extop(expr * vect, extop *eop, int32_t myseg)
+static int value_to_extop(expr *vect, extop *eop, int32_t myseg)
 {
     eop->type = EOT_DB_NUMBER;
-    eop->offset = 0;
-    eop->segment = eop->wrt = NO_SEG;
-    eop->relative = false;
+    eop->val.num.offset = 0;
+    eop->val.num.segment = eop->val.num.wrt = NO_SEG;
+    eop->val.num.relative = false;
 
     for (; vect->type; vect++) {
         if (!vect->value)       /* zero term, safe to ignore */
@@ -376,25 +377,26 @@ static int value_to_extop(expr * vect, extop *eop, int32_t myseg)
 
         if (vect->type == EXPR_SIMPLE) {
             /* Simple number expression */
-            eop->offset += vect->value;
+            eop->val.num.offset += vect->value;
             continue;
         }
-        if (eop->wrt == NO_SEG && !eop->relative && vect->type == EXPR_WRT) {
+        if (eop->val.num.wrt == NO_SEG && !eop->val.num.relative &&
+            vect->type == EXPR_WRT) {
             /* WRT term */
-            eop->wrt = vect->value;
+            eop->val.num.wrt = vect->value;
             continue;
         }
 
-        if (!eop->relative &&
+        if (!eop->val.num.relative &&
             vect->type == EXPR_SEGBASE + myseg && vect->value == -1) {
             /* Expression of the form: foo - $ */
-            eop->relative = true;
+            eop->val.num.relative = true;
             continue;
         }
 
-        if (eop->segment == NO_SEG && vect->type >= EXPR_SEGBASE &&
-            vect->value == 1) {
-            eop->segment = vect->type - EXPR_SEGBASE;
+        if (eop->val.num.segment == NO_SEG &&
+            vect->type >= EXPR_SEGBASE && vect->value == 1) {
+            eop->val.num.segment = vect->type - EXPR_SEGBASE;
             continue;
         }
 
@@ -406,6 +408,228 @@ static int value_to_extop(expr * vect, extop *eop, int32_t myseg)
     return 0;
 }
 
+/*
+ * Parse an extended expression, used by db et al. "elem" is the element
+ * size; initially comes from the specific opcode (e.g. db == 1) but
+ * can be overridden.
+ */
+static int parse_eops(extop **result, bool critical, int elem)
+{
+    extop *eop = NULL, *prev = NULL;
+    extop **tail = result;
+    int sign;
+    int i = tokval.t_type;
+    int oper_num = 0;
+    bool do_subexpr = false;
+
+    *tail = NULL;
+
+    /* End of string is obvious; ) ends a sub-expression list e.g. DUP */
+    for (i = tokval.t_type; i != TOKEN_EOS; i = stdscan(NULL, &tokval)) {
+        char endparen = ')';   /* Is a right paren the end of list? */
+
+        if (i == ')')
+            break;
+
+        if (!eop) {
+            nasm_new(eop);
+            eop->dup  = 1;
+            eop->elem = elem;
+            do_subexpr = false;
+        }
+        sign = +1;
+
+        /*
+         * end_expression_next() here is to distinguish this from
+         * a string used as part of an expression...
+         */
+        if (i == TOKEN_QMARK) {
+            eop->type = EOT_DB_RESERVE;
+        } else if (do_subexpr && i == '(') {
+            extop *subexpr;
+
+            stdscan(NULL, &tokval); /* Skip paren */
+            if (parse_eops(&eop->val.subexpr, critical, eop->elem) < 0)
+                goto fail;
+
+            subexpr = eop->val.subexpr;
+            if (!subexpr) {
+                /* Subexpression is empty */
+                eop->type = EOT_NOTHING;
+            } else if (!subexpr->next) {
+                /* Subexpression is a single element, flatten */
+                eop->val   = subexpr->val;
+                eop->type  = subexpr->type;
+                eop->dup  *= subexpr->dup;
+                nasm_free(subexpr);
+            } else {
+                eop->type = EOT_EXTOP;
+            }
+
+            /* We should have ended on a closing paren */
+            if (tokval.t_type != ')') {
+                nasm_nonfatal("expected `)' after subexpression, got `%s'",
+                              i == TOKEN_EOS ?
+                              "end of line" : tokval.t_charptr);
+                goto fail;
+            }
+            endparen = 0;       /* This time the paren is not the end */
+        } else if (i == '%') {
+            /* %(expression_list) */
+            do_subexpr = true;
+            continue;
+        } else if (i == TOKEN_SIZE) {
+            /* Element size override */
+            eop->elem = tokval.t_inttwo;
+            do_subexpr = true;
+            continue;
+        } else if (i == TOKEN_STR && end_expression_next()) {
+            eop->type            = EOT_DB_STRING;
+            eop->val.string.data = tokval.t_charptr;
+            eop->val.string.len  = tokval.t_inttwo;
+        } else if (i == TOKEN_STRFUNC) {
+            bool parens = false;
+            const char *funcname = tokval.t_charptr;
+            enum strfunc func = tokval.t_integer;
+
+            i = stdscan(NULL, &tokval);
+            if (i == '(') {
+                parens = true;
+                endparen = 0;
+                i = stdscan(NULL, &tokval);
+            }
+            if (i != TOKEN_STR) {
+                nasm_nonfatal("%s must be followed by a string constant",
+                              funcname);
+                eop->type = EOT_NOTHING;
+            } else {
+                eop->type = EOT_DB_STRING_FREE;
+                eop->val.string.len =
+                    string_transform(tokval.t_charptr, tokval.t_inttwo,
+                                     &eop->val.string.data, func);
+                if (eop->val.string.len == (size_t)-1) {
+                    nasm_nonfatal("invalid input string to %s", funcname);
+                    eop->type = EOT_NOTHING;
+                }
+            }
+            if (parens && i && i != ')') {
+                i = stdscan(NULL, &tokval);
+                if (i != ')')
+                    nasm_nonfatal("unterminated %s function", funcname);
+            }
+        } else if (i == '-' || i == '+') {
+            char *save = stdscan_get();
+            struct tokenval tmptok;
+
+            sign = (i == '-') ? -1 : 1;
+            if (stdscan(NULL, &tmptok) != TOKEN_FLOAT) {
+                stdscan_set(save);
+                goto is_expression;
+            } else {
+                tokval = tmptok;
+                goto is_float;
+            }
+        } else if (i == TOKEN_FLOAT) {
+        is_float:
+            eop->type = EOT_DB_FLOAT;
+
+            if (eop->elem > 16) {
+                nasm_nonfatal("no %d-bit floating-point format supported",
+                              eop->elem << 3);
+                eop->val.string.len = 0;
+            } else if (eop->elem < 1) {
+                nasm_nonfatal("floating-point constant"
+                              " encountered in unknown instruction");
+                /*
+                 * fix suggested by Pedro Gimeno... original line was:
+                 * eop->type = EOT_NOTHING;
+                 */
+                eop->val.string.len = 0;
+            } else {
+                eop->val.string.len = eop->elem;
+
+                eop = nasm_realloc(eop, sizeof(extop) + eop->val.string.len);
+                eop->val.string.data = (char *)eop + sizeof(extop);
+                if (!float_const(tokval.t_charptr, sign,
+                                 (uint8_t *)eop->val.string.data,
+                                 eop->val.string.len))
+                    eop->val.string.len = 0;
+            }
+            if (!eop->val.string.len)
+                eop->type = EOT_NOTHING;
+        } else {
+            /* anything else, assume it is an expression */
+            expr *value;
+
+        is_expression:
+            value = evaluate(stdscan, NULL, &tokval, NULL,
+                             critical, NULL);
+            i = tokval.t_type;
+            if (!value)                  /* Error in evaluator */
+                goto fail;
+            if (tokval.t_flag & TFLAG_DUP) {
+                /* Expression followed by DUP */
+                if (!is_simple(value)) {
+                    nasm_nonfatal("non-constant argument supplied to DUP");
+                    goto fail;
+                } else if (value->value < 0) {
+                    nasm_nonfatal("negative argument supplied to DUP");
+                    goto fail;
+                }
+                eop->dup *= (size_t)value->value;
+                do_subexpr = true;
+                continue;
+            }
+            if (value_to_extop(value, eop, location.segment)) {
+                nasm_nonfatal("expression is not simple or relocatable");
+            }
+        }
+
+        if (eop->dup == 0 || eop->type == EOT_NOTHING) {
+            nasm_free(eop);
+        } else if (eop->type == EOT_DB_RESERVE &&
+                   prev && prev->type == EOT_DB_RESERVE &&
+                   prev->elem == eop->elem) {
+            /* Coalesce multiple EOT_DB_RESERVE */
+            prev->dup += eop->dup;
+            nasm_free(eop);
+        } else {
+            /* Add this eop to the end of the chain */
+            prev = eop;
+            *tail = eop;
+            tail = &eop->next;
+        }
+
+        oper_num++;
+        eop = NULL;             /* Done with this operand */
+
+        /*
+         * We're about to call stdscan(), which will eat the
+         * comma that we're currently sitting on between
+         * arguments. However, we'd better check first that it
+         * _is_ a comma.
+         */
+        if (i == TOKEN_EOS || i == endparen)	/* Already at end? */
+            break;
+        if (i != ',') {
+            i = stdscan(NULL, &tokval);		/* eat the comma or final paren */
+            if (i == TOKEN_EOS || i == ')')	/* got end of expression */
+                break;
+            if (i != ',') {
+                nasm_nonfatal("comma expected after operand");
+                goto fail;
+            }
+        }
+    }
+
+    return oper_num;
+
+fail:
+    if (eop)
+        nasm_free(eop);
+    return -1;
+}
+
 insn *parse_line(char *buffer, insn *result)
 {
     bool insn_is_label = false;
@@ -562,141 +786,19 @@ restart_parse:
     critical = pass_final() || (result->opcode == I_INCBIN);
 
     if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) {
-        extop *eop, **tail = &result->eops, **fixptr;
-        int oper_num = 0;
-        int32_t sign;
-
-        result->eops_float = false;
+        int oper_num;
 
-        /*
-         * Begin to read the DB/DW/DD/DQ/DT/DO/DY/DZ/INCBIN operands.
-         */
-        while (1) {
-            i = stdscan(NULL, &tokval);
-            if (i == TOKEN_EOS)
-                break;
-            else if (first && i == ':') {
-                insn_is_label = true;
-                goto restart_parse;
-            }
-            first = false;
-            fixptr = tail;
-            eop = *tail = nasm_malloc(sizeof(extop));
-            tail = &eop->next;
-            eop->next = NULL;
-            eop->type = EOT_NOTHING;
-            oper_num++;
-            sign = +1;
-
-            /*
-             * is_comma_next() here is to distinguish this from
-             * a string used as part of an expression...
-             */
-            if (i == TOKEN_STR && is_comma_next()) {
-                eop->type       = EOT_DB_STRING;
-                eop->stringval  = tokval.t_charptr;
-                eop->stringlen  = tokval.t_inttwo;
-                i = stdscan(NULL, &tokval);     /* eat the comma */
-            } else if (i == TOKEN_STRFUNC) {
-                bool parens = false;
-                const char *funcname = tokval.t_charptr;
-                enum strfunc func = tokval.t_integer;
-                i = stdscan(NULL, &tokval);
-                if (i == '(') {
-                    parens = true;
-                    i = stdscan(NULL, &tokval);
-                }
-                if (i != TOKEN_STR) {
-                    nasm_nonfatal("%s must be followed by a string constant",
-                                  funcname);
-                    eop->type = EOT_NOTHING;
-                } else {
-                    eop->type = EOT_DB_STRING_FREE;
-                    eop->stringlen =
-                        string_transform(tokval.t_charptr, tokval.t_inttwo,
-                                         &eop->stringval, func);
-                    if (eop->stringlen == (size_t)-1) {
-                        nasm_nonfatal("invalid input string to %s", funcname);
-                        eop->type = EOT_NOTHING;
-                    }
-                }
-                if (parens && i && i != ')') {
-                    i = stdscan(NULL, &tokval);
-                    if (i != ')')
-                        nasm_nonfatal("unterminated %s function", funcname);
-                }
-                if (i && i != ',')
-                    i = stdscan(NULL, &tokval);
-            } else if (i == '-' || i == '+') {
-                char *save = stdscan_get();
-                int token = i;
-                sign = (i == '-') ? -1 : 1;
-                i = stdscan(NULL, &tokval);
-                if (i != TOKEN_FLOAT) {
-                    stdscan_set(save);
-                    i = tokval.t_type = token;
-                    goto is_expression;
-                } else {
-                    goto is_float;
-                }
-            } else if (i == TOKEN_FLOAT) {
-is_float:
-                eop->type = EOT_DB_STRING;
-                result->eops_float = true;
-
-                eop->stringlen = db_bytes(result->opcode);
-                if (eop->stringlen > 16) {
-                    nasm_nonfatal("floating-point constant"
-                                  " encountered in DY or DZ instruction");
-                    eop->stringlen = 0;
-                } else if (eop->stringlen < 1) {
-                    nasm_nonfatal("floating-point constant"
-                                  " encountered in unknown instruction");
-                    /*
-                     * fix suggested by Pedro Gimeno... original line was:
-                     * eop->type = EOT_NOTHING;
-                     */
-                    eop->stringlen = 0;
-                }
-
-                eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen);
-                tail = &eop->next;
-                *fixptr = eop;
-                eop->stringval = (char *)eop + sizeof(extop);
-                if (!eop->stringlen ||
-                    !float_const(tokval.t_charptr, sign,
-                                 (uint8_t *)eop->stringval, eop->stringlen))
-                    eop->type = EOT_NOTHING;
-                i = stdscan(NULL, &tokval); /* eat the comma */
-            } else {
-                /* anything else, assume it is an expression */
-                expr *value;
-
-is_expression:
-                value = evaluate(stdscan, NULL, &tokval, NULL,
-                                 critical, NULL);
-                i = tokval.t_type;
-                if (!value)                  /* Error in evaluator */
-                    goto fail;
-                if (value_to_extop(value, eop, location.segment)) {
-                    nasm_nonfatal("operand %d: expression is not simple or relocatable",
-                                  oper_num);
-                }
-            }
+        i = stdscan(NULL, &tokval);
 
-            /*
-             * We're about to call stdscan(), which will eat the
-             * comma that we're currently sitting on between
-             * arguments. However, we'd better check first that it
-             * _is_ a comma.
-             */
-            if (i == TOKEN_EOS) /* also could be EOL */
-                break;
-            if (i != ',') {
-                nasm_nonfatal("comma expected after operand %d", oper_num);
-                goto fail;
-            }
+        if (first && i == ':') {
+            /* Really a label */
+            insn_is_label = true;
+            goto restart_parse;
         }
+        first = false;
+        oper_num = parse_eops(&result->eops, critical, db_bytes(result->opcode));
+        if (oper_num < 0)
+            goto fail;
 
         if (result->opcode == I_INCBIN) {
             /*
@@ -724,11 +826,12 @@ is_expression:
              * Throw the instruction away.
              */
             goto fail;
-        } else /* DB ... */ if (oper_num == 0)
-            nasm_warn(WARN_OTHER, "no operand for data declaration");
-        else
+        } else {
+            /* DB et al */
             result->operands = oper_num;
-
+            if (oper_num == 0)
+                nasm_warn(WARN_OTHER, "no operand for data declaration");
+        }
         return result;
     }
 
@@ -1197,7 +1300,7 @@ fail:
     return result;
 }
 
-static int is_comma_next(void)
+static int end_expression_next(void)
 {
     struct tokenval tv;
     char *p;
@@ -1207,17 +1310,34 @@ static int is_comma_next(void)
     i = stdscan(NULL, &tv);
     stdscan_set(p);
 
-    return (i == ',' || i == ';' || !i);
+    return (i == ',' || i == ';' || i == ')' || !i);
 }
 
-void cleanup_insn(insn * i)
+static void free_eops(extop *e)
 {
-    extop *e;
+    extop *next;
+
+    while (e) {
+        next = e->next;
+        switch (e->type) {
+        case EOT_EXTOP:
+            free_eops(e->val.subexpr);
+            break;
+
+        case EOT_DB_STRING_FREE:
+            nasm_free(e->val.string.data);
+            break;
+
+        default:
+            break;
+        }
 
-    while ((e = i->eops)) {
-        i->eops = e->next;
-        if (e->type == EOT_DB_STRING_FREE)
-            nasm_free(e->stringval);
         nasm_free(e);
+        e = next;
     }
 }
+
+void cleanup_insn(insn * i)
+{
+    free_eops(i->eops);
+}
diff --git a/asm/stdscan.c b/asm/stdscan.c
index 4491430d..c1c38f42 100644
--- a/asm/stdscan.c
+++ b/asm/stdscan.c
@@ -128,6 +128,8 @@ int stdscan(void *private_data, struct tokenval *tv)
 
     (void)private_data;         /* Don't warn that this parameter is unused */
 
+    nasm_zero(*tv);
+
     stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
     if (!*stdscan_bufptr)
         return tv->t_type = TOKEN_EOS;
diff --git a/asm/tokens.dat b/asm/tokens.dat
index 81875f0e..ab37dcc1 100644
--- a/asm/tokens.dat
+++ b/asm/tokens.dat
@@ -42,6 +42,7 @@
 # Tokens other than instructions and registers
 #
 
+# The ? operator is a keyword, because ? is a legitimate symbol character
 % TOKEN_QMARK, 0, 0, 0
 ?
 
@@ -88,9 +89,14 @@ short
 strict
 to
 
+# PTR is a legitimate symbol, but has an optional warning
 % TOKEN_ID, 0, TFLAG_WARN, 0
 ptr
 
+# DUP is a legitimate symbol, but also has context-specific use in extops
+% TOKEN_ID, 0, TFLAG_DUP, 0
+dup
+
 % TOKEN_FLOAT, 0, 0, 0
 __?infinity?__
 __?nan?__
diff --git a/include/nasm.h b/include/nasm.h
index 2a207a03..bb9dbf6b 100644
--- a/include/nasm.h
+++ b/include/nasm.h
@@ -462,6 +462,7 @@ enum ccode { /* condition code names */
 #define TFLAG_BRC_ANY   (TFLAG_BRC | TFLAG_BRC_OPT)
 #define TFLAG_BRDCAST   (1 << 2)    /* broadcasting decorator */
 #define TFLAG_WARN	(1 << 3)    /* warning only, treat as ID */
+#define TFLAG_DUP	(1 << 4)    /* valid ID but also has context-specific use */
 
 static inline uint8_t get_cond_opcode(enum ccode c)
 {
@@ -548,13 +549,6 @@ enum prefixes { /* instruction prefixes */
     PREFIX_ENUM_LIMIT
 };
 
-enum extop_type { /* extended operand types */
-    EOT_NOTHING,
-    EOT_DB_STRING,      /* Byte string */
-    EOT_DB_STRING_FREE, /* Byte string which should be nasm_free'd*/
-    EOT_DB_NUMBER       /* Integer */
-};
-
 enum ea_flags { /* special EA flags */
     EAF_BYTEOFFS    =  1,   /* force offset part to byte size */
     EAF_WORDOFFS    =  2,   /* force offset part to [d]word size */
@@ -595,15 +589,34 @@ typedef struct operand { /* operand to an instruction */
 #define OPFLAG_RELATIVE     8   /* operand is self-relative, e.g. [foo - $]
                                    where foo is not in the current segment */
 
+enum extop_type { /* extended operand types */
+    EOT_NOTHING = 0,
+    EOT_EXTOP,          /* Subexpression */
+    EOT_DB_STRING,      /* Byte string */
+    EOT_DB_FLOAT,       /* Floating-pointer number (special byte string) */
+    EOT_DB_STRING_FREE, /* Byte string which should be nasm_free'd*/
+    EOT_DB_NUMBER,      /* Integer */
+    EOT_DB_RESERVE      /* ? */
+};
+
 typedef struct extop { /* extended operand */
-    struct extop    *next;      /* linked list */
-    char            *stringval; /* if it's a string, then here it is */
-    size_t          stringlen;  /* ... and here's how long it is */
-    int64_t         offset;     /* ... it's given here ... */
-    int32_t         segment;    /* if it's a number/address, then... */
-    int32_t         wrt;        /* ... and here */
-    bool            relative;   /* self-relative expression */
-    enum extop_type type;       /* defined above */
+    struct extop    *next;       /* linked list */
+    union {
+        struct {                 /* text or byte string */
+            char    *data;
+            size_t   len;
+        } string;
+        struct {                 /* numeric expression */
+            int64_t  offset;     /* numeric value or address offset */
+            int32_t  segment;    /* address segment */
+            int32_t  wrt;        /* address wrt */
+            bool     relative;   /* self-relative expression */
+        } num;
+        struct extop *subexpr;   /* actual expressions */
+    } val;
+    size_t dup;                  /* duplicated? */
+    enum extop_type type;        /* defined above */
+    int elem;                    /* element size override, if any (bytes) */
 } extop;
 
 enum ea_type {
@@ -827,7 +840,7 @@ struct ofmt {
      * This procedure is called at the start of each pass.
      */
     void (*reset)(void);
-    
+
     /*
      * This is the modern output function, which gets passed
      * a struct out_data with much more information.  See the
@@ -1252,8 +1265,8 @@ enum decorator_tokens {
  * Global modes
  */
 
-/* 
- * flag to disable optimizations selectively 
+/*
+ * flag to disable optimizations selectively
  * this is useful to turn-off certain optimizations
  */
 enum optimization_disable_flag {
diff --git a/test/dup.asm b/test/dup.asm
new file mode 100644
index 00000000..2e939119
--- /dev/null
+++ b/test/dup.asm
@@ -0,0 +1,21 @@
+	bits 32
+
+	db 33
+	db (44)
+;	db (44,55)	-- error
+	db %(44,55)
+	db %('XX','YY')
+	db ('AA')
+	db %('BB')
+	db ?
+	db 6 dup (33)
+	db 6 dup (33, 34)
+	db 6 dup (33, 34), 35
+	db 7 dup (99)
+	db 7 dup (?,?)
+	dw byte (?,44)
+
+	dw 0xcc, 4 dup byte ('PQR'), ?, 0xabcd
+
+	dd 16 dup (0xaaaa, ?, 0xbbbbbb)
+	dd 64 dup (?)
author	H. Peter Anvin (Intel) <hpa@zytor.com>	2019-10-16 14:29:16 -0700
committer	H. Peter Anvin (Intel) <hpa@zytor.com>	2019-10-16 14:29:16 -0700
commit	84b852bff0ee7085245defbeb38d6218052579ae (patch)
tree	78f08be964725c1d9ad619fe811575015b7f6214
parent	d03a6c8ffedd1496221eca5f02ba0215794cfaf9 (diff)
download	nasm-84b852bff0ee7085245defbeb38d6218052579ae.tar.gz