summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--asm/assemble.c290
-rw-r--r--asm/parser.c436
-rw-r--r--asm/stdscan.c2
-rw-r--r--asm/tokens.dat6
-rw-r--r--include/nasm.h49
-rw-r--r--test/dup.asm21
6 files changed, 517 insertions, 287 deletions
diff --git a/asm/assemble.c b/asm/assemble.c
index 875a0d32..7a0830ca 100644
--- a/asm/assemble.c
+++ b/asm/assemble.c
@@ -601,6 +601,83 @@ static inline int64_t merge_resb(insn *ins, int64_t isize)
return isize;
}
+/* This must be handle non-power-of-2 alignment values */
+static inline size_t pad_bytes(size_t len, size_t align)
+{
+ size_t partial = len % align;
+ return partial ? align - partial : 0;
+}
+
+static void out_eops(struct out_data *data, const extop *e)
+{
+ while (e) {
+ size_t dup = e->dup;
+
+ switch (e->type) {
+ case EOT_NOTHING:
+ break;
+
+ case EOT_EXTOP:
+ while (dup--)
+ out_eops(data, e->val.subexpr);
+ break;
+
+ case EOT_DB_NUMBER:
+ if (e->elem > 8) {
+ nasm_nonfatal("integer supplied as %d-bit data",
+ e->elem << 3);
+ } else {
+ while (dup--) {
+ data->insoffs = 0;
+ data->inslen = data->size = e->elem;
+ data->tsegment = e->val.num.segment;
+ data->toffset = e->val.num.offset;
+ data->twrt = e->val.num.wrt;
+ data->relbase = 0;
+ if (e->val.num.segment != NO_SEG &&
+ (e->val.num.segment & 1)) {
+ data->type = OUT_SEGMENT;
+ data->sign = OUT_UNSIGNED;
+ } else {
+ data->type = e->val.num.relative
+ ? OUT_RELADDR : OUT_ADDRESS;
+ data->sign = OUT_WRAP;
+ }
+ out(data);
+ }
+ }
+ break;
+
+ case EOT_DB_FLOAT:
+ case EOT_DB_STRING:
+ case EOT_DB_STRING_FREE:
+ {
+ size_t pad, len;
+
+ pad = pad_bytes(e->val.string.len, e->elem);
+ len = e->val.string.len + pad;
+
+ while (dup--) {
+ data->insoffs = 0;
+ data->inslen = len;
+ out_rawdata(data, e->val.string.data, e->val.string.len);
+ if (pad)
+ out_rawdata(data, zero_buffer, pad);
+ }
+ break;
+ }
+
+ case EOT_DB_RESERVE:
+ data->insoffs = 0;
+ data->inslen = dup * e->elem;
+ out_reserve(data, data->inslen);
+ break;
+ }
+
+ e = e->next;
+ }
+}
+
/* This is totally just a wild guess what is reasonable... */
#define INCBIN_MAX_BUF (ZERO_BUF_SIZE * 16)
@@ -609,7 +686,9 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
struct out_data data;
const struct itemplate *temp;
enum match_result m;
- int64_t wsize; /* size for DB etc. */
+
+ if (instruction->opcode == I_none)
+ return 0;
nasm_zero(data);
data.offset = start;
@@ -617,49 +696,10 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
data.itemp = NULL;
data.bits = bits;
- wsize = db_bytes(instruction->opcode);
- if (wsize == -1)
- return 0;
-
- if (wsize) {
- extop *e;
-
- list_for_each(e, instruction->eops) {
- if (e->type == EOT_DB_NUMBER) {
- if (wsize > 8) {
- nasm_nonfatal("integer supplied to a DT,DO,DY or DZ");
- } else {
- data.insoffs = 0;
- data.inslen = data.size = wsize;
- data.toffset = e->offset;
- data.twrt = e->wrt;
- data.relbase = 0;
- if (e->segment != NO_SEG && (e->segment & 1)) {
- data.tsegment = e->segment;
- data.type = OUT_SEGMENT;
- data.sign = OUT_UNSIGNED;
- } else {
- data.tsegment = e->segment;
- data.type = e->relative ? OUT_RELADDR : OUT_ADDRESS;
- data.sign = OUT_WRAP;
- }
- out(&data);
- }
- } else if (e->type == EOT_DB_STRING ||
- e->type == EOT_DB_STRING_FREE) {
- int align = e->stringlen % wsize;
- if (align)
- align = wsize - align;
-
- data.insoffs = 0;
- data.inslen = e->stringlen + align;
-
- out_rawdata(&data, e->stringval, e->stringlen);
- out_rawdata(&data, zero_buffer, align);
- }
- }
+ if (opcode_is_db(instruction->opcode)) {
+ out_eops(&data, instruction->eops);
} else if (instruction->opcode == I_INCBIN) {
- const char *fname = instruction->eops->stringval;
+ const char *fname = instruction->eops->val.string.data;
FILE *fp;
size_t t = instruction->times; /* INCBIN handles TIMES by itself */
off_t base = 0;
@@ -688,14 +728,14 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
}
if (instruction->eops->next) {
- base = instruction->eops->next->offset;
+ base = instruction->eops->next->val.num.offset;
if (base >= len) {
len = 0;
} else {
len -= base;
if (instruction->eops->next->next &&
- len > (off_t)instruction->eops->next->next->offset)
- len = (off_t)instruction->eops->next->next->offset;
+ len > (off_t)instruction->eops->next->next->val.num.offset)
+ len = (off_t)instruction->eops->next->next->val.num.offset;
}
}
@@ -897,45 +937,57 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
return data.offset - start;
}
-static void debug_set_db_type(insn *instruction)
+static int32_t eops_typeinfo(const extop *e)
{
- /* Is this really correct? .operands doesn't mean much for Dx */
- int32_t typeinfo = TYS_ELEMENTS(instruction->operands);
+ int32_t typeinfo = 0;
- switch (instruction->opcode) {
- case I_DB:
- typeinfo |= TY_BYTE;
- break;
- case I_DW:
- typeinfo |= TY_WORD;
- break;
- case I_DD:
- if (instruction->eops_float)
- typeinfo |= TY_FLOAT;
- else
- typeinfo |= TY_DWORD;
- break;
- case I_DQ:
- /* What about double? */
- typeinfo |= TY_QWORD;
- break;
- case I_DT:
- /* What about long double? */
- typeinfo |= TY_TBYTE;
- break;
- case I_DO:
- typeinfo |= TY_OWORD;
- break;
- case I_DY:
- typeinfo |= TY_YWORD;
- break;
- case I_DZ:
- typeinfo |= TY_ZWORD;
- break;
- default:
- panic();
+ while (e) {
+ switch (e->type) {
+ case EOT_NOTHING:
+ break;
+
+ case EOT_EXTOP:
+ typeinfo |= eops_typeinfo(e->val.subexpr);
+ break;
+
+ case EOT_DB_FLOAT:
+ switch (e->elem) {
+ case 1: typeinfo |= TY_BYTE; break;
+ case 2: typeinfo |= TY_WORD; break;
+ case 4: typeinfo |= TY_FLOAT; break;
+ case 8: typeinfo |= TY_QWORD; break; /* double? */
+ case 10: typeinfo |= TY_TBYTE; break; /* long double? */
+ case 16: typeinfo |= TY_YWORD; break;
+ case 32: typeinfo |= TY_ZWORD; break;
+ default: break;
+ }
+ break;
+
+ default:
+ switch (e->elem) {
+ case 1: typeinfo |= TY_BYTE; break;
+ case 2: typeinfo |= TY_WORD; break;
+ case 4: typeinfo |= TY_DWORD; break;
+ case 8: typeinfo |= TY_QWORD; break;
+ case 10: typeinfo |= TY_TBYTE; break;
+ case 16: typeinfo |= TY_YWORD; break;
+ case 32: typeinfo |= TY_ZWORD; break;
+ default: break;
+ }
+ break;
+ }
+ e = e->next;
}
+ return typeinfo;
+}
+
+static inline void debug_set_db_type(insn *instruction)
+{
+
+ int32_t typeinfo = TYS_ELEMENTS(instruction->operands);
+
+ typeinfo |= eops_typeinfo(instruction->eops);
dfmt->debug_typevalue(typeinfo);
}
@@ -1009,6 +1061,42 @@ static void define_equ(insn * instruction)
}
}
+static int64_t len_extops(const extop *e)
+{
+ int64_t isize = 0;
+ size_t pad;
+
+ while (e) {
+ switch (e->type) {
+ case EOT_NOTHING:
+ break;
+
+ case EOT_EXTOP:
+ isize += e->dup * len_extops(e->val.subexpr);
+ break;
+
+ case EOT_DB_STRING:
+ case EOT_DB_STRING_FREE:
+ case EOT_DB_FLOAT:
+ pad = pad_bytes(e->val.string.len, e->elem);
+ isize += e->dup * (e->val.string.len + pad);
+ break;
+
+ case EOT_DB_NUMBER:
+ warn_overflow_const(e->val.num.offset, e->elem);
+ isize += e->dup * e->elem;
+ break;
+
+ case EOT_DB_RESERVE:
+ isize += e->dup;
+ break;
+ }
+
+ e = e->next;
+ }
+
+ return isize;
+}
int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction)
{
@@ -1022,33 +1110,12 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction)
define_equ(instruction);
return 0;
} else if (opcode_is_db(instruction->opcode)) {
- extop *e;
- int32_t osize, wsize;
-
- wsize = db_bytes(instruction->opcode);
- nasm_assert(wsize > 0);
-
- list_for_each(e, instruction->eops) {
- int32_t align;
-
- osize = 0;
- if (e->type == EOT_DB_NUMBER) {
- osize = 1;
- warn_overflow_const(e->offset, wsize);
- } else if (e->type == EOT_DB_STRING ||
- e->type == EOT_DB_STRING_FREE)
- osize = e->stringlen;
-
- align = (-osize) % wsize;
- if (align < 0)
- align += wsize;
- isize += osize + align;
- }
-
+ isize = len_extops(instruction->eops);
debug_set_db_type(instruction);
return isize;
} else if (instruction->opcode == I_INCBIN) {
- const char *fname = instruction->eops->stringval;
+ const extop *e = instruction->eops;
+ const char *fname = e->val.string.data;
off_t len;
len = nasm_file_size_by_path(fname);
@@ -1058,14 +1125,15 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction)
return 0;
}
- if (instruction->eops->next) {
- if (len <= (off_t)instruction->eops->next->offset) {
+ e = e->next;
+ if (e) {
+ if (len <= (off_t)e->val.num.offset) {
len = 0;
} else {
- len -= instruction->eops->next->offset;
- if (instruction->eops->next->next &&
- len > (off_t)instruction->eops->next->next->offset) {
- len = (off_t)instruction->eops->next->next->offset;
+ len -= e->val.num.offset;
+ e = e->next;
+ if (e && len > (off_t)e->val.num.offset) {
+ len = (off_t)e->val.num.offset;
}
}
}
diff --git a/asm/parser.c b/asm/parser.c
index 15cfcdfa..a59acb19 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -51,7 +51,7 @@
#include "tables.h"
-static int is_comma_next(void);
+static int end_expression_next(void);
static struct tokenval tokval;
@@ -355,14 +355,15 @@ static void mref_set_optype(operand *op)
/*
* Convert an expression vector returned from evaluate() into an
- * extop structure. Return zero on success.
+ * extop structure. Return zero on success. Note that the eop
+ * already has dup and elem set, so we can't clear it here.
*/
-static int value_to_extop(expr * vect, extop *eop, int32_t myseg)
+static int value_to_extop(expr *vect, extop *eop, int32_t myseg)
{
eop->type = EOT_DB_NUMBER;
- eop->offset = 0;
- eop->segment = eop->wrt = NO_SEG;
- eop->relative = false;
+ eop->val.num.offset = 0;
+ eop->val.num.segment = eop->val.num.wrt = NO_SEG;
+ eop->val.num.relative = false;
for (; vect->type; vect++) {
if (!vect->value) /* zero term, safe to ignore */
@@ -376,25 +377,26 @@ static int value_to_extop(expr * vect, extop *eop, int32_t myseg)
if (vect->type == EXPR_SIMPLE) {
/* Simple number expression */
- eop->offset += vect->value;
+ eop->val.num.offset += vect->value;
continue;
}
- if (eop->wrt == NO_SEG && !eop->relative && vect->type == EXPR_WRT) {
+ if (eop->val.num.wrt == NO_SEG && !eop->val.num.relative &&
+ vect->type == EXPR_WRT) {
/* WRT term */
- eop->wrt = vect->value;
+ eop->val.num.wrt = vect->value;
continue;
}
- if (!eop->relative &&
+ if (!eop->val.num.relative &&
vect->type == EXPR_SEGBASE + myseg && vect->value == -1) {
/* Expression of the form: foo - $ */
- eop->relative = true;
+ eop->val.num.relative = true;
continue;
}
- if (eop->segment == NO_SEG && vect->type >= EXPR_SEGBASE &&
- vect->value == 1) {
- eop->segment = vect->type - EXPR_SEGBASE;
+ if (eop->val.num.segment == NO_SEG &&
+ vect->type >= EXPR_SEGBASE && vect->value == 1) {
+ eop->val.num.segment = vect->type - EXPR_SEGBASE;
continue;
}
@@ -406,6 +408,228 @@ static int value_to_extop(expr * vect, extop *eop, int32_t myseg)
return 0;
}
+/*
+ * Parse an extended expression, used by db et al. "elem" is the element
+ * size; initially comes from the specific opcode (e.g. db == 1) but
+ * can be overridden.
+ */
+static int parse_eops(extop **result, bool critical, int elem)
+{
+ extop *eop = NULL, *prev = NULL;
+ extop **tail = result;
+ int sign;
+ int i = tokval.t_type;
+ int oper_num = 0;
+ bool do_subexpr = false;
+
+ *tail = NULL;
+
+ /* End of string is obvious; ) ends a sub-expression list e.g. DUP */
+ for (i = tokval.t_type; i != TOKEN_EOS; i = stdscan(NULL, &tokval)) {
+ char endparen = ')'; /* Is a right paren the end of list? */
+
+ if (i == ')')
+ break;
+
+ if (!eop) {
+ nasm_new(eop);
+ eop->dup = 1;
+ eop->elem = elem;
+ do_subexpr = false;
+ }
+ sign = +1;
+
+ /*
+ * end_expression_next() here is to distinguish this from
+ * a string used as part of an expression...
+ */
+ if (i == TOKEN_QMARK) {
+ eop->type = EOT_DB_RESERVE;
+ } else if (do_subexpr && i == '(') {
+ extop *subexpr;
+
+ stdscan(NULL, &tokval); /* Skip paren */
+ if (parse_eops(&eop->val.subexpr, critical, eop->elem) < 0)
+ goto fail;
+
+ subexpr = eop->val.subexpr;
+ if (!subexpr) {
+ /* Subexpression is empty */
+ eop->type = EOT_NOTHING;
+ } else if (!subexpr->next) {
+ /* Subexpression is a single element, flatten */
+ eop->val = subexpr->val;
+ eop->type = subexpr->type;
+ eop->dup *= subexpr->dup;
+ nasm_free(subexpr);
+ } else {
+ eop->type = EOT_EXTOP;
+ }
+
+ /* We should have ended on a closing paren */
+ if (tokval.t_type != ')') {
+ nasm_nonfatal("expected `)' after subexpression, got `%s'",
+ i == TOKEN_EOS ?
+ "end of line" : tokval.t_charptr);
+ goto fail;
+ }
+ endparen = 0; /* This time the paren is not the end */
+ } else if (i == '%') {
+ /* %(expression_list) */
+ do_subexpr = true;
+ continue;
+ } else if (i == TOKEN_SIZE) {
+ /* Element size override */
+ eop->elem = tokval.t_inttwo;
+ do_subexpr = true;
+ continue;
+ } else if (i == TOKEN_STR && end_expression_next()) {
+ eop->type = EOT_DB_STRING;
+ eop->val.string.data = tokval.t_charptr;
+ eop->val.string.len = tokval.t_inttwo;
+ } else if (i == TOKEN_STRFUNC) {
+ bool parens = false;
+ const char *funcname = tokval.t_charptr;
+ enum strfunc func = tokval.t_integer;
+
+ i = stdscan(NULL, &tokval);
+ if (i == '(') {
+ parens = true;
+ endparen = 0;
+ i = stdscan(NULL, &tokval);
+ }
+ if (i != TOKEN_STR) {
+ nasm_nonfatal("%s must be followed by a string constant",
+ funcname);
+ eop->type = EOT_NOTHING;
+ } else {
+ eop->type = EOT_DB_STRING_FREE;
+ eop->val.string.len =
+ string_transform(tokval.t_charptr, tokval.t_inttwo,
+ &eop->val.string.data, func);
+ if (eop->val.string.len == (size_t)-1) {
+ nasm_nonfatal("invalid input string to %s", funcname);
+ eop->type = EOT_NOTHING;
+ }
+ }
+ if (parens && i && i != ')') {
+ i = stdscan(NULL, &tokval);
+ if (i != ')')
+ nasm_nonfatal("unterminated %s function", funcname);
+ }
+ } else if (i == '-' || i == '+') {
+ char *save = stdscan_get();
+ struct tokenval tmptok;
+
+ sign = (i == '-') ? -1 : 1;
+ if (stdscan(NULL, &tmptok) != TOKEN_FLOAT) {
+ stdscan_set(save);
+ goto is_expression;
+ } else {
+ tokval = tmptok;
+ goto is_float;
+ }
+ } else if (i == TOKEN_FLOAT) {
+ is_float:
+ eop->type = EOT_DB_FLOAT;
+
+ if (eop->elem > 16) {
+ nasm_nonfatal("no %d-bit floating-point format supported",
+ eop->elem << 3);
+ eop->val.string.len = 0;
+ } else if (eop->elem < 1) {
+ nasm_nonfatal("floating-point constant"
+ " encountered in unknown instruction");
+ /*
+ * fix suggested by Pedro Gimeno... original line was:
+ * eop->type = EOT_NOTHING;
+ */
+ eop->val.string.len = 0;
+ } else {
+ eop->val.string.len = eop->elem;
+
+ eop = nasm_realloc(eop, sizeof(extop) + eop->val.string.len);
+ eop->val.string.data = (char *)eop + sizeof(extop);
+ if (!float_const(tokval.t_charptr, sign,
+ (uint8_t *)eop->val.string.data,
+ eop->val.string.len))
+ eop->val.string.len = 0;
+ }
+ if (!eop->val.string.len)
+ eop->type = EOT_NOTHING;
+ } else {
+ /* anything else, assume it is an expression */
+ expr *value;
+
+ is_expression:
+ value = evaluate(stdscan, NULL, &tokval, NULL,
+ critical, NULL);
+ i = tokval.t_type;
+ if (!value) /* Error in evaluator */
+ goto fail;
+ if (tokval.t_flag & TFLAG_DUP) {
+ /* Expression followed by DUP */
+ if (!is_simple(value)) {
+ nasm_nonfatal("non-constant argument supplied to DUP");
+ goto fail;
+ } else if (value->value < 0) {
+ nasm_nonfatal("negative argument supplied to DUP");
+ goto fail;
+ }
+ eop->dup *= (size_t)value->value;
+ do_subexpr = true;
+ continue;
+ }
+ if (value_to_extop(value, eop, location.segment)) {
+ nasm_nonfatal("expression is not simple or relocatable");
+ }
+ }
+
+ if (eop->dup == 0 || eop->type == EOT_NOTHING) {
+ nasm_free(eop);
+ } else if (eop->type == EOT_DB_RESERVE &&
+ prev && prev->type == EOT_DB_RESERVE &&
+ prev->elem == eop->elem) {
+ /* Coalesce multiple EOT_DB_RESERVE */
+ prev->dup += eop->dup;
+ nasm_free(eop);
+ } else {
+ /* Add this eop to the end of the chain */
+ prev = eop;
+ *tail = eop;
+ tail = &eop->next;
+ }
+
+ oper_num++;
+ eop = NULL; /* Done with this operand */
+
+ /*
+ * We're about to call stdscan(), which will eat the
+ * comma that we're currently sitting on between
+ * arguments. However, we'd better check first that it
+ * _is_ a comma.
+ */
+ if (i == TOKEN_EOS || i == endparen) /* Already at end? */
+ break;
+ if (i != ',') {
+ i = stdscan(NULL, &tokval); /* eat the comma or final paren */
+ if (i == TOKEN_EOS || i == ')') /* got end of expression */
+ break;
+ if (i != ',') {
+ nasm_nonfatal("comma expected after operand");
+ goto fail;
+ }
+ }
+ }
+
+ return oper_num;
+
+fail:
+ if (eop)
+ nasm_free(eop);
+ return -1;
+}
+
insn *parse_line(char *buffer, insn *result)
{
bool insn_is_label = false;
@@ -562,141 +786,19 @@ restart_parse:
critical = pass_final() || (result->opcode == I_INCBIN);
if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) {
- extop *eop, **tail = &result->eops, **fixptr;
- int oper_num = 0;
- int32_t sign;
-
- result->eops_float = false;
+ int oper_num;
- /*
- * Begin to read the DB/DW/DD/DQ/DT/DO/DY/DZ/INCBIN operands.
- */
- while (1) {
- i = stdscan(NULL, &tokval);
- if (i == TOKEN_EOS)
- break;
- else if (first && i == ':') {
- insn_is_label = true;
- goto restart_parse;
- }
- first = false;
- fixptr = tail;
- eop = *tail = nasm_malloc(sizeof(extop));
- tail = &eop->next;
- eop->next = NULL;
- eop->type = EOT_NOTHING;
- oper_num++;
- sign = +1;
-
- /*
- * is_comma_next() here is to distinguish this from
- * a string used as part of an expression...
- */
- if (i == TOKEN_STR && is_comma_next()) {
- eop->type = EOT_DB_STRING;
- eop->stringval = tokval.t_charptr;
- eop->stringlen = tokval.t_inttwo;
- i = stdscan(NULL, &tokval); /* eat the comma */
- } else if (i == TOKEN_STRFUNC) {
- bool parens = false;
- const char *funcname = tokval.t_charptr;
- enum strfunc func = tokval.t_integer;
- i = stdscan(NULL, &tokval);
- if (i == '(') {
- parens = true;
- i = stdscan(NULL, &tokval);
- }
- if (i != TOKEN_STR) {
- nasm_nonfatal("%s must be followed by a string constant",
- funcname);
- eop->type = EOT_NOTHING;
- } else {
- eop->type = EOT_DB_STRING_FREE;
- eop->stringlen =
- string_transform(tokval.t_charptr, tokval.t_inttwo,
- &eop->stringval, func);
- if (eop->stringlen == (size_t)-1) {
- nasm_nonfatal("invalid input string to %s", funcname);
- eop->type = EOT_NOTHING;
- }
- }
- if (parens && i && i != ')') {
- i = stdscan(NULL, &tokval);
- if (i != ')')
- nasm_nonfatal("unterminated %s function", funcname);
- }
- if (i && i != ',')
- i = stdscan(NULL, &tokval);
- } else if (i == '-' || i == '+') {
- char *save = stdscan_get();
- int token = i;
- sign = (i == '-') ? -1 : 1;
- i = stdscan(NULL, &tokval);
- if (i != TOKEN_FLOAT) {
- stdscan_set(save);
- i = tokval.t_type = token;
- goto is_expression;
- } else {
- goto is_float;
- }
- } else if (i == TOKEN_FLOAT) {
-is_float:
- eop->type = EOT_DB_STRING;
- result->eops_float = true;
-
- eop->stringlen = db_bytes(result->opcode);
- if (eop->stringlen > 16) {
- nasm_nonfatal("floating-point constant"
- " encountered in DY or DZ instruction");
- eop->stringlen = 0;
- } else if (eop->stringlen < 1) {
- nasm_nonfatal("floating-point constant"
- " encountered in unknown instruction");
- /*
- * fix suggested by Pedro Gimeno... original line was:
- * eop->type = EOT_NOTHING;
- */
- eop->stringlen = 0;
- }
-
- eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen);
- tail = &eop->next;
- *fixptr = eop;
- eop->stringval = (char *)eop + sizeof(extop);
- if (!eop->stringlen ||
- !float_const(tokval.t_charptr, sign,
- (uint8_t *)eop->stringval, eop->stringlen))
- eop->type = EOT_NOTHING;
- i = stdscan(NULL, &tokval); /* eat the comma */
- } else {
- /* anything else, assume it is an expression */
- expr *value;
-
-is_expression:
- value = evaluate(stdscan, NULL, &tokval, NULL,
- critical, NULL);
- i = tokval.t_type;
- if (!value) /* Error in evaluator */
- goto fail;
- if (value_to_extop(value, eop, location.segment)) {
- nasm_nonfatal("operand %d: expression is not simple or relocatable",
- oper_num);
- }
- }
+ i = stdscan(NULL, &tokval);
- /*
- * We're about to call stdscan(), which will eat the
- * comma that we're currently sitting on between
- * arguments. However, we'd better check first that it
- * _is_ a comma.
- */
- if (i == TOKEN_EOS) /* also could be EOL */
- break;
- if (i != ',') {
- nasm_nonfatal("comma expected after operand %d", oper_num);
- goto fail;
- }
+ if (first && i == ':') {
+ /* Really a label */
+ insn_is_label = true;
+ goto restart_parse;
}
+ first = false;
+ oper_num = parse_eops(&result->eops, critical, db_bytes(result->opcode));
+ if (oper_num < 0)
+ goto fail;
if (result->opcode == I_INCBIN) {
/*
@@ -724,11 +826,12 @@ is_expression:
* Throw the instruction away.
*/
goto fail;
- } else /* DB ... */ if (oper_num == 0)
- nasm_warn(WARN_OTHER, "no operand for data declaration");
- else
+ } else {
+ /* DB et al */
result->operands = oper_num;
-
+ if (oper_num == 0)
+ nasm_warn(WARN_OTHER, "no operand for data declaration");
+ }
return result;
}
@@ -1197,7 +1300,7 @@ fail:
return result;
}
-static int is_comma_next(void)
+static int end_expression_next(void)
{
struct tokenval tv;
char *p;
@@ -1207,17 +1310,34 @@ static int is_comma_next(void)
i = stdscan(NULL, &tv);
stdscan_set(p);
- return (i == ',' || i == ';' || !i);
+ return (i == ',' || i == ';' || i == ')' || !i);
}
-void cleanup_insn(insn * i)
+static void free_eops(extop *e)
{
- extop *e;
+ extop *next;
+
+ while (e) {
+ next = e->next;
+ switch (e->type) {
+ case EOT_EXTOP:
+ free_eops(e->val.subexpr);
+ break;
+
+ case EOT_DB_STRING_FREE:
+ nasm_free(e->val.string.data);
+ break;
+
+ default:
+ break;
+ }
- while ((e = i->eops)) {
- i->eops = e->next;
- if (e->type == EOT_DB_STRING_FREE)
- nasm_free(e->stringval);
nasm_free(e);
+ e = next;
}
}
+
+void cleanup_insn(insn * i)
+{
+ free_eops(i->eops);
+}
diff --git a/asm/stdscan.c b/asm/stdscan.c
index 4491430d..c1c38f42 100644
--- a/asm/stdscan.c
+++ b/asm/stdscan.c
@@ -128,6 +128,8 @@ int stdscan(void *private_data, struct tokenval *tv)
(void)private_data; /* Don't warn that this parameter is unused */
+ nasm_zero(*tv);
+
stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
if (!*stdscan_bufptr)
return tv->t_type = TOKEN_EOS;
diff --git a/asm/tokens.dat b/asm/tokens.dat
index 81875f0e..ab37dcc1 100644
--- a/asm/tokens.dat
+++ b/asm/tokens.dat
@@ -42,6 +42,7 @@
# Tokens other than instructions and registers
#
+# The ? operator is a keyword, because ? is a legitimate symbol character
% TOKEN_QMARK, 0, 0, 0
?
@@ -88,9 +89,14 @@ short
strict
to
+# PTR is a legitimate symbol, but has an optional warning
% TOKEN_ID, 0, TFLAG_WARN, 0
ptr
+# DUP is a legitimate symbol, but also has context-specific use in extops
+% TOKEN_ID, 0, TFLAG_DUP, 0
+dup
+
% TOKEN_FLOAT, 0, 0, 0
__?infinity?__
__?nan?__
diff --git a/include/nasm.h b/include/nasm.h
index 2a207a03..bb9dbf6b 100644
--- a/include/nasm.h
+++ b/include/nasm.h
@@ -462,6 +462,7 @@ enum ccode { /* condition code names */
#define TFLAG_BRC_ANY (TFLAG_BRC | TFLAG_BRC_OPT)
#define TFLAG_BRDCAST (1 << 2) /* broadcasting decorator */
#define TFLAG_WARN (1 << 3) /* warning only, treat as ID */
+#define TFLAG_DUP (1 << 4) /* valid ID but also has context-specific use */
static inline uint8_t get_cond_opcode(enum ccode c)
{
@@ -548,13 +549,6 @@ enum prefixes { /* instruction prefixes */
PREFIX_ENUM_LIMIT
};
-enum extop_type { /* extended operand types */
- EOT_NOTHING,
- EOT_DB_STRING, /* Byte string */
- EOT_DB_STRING_FREE, /* Byte string which should be nasm_free'd*/
- EOT_DB_NUMBER /* Integer */
-};
-
enum ea_flags { /* special EA flags */
EAF_BYTEOFFS = 1, /* force offset part to byte size */
EAF_WORDOFFS = 2, /* force offset part to [d]word size */
@@ -595,15 +589,34 @@ typedef struct operand { /* operand to an instruction */
#define OPFLAG_RELATIVE 8 /* operand is self-relative, e.g. [foo - $]
where foo is not in the current segment */
+enum extop_type { /* extended operand types */
+ EOT_NOTHING = 0,
+ EOT_EXTOP, /* Subexpression */
+ EOT_DB_STRING, /* Byte string */
+ EOT_DB_FLOAT, /* Floating-pointer number (special byte string) */
+ EOT_DB_STRING_FREE, /* Byte string which should be nasm_free'd*/
+ EOT_DB_NUMBER, /* Integer */
+ EOT_DB_RESERVE /* ? */
+};
+
typedef struct extop { /* extended operand */
- struct extop *next; /* linked list */
- char *stringval; /* if it's a string, then here it is */
- size_t stringlen; /* ... and here's how long it is */
- int64_t offset; /* ... it's given here ... */
- int32_t segment; /* if it's a number/address, then... */
- int32_t wrt; /* ... and here */
- bool relative; /* self-relative expression */
- enum extop_type type; /* defined above */
+ struct extop *next; /* linked list */
+ union {
+ struct { /* text or byte string */
+ char *data;
+ size_t len;
+ } string;
+ struct { /* numeric expression */
+ int64_t offset; /* numeric value or address offset */
+ int32_t segment; /* address segment */
+ int32_t wrt; /* address wrt */
+ bool relative; /* self-relative expression */
+ } num;
+ struct extop *subexpr; /* actual expressions */
+ } val;
+ size_t dup; /* duplicated? */
+ enum extop_type type; /* defined above */
+ int elem; /* element size override, if any (bytes) */
} extop;
enum ea_type {
@@ -827,7 +840,7 @@ struct ofmt {
* This procedure is called at the start of each pass.
*/
void (*reset)(void);
-
+
/*
* This is the modern output function, which gets passed
* a struct out_data with much more information. See the
@@ -1252,8 +1265,8 @@ enum decorator_tokens {
* Global modes
*/
-/*
- * flag to disable optimizations selectively
+/*
+ * flag to disable optimizations selectively
* this is useful to turn-off certain optimizations
*/
enum optimization_disable_flag {
diff --git a/test/dup.asm b/test/dup.asm
new file mode 100644
index 00000000..2e939119
--- /dev/null
+++ b/test/dup.asm
@@ -0,0 +1,21 @@
+ bits 32
+
+ db 33
+ db (44)
+; db (44,55) -- error
+ db %(44,55)
+ db %('XX','YY')
+ db ('AA')
+ db %('BB')
+ db ?
+ db 6 dup (33)
+ db 6 dup (33, 34)
+ db 6 dup (33, 34), 35
+ db 7 dup (99)
+ db 7 dup (?,?)
+ dw byte (?,44)
+
+ dw 0xcc, 4 dup byte ('PQR'), ?, 0xabcd
+
+ dd 16 dup (0xaaaa, ?, 0xbbbbbb)
+ dd 64 dup (?)