diff options
-rw-r--r-- | asm/assemble.c | 290 | ||||
-rw-r--r-- | asm/parser.c | 436 | ||||
-rw-r--r-- | asm/stdscan.c | 2 | ||||
-rw-r--r-- | asm/tokens.dat | 6 | ||||
-rw-r--r-- | include/nasm.h | 49 | ||||
-rw-r--r-- | test/dup.asm | 21 |
6 files changed, 517 insertions, 287 deletions
diff --git a/asm/assemble.c b/asm/assemble.c index 875a0d32..7a0830ca 100644 --- a/asm/assemble.c +++ b/asm/assemble.c @@ -601,6 +601,83 @@ static inline int64_t merge_resb(insn *ins, int64_t isize) return isize; } +/* This must be handle non-power-of-2 alignment values */ +static inline size_t pad_bytes(size_t len, size_t align) +{ + size_t partial = len % align; + return partial ? align - partial : 0; +} + +static void out_eops(struct out_data *data, const extop *e) +{ + while (e) { + size_t dup = e->dup; + + switch (e->type) { + case EOT_NOTHING: + break; + + case EOT_EXTOP: + while (dup--) + out_eops(data, e->val.subexpr); + break; + + case EOT_DB_NUMBER: + if (e->elem > 8) { + nasm_nonfatal("integer supplied as %d-bit data", + e->elem << 3); + } else { + while (dup--) { + data->insoffs = 0; + data->inslen = data->size = e->elem; + data->tsegment = e->val.num.segment; + data->toffset = e->val.num.offset; + data->twrt = e->val.num.wrt; + data->relbase = 0; + if (e->val.num.segment != NO_SEG && + (e->val.num.segment & 1)) { + data->type = OUT_SEGMENT; + data->sign = OUT_UNSIGNED; + } else { + data->type = e->val.num.relative + ? OUT_RELADDR : OUT_ADDRESS; + data->sign = OUT_WRAP; + } + out(data); + } + } + break; + + case EOT_DB_FLOAT: + case EOT_DB_STRING: + case EOT_DB_STRING_FREE: + { + size_t pad, len; + + pad = pad_bytes(e->val.string.len, e->elem); + len = e->val.string.len + pad; + + while (dup--) { + data->insoffs = 0; + data->inslen = len; + out_rawdata(data, e->val.string.data, e->val.string.len); + if (pad) + out_rawdata(data, zero_buffer, pad); + } + break; + } + + case EOT_DB_RESERVE: + data->insoffs = 0; + data->inslen = dup * e->elem; + out_reserve(data, data->inslen); + break; + } + + e = e->next; + } +} + /* This is totally just a wild guess what is reasonable... */ #define INCBIN_MAX_BUF (ZERO_BUF_SIZE * 16) @@ -609,7 +686,9 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction) struct out_data data; const struct itemplate *temp; enum match_result m; - int64_t wsize; /* size for DB etc. */ + + if (instruction->opcode == I_none) + return 0; nasm_zero(data); data.offset = start; @@ -617,49 +696,10 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction) data.itemp = NULL; data.bits = bits; - wsize = db_bytes(instruction->opcode); - if (wsize == -1) - return 0; - - if (wsize) { - extop *e; - - list_for_each(e, instruction->eops) { - if (e->type == EOT_DB_NUMBER) { - if (wsize > 8) { - nasm_nonfatal("integer supplied to a DT,DO,DY or DZ"); - } else { - data.insoffs = 0; - data.inslen = data.size = wsize; - data.toffset = e->offset; - data.twrt = e->wrt; - data.relbase = 0; - if (e->segment != NO_SEG && (e->segment & 1)) { - data.tsegment = e->segment; - data.type = OUT_SEGMENT; - data.sign = OUT_UNSIGNED; - } else { - data.tsegment = e->segment; - data.type = e->relative ? OUT_RELADDR : OUT_ADDRESS; - data.sign = OUT_WRAP; - } - out(&data); - } - } else if (e->type == EOT_DB_STRING || - e->type == EOT_DB_STRING_FREE) { - int align = e->stringlen % wsize; - if (align) - align = wsize - align; - - data.insoffs = 0; - data.inslen = e->stringlen + align; - - out_rawdata(&data, e->stringval, e->stringlen); - out_rawdata(&data, zero_buffer, align); - } - } + if (opcode_is_db(instruction->opcode)) { + out_eops(&data, instruction->eops); } else if (instruction->opcode == I_INCBIN) { - const char *fname = instruction->eops->stringval; + const char *fname = instruction->eops->val.string.data; FILE *fp; size_t t = instruction->times; /* INCBIN handles TIMES by itself */ off_t base = 0; @@ -688,14 +728,14 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction) } if (instruction->eops->next) { - base = instruction->eops->next->offset; + base = instruction->eops->next->val.num.offset; if (base >= len) { len = 0; } else { len -= base; if (instruction->eops->next->next && - len > (off_t)instruction->eops->next->next->offset) - len = (off_t)instruction->eops->next->next->offset; + len > (off_t)instruction->eops->next->next->val.num.offset) + len = (off_t)instruction->eops->next->next->val.num.offset; } } @@ -897,45 +937,57 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction) return data.offset - start; } -static void debug_set_db_type(insn *instruction) +static int32_t eops_typeinfo(const extop *e) { - /* Is this really correct? .operands doesn't mean much for Dx */ - int32_t typeinfo = TYS_ELEMENTS(instruction->operands); + int32_t typeinfo = 0; - switch (instruction->opcode) { - case I_DB: - typeinfo |= TY_BYTE; - break; - case I_DW: - typeinfo |= TY_WORD; - break; - case I_DD: - if (instruction->eops_float) - typeinfo |= TY_FLOAT; - else - typeinfo |= TY_DWORD; - break; - case I_DQ: - /* What about double? */ - typeinfo |= TY_QWORD; - break; - case I_DT: - /* What about long double? */ - typeinfo |= TY_TBYTE; - break; - case I_DO: - typeinfo |= TY_OWORD; - break; - case I_DY: - typeinfo |= TY_YWORD; - break; - case I_DZ: - typeinfo |= TY_ZWORD; - break; - default: - panic(); + while (e) { + switch (e->type) { + case EOT_NOTHING: + break; + + case EOT_EXTOP: + typeinfo |= eops_typeinfo(e->val.subexpr); + break; + + case EOT_DB_FLOAT: + switch (e->elem) { + case 1: typeinfo |= TY_BYTE; break; + case 2: typeinfo |= TY_WORD; break; + case 4: typeinfo |= TY_FLOAT; break; + case 8: typeinfo |= TY_QWORD; break; /* double? */ + case 10: typeinfo |= TY_TBYTE; break; /* long double? */ + case 16: typeinfo |= TY_YWORD; break; + case 32: typeinfo |= TY_ZWORD; break; + default: break; + } + break; + + default: + switch (e->elem) { + case 1: typeinfo |= TY_BYTE; break; + case 2: typeinfo |= TY_WORD; break; + case 4: typeinfo |= TY_DWORD; break; + case 8: typeinfo |= TY_QWORD; break; + case 10: typeinfo |= TY_TBYTE; break; + case 16: typeinfo |= TY_YWORD; break; + case 32: typeinfo |= TY_ZWORD; break; + default: break; + } + break; + } + e = e->next; } + return typeinfo; +} + +static inline void debug_set_db_type(insn *instruction) +{ + + int32_t typeinfo = TYS_ELEMENTS(instruction->operands); + + typeinfo |= eops_typeinfo(instruction->eops); dfmt->debug_typevalue(typeinfo); } @@ -1009,6 +1061,42 @@ static void define_equ(insn * instruction) } } +static int64_t len_extops(const extop *e) +{ + int64_t isize = 0; + size_t pad; + + while (e) { + switch (e->type) { + case EOT_NOTHING: + break; + + case EOT_EXTOP: + isize += e->dup * len_extops(e->val.subexpr); + break; + + case EOT_DB_STRING: + case EOT_DB_STRING_FREE: + case EOT_DB_FLOAT: + pad = pad_bytes(e->val.string.len, e->elem); + isize += e->dup * (e->val.string.len + pad); + break; + + case EOT_DB_NUMBER: + warn_overflow_const(e->val.num.offset, e->elem); + isize += e->dup * e->elem; + break; + + case EOT_DB_RESERVE: + isize += e->dup; + break; + } + + e = e->next; + } + + return isize; +} int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction) { @@ -1022,33 +1110,12 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction) define_equ(instruction); return 0; } else if (opcode_is_db(instruction->opcode)) { - extop *e; - int32_t osize, wsize; - - wsize = db_bytes(instruction->opcode); - nasm_assert(wsize > 0); - - list_for_each(e, instruction->eops) { - int32_t align; - - osize = 0; - if (e->type == EOT_DB_NUMBER) { - osize = 1; - warn_overflow_const(e->offset, wsize); - } else if (e->type == EOT_DB_STRING || - e->type == EOT_DB_STRING_FREE) - osize = e->stringlen; - - align = (-osize) % wsize; - if (align < 0) - align += wsize; - isize += osize + align; - } - + isize = len_extops(instruction->eops); debug_set_db_type(instruction); return isize; } else if (instruction->opcode == I_INCBIN) { - const char *fname = instruction->eops->stringval; + const extop *e = instruction->eops; + const char *fname = e->val.string.data; off_t len; len = nasm_file_size_by_path(fname); @@ -1058,14 +1125,15 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, insn *instruction) return 0; } - if (instruction->eops->next) { - if (len <= (off_t)instruction->eops->next->offset) { + e = e->next; + if (e) { + if (len <= (off_t)e->val.num.offset) { len = 0; } else { - len -= instruction->eops->next->offset; - if (instruction->eops->next->next && - len > (off_t)instruction->eops->next->next->offset) { - len = (off_t)instruction->eops->next->next->offset; + len -= e->val.num.offset; + e = e->next; + if (e && len > (off_t)e->val.num.offset) { + len = (off_t)e->val.num.offset; } } } diff --git a/asm/parser.c b/asm/parser.c index 15cfcdfa..a59acb19 100644 --- a/asm/parser.c +++ b/asm/parser.c @@ -51,7 +51,7 @@ #include "tables.h" -static int is_comma_next(void); +static int end_expression_next(void); static struct tokenval tokval; @@ -355,14 +355,15 @@ static void mref_set_optype(operand *op) /* * Convert an expression vector returned from evaluate() into an - * extop structure. Return zero on success. + * extop structure. Return zero on success. Note that the eop + * already has dup and elem set, so we can't clear it here. */ -static int value_to_extop(expr * vect, extop *eop, int32_t myseg) +static int value_to_extop(expr *vect, extop *eop, int32_t myseg) { eop->type = EOT_DB_NUMBER; - eop->offset = 0; - eop->segment = eop->wrt = NO_SEG; - eop->relative = false; + eop->val.num.offset = 0; + eop->val.num.segment = eop->val.num.wrt = NO_SEG; + eop->val.num.relative = false; for (; vect->type; vect++) { if (!vect->value) /* zero term, safe to ignore */ @@ -376,25 +377,26 @@ static int value_to_extop(expr * vect, extop *eop, int32_t myseg) if (vect->type == EXPR_SIMPLE) { /* Simple number expression */ - eop->offset += vect->value; + eop->val.num.offset += vect->value; continue; } - if (eop->wrt == NO_SEG && !eop->relative && vect->type == EXPR_WRT) { + if (eop->val.num.wrt == NO_SEG && !eop->val.num.relative && + vect->type == EXPR_WRT) { /* WRT term */ - eop->wrt = vect->value; + eop->val.num.wrt = vect->value; continue; } - if (!eop->relative && + if (!eop->val.num.relative && vect->type == EXPR_SEGBASE + myseg && vect->value == -1) { /* Expression of the form: foo - $ */ - eop->relative = true; + eop->val.num.relative = true; continue; } - if (eop->segment == NO_SEG && vect->type >= EXPR_SEGBASE && - vect->value == 1) { - eop->segment = vect->type - EXPR_SEGBASE; + if (eop->val.num.segment == NO_SEG && + vect->type >= EXPR_SEGBASE && vect->value == 1) { + eop->val.num.segment = vect->type - EXPR_SEGBASE; continue; } @@ -406,6 +408,228 @@ static int value_to_extop(expr * vect, extop *eop, int32_t myseg) return 0; } +/* + * Parse an extended expression, used by db et al. "elem" is the element + * size; initially comes from the specific opcode (e.g. db == 1) but + * can be overridden. + */ +static int parse_eops(extop **result, bool critical, int elem) +{ + extop *eop = NULL, *prev = NULL; + extop **tail = result; + int sign; + int i = tokval.t_type; + int oper_num = 0; + bool do_subexpr = false; + + *tail = NULL; + + /* End of string is obvious; ) ends a sub-expression list e.g. DUP */ + for (i = tokval.t_type; i != TOKEN_EOS; i = stdscan(NULL, &tokval)) { + char endparen = ')'; /* Is a right paren the end of list? */ + + if (i == ')') + break; + + if (!eop) { + nasm_new(eop); + eop->dup = 1; + eop->elem = elem; + do_subexpr = false; + } + sign = +1; + + /* + * end_expression_next() here is to distinguish this from + * a string used as part of an expression... + */ + if (i == TOKEN_QMARK) { + eop->type = EOT_DB_RESERVE; + } else if (do_subexpr && i == '(') { + extop *subexpr; + + stdscan(NULL, &tokval); /* Skip paren */ + if (parse_eops(&eop->val.subexpr, critical, eop->elem) < 0) + goto fail; + + subexpr = eop->val.subexpr; + if (!subexpr) { + /* Subexpression is empty */ + eop->type = EOT_NOTHING; + } else if (!subexpr->next) { + /* Subexpression is a single element, flatten */ + eop->val = subexpr->val; + eop->type = subexpr->type; + eop->dup *= subexpr->dup; + nasm_free(subexpr); + } else { + eop->type = EOT_EXTOP; + } + + /* We should have ended on a closing paren */ + if (tokval.t_type != ')') { + nasm_nonfatal("expected `)' after subexpression, got `%s'", + i == TOKEN_EOS ? + "end of line" : tokval.t_charptr); + goto fail; + } + endparen = 0; /* This time the paren is not the end */ + } else if (i == '%') { + /* %(expression_list) */ + do_subexpr = true; + continue; + } else if (i == TOKEN_SIZE) { + /* Element size override */ + eop->elem = tokval.t_inttwo; + do_subexpr = true; + continue; + } else if (i == TOKEN_STR && end_expression_next()) { + eop->type = EOT_DB_STRING; + eop->val.string.data = tokval.t_charptr; + eop->val.string.len = tokval.t_inttwo; + } else if (i == TOKEN_STRFUNC) { + bool parens = false; + const char *funcname = tokval.t_charptr; + enum strfunc func = tokval.t_integer; + + i = stdscan(NULL, &tokval); + if (i == '(') { + parens = true; + endparen = 0; + i = stdscan(NULL, &tokval); + } + if (i != TOKEN_STR) { + nasm_nonfatal("%s must be followed by a string constant", + funcname); + eop->type = EOT_NOTHING; + } else { + eop->type = EOT_DB_STRING_FREE; + eop->val.string.len = + string_transform(tokval.t_charptr, tokval.t_inttwo, + &eop->val.string.data, func); + if (eop->val.string.len == (size_t)-1) { + nasm_nonfatal("invalid input string to %s", funcname); + eop->type = EOT_NOTHING; + } + } + if (parens && i && i != ')') { + i = stdscan(NULL, &tokval); + if (i != ')') + nasm_nonfatal("unterminated %s function", funcname); + } + } else if (i == '-' || i == '+') { + char *save = stdscan_get(); + struct tokenval tmptok; + + sign = (i == '-') ? -1 : 1; + if (stdscan(NULL, &tmptok) != TOKEN_FLOAT) { + stdscan_set(save); + goto is_expression; + } else { + tokval = tmptok; + goto is_float; + } + } else if (i == TOKEN_FLOAT) { + is_float: + eop->type = EOT_DB_FLOAT; + + if (eop->elem > 16) { + nasm_nonfatal("no %d-bit floating-point format supported", + eop->elem << 3); + eop->val.string.len = 0; + } else if (eop->elem < 1) { + nasm_nonfatal("floating-point constant" + " encountered in unknown instruction"); + /* + * fix suggested by Pedro Gimeno... original line was: + * eop->type = EOT_NOTHING; + */ + eop->val.string.len = 0; + } else { + eop->val.string.len = eop->elem; + + eop = nasm_realloc(eop, sizeof(extop) + eop->val.string.len); + eop->val.string.data = (char *)eop + sizeof(extop); + if (!float_const(tokval.t_charptr, sign, + (uint8_t *)eop->val.string.data, + eop->val.string.len)) + eop->val.string.len = 0; + } + if (!eop->val.string.len) + eop->type = EOT_NOTHING; + } else { + /* anything else, assume it is an expression */ + expr *value; + + is_expression: + value = evaluate(stdscan, NULL, &tokval, NULL, + critical, NULL); + i = tokval.t_type; + if (!value) /* Error in evaluator */ + goto fail; + if (tokval.t_flag & TFLAG_DUP) { + /* Expression followed by DUP */ + if (!is_simple(value)) { + nasm_nonfatal("non-constant argument supplied to DUP"); + goto fail; + } else if (value->value < 0) { + nasm_nonfatal("negative argument supplied to DUP"); + goto fail; + } + eop->dup *= (size_t)value->value; + do_subexpr = true; + continue; + } + if (value_to_extop(value, eop, location.segment)) { + nasm_nonfatal("expression is not simple or relocatable"); + } + } + + if (eop->dup == 0 || eop->type == EOT_NOTHING) { + nasm_free(eop); + } else if (eop->type == EOT_DB_RESERVE && + prev && prev->type == EOT_DB_RESERVE && + prev->elem == eop->elem) { + /* Coalesce multiple EOT_DB_RESERVE */ + prev->dup += eop->dup; + nasm_free(eop); + } else { + /* Add this eop to the end of the chain */ + prev = eop; + *tail = eop; + tail = &eop->next; + } + + oper_num++; + eop = NULL; /* Done with this operand */ + + /* + * We're about to call stdscan(), which will eat the + * comma that we're currently sitting on between + * arguments. However, we'd better check first that it + * _is_ a comma. + */ + if (i == TOKEN_EOS || i == endparen) /* Already at end? */ + break; + if (i != ',') { + i = stdscan(NULL, &tokval); /* eat the comma or final paren */ + if (i == TOKEN_EOS || i == ')') /* got end of expression */ + break; + if (i != ',') { + nasm_nonfatal("comma expected after operand"); + goto fail; + } + } + } + + return oper_num; + +fail: + if (eop) + nasm_free(eop); + return -1; +} + insn *parse_line(char *buffer, insn *result) { bool insn_is_label = false; @@ -562,141 +786,19 @@ restart_parse: critical = pass_final() || (result->opcode == I_INCBIN); if (opcode_is_db(result->opcode) || result->opcode == I_INCBIN) { - extop *eop, **tail = &result->eops, **fixptr; - int oper_num = 0; - int32_t sign; - - result->eops_float = false; + int oper_num; - /* - * Begin to read the DB/DW/DD/DQ/DT/DO/DY/DZ/INCBIN operands. - */ - while (1) { - i = stdscan(NULL, &tokval); - if (i == TOKEN_EOS) - break; - else if (first && i == ':') { - insn_is_label = true; - goto restart_parse; - } - first = false; - fixptr = tail; - eop = *tail = nasm_malloc(sizeof(extop)); - tail = &eop->next; - eop->next = NULL; - eop->type = EOT_NOTHING; - oper_num++; - sign = +1; - - /* - * is_comma_next() here is to distinguish this from - * a string used as part of an expression... - */ - if (i == TOKEN_STR && is_comma_next()) { - eop->type = EOT_DB_STRING; - eop->stringval = tokval.t_charptr; - eop->stringlen = tokval.t_inttwo; - i = stdscan(NULL, &tokval); /* eat the comma */ - } else if (i == TOKEN_STRFUNC) { - bool parens = false; - const char *funcname = tokval.t_charptr; - enum strfunc func = tokval.t_integer; - i = stdscan(NULL, &tokval); - if (i == '(') { - parens = true; - i = stdscan(NULL, &tokval); - } - if (i != TOKEN_STR) { - nasm_nonfatal("%s must be followed by a string constant", - funcname); - eop->type = EOT_NOTHING; - } else { - eop->type = EOT_DB_STRING_FREE; - eop->stringlen = - string_transform(tokval.t_charptr, tokval.t_inttwo, - &eop->stringval, func); - if (eop->stringlen == (size_t)-1) { - nasm_nonfatal("invalid input string to %s", funcname); - eop->type = EOT_NOTHING; - } - } - if (parens && i && i != ')') { - i = stdscan(NULL, &tokval); - if (i != ')') - nasm_nonfatal("unterminated %s function", funcname); - } - if (i && i != ',') - i = stdscan(NULL, &tokval); - } else if (i == '-' || i == '+') { - char *save = stdscan_get(); - int token = i; - sign = (i == '-') ? -1 : 1; - i = stdscan(NULL, &tokval); - if (i != TOKEN_FLOAT) { - stdscan_set(save); - i = tokval.t_type = token; - goto is_expression; - } else { - goto is_float; - } - } else if (i == TOKEN_FLOAT) { -is_float: - eop->type = EOT_DB_STRING; - result->eops_float = true; - - eop->stringlen = db_bytes(result->opcode); - if (eop->stringlen > 16) { - nasm_nonfatal("floating-point constant" - " encountered in DY or DZ instruction"); - eop->stringlen = 0; - } else if (eop->stringlen < 1) { - nasm_nonfatal("floating-point constant" - " encountered in unknown instruction"); - /* - * fix suggested by Pedro Gimeno... original line was: - * eop->type = EOT_NOTHING; - */ - eop->stringlen = 0; - } - - eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen); - tail = &eop->next; - *fixptr = eop; - eop->stringval = (char *)eop + sizeof(extop); - if (!eop->stringlen || - !float_const(tokval.t_charptr, sign, - (uint8_t *)eop->stringval, eop->stringlen)) - eop->type = EOT_NOTHING; - i = stdscan(NULL, &tokval); /* eat the comma */ - } else { - /* anything else, assume it is an expression */ - expr *value; - -is_expression: - value = evaluate(stdscan, NULL, &tokval, NULL, - critical, NULL); - i = tokval.t_type; - if (!value) /* Error in evaluator */ - goto fail; - if (value_to_extop(value, eop, location.segment)) { - nasm_nonfatal("operand %d: expression is not simple or relocatable", - oper_num); - } - } + i = stdscan(NULL, &tokval); - /* - * We're about to call stdscan(), which will eat the - * comma that we're currently sitting on between - * arguments. However, we'd better check first that it - * _is_ a comma. - */ - if (i == TOKEN_EOS) /* also could be EOL */ - break; - if (i != ',') { - nasm_nonfatal("comma expected after operand %d", oper_num); - goto fail; - } + if (first && i == ':') { + /* Really a label */ + insn_is_label = true; + goto restart_parse; } + first = false; + oper_num = parse_eops(&result->eops, critical, db_bytes(result->opcode)); + if (oper_num < 0) + goto fail; if (result->opcode == I_INCBIN) { /* @@ -724,11 +826,12 @@ is_expression: * Throw the instruction away. */ goto fail; - } else /* DB ... */ if (oper_num == 0) - nasm_warn(WARN_OTHER, "no operand for data declaration"); - else + } else { + /* DB et al */ result->operands = oper_num; - + if (oper_num == 0) + nasm_warn(WARN_OTHER, "no operand for data declaration"); + } return result; } @@ -1197,7 +1300,7 @@ fail: return result; } -static int is_comma_next(void) +static int end_expression_next(void) { struct tokenval tv; char *p; @@ -1207,17 +1310,34 @@ static int is_comma_next(void) i = stdscan(NULL, &tv); stdscan_set(p); - return (i == ',' || i == ';' || !i); + return (i == ',' || i == ';' || i == ')' || !i); } -void cleanup_insn(insn * i) +static void free_eops(extop *e) { - extop *e; + extop *next; + + while (e) { + next = e->next; + switch (e->type) { + case EOT_EXTOP: + free_eops(e->val.subexpr); + break; + + case EOT_DB_STRING_FREE: + nasm_free(e->val.string.data); + break; + + default: + break; + } - while ((e = i->eops)) { - i->eops = e->next; - if (e->type == EOT_DB_STRING_FREE) - nasm_free(e->stringval); nasm_free(e); + e = next; } } + +void cleanup_insn(insn * i) +{ + free_eops(i->eops); +} diff --git a/asm/stdscan.c b/asm/stdscan.c index 4491430d..c1c38f42 100644 --- a/asm/stdscan.c +++ b/asm/stdscan.c @@ -128,6 +128,8 @@ int stdscan(void *private_data, struct tokenval *tv) (void)private_data; /* Don't warn that this parameter is unused */ + nasm_zero(*tv); + stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr); if (!*stdscan_bufptr) return tv->t_type = TOKEN_EOS; diff --git a/asm/tokens.dat b/asm/tokens.dat index 81875f0e..ab37dcc1 100644 --- a/asm/tokens.dat +++ b/asm/tokens.dat @@ -42,6 +42,7 @@ # Tokens other than instructions and registers # +# The ? operator is a keyword, because ? is a legitimate symbol character % TOKEN_QMARK, 0, 0, 0 ? @@ -88,9 +89,14 @@ short strict to +# PTR is a legitimate symbol, but has an optional warning % TOKEN_ID, 0, TFLAG_WARN, 0 ptr +# DUP is a legitimate symbol, but also has context-specific use in extops +% TOKEN_ID, 0, TFLAG_DUP, 0 +dup + % TOKEN_FLOAT, 0, 0, 0 __?infinity?__ __?nan?__ diff --git a/include/nasm.h b/include/nasm.h index 2a207a03..bb9dbf6b 100644 --- a/include/nasm.h +++ b/include/nasm.h @@ -462,6 +462,7 @@ enum ccode { /* condition code names */ #define TFLAG_BRC_ANY (TFLAG_BRC | TFLAG_BRC_OPT) #define TFLAG_BRDCAST (1 << 2) /* broadcasting decorator */ #define TFLAG_WARN (1 << 3) /* warning only, treat as ID */ +#define TFLAG_DUP (1 << 4) /* valid ID but also has context-specific use */ static inline uint8_t get_cond_opcode(enum ccode c) { @@ -548,13 +549,6 @@ enum prefixes { /* instruction prefixes */ PREFIX_ENUM_LIMIT }; -enum extop_type { /* extended operand types */ - EOT_NOTHING, - EOT_DB_STRING, /* Byte string */ - EOT_DB_STRING_FREE, /* Byte string which should be nasm_free'd*/ - EOT_DB_NUMBER /* Integer */ -}; - enum ea_flags { /* special EA flags */ EAF_BYTEOFFS = 1, /* force offset part to byte size */ EAF_WORDOFFS = 2, /* force offset part to [d]word size */ @@ -595,15 +589,34 @@ typedef struct operand { /* operand to an instruction */ #define OPFLAG_RELATIVE 8 /* operand is self-relative, e.g. [foo - $] where foo is not in the current segment */ +enum extop_type { /* extended operand types */ + EOT_NOTHING = 0, + EOT_EXTOP, /* Subexpression */ + EOT_DB_STRING, /* Byte string */ + EOT_DB_FLOAT, /* Floating-pointer number (special byte string) */ + EOT_DB_STRING_FREE, /* Byte string which should be nasm_free'd*/ + EOT_DB_NUMBER, /* Integer */ + EOT_DB_RESERVE /* ? */ +}; + typedef struct extop { /* extended operand */ - struct extop *next; /* linked list */ - char *stringval; /* if it's a string, then here it is */ - size_t stringlen; /* ... and here's how long it is */ - int64_t offset; /* ... it's given here ... */ - int32_t segment; /* if it's a number/address, then... */ - int32_t wrt; /* ... and here */ - bool relative; /* self-relative expression */ - enum extop_type type; /* defined above */ + struct extop *next; /* linked list */ + union { + struct { /* text or byte string */ + char *data; + size_t len; + } string; + struct { /* numeric expression */ + int64_t offset; /* numeric value or address offset */ + int32_t segment; /* address segment */ + int32_t wrt; /* address wrt */ + bool relative; /* self-relative expression */ + } num; + struct extop *subexpr; /* actual expressions */ + } val; + size_t dup; /* duplicated? */ + enum extop_type type; /* defined above */ + int elem; /* element size override, if any (bytes) */ } extop; enum ea_type { @@ -827,7 +840,7 @@ struct ofmt { * This procedure is called at the start of each pass. */ void (*reset)(void); - + /* * This is the modern output function, which gets passed * a struct out_data with much more information. See the @@ -1252,8 +1265,8 @@ enum decorator_tokens { * Global modes */ -/* - * flag to disable optimizations selectively +/* + * flag to disable optimizations selectively * this is useful to turn-off certain optimizations */ enum optimization_disable_flag { diff --git a/test/dup.asm b/test/dup.asm new file mode 100644 index 00000000..2e939119 --- /dev/null +++ b/test/dup.asm @@ -0,0 +1,21 @@ + bits 32 + + db 33 + db (44) +; db (44,55) -- error + db %(44,55) + db %('XX','YY') + db ('AA') + db %('BB') + db ? + db 6 dup (33) + db 6 dup (33, 34) + db 6 dup (33, 34), 35 + db 7 dup (99) + db 7 dup (?,?) + dw byte (?,44) + + dw 0xcc, 4 dup byte ('PQR'), ?, 0xabcd + + dd 16 dup (0xaaaa, ?, 0xbbbbbb) + dd 64 dup (?) |