summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2007-09-19 16:22:03 -0700
committerH. Peter Anvin <hpa@zytor.com>2007-09-19 16:22:03 -0700
commiteb49a4e1d402d5a1ce95e495787b900aa5303a47 (patch)
tree81fbe28b4d1faf6e8d68aa3d7af58b4443e948d2
parentb4b43178783e963e95fb290e82f1a0c6d6725520 (diff)
parentbf9a24f46471abad75fa3efba059646a6c4f5026 (diff)
downloadnasm-eb49a4e1d402d5a1ce95e495787b900aa5303a47.tar.gz
Merge commit 'origin/sse5'
-rw-r--r--assemble.c344
-rw-r--r--disasm.c274
-rw-r--r--doc/nasmdoc.src115
-rw-r--r--float.c388
-rw-r--r--insns.dat354
-rw-r--r--insns.h35
-rw-r--r--insns.pl214
-rw-r--r--nasm.h30
-rw-r--r--parser.c73
-rw-r--r--perllib/phash.ph4
-rwxr-xr-xpptok.pl4
-rw-r--r--stdscan.c10
-rw-r--r--test/float.asm133
-rw-r--r--test/floatx.asm125
-rw-r--r--test/fmsub.asm16
-rw-r--r--tokens.dat7
-rwxr-xr-xtokhash.pl8
17 files changed, 1547 insertions, 587 deletions
diff --git a/assemble.c b/assemble.c
index 54522712..efb02207 100644
--- a/assemble.c
+++ b/assemble.c
@@ -12,39 +12,43 @@
* (POP is never used for CS) depending on operand 0
* \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
* on operand 0
- * \10, \11, \12 - a literal byte follows in the code stream, to be added
- * to the register value of operand 0, 1 or 2
- * \17 - encodes the literal byte 0. (Some compilers don't take
- * kindly to a zero byte in the _middle_ of a compile time
- * string constant, so I had to put this hack in.)
- * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
- * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
- * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
- * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
- * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
+ * \10..\13 - a literal byte follows in the code stream, to be added
+ * to the register value of operand 0..3
+ * \14..\17 - a signed byte immediate operand, from operand 0..3
+ * \20..\23 - a byte immediate operand, from operand 0..3
+ * \24..\27 - an unsigned byte immediate operand, from operand 0..3
+ * \30..\33 - a word immediate operand, from operand 0..3
+ * \34..\37 - select between \3[0-3] and \4[0-3] depending on 16/32 bit
* assembly mode or the operand-size override on the operand
- * \37 - a word constant, from the _segment_ part of operand 0
- * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
- * \44, \45, \46 - select between \3[012], \4[012] and \5[456]
+ * \40..\43 - a long immediate operand, from operand 0..3
+ * \44..\47 - select between \3[0-3], \4[0-3] and \5[4-7]
* depending on assembly mode or the address-size override
* on the operand.
- * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
- * \54, \55, \56 - a qword immediate operand, from operand 0, 1 or 2
- * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
- * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
+ * \50..\53 - a byte relative operand, from operand 0..3
+ * \54..\57 - a qword immediate operand, from operand 0..3
+ * \60..\63 - a word relative operand, from operand 0..3
+ * \64..\67 - select between \6[0-3] and \7[0-3] depending on 16/32 bit
* assembly mode or the operand-size override on the operand
- * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
+ * \70..\73 - a long relative operand, from operand 0..3
+ * \74..\77 - a word constant, from the _segment_ part of operand 0..3
* \1ab - a ModRM, calculated on EA in operand a, with the spare
* field the register value of operand b.
- * \130,\131,\132 - an immediate word or signed byte for operand 0, 1, or 2
- * \133,\134,\135 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
+ * \140..\143 - an immediate word or signed byte for operand 0..3
+ * \144..\147 - or 2 (s-field) into next opcode byte if operand 0..3
* is a signed byte rather than a word.
- * \140,\141,\142 - an immediate dword or signed byte for operand 0, 1, or 2
- * \143,\144,\145 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
+ * \150..\153 - an immediate dword or signed byte for operand 0..3
+ * \154..\157 - or 2 (s-field) into next opcode byte if operand 0..3
* is a signed byte rather than a dword.
- * \150,\151,\152 - an immediate qword or signed byte for operand 0, 1, or 2
- * \153,\154,\155 - or 2 (s-field) into next opcode byte if operand 0, 1, or 2
- * is a signed byte rather than a qword.
+ * \160..\163 - this instruction uses DREX rather than REX, with the
+ * OC0 field set to 0, and the dest field taken from
+ * operand 0..3.
+ * \164..\167 - this instruction uses DREX rather than REX, with the
+ * OC0 field set to 1, and the dest field taken from
+ * operand 0..3.
+ * \170 - encodes the literal byte 0. (Some compilers don't take
+ * kindly to a zero byte in the _middle_ of a compile time
+ * string constant, so I had to put this hack in.)
+ * \171 - placement of DREX suffix in the absence of an EA
* \2ab - a ModRM, calculated on EA in operand a, with the spare
* field equal to digit b.
* \30x - might be an 0x67 byte, depending on the address size of
@@ -246,6 +250,9 @@ int32_t assemble(int32_t segment, int32_t offset, int bits, uint32_t cp,
case I_DT:
wsize = 10;
break;
+ case I_DO:
+ wsize = 16;
+ break;
default:
break;
}
@@ -560,10 +567,9 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp,
if (instruction->opcode == -1)
return 0;
- if (instruction->opcode == I_DB ||
- instruction->opcode == I_DW ||
- instruction->opcode == I_DD ||
- instruction->opcode == I_DQ || instruction->opcode == I_DT) {
+ if (instruction->opcode == I_DB || instruction->opcode == I_DW ||
+ instruction->opcode == I_DD || instruction->opcode == I_DQ ||
+ instruction->opcode == I_DT || instruction->opcode == I_DO) {
extop *e;
int32_t isize, osize, wsize = 0; /* placate gcc */
@@ -584,6 +590,9 @@ int32_t insn_size(int32_t segment, int32_t offset, int bits, uint32_t cp,
case I_DT:
wsize = 10;
break;
+ case I_DO:
+ wsize = 16;
+ break;
default:
break;
}
@@ -730,73 +739,79 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
case 010:
case 011:
case 012:
+ case 013:
ins->rex |=
op_rexflags(&ins->oprs[c - 010], REX_B|REX_H|REX_P|REX_W);
codes++, length++;
break;
- case 017:
- length++;
- break;
case 014:
case 015:
case 016:
+ case 017:
length++;
break;
case 020:
case 021:
case 022:
+ case 023:
length++;
break;
case 024:
case 025:
case 026:
+ case 027:
length++;
break;
case 030:
case 031:
case 032:
+ case 033:
length += 2;
break;
case 034:
case 035:
case 036:
+ case 037:
if (ins->oprs[c - 034].type & (BITS16 | BITS32 | BITS64))
length += (ins->oprs[c - 034].type & BITS16) ? 2 : 4;
else
length += (bits == 16) ? 2 : 4;
break;
- case 037:
- length += 2;
- break;
case 040:
case 041:
case 042:
+ case 043:
length += 4;
break;
case 044:
case 045:
case 046:
+ case 047:
length += ((ins->oprs[c - 044].addr_size ?
ins->oprs[c - 044].addr_size : bits) >> 3);
break;
case 050:
case 051:
case 052:
+ case 053:
length++;
break;
case 054:
case 055:
case 056:
+ case 057:
length += 8; /* MOV reg64/imm */
break;
case 060:
case 061:
case 062:
+ case 063:
length += 2;
break;
case 064:
case 065:
case 066:
+ case 067:
if (ins->oprs[c - 064].type & (BITS16 | BITS32 | BITS64))
length += (ins->oprs[c - 064].type & BITS16) ? 2 : 4;
else
@@ -805,33 +820,66 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
case 070:
case 071:
case 072:
+ case 073:
length += 4;
break;
- case 0130:
- case 0131:
- case 0132:
- length += is_sbyte(ins, c - 0130, 16) ? 1 : 2;
- break;
- case 0133:
- case 0134:
- case 0135:
- codes += 2;
- length++;
+ case 074:
+ case 075:
+ case 076:
+ case 077:
+ length += 2;
break;
case 0140:
case 0141:
case 0142:
- length += is_sbyte(ins, c - 0140, 32) ? 1 : 4;
+ case 0143:
+ length += is_sbyte(ins, c - 0140, 16) ? 1 : 2;
break;
- case 0143:
case 0144:
case 0145:
+ case 0146:
+ case 0147:
+ codes += 2;
+ length++;
+ break;
+ case 0150:
+ case 0151:
+ case 0152:
+ case 0153:
+ length += is_sbyte(ins, c - 0150, 32) ? 1 : 4;
+ break;
+ case 0154:
+ case 0155:
+ case 0156:
+ case 0157:
codes += 2;
length++;
break;
+ case 0160:
+ case 0161:
+ case 0162:
+ case 0163:
+ length++;
+ ins->rex |= REX_D;
+ ins->drexdst = regval(&ins->oprs[c & 3]);
+ break;
+ case 0164:
+ case 0165:
+ case 0166:
+ case 0167:
+ length++;
+ ins->rex |= REX_D|REX_OC;
+ ins->drexdst = regval(&ins->oprs[c & 3]);
+ break;
+ case 0170:
+ length++;
+ break;
+ case 0171:
+ break;
case 0300:
case 0301:
case 0302:
+ case 0303:
length += chsize(&ins->oprs[c - 0300], bits);
break;
case 0310:
@@ -927,7 +975,19 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
}
ins->rex &= rex_mask;
- if (ins->rex & REX_REAL) {
+
+ if (ins->rex & REX_D) {
+ if (ins->rex & REX_H) {
+ errfunc(ERR_NONFATAL, "cannot use high register in drex instruction");
+ return -1;
+ }
+ if (bits != 64 && ((ins->rex & (REX_W|REX_X|REX_B)) ||
+ ins->drexdst > 7)) {
+ errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
+ return -1;
+ }
+ length++;
+ } else if (ins->rex & REX_REAL) {
if (ins->rex & REX_H) {
errfunc(ERR_NONFATAL, "cannot use high register in rex instruction");
return -1;
@@ -937,8 +997,8 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
cpu >= IF_X86_64)) {
length++;
} else {
- errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
- return -1;
+ errfunc(ERR_NONFATAL, "invalid operands in non-64-bit mode");
+ return -1;
}
}
@@ -946,7 +1006,7 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
}
#define EMIT_REX() \
- if((ins->rex & REX_REAL) && (bits == 64)) { \
+ if (!(ins->rex & REX_D) && (ins->rex & REX_REAL) && (bits == 64)) { \
ins->rex = (ins->rex & REX_REAL)|REX_P; \
out(offset, segment, &ins->rex, OUT_RAWDATA+1, NO_SEG, NO_SEG); \
ins->rex = 0; \
@@ -1020,21 +1080,17 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 010:
case 011:
case 012:
+ case 013:
EMIT_REX();
bytes[0] = *codes++ + ((regval(&ins->oprs[c - 010])) & 7);
out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
offset += 1;
break;
- case 017:
- bytes[0] = 0;
- out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
- offset += 1;
- break;
-
case 014:
case 015:
case 016:
+ case 017:
if (ins->oprs[c - 014].offset < -128
|| ins->oprs[c - 014].offset > 127) {
errfunc(ERR_WARNING, "signed byte value exceeds bounds");
@@ -1055,6 +1111,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 020:
case 021:
case 022:
+ case 023:
if (ins->oprs[c - 020].offset < -256
|| ins->oprs[c - 020].offset > 255) {
errfunc(ERR_WARNING, "byte value exceeds bounds");
@@ -1074,6 +1131,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 024:
case 025:
case 026:
+ case 027:
if (ins->oprs[c - 024].offset < 0
|| ins->oprs[c - 024].offset > 255)
errfunc(ERR_WARNING, "unsigned byte value exceeds bounds");
@@ -1092,6 +1150,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 030:
case 031:
case 032:
+ case 033:
if (ins->oprs[c - 030].segment == NO_SEG &&
ins->oprs[c - 030].wrt == NO_SEG &&
(ins->oprs[c - 030].offset < -65536L ||
@@ -1107,6 +1166,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 034:
case 035:
case 036:
+ case 037:
if (ins->oprs[c - 034].type & (BITS16 | BITS32))
size = (ins->oprs[c - 034].type & BITS16) ? 2 : 4;
else
@@ -1119,20 +1179,10 @@ static void gencode(int32_t segment, int32_t offset, int bits,
offset += size;
break;
- case 037:
- if (ins->oprs[0].segment == NO_SEG)
- errfunc(ERR_NONFATAL, "value referenced by FAR is not"
- " relocatable");
- data = 0L;
- out(offset, segment, &data, OUT_ADDRESS + 2,
- outfmt->segbase(1 + ins->oprs[0].segment),
- ins->oprs[0].wrt);
- offset += 2;
- break;
-
case 040:
case 041:
case 042:
+ case 043:
data = ins->oprs[c - 040].offset;
out(offset, segment, &data, OUT_ADDRESS + 4,
ins->oprs[c - 040].segment, ins->oprs[c - 040].wrt);
@@ -1142,6 +1192,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 044:
case 045:
case 046:
+ case 047:
data = ins->oprs[c - 044].offset;
size = ((ins->oprs[c - 044].addr_size ?
ins->oprs[c - 044].addr_size : bits) >> 3);
@@ -1155,6 +1206,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 050:
case 051:
case 052:
+ case 053:
if (ins->oprs[c - 050].segment != segment)
errfunc(ERR_NONFATAL,
"short relative jump outside segment");
@@ -1169,6 +1221,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 054:
case 055:
case 056:
+ case 057:
data = (int64_t)ins->oprs[c - 054].offset;
out(offset, segment, &data, OUT_ADDRESS + 8,
ins->oprs[c - 054].segment, ins->oprs[c - 054].wrt);
@@ -1178,6 +1231,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 060:
case 061:
case 062:
+ case 063:
if (ins->oprs[c - 060].segment != segment) {
data = ins->oprs[c - 060].offset;
out(offset, segment, &data,
@@ -1194,6 +1248,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 064:
case 065:
case 066:
+ case 067:
if (ins->oprs[c - 064].type & (BITS16 | BITS32 | BITS64))
size = (ins->oprs[c - 064].type & BITS16) ? 2 : 4;
else
@@ -1214,6 +1269,7 @@ static void gencode(int32_t segment, int32_t offset, int bits,
case 070:
case 071:
case 072:
+ case 073:
if (ins->oprs[c - 070].segment != segment) {
data = ins->oprs[c - 070].offset;
out(offset, segment, &data,
@@ -1227,70 +1283,115 @@ static void gencode(int32_t segment, int32_t offset, int bits,
offset += 4;
break;
- case 0130:
- case 0131:
- case 0132:
- data = ins->oprs[c - 0130].offset;
- if (is_sbyte(ins, c - 0130, 16)) {
+ case 074:
+ case 075:
+ case 076:
+ case 077:
+ if (ins->oprs[c - 074].segment == NO_SEG)
+ errfunc(ERR_NONFATAL, "value referenced by FAR is not"
+ " relocatable");
+ data = 0L;
+ out(offset, segment, &data, OUT_ADDRESS + 2,
+ outfmt->segbase(1 + ins->oprs[c - 074].segment),
+ ins->oprs[c - 074].wrt);
+ offset += 2;
+ break;
+
+ case 0140:
+ case 0141:
+ case 0142:
+ case 0143:
+ data = ins->oprs[c - 0140].offset;
+ if (is_sbyte(ins, c - 0140, 16)) {
bytes[0] = data;
out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
NO_SEG);
offset++;
} else {
- if (ins->oprs[c - 0130].segment == NO_SEG &&
- ins->oprs[c - 0130].wrt == NO_SEG &&
+ if (ins->oprs[c - 0140].segment == NO_SEG &&
+ ins->oprs[c - 0140].wrt == NO_SEG &&
(data < -65536L || data > 65535L)) {
errfunc(ERR_WARNING, "word value exceeds bounds");
}
out(offset, segment, &data, OUT_ADDRESS + 2,
- ins->oprs[c - 0130].segment, ins->oprs[c - 0130].wrt);
+ ins->oprs[c - 0140].segment, ins->oprs[c - 0140].wrt);
offset += 2;
}
break;
- case 0133:
- case 0134:
- case 0135:
+ case 0144:
+ case 0145:
+ case 0146:
+ case 0147:
EMIT_REX();
codes++;
bytes[0] = *codes++;
- if (is_sbyte(ins, c - 0133, 16))
+ if (is_sbyte(ins, c - 0144, 16))
bytes[0] |= 2; /* s-bit */
out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
offset++;
break;
- case 0140:
- case 0141:
- case 0142:
- data = ins->oprs[c - 0140].offset;
- if (is_sbyte(ins, c - 0140, 32)) {
+ case 0150:
+ case 0151:
+ case 0152:
+ case 0153:
+ data = ins->oprs[c - 0150].offset;
+ if (is_sbyte(ins, c - 0150, 32)) {
bytes[0] = data;
out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG,
NO_SEG);
offset++;
} else {
out(offset, segment, &data, OUT_ADDRESS + 4,
- ins->oprs[c - 0140].segment, ins->oprs[c - 0140].wrt);
+ ins->oprs[c - 0150].segment, ins->oprs[c - 0150].wrt);
offset += 4;
}
break;
- case 0143:
- case 0144:
- case 0145:
+ case 0154:
+ case 0155:
+ case 0156:
+ case 0157:
EMIT_REX();
codes++;
bytes[0] = *codes++;
- if (is_sbyte(ins, c - 0143, 32))
+ if (is_sbyte(ins, c - 0154, 32))
bytes[0] |= 2; /* s-bit */
out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
offset++;
break;
+ case 0160:
+ case 0161:
+ case 0162:
+ case 0163:
+ case 0164:
+ case 0165:
+ case 0166:
+ case 0167:
+ break;
+
+ case 0170:
+ bytes[0] = 0;
+ out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
+ offset += 1;
+ break;
+
+ case 0171:
+ bytes[0] =
+ (ins->drexdst << 4) |
+ (ins->rex & REX_OC ? 0x08 : 0) |
+ (ins->rex & (REX_R|REX_X|REX_B));
+ ins->rex = 0;
+ out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
+ offset++;
+ break;
+
case 0300:
case 0301:
case 0302:
+ case 0303:
if (chsize(&ins->oprs[c - 0300], bits)) {
*bytes = 0x67;
out(offset, segment, bytes,
@@ -1448,6 +1549,15 @@ static void gencode(int32_t segment, int32_t offset, int bits,
if (ea_data.sib_present)
*p++ = ea_data.sib;
+ /* DREX suffixes come between the SIB and the displacement */
+ if (ins->rex & REX_D) {
+ *p++ =
+ (ins->drexdst << 4) |
+ (ins->rex & REX_OC ? 0x08 : 0) |
+ (ins->rex & (REX_R|REX_X|REX_B));
+ ins->rex = 0;
+ }
+
s = p - bytes;
out(offset, segment, bytes, OUT_RAWDATA + s,
NO_SEG, NO_SEG);
@@ -1537,7 +1647,7 @@ static int rexflags(int val, int32_t flags, int mask)
static int matches(const struct itemplate *itemp, insn * instruction, int bits)
{
- int i, size[3], asize, oprs, ret;
+ int i, size[MAX_OPERANDS], asize, oprs, ret;
ret = 100;
@@ -1564,7 +1674,12 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits)
* Check that the operand flags all match up
*/
for (i = 0; i < itemp->operands; i++) {
- if (itemp->opd[i] & ~instruction->oprs[i].type ||
+ if (itemp->opd[i] & SAME_AS) {
+ int j = itemp->opd[i] & ~SAME_AS;
+ if (instruction->oprs[i].type != instruction->oprs[j].type ||
+ instruction->oprs[i].basereg != instruction->oprs[j].basereg)
+ return 0;
+ } else if (itemp->opd[i] & ~instruction->oprs[i].type ||
((itemp->opd[i] & SIZE_MASK) &&
((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
if ((itemp->opd[i] & ~instruction->oprs[i].type & ~SIZE_MASK) ||
@@ -1579,7 +1694,7 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits)
* Check operand sizes
*/
if (itemp->flags & IF_ARMASK) {
- size[0] = size[1] = size[2] = 0;
+ memset(size, 0, sizeof size);
switch (itemp->flags & IF_ARMASK) {
case IF_AR0:
@@ -1591,34 +1706,59 @@ static int matches(const struct itemplate *itemp, insn * instruction, int bits)
case IF_AR2:
i = 2;
break;
+ case IF_AR3:
+ i = 3;
+ break;
default:
break; /* Shouldn't happen */
}
- if (itemp->flags & IF_SB) {
+ switch (itemp->flags & IF_SMASK) {
+ case IF_SB:
size[i] = BITS8;
- } else if (itemp->flags & IF_SW) {
+ break;
+ case IF_SW:
size[i] = BITS16;
- } else if (itemp->flags & IF_SD) {
+ break;
+ case IF_SD:
size[i] = BITS32;
- } else if (itemp->flags & IF_SQ) {
+ break;
+ case IF_SQ:
size[i] = BITS64;
+ break;
+ case IF_SO:
+ size[i] = BITS128;
+ break;
+ default:
+ break;
}
} else {
asize = 0;
- if (itemp->flags & IF_SB) {
+ switch (itemp->flags & IF_SMASK) {
+ case IF_SB:
asize = BITS8;
oprs = itemp->operands;
- } else if (itemp->flags & IF_SW) {
+ break;
+ case IF_SW:
asize = BITS16;
oprs = itemp->operands;
- } else if (itemp->flags & IF_SD) {
+ break;
+ case IF_SD:
asize = BITS32;
oprs = itemp->operands;
- } else if (itemp->flags & IF_SQ) {
+ break;
+ case IF_SQ:
asize = BITS64;
oprs = itemp->operands;
+ break;
+ case IF_SO:
+ asize = BITS128;
+ oprs = itemp->operands;
+ break;
+ default:
+ break;
}
- size[0] = size[1] = size[2] = asize;
+ for (i = 0; i < MAX_OPERANDS; i++)
+ size[i] = asize;
}
if (itemp->flags & (IF_SM | IF_SM2)) {
diff --git a/disasm.c b/disasm.c
index 0452c295..a6c1c729 100644
--- a/disasm.c
+++ b/disasm.c
@@ -167,16 +167,46 @@ static const char *whichcond(int condval)
}
/*
+ * Process a DREX suffix
+ */
+static uint8_t *do_drex(uint8_t *data, insn *ins)
+{
+ uint8_t drex = *data++;
+ operand *dst = &ins->oprs[ins->drexdst];
+
+ if ((drex & 8) != ((ins->rex & REX_OC) ? 8 : 0))
+ return NULL; /* OC0 mismatch */
+ ins->rex = (ins->rex & ~7) | (drex & 7);
+
+ dst->segment = SEG_RMREG;
+ dst->basereg = drex >> 4;
+ return data;
+}
+
+
+/*
* Process an effective address (ModRM) specification.
*/
static uint8_t *do_ea(uint8_t *data, int modrm, int asize,
- int segsize, operand * op, int rex)
+ int segsize, operand * op, insn *ins)
{
int mod, rm, scale, index, base;
+ int rex;
+ uint8_t sib = 0;
mod = (modrm >> 6) & 03;
rm = modrm & 07;
+ if (mod != 3 && rm == 4 && asize != 16)
+ sib = *data++;
+
+ if (ins->rex & REX_D) {
+ data = do_drex(data, ins);
+ if (!data)
+ return NULL;
+ }
+ rex = ins->rex;
+
if (mod == 3) { /* pure register version */
op->basereg = rm+(rex & REX_B ? 8 : 0);
op->segment |= SEG_RMREG;
@@ -282,10 +312,9 @@ static uint8_t *do_ea(uint8_t *data, int modrm, int asize,
}
if (rm == 4) { /* process SIB */
- scale = (*data >> 6) & 03;
- index = (*data >> 3) & 07;
- base = *data & 07;
- data++;
+ scale = (sib >> 6) & 03;
+ index = (sib >> 3) & 07;
+ base = sib & 07;
op->scale = 1 << scale;
@@ -341,12 +370,12 @@ static int matches(const struct itemplate *t, uint8_t *data,
uint8_t lock = prefix->lock;
int osize = prefix->osize;
int asize = prefix->asize;
+ int i;
- ins->oprs[0].segment = ins->oprs[1].segment =
- ins->oprs[2].segment =
- ins->oprs[0].addr_size = ins->oprs[1].addr_size =
- ins->oprs[2].addr_size = (segsize == 64 ? SEG_64BIT :
- segsize == 32 ? SEG_32BIT : 0);
+ for (i = 0; i < MAX_OPERANDS; i++) {
+ ins->oprs[i].segment = ins->oprs[i].addr_size =
+ (segsize == 64 ? SEG_64BIT : segsize == 32 ? SEG_32BIT : 0);
+ }
ins->condition = -1;
ins->rex = prefix->rex;
@@ -419,7 +448,7 @@ static int matches(const struct itemplate *t, uint8_t *data,
default:
return FALSE;
}
- } else if (c >= 010 && c <= 012) {
+ } else if (c >= 010 && c <= 013) {
int t = *r++, d = *data++;
if (d < t || d > t + 7)
return FALSE;
@@ -428,20 +457,17 @@ static int matches(const struct itemplate *t, uint8_t *data,
(ins->rex & REX_B ? 8 : 0);
ins->oprs[c - 010].segment |= SEG_RMREG;
}
- } else if (c == 017) {
- if (*data++)
- return FALSE;
- } else if (c >= 014 && c <= 016) {
+ } else if (c >= 014 && c <= 017) {
ins->oprs[c - 014].offset = (int8_t)*data++;
ins->oprs[c - 014].segment |= SEG_SIGNED;
- } else if (c >= 020 && c <= 022) {
+ } else if (c >= 020 && c <= 023) {
ins->oprs[c - 020].offset = *data++;
- } else if (c >= 024 && c <= 026) {
+ } else if (c >= 024 && c <= 027) {
ins->oprs[c - 024].offset = *data++;
- } else if (c >= 030 && c <= 032) {
+ } else if (c >= 030 && c <= 033) {
ins->oprs[c - 030].offset = getu16(data);
data += 2;
- } else if (c >= 034 && c <= 036) {
+ } else if (c >= 034 && c <= 037) {
if (osize == 32) {
ins->oprs[c - 034].offset = getu32(data);
data += 4;
@@ -451,10 +477,10 @@ static int matches(const struct itemplate *t, uint8_t *data,
}
if (segsize != asize)
ins->oprs[c - 034].addr_size = asize;
- } else if (c >= 040 && c <= 042) {
+ } else if (c >= 040 && c <= 043) {
ins->oprs[c - 040].offset = getu32(data);
data += 4;
- } else if (c >= 044 && c <= 046) {
+ } else if (c >= 044 && c <= 047) {
switch (asize) {
case 16:
ins->oprs[c - 044].offset = getu16(data);
@@ -471,18 +497,18 @@ static int matches(const struct itemplate *t, uint8_t *data,
}
if (segsize != asize)
ins->oprs[c - 044].addr_size = asize;
- } else if (c >= 050 && c <= 052) {
+ } else if (c >= 050 && c <= 053) {
ins->oprs[c - 050].offset = gets8(data++);
ins->oprs[c - 050].segment |= SEG_RELATIVE;
- } else if (c >= 054 && c <= 056) {
+ } else if (c >= 054 && c <= 057) {
ins->oprs[c - 054].offset = getu64(data);
data += 8;
- } else if (c >= 060 && c <= 062) {
+ } else if (c >= 060 && c <= 063) {
ins->oprs[c - 060].offset = gets16(data);
data += 2;
ins->oprs[c - 060].segment |= SEG_RELATIVE;
ins->oprs[c - 060].segment &= ~SEG_32BIT;
- } else if (c >= 064 && c <= 066) {
+ } else if (c >= 064 && c <= 067) {
if (osize == 16) {
ins->oprs[c - 064].offset = getu16(data);
data += 2;
@@ -498,30 +524,44 @@ static int matches(const struct itemplate *t, uint8_t *data,
(ins->oprs[c - 064].type & ~SIZE_MASK)
| ((osize == 16) ? BITS16 : BITS32);
}
- } else if (c >= 070 && c <= 072) {
+ } else if (c >= 070 && c <= 073) {
ins->oprs[c - 070].offset = getu32(data);
data += 4;
ins->oprs[c - 070].segment |= SEG_32BIT | SEG_RELATIVE;
- } else if (c >= 0100 && c < 0130) {
+ } else if (c >= 0100 && c < 0140) {
int modrm = *data++;
- ins->oprs[c & 07].basereg = ((modrm >> 3)&7)+
- (ins->rex & REX_R ? 8 : 0);
ins->oprs[c & 07].segment |= SEG_RMREG;
data = do_ea(data, modrm, asize, segsize,
- &ins->oprs[(c >> 3) & 07], ins->rex);
- } else if (c >= 0130 && c <= 0132) {
- ins->oprs[c - 0130].offset = getu16(data);
+ &ins->oprs[(c >> 3) & 07], ins);
+ if (!data)
+ return FALSE;
+ ins->oprs[c & 07].basereg = ((modrm >> 3)&7)+
+ (ins->rex & REX_R ? 8 : 0);
+ } else if (c >= 0140 && c <= 0143) {
+ ins->oprs[c - 0140].offset = getu16(data);
data += 2;
- } else if (c >= 0140 && c <= 0142) {
- ins->oprs[c - 0140].offset = getu32(data);
+ } else if (c >= 0150 && c <= 0153) {
+ ins->oprs[c - 0150].offset = getu32(data);
data += 4;
+ } else if (c >= 0160 && c <= 0167) {
+ ins->rex |= (c & 4) ? REX_D|REX_OC : REX_D;
+ ins->drexdst = c & 3;
+ } else if (c == 0170) {
+ if (*data++)
+ return FALSE;
+ } else if (c == 0171) {
+ data = do_drex(data, ins);
+ if (!data)
+ return FALSE;
} else if (c >= 0200 && c <= 0277) {
int modrm = *data++;
if (((modrm >> 3) & 07) != (c & 07))
return FALSE; /* spare field doesn't match up */
data = do_ea(data, modrm, asize, segsize,
- &ins->oprs[(c >> 3) & 07], ins->rex);
- } else if (c >= 0300 && c <= 0302) {
+ &ins->oprs[(c >> 3) & 07], ins);
+ if (!data)
+ return FALSE;
+ } else if (c >= 0300 && c <= 0303) {
a_used = TRUE;
} else if (c == 0310) {
if (asize != 16)
@@ -605,6 +645,10 @@ static int matches(const struct itemplate *t, uint8_t *data,
}
}
+ /* REX cannot be combined with DREX */
+ if ((ins->rex & REX_D) && (prefix->rex))
+ return FALSE;
+
/*
* Check for unused rep or a/o prefixes.
*/
@@ -627,9 +671,11 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
int32_t offset, int autosync, uint32_t prefer)
{
const struct itemplate * const *p, * const *best_p;
+ const struct disasm_index *ix;
+ uint8_t *dp;
int length, best_length = 0;
char *segover;
- int i, slen, colon;
+ int i, slen, colon, n;
uint8_t *origdata;
int works;
insn tmp_ins, ins;
@@ -684,7 +730,14 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
best_p = NULL;
best_pref = INT_MAX;
- for (p = itable[*data]; *p; p++) {
+ dp = data;
+ ix = itable + *dp++;
+ while (ix->n == (size_t)-1) {
+ ix = (const struct disasm_index *)ix->p + *dp++;
+ }
+
+ p = (const struct itemplate * const *)ix->p;
+ for (n = ix->n; n; n--, p++) {
if ((length = matches(*p, data, &prefix, segsize, &tmp_ins))) {
works = TRUE;
/*
@@ -692,19 +745,21 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
* XXX: Need to make sure this is actually correct.
*/
for (i = 0; i < (*p)->operands; i++) {
- if (
- /* If it's a mem-only EA but we have a register, die. */
- ((tmp_ins.oprs[i].segment & SEG_RMREG) &&
- !(MEMORY & ~(*p)->opd[i])) ||
- /* If it's a reg-only EA but we have a memory ref, die. */
- (!(tmp_ins.oprs[i].segment & SEG_RMREG) &&
- !(REG_EA & ~(*p)->opd[i]) &&
- !((*p)->opd[i] & REG_SMASK)) ||
- /* Register type mismatch (eg FS vs REG_DESS): die. */
- ((((*p)->opd[i] & (REGISTER | FPUREG)) ||
- (tmp_ins.oprs[i].segment & SEG_RMREG)) &&
- !whichreg((*p)->opd[i],
- tmp_ins.oprs[i].basereg, tmp_ins.rex))) {
+ if (!((*p)->opd[i] & SAME_AS) &&
+ (
+ /* If it's a mem-only EA but we have a register, die. */
+ ((tmp_ins.oprs[i].segment & SEG_RMREG) &&
+ !(MEMORY & ~(*p)->opd[i])) ||
+ /* If it's a reg-only EA but we have a memory ref, die. */
+ (!(tmp_ins.oprs[i].segment & SEG_RMREG) &&
+ !(REG_EA & ~(*p)->opd[i]) &&
+ !((*p)->opd[i] & REG_SMASK)) ||
+ /* Register type mismatch (eg FS vs REG_DESS): die. */
+ ((((*p)->opd[i] & (REGISTER | FPUREG)) ||
+ (tmp_ins.oprs[i].segment & SEG_RMREG)) &&
+ !whichreg((*p)->opd[i],
+ tmp_ins.oprs[i].basereg, tmp_ins.rex))
+ )) {
works = FALSE;
break;
}
@@ -793,107 +848,116 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
colon = FALSE;
length += data - origdata; /* fix up for prefixes */
for (i = 0; i < (*p)->operands; i++) {
+ opflags_t t = (*p)->opd[i];
+ const operand *o = &ins.oprs[i];
+ int64_t offs;
+
+ if (t & SAME_AS) {
+ o = &ins.oprs[t & ~SAME_AS];
+ t = (*p)->opd[t & ~SAME_AS];
+ }
+
output[slen++] = (colon ? ':' : i == 0 ? ' ' : ',');
- if (ins.oprs[i].segment & SEG_RELATIVE) {
- ins.oprs[i].offset += offset + length;
+ offs = o->offset;
+ if (o->segment & SEG_RELATIVE) {
+ offs += offset + length;
/*
* sort out wraparound
*/
- if (!(ins.oprs[i].segment & (SEG_32BIT|SEG_64BIT)))
- ins.oprs[i].offset &= 0xffff;
+ if (!(o->segment & (SEG_32BIT|SEG_64BIT)))
+ offs &= 0xffff;
/*
* add sync marker, if autosync is on
*/
if (autosync)
- add_sync(ins.oprs[i].offset, 0L);
+ add_sync(offs, 0L);
}
- if ((*p)->opd[i] & COLON)
+ if (t & COLON)
colon = TRUE;
else
colon = FALSE;
- if (((*p)->opd[i] & (REGISTER | FPUREG)) ||
- (ins.oprs[i].segment & SEG_RMREG)) {
- ins.oprs[i].basereg = whichreg((*p)->opd[i],
- ins.oprs[i].basereg, ins.rex);
- if ((*p)->opd[i] & TO)
+ if ((t & (REGISTER | FPUREG)) ||
+ (o->segment & SEG_RMREG)) {
+ enum reg_enum reg;
+ reg = whichreg(t, o->basereg, ins.rex);
+ if (t & TO)
slen += snprintf(output + slen, outbufsize - slen, "to ");
slen += snprintf(output + slen, outbufsize - slen, "%s",
- reg_names[ins.oprs[i].basereg -
- EXPR_REG_START]);
- } else if (!(UNITY & ~(*p)->opd[i])) {
+ reg_names[reg - EXPR_REG_START]);
+ } else if (!(UNITY & ~t)) {
output[slen++] = '1';
- } else if ((*p)->opd[i] & IMMEDIATE) {
- if ((*p)->opd[i] & BITS8) {
+ } else if (t & IMMEDIATE) {
+ if (t & BITS8) {
slen +=
snprintf(output + slen, outbufsize - slen, "byte ");
- if (ins.oprs[i].segment & SEG_SIGNED) {
- if (ins.oprs[i].offset < 0) {
- ins.oprs[i].offset *= -1;
+ if (o->segment & SEG_SIGNED) {
+ if (offs < 0) {
+ offs *= -1;
output[slen++] = '-';
} else
output[slen++] = '+';
}
- } else if ((*p)->opd[i] & BITS16) {
+ } else if (t & BITS16) {
slen +=
snprintf(output + slen, outbufsize - slen, "word ");
- } else if ((*p)->opd[i] & BITS32) {
+ } else if (t & BITS32) {
slen +=
snprintf(output + slen, outbufsize - slen, "dword ");
- } else if ((*p)->opd[i] & BITS64) {
+ } else if (t & BITS64) {
slen +=
snprintf(output + slen, outbufsize - slen, "qword ");
- } else if ((*p)->opd[i] & NEAR) {
+ } else if (t & NEAR) {
slen +=
snprintf(output + slen, outbufsize - slen, "near ");
- } else if ((*p)->opd[i] & SHORT) {
+ } else if (t & SHORT) {
slen +=
snprintf(output + slen, outbufsize - slen, "short ");
}
slen +=
snprintf(output + slen, outbufsize - slen, "0x%"PRIx64"",
- ins.oprs[i].offset);
- } else if (!(MEM_OFFS & ~(*p)->opd[i])) {
+ offs);
+ } else if (!(MEM_OFFS & ~t)) {
slen +=
snprintf(output + slen, outbufsize - slen, "[%s%s%s0x%"PRIx64"]",
(segover ? segover : ""),
(segover ? ":" : ""),
- (ins.oprs[i].addr_size ==
- 32 ? "dword " : ins.oprs[i].addr_size ==
- 16 ? "word " : ""), ins.oprs[i].offset);
+ (o->addr_size ==
+ 32 ? "dword " : o->addr_size ==
+ 16 ? "word " : ""), offs);
segover = NULL;
- } else if (!(REGMEM & ~(*p)->opd[i])) {
+ } else if (!(REGMEM & ~t)) {
int started = FALSE;
- if ((*p)->opd[i] & BITS8)
+ if (t & BITS8)
slen +=
snprintf(output + slen, outbufsize - slen, "byte ");
- if ((*p)->opd[i] & BITS16)
+ if (t & BITS16)
slen +=
snprintf(output + slen, outbufsize - slen, "word ");
- if ((*p)->opd[i] & BITS32)
+ if (t & BITS32)
slen +=
snprintf(output + slen, outbufsize - slen, "dword ");
- if ((*p)->opd[i] & BITS64)
+ if (t & BITS64)
slen +=
snprintf(output + slen, outbufsize - slen, "qword ");
- if ((*p)->opd[i] & BITS80)
+ if (t & BITS80)
slen +=
snprintf(output + slen, outbufsize - slen, "tword ");
- if ((*p)->opd[i] & FAR)
+ if (t & FAR)
slen += snprintf(output + slen, outbufsize - slen, "far ");
- if ((*p)->opd[i] & NEAR)
+ if (t & NEAR)
slen +=
snprintf(output + slen, outbufsize - slen, "near ");
output[slen++] = '[';
- if (ins.oprs[i].addr_size)
+ if (o->addr_size)
slen += snprintf(output + slen, outbufsize - slen, "%s",
- (ins.oprs[i].addr_size == 64 ? "qword " :
- ins.oprs[i].addr_size == 32 ? "dword " :
- ins.oprs[i].addr_size == 16 ? "word " :
+ (o->addr_size == 64 ? "qword " :
+ o->addr_size == 32 ? "dword " :
+ o->addr_size == 16 ? "word " :
""));
- if (ins.oprs[i].eaflags & EAF_REL)
+ if (o->eaflags & EAF_REL)
slen += snprintf(output + slen, outbufsize - slen, "rel ");
if (segover) {
slen +=
@@ -901,27 +965,27 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
segover);
segover = NULL;
}
- if (ins.oprs[i].basereg != -1) {
+ if (o->basereg != -1) {
slen += snprintf(output + slen, outbufsize - slen, "%s",
- reg_names[(ins.oprs[i].basereg -
+ reg_names[(o->basereg -
EXPR_REG_START)]);
started = TRUE;
}
- if (ins.oprs[i].indexreg != -1) {
+ if (o->indexreg != -1) {
if (started)
output[slen++] = '+';
slen += snprintf(output + slen, outbufsize - slen, "%s",
- reg_names[(ins.oprs[i].indexreg -
+ reg_names[(o->indexreg -
EXPR_REG_START)]);
- if (ins.oprs[i].scale > 1)
+ if (o->scale > 1)
slen +=
snprintf(output + slen, outbufsize - slen, "*%d",
- ins.oprs[i].scale);
+ o->scale);
started = TRUE;
}
- if (ins.oprs[i].segment & SEG_DISP8) {
+ if (o->segment & SEG_DISP8) {
int minus = 0;
- int8_t offset = ins.oprs[i].offset;
+ int8_t offset = offs;
if (offset < 0) {
minus = 1;
offset = -offset;
@@ -929,9 +993,9 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
slen +=
snprintf(output + slen, outbufsize - slen, "%s0x%"PRIx8"",
minus ? "-" : "+", offset);
- } else if (ins.oprs[i].segment & SEG_DISP16) {
+ } else if (o->segment & SEG_DISP16) {
int minus = 0;
- int16_t offset = ins.oprs[i].offset;
+ int16_t offset = offs;
if (offset < 0) {
minus = 1;
offset = -offset;
@@ -939,9 +1003,9 @@ int32_t disasm(uint8_t *data, char *output, int outbufsize, int segsize,
slen +=
snprintf(output + slen, outbufsize - slen, "%s0x%"PRIx16"",
minus ? "-" : started ? "+" : "", offset);
- } else if (ins.oprs[i].segment & SEG_DISP32) {
+ } else if (o->segment & SEG_DISP32) {
char *prefix = "";
- int32_t offset = ins.oprs[i].offset;
+ int32_t offset = offs;
if (offset < 0) {
offset = -offset;
prefix = "-";
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src
index 13ae013d..c79cd39b 100644
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -151,6 +151,7 @@ convention
\IR{ms-dos} MS-DOS
\IR{ms-dos device drivers} MS-DOS device drivers
\IR{multipush} \c{multipush} macro
+\IR{nan} NaN
\IR{nasm version} NASM version
\IR{netbsd} NetBSD
\IR{omf} OMF
@@ -1093,7 +1094,7 @@ syntax in which register names must be prefixed by a \c{%} sign), or
they can be \i{effective addresses} (see \k{effaddr}), constants
(\k{const}) or expressions (\k{expr}).
-For \i{floating-point} instructions, NASM accepts a wide range of
+For x87 \i{floating-point} instructions, NASM accepts a wide range of
syntaxes: you can use two-operand forms like MASM supports, or you
can use NASM's native single-operand forms in most cases.
\# Details of
@@ -1107,7 +1108,7 @@ For example, you can code:
\c fadd st1,st0 ; this sets st1 := st1 + st0
\c fadd to st1 ; so does this
-Almost any floating-point instruction that references memory must
+Almost any x87 floating-point instruction that references memory must
use one of the prefixes \i\c{DWORD}, \i\c{QWORD} or \i\c{TWORD} to
indicate what size of \i{memory operand} it refers to.
@@ -1115,19 +1116,19 @@ indicate what size of \i{memory operand} it refers to.
\H{pseudop} \i{Pseudo-Instructions}
Pseudo-instructions are things which, though not real x86 machine
-instructions, are used in the instruction field anyway because
-that's the most convenient place to put them. The current
-pseudo-instructions are \i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ} and
-\i\c{DT}, their \i{uninitialized} counterparts \i\c{RESB},
-\i\c{RESW}, \i\c{RESD}, \i\c{RESQ} and \i\c{REST}, the \i\c{INCBIN}
+instructions, are used in the instruction field anyway because that's
+the most convenient place to put them. The current pseudo-instructions
+are \i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT} and \i\c{DO};
+their \i{uninitialized} counterparts \i\c{RESB}, \i\c{RESW},
+\i\c{RESD}, \i\c{RESQ}, \i\c{REST} and \i\c{RESO}; the \i\c{INCBIN}
command, the \i\c{EQU} command, and the \i\c{TIMES} prefix.
\S{db} \c{DB} and friends: Declaring initialized Data
-\i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ} and \i\c{DT} are used, much
-as in MASM, to declare initialized data in the output file. They can
-be invoked in a wide range of ways:
+\i\c{DB}, \i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT} and \i\c{DO} are
+used, much as in MASM, to declare initialized data in the output
+file. They can be invoked in a wide range of ways:
\I{floating-point}\I{character constant}\I{string constant}
\c db 0x55 ; just the byte 0x55
@@ -1144,20 +1145,21 @@ be invoked in a wide range of ways:
\c dq 1.234567e20 ; double-precision float
\c dt 1.234567e20 ; extended-precision float
-\c{DT} does not accept \i{numeric constants} as operands.
+\c{DT} and \c{DO} do not accept \i{numeric constants} as operands.
+\c{DB} does not accept \i{floating-point} numbers as operands.
\S{resb} \c{RESB} and friends: Declaring \i{Uninitialized} Data
-\i\c{RESB}, \i\c{RESW}, \i\c{RESD}, \i\c{RESQ} and \i\c{REST} are
-designed to be used in the BSS section of a module: they declare
-\e{uninitialized} storage space. Each takes a single operand, which
-is the number of bytes, words, doublewords or whatever to reserve.
-As stated in \k{qsother}, NASM does not support the MASM/TASM syntax
-of reserving uninitialized space by writing \I\c{?}\c{DW ?} or
-similar things: this is what it does instead. The operand to a
-\c{RESB}-type pseudo-instruction is a \i\e{critical expression}: see
-\k{crit}.
+\i\c{RESB}, \i\c{RESW}, \i\c{RESD}, \i\c{RESQ}, \i\c{REST} and
+\i\c{RESO} are designed to be used in the BSS section of a module:
+they declare \e{uninitialized} storage space. Each takes a single
+operand, which is the number of bytes, words, doublewords or whatever
+to reserve. As stated in \k{qsother}, NASM does not support the
+MASM/TASM syntax of reserving uninitialized space by writing
+\I\c{?}\c{DW ?} or similar things: this is what it does instead. The
+operand to a \c{RESB}-type pseudo-instruction is a \i\e{critical
+expression}: see \k{crit}.
For example:
@@ -1390,20 +1392,28 @@ when they are operands to \c{dw}.
\S{fltconst} \I{floating-point, constants}Floating-Point Constants
\i{Floating-point} constants are acceptable only as arguments to
-\i\c{DD}, \i\c{DQ} and \i\c{DT}. They are expressed in the
-traditional form: digits, then a period, then optionally more
-digits, then optionally an \c{E} followed by an exponent. The period
-is mandatory, so that NASM can distinguish between \c{dd 1}, which
-declares an integer constant, and \c{dd 1.0} which declares a
-floating-point constant.
+\i\c{DW}, \i\c{DD}, \i\c{DQ}, \i\c{DT}, and \i\c{DO}. They are
+expressed in the traditional form: digits, then a period, then
+optionally more digits, then optionally an \c{E} followed by an
+exponent. The period is mandatory, so that NASM can distinguish
+between \c{dd 1}, which declares an integer constant, and \c{dd 1.0}
+which declares a floating-point constant.
+
+NASM also support C99-style hexadecimal floating-point: \c{0x},
+hexadecimal digits, period, optionally more hexadeximal digits, then
+optionally a \c{P} followed by a \e{binary} (not hexadecimal) exponent
+in decimal notation.
Some examples:
+\c dw -0.5 ; IEEE half precision
\c dd 1.2 ; an easy one
+\c dd 0x1p+2 ; 1.0x2^2 = 4.0
\c dq 1.e10 ; 10,000,000,000
\c dq 1.e+10 ; synonymous with 1.e10
\c dq 1.e-10 ; 0.000 000 000 1
\c dt 3.141592653589793238462 ; pi
+\c do 1.e+4000 ; IEEE quad precision
NASM cannot do compile-time arithmetic on floating-point constants.
This is because NASM is designed to be portable - although it always
@@ -1415,18 +1425,21 @@ do floating arithmetic it would have to include its own complete set
of floating-point routines, which would significantly increase the
size of the assembler for very little benefit.
+The special tokens \i\c{__Infinity__}, \i\c{__QNaN__} (or
+\i\c{__NaN__}) and \i\c{__SNaN__} can be used to generate
+\I{infinity}infinities, quiet \i{NaN}s, and signalling NaNs,
+respectively. These are normally used as macros:
-\H{expr} \i{Expressions}
+\c %define Inf __Infinity__
+\c %define NaN __QNaN__
+\c
+\c dq +1.5, -Inf, NaN ; Double-precision constants
-Expressions in NASM are similar in syntax to those in C.
+\H{expr} \i{Expressions}
-NASM does not guarantee the size of the integers used to evaluate
-expressions at compile time: since NASM can compile and run on
-64-bit systems quite happily, don't assume that expressions are
-evaluated in 32-bit registers and so try to make deliberate use of
-\i{integer overflow}. It might not always work. The only thing NASM
-will guarantee is what's guaranteed by ANSI C: you always have \e{at
-least} 32 bits to work in.
+Expressions in NASM are similar in syntax to those in C. Expressions
+are evaluated as 64-bit integers which are then adjusted to the
+appropriate size.
NASM supports two special tokens in expressions, allowing
calculations to involve the current assembly position: the
@@ -1560,11 +1573,11 @@ invent one using the macro processor.
When assembling with the optimizer set to level 2 or higher (see
\k{opt-On}), NASM will use size specifiers (\c{BYTE}, \c{WORD},
-\c{DWORD}, \c{QWORD}, or \c{TWORD}), but will give them the smallest
-possible size. The keyword \c{STRICT} can be used to inhibit
+\c{DWORD}, \c{QWORD}, \c{TWORD} or \c{OWORD}), but will give them the
+smallest possible size. The keyword \c{STRICT} can be used to inhibit
optimization and force a particular operand to be emitted in the
-specified size. For example, with the optimizer on, and in
-\c{BITS 16} mode,
+specified size. For example, with the optimizer on, and in \c{BITS 16}
+mode,
\c push dword 33
@@ -3425,15 +3438,21 @@ using 16-bit data need an 0x66 and those working on 16-bit addresses
need an 0x67.
When NASM is in \c{BITS 64} mode, most instructions operate the same
-as they do for \c{BITS 32} mode. However, 16-bit addresses are depreciated
-in the x86-64 architecture extension and the 0x67 prefix is used for 32-bit
-addressing. This is due to the default of 64-bit addressing. When the \c{REX}
-prefix is used, the processor does not know how to address the AH, BH, CH or
-DH (high 8-bit legacy) registers. This because the x86-64 has added a new
-set of registers and the capability to address the low 8-bits of the SP, BP
-SI and DI registers as SPL, BPL, SIL and DIL, respectively; but only when
-the REX prefix is used. In summary, the \c{REX} prefix causes the addressing
-of AH, BH, CH and DH to be replaced by SPL, BPL, SIL and DIL.
+as they do for \c{BITS 32} mode. However, there are 8 more general and
+SSE registers, and 16-bit addressing is no longer supported.
+
+The default address size is 64 bits; 32-bit addressing can be selected
+with the 0x67 prefix. The default operand size is still 32 bits,
+however, and the 0x66 prefix selects 16-bit operand size. The \c{REX}
+prefix is used both to select 64-bit operand size, and to access the
+new registers. NASM automatically inserts REX prefixes when
+necessary.
+
+When the \c{REX} prefix is used, the processor does not know how to
+address the AH, BH, CH or DH (high 8-bit legacy) registers. Instead,
+it is possible to access the the low 8-bits of the SP, BP SI and DI
+registers as SPL, BPL, SIL and DIL, respectively; but only when the
+REX prefix is used.
The \c{BITS} directive has an exactly equivalent primitive form,
\c{[BITS 16]}, \c{[BITS 32]} and \c{[BITS 64]}. The user-level form is
diff --git a/float.c b/float.c
index 099e23f2..d22aa19c 100644
--- a/float.c
+++ b/float.c
@@ -8,6 +8,7 @@
* initial version 13/ix/96 by Simon Tatham
*/
+#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -18,8 +19,8 @@
#define TRUE 1
#define FALSE 0
-#define MANT_WORDS 6 /* 64 bits + 32 for accuracy == 96 */
-#define MANT_DIGITS 28 /* 29 digits don't fit in 96 bits */
+#define MANT_WORDS 10 /* 112 bits + 48 for accuracy == 160 */
+#define MANT_DIGITS 49 /* 50 digits don't fit in 160 bits */
/*
* guaranteed top bit of from is set
@@ -47,9 +48,8 @@ static int ieee_multiply(uint16_t *to, uint16_t *from)
temp[i] &= 0xFFFF;
}
if (temp[0] & 0x8000) {
- for (i = 0; i < MANT_WORDS; i++)
- to[i] = temp[i] & 0xFFFF;
- return 0;
+ memcpy(to, temp, 2*MANT_WORDS);
+ return 0;
} else {
for (i = 0; i < MANT_WORDS; i++)
to[i] = (temp[i] << 1) + !!(temp[i + 1] & 0x8000);
@@ -57,6 +57,91 @@ static int ieee_multiply(uint16_t *to, uint16_t *from)
}
}
+static int hexval(char c)
+{
+ if (c >= '0' && c <= '9')
+ return c-'0';
+ else if (c >= 'a' && c <= 'f')
+ return c-'a'+10;
+ else
+ return c-'A'+10;
+}
+
+static void ieee_flconvert_hex(char *string, uint16_t *mant,
+ int32_t *exponent, efunc error)
+{
+ static const int log2tbl[16] =
+ { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 };
+ uint16_t mult[MANT_WORDS+1], *mp;
+ int ms;
+ int32_t twopwr;
+ int seendot, seendigit;
+ unsigned char c;
+
+ twopwr = 0;
+ seendot = seendigit = 0;
+
+ memset(mult, 0, sizeof mult);
+
+ while ((c = *string++) != '\0') {
+ if (c == '.') {
+ if (!seendot)
+ seendot = TRUE;
+ else {
+ error(ERR_NONFATAL,
+ "too many periods in floating-point constant");
+ return;
+ }
+ } else if (isxdigit(c)) {
+ int v = hexval(c);
+
+ if (!seendigit && v) {
+ int l = log2tbl[v];
+
+ seendigit = 1;
+ mp = mult;
+ ms = 15-l;
+
+ twopwr = seendot ? twopwr-4+l : l-3;
+ }
+
+ if (seendigit) {
+ if (ms <= 0) {
+ *mp |= v >> -ms;
+ mp++;
+ if (mp > &mult[MANT_WORDS])
+ mp = &mult[MANT_WORDS]; /* Guard slot */
+ ms += 16;
+ }
+ *mp |= v << ms;
+ ms -= 4;
+
+ if (!seendot)
+ twopwr += 4;
+ } else {
+ if (seendot)
+ twopwr -= 4;
+ }
+ } else if (c == 'p' || c == 'P') {
+ twopwr += atoi(string);
+ break;
+ } else {
+ error(ERR_NONFATAL,
+ "floating-point constant: `%c' is invalid character",
+ c);
+ return;
+ }
+ }
+
+ if (!seendigit) {
+ memset(mant, 0, 2*MANT_WORDS); /* Zero */
+ *exponent = 0;
+ } else {
+ memcpy(mant, mult, 2*MANT_WORDS);
+ *exponent = twopwr;
+ }
+}
+
static void ieee_flconvert(char *string, uint16_t *mant,
int32_t *exponent, efunc error)
{
@@ -67,6 +152,11 @@ static void ieee_flconvert(char *string, uint16_t *mant,
int32_t tenpwr, twopwr;
int extratwos, started, seendot;
+ if (string[0] == '0' && (string[1] == 'x' || string[1] == 'X')) {
+ ieee_flconvert_hex(string+2, mant, exponent, error);
+ return;
+ }
+
p = digits;
tenpwr = 0;
started = seendot = FALSE;
@@ -213,123 +303,135 @@ static int ieee_round(uint16_t *mant, int i)
#define put(a,b) ( (*(a)=(b)), ((a)[1]=(b)>>8) )
-static int to_double(char *str, int32_t sign, uint8_t *result,
- efunc error)
+/* Set a bit, using *bigendian* bit numbering (0 = MSB) */
+static void set_bit(uint16_t *mant, int bit)
{
- uint16_t mant[MANT_WORDS];
- int32_t exponent;
+ mant[bit >> 4] |= 1 << (~bit & 15);
+}
- sign = (sign < 0 ? 0x8000L : 0L);
+/* Produce standard IEEE formats, with implicit "1" bit; this makes
+ the following assumptions:
- ieee_flconvert(str, mant, &exponent, error);
- if (mant[0] & 0x8000) {
- /*
- * Non-zero.
- */
- exponent--;
- if (exponent >= -1022 && exponent <= 1024) {
- /*
- * Normalised.
- */
- exponent += 1023;
- ieee_shr(mant, 11);
- ieee_round(mant, 4);
- if (mant[0] & 0x20) /* did we scale up by one? */
- ieee_shr(mant, 1), exponent++;
- mant[0] &= 0xF; /* remove leading one */
- put(result + 6, (exponent << 4) | mant[0] | sign);
- put(result + 4, mant[1]);
- put(result + 2, mant[2]);
- put(result + 0, mant[3]);
- } else if (exponent < -1022 && exponent >= -1074) {
- /*
- * Denormal.
- */
- int shift = -(exponent + 1011);
- int sh = shift % 16, wds = shift / 16;
- ieee_shr(mant, sh);
- if (ieee_round(mant, 4 - wds)
- || (sh > 0 && (mant[0] & (0x8000 >> (sh - 1))))) {
- ieee_shr(mant, 1);
- if (sh == 0)
- mant[0] |= 0x8000;
- exponent++;
- }
- put(result + 6, (wds == 0 ? mant[0] : 0) | sign);
- put(result + 4, (wds <= 1 ? mant[1 - wds] : 0));
- put(result + 2, (wds <= 2 ? mant[2 - wds] : 0));
- put(result + 0, (wds <= 3 ? mant[3 - wds] : 0));
- } else {
- if (exponent > 0) {
- error(ERR_NONFATAL, "overflow in floating-point constant");
- return 0;
- } else
- memset(result, 0, 8);
- }
- } else {
- /*
- * Zero.
- */
- memset(result, 0, 8);
- }
- return 1; /* success */
-}
+ - the sign bit is the MSB, followed by the exponent.
+ - the sign bit plus exponent fit in 16 bits.
+ - the exponent bias is 2^(n-1)-1 for an n-bit exponent */
+
+struct ieee_format {
+ int words;
+ int mantissa; /* Bits in the mantissa */
+ int exponent; /* Bits in the exponent */
+};
+static const struct ieee_format ieee_16 = { 1, 10, 5 };
+static const struct ieee_format ieee_32 = { 2, 23, 8 };
+static const struct ieee_format ieee_64 = { 4, 52, 11 };
+static const struct ieee_format ieee_128 = { 8, 112, 15 };
+
+/* Produce all the standard IEEE formats: 16, 32, 64, and 128 bits */
static int to_float(char *str, int32_t sign, uint8_t *result,
- efunc error)
+ const struct ieee_format *fmt, efunc error)
{
- uint16_t mant[MANT_WORDS];
+ uint16_t mant[MANT_WORDS], *mp;
int32_t exponent;
+ int32_t expmax = 1 << (fmt->exponent-1);
+ uint16_t implicit_one = 0x8000 >> fmt->exponent;
+ int i;
sign = (sign < 0 ? 0x8000L : 0L);
- ieee_flconvert(str, mant, &exponent, error);
- if (mant[0] & 0x8000) {
- /*
- * Non-zero.
- */
- exponent--;
- if (exponent >= -126 && exponent <= 128) {
- /*
- * Normalised.
- */
- exponent += 127;
- ieee_shr(mant, 8);
- ieee_round(mant, 2);
- if (mant[0] & 0x100) /* did we scale up by one? */
- ieee_shr(mant, 1), exponent++;
- mant[0] &= 0x7F; /* remove leading one */
- put(result + 2, (exponent << 7) | mant[0] | sign);
- put(result + 0, mant[1]);
- } else if (exponent < -126 && exponent >= -149) {
- /*
- * Denormal.
- */
- int shift = -(exponent + 118);
- int sh = shift % 16, wds = shift / 16;
- ieee_shr(mant, sh);
- if (ieee_round(mant, 2 - wds)
- || (sh > 0 && (mant[0] & (0x8000 >> (sh - 1))))) {
- ieee_shr(mant, 1);
- if (sh == 0)
- mant[0] |= 0x8000;
- exponent++;
- }
- put(result + 2, (wds == 0 ? mant[0] : 0) | sign);
- put(result + 0, (wds <= 1 ? mant[1 - wds] : 0));
- } else {
- if (exponent > 0) {
- error(ERR_NONFATAL, "overflow in floating-point constant");
- return 0;
- } else
- memset(result, 0, 4);
- }
+ if (str[0] == '_') {
+ /* NaN or Infinity */
+ int32_t expmask = (1 << fmt->exponent)-1;
+
+ memset(mant, 0, sizeof mant);
+ mant[0] = expmask << (15-fmt->exponent); /* Exponent: all bits one */
+
+ switch (str[2]) {
+ case 'n': /* __nan__ */
+ case 'N':
+ case 'q': /* __qnan__ */
+ case 'Q':
+ set_bit(mant, fmt->exponent+1); /* Highest bit in mantissa */
+ break;
+ case 's': /* __snan__ */
+ case 'S':
+ set_bit(mant, fmt->exponent+fmt->mantissa); /* Last bit */
+ break;
+ case 'i': /* __infinity__ */
+ case 'I':
+ break;
+ }
} else {
- memset(result, 0, 4);
+ ieee_flconvert(str, mant, &exponent, error);
+ if (mant[0] & 0x8000) {
+ /*
+ * Non-zero.
+ */
+ exponent--;
+ if (exponent >= 2-expmax && exponent <= expmax) {
+ /*
+ * Normalised.
+ */
+ exponent += expmax;
+ ieee_shr(mant, fmt->exponent);
+ ieee_round(mant, fmt->words);
+ /* did we scale up by one? */
+ if (mant[0] & (implicit_one << 1)) {
+ ieee_shr(mant, 1);
+ exponent++;
+ }
+
+ mant[0] &= (implicit_one-1); /* remove leading one */
+ mant[0] |= exponent << (15 - fmt->exponent);
+ } else if (exponent < 2-expmax &&
+ exponent >= 2-expmax-fmt->mantissa) {
+ /*
+ * Denormal.
+ */
+ int shift = -(exponent + expmax-2-fmt->exponent);
+ int sh = shift % 16, wds = shift / 16;
+ ieee_shr(mant, sh);
+ if (ieee_round(mant, fmt->words - wds)
+ || (sh > 0 && (mant[0] & (0x8000 >> (sh - 1))))) {
+ ieee_shr(mant, 1);
+ if (sh == 0)
+ mant[0] |= 0x8000;
+ exponent++;
+ }
+
+ if (wds) {
+ for (i = fmt->words-1; i >= wds; i--)
+ mant[i] = mant[i-wds];
+ for (; i >= 0; i--)
+ mant[i] = 0;
+ }
+ } else {
+ if (exponent > 0) {
+ error(ERR_NONFATAL, "overflow in floating-point constant");
+ return 0;
+ } else {
+ memset(mant, 0, 2*fmt->words);
+ }
+ }
+ } else {
+ /* Zero */
+ memset(mant, 0, 2*fmt->words);
+ }
}
- return 1;
+
+ mant[0] |= sign;
+
+ for (mp = &mant[fmt->words], i = 0; i < fmt->words; i++) {
+ uint16_t m = *--mp;
+ put(result, m);
+ result += 2;
+ }
+
+ return 1; /* success */
}
+/* 80-bit format with 64-bit mantissa *including an explicit integer 1*
+ and 15-bit exponent. */
static int to_ldoub(char *str, int32_t sign, uint8_t *result,
efunc error)
{
@@ -338,6 +440,31 @@ static int to_ldoub(char *str, int32_t sign, uint8_t *result,
sign = (sign < 0 ? 0x8000L : 0L);
+ if (str[0] == '_') {
+ uint16_t is_snan = 0, is_qnan = 0x8000;
+ switch (str[2]) {
+ case 'n':
+ case 'N':
+ case 'q':
+ case 'Q':
+ is_qnan = 0xc000;
+ break;
+ case 's':
+ case 'S':
+ is_snan = 1;
+ break;
+ case 'i':
+ case 'I':
+ break;
+ }
+ put(result + 0, is_snan);
+ put(result + 2, 0);
+ put(result + 4, 0);
+ put(result + 6, is_qnan);
+ put(result + 8, 0x7fff|sign);
+ return 1;
+ }
+
ieee_flconvert(str, mant, &exponent, error);
if (mant[0] & 0x8000) {
/*
@@ -351,11 +478,11 @@ static int to_ldoub(char *str, int32_t sign, uint8_t *result,
exponent += 16383;
if (ieee_round(mant, 4)) /* did we scale up by one? */
ieee_shr(mant, 1), mant[0] |= 0x8000, exponent++;
- put(result + 8, exponent | sign);
- put(result + 6, mant[0]);
- put(result + 4, mant[1]);
- put(result + 2, mant[2]);
put(result + 0, mant[3]);
+ put(result + 2, mant[2]);
+ put(result + 4, mant[1]);
+ put(result + 6, mant[0]);
+ put(result + 8, exponent | sign);
} else if (exponent < -16383 && exponent >= -16446) {
/*
* Denormal.
@@ -370,23 +497,29 @@ static int to_ldoub(char *str, int32_t sign, uint8_t *result,
mant[0] |= 0x8000;
exponent++;
}
- put(result + 8, sign);
- put(result + 6, (wds == 0 ? mant[0] : 0));
- put(result + 4, (wds <= 1 ? mant[1 - wds] : 0));
- put(result + 2, (wds <= 2 ? mant[2 - wds] : 0));
put(result + 0, (wds <= 3 ? mant[3 - wds] : 0));
+ put(result + 2, (wds <= 2 ? mant[2 - wds] : 0));
+ put(result + 4, (wds <= 1 ? mant[1 - wds] : 0));
+ put(result + 6, (wds == 0 ? mant[0] : 0));
+ put(result + 8, sign);
} else {
if (exponent > 0) {
error(ERR_NONFATAL, "overflow in floating-point constant");
return 0;
- } else
- memset(result, 0, 10);
+ } else {
+ goto zero;
+ }
}
} else {
/*
* Zero.
*/
- memset(result, 0, 10);
+ zero:
+ put(result + 0, 0);
+ put(result + 2, 0);
+ put(result + 4, 0);
+ put(result + 6, 0);
+ put(result + 8, sign);
}
return 1;
}
@@ -394,13 +527,18 @@ static int to_ldoub(char *str, int32_t sign, uint8_t *result,
int float_const(char *number, int32_t sign, uint8_t *result, int bytes,
efunc error)
{
- if (bytes == 4)
- return to_float(number, sign, result, error);
- else if (bytes == 8)
- return to_double(number, sign, result, error);
- else if (bytes == 10)
+ switch (bytes) {
+ case 2:
+ return to_float(number, sign, result, &ieee_16, error);
+ case 4:
+ return to_float(number, sign, result, &ieee_32, error);
+ case 8:
+ return to_float(number, sign, result, &ieee_64, error);
+ case 10:
return to_ldoub(number, sign, result, error);
- else {
+ case 16:
+ return to_float(number, sign, result, &ieee_128, error);
+ default:
error(ERR_PANIC, "strange value %d passed to float_const", bytes);
return 0;
}
diff --git a/insns.dat b/insns.dat
index 5043b0b3..61af07f5 100644
--- a/insns.dat
+++ b/insns.dat
@@ -14,6 +14,22 @@
; see the comment at the top of assemble.c. For a detailed description
; of the flags (fourth field), please see insns.h.
;
+
+; Special instructions...
+DB ignore ignore ignore
+DW ignore ignore ignore
+DD ignore ignore ignore
+DQ ignore ignore ignore
+DT ignore ignore ignore
+DO ignore ignore ignore
+RESB imm \340 8086
+RESW ignore ignore ignore
+RESD ignore ignore ignore
+RESQ ignore ignore ignore
+REST ignore ignore ignore
+RESO ignore ignore ignore
+
+; Conventional instructions
AAA void \1\x37 8086,NOLONG
AAD void \2\xD5\x0A 8086,NOLONG
AAD imm \1\xD5\24 8086,SB,NOLONG
@@ -47,14 +63,14 @@ ADC reg_eax,imm \321\1\x15\41 386,SM
ADC reg_rax,sbyte \321\1\x83\202\15 X64,SM,ND
ADC reg_rax,imm \321\1\x15\41 X64,SM
ADC rm8,imm \300\1\x80\202\21 8086,SM
-ADC rm16,imm \320\300\134\1\x81\202\131 8086,SM
-ADC rm32,imm \321\300\144\1\x81\202\141 386,SM
-ADC rm64,imm \324\300\144\1\x81\202\141 X64,SM
+ADC rm16,imm \320\300\145\1\x81\202\141 8086,SM
+ADC rm32,imm \321\300\155\1\x81\202\151 386,SM
+ADC rm64,imm \324\300\155\1\x81\202\151 X64,SM
ADC mem,imm8 \300\1\x80\202\21 8086,SM
-ADC mem,imm16 \320\300\134\1\x81\202\131 8086,SM
-ADC mem,imm32 \321\300\144\1\x81\202\141 386,SM
-ADD mem,reg8 \300\17\101 8086,SM
-ADD reg8,reg8 \17\101 8086
+ADC mem,imm16 \320\300\145\1\x81\202\141 8086,SM
+ADC mem,imm32 \321\300\155\1\x81\202\151 386,SM
+ADD mem,reg8 \300\170\101 8086,SM
+ADD reg8,reg8 \170\101 8086
ADD mem,reg16 \320\300\1\x01\101 8086,SM
ADD reg16,reg16 \320\1\x01\101 8086
ADD mem,reg32 \321\300\1\x01\101 386,SM
@@ -80,12 +96,12 @@ ADD reg_eax,imm \321\1\x05\41 386,SM
ADD reg_rax,sbyte \321\1\x83\200\15 X64,SM,ND
ADD reg_rax,imm \323\1\x05\41 X64,SM
ADD rm8,imm \300\1\x80\200\21 8086,SM
-ADD rm16,imm \320\300\134\1\x81\200\131 8086,SM
-ADD rm32,imm \321\300\144\1\x81\200\141 386,SM
-ADD rm64,imm \324\300\144\1\x81\200\141 X64,SM
+ADD rm16,imm \320\300\145\1\x81\200\141 8086,SM
+ADD rm32,imm \321\300\155\1\x81\200\151 386,SM
+ADD rm64,imm \324\300\155\1\x81\200\151 X64,SM
ADD mem,imm8 \300\1\x80\200\21 8086,SM
-ADD mem,imm16 \320\300\134\1\x81\200\131 8086,SM
-ADD mem,imm32 \321\300\144\1\x81\200\141 386,SM
+ADD mem,imm16 \320\300\145\1\x81\200\141 8086,SM
+ADD mem,imm32 \321\300\155\1\x81\200\151 386,SM
AND mem,reg8 \300\1\x20\101 8086,SM
AND reg8,reg8 \1\x20\101 8086
AND mem,reg16 \320\300\1\x21\101 8086,SM
@@ -113,12 +129,12 @@ AND reg_eax,imm \321\1\x25\41 386,SM
AND reg_rax,sbyte \321\1\x83\204\15 X64,SM,ND
AND reg_rax,imm \324\1\x25\41 X64,SM
AND rm8,imm \300\1\x80\204\21 8086,SM
-AND rm16,imm \320\300\134\1\x81\204\131 8086,SM
-AND rm32,imm \321\300\144\1\x81\204\141 386,SM
-AND rm64,imm \324\300\144\1\x81\204\141 X64,SM
+AND rm16,imm \320\300\145\1\x81\204\141 8086,SM
+AND rm32,imm \321\300\155\1\x81\204\151 386,SM
+AND rm64,imm \324\300\155\1\x81\204\151 X64,SM
AND mem,imm8 \300\1\x80\204\21 8086,SM
-AND mem,imm16 \320\300\134\1\x81\204\131 8086,SM
-AND mem,imm32 \321\300\144\1\x81\204\141 386,SM
+AND mem,imm16 \320\300\145\1\x81\204\141 8086,SM
+AND mem,imm32 \321\300\155\1\x81\204\151 386,SM
ARPL mem,reg16 \300\1\x63\101 286,PROT,SM,NOLONG
ARPL reg16,reg16 \1\x63\101 286,PROT,NOLONG
BOUND reg16,mem \320\301\1\x62\110 186,NOLONG
@@ -175,13 +191,13 @@ BTS rm32,imm \321\300\2\x0F\xBA\205\25 386,SB
BTS rm64,imm \324\300\2\x0F\xBA\205\25 X64,SB
CALL imm \322\1\xE8\64 8086
CALL imm|near \322\1\xE8\64 8086
-CALL imm|far \322\1\x9A\34\37 8086,ND,NOLONG
+CALL imm|far \322\1\x9A\34\74 8086,ND,NOLONG
CALL imm16 \320\1\xE8\64 8086
CALL imm16|near \320\1\xE8\64 8086
-CALL imm16|far \320\1\x9A\34\37 8086,ND,NOLONG
+CALL imm16|far \320\1\x9A\34\74 8086,ND,NOLONG
CALL imm32 \321\1\xE8\64 386
CALL imm32|near \321\1\xE8\64 386
-CALL imm32|far \321\1\x9A\34\37 386,ND,NOLONG
+CALL imm32|far \321\1\x9A\34\74 386,ND,NOLONG
CALL imm:imm \322\1\x9A\35\30 8086,NOLONG
CALL imm16:imm \320\1\x9A\31\30 8086,NOLONG
CALL imm:imm16 \320\1\x9A\31\30 8086,NOLONG
@@ -238,12 +254,12 @@ CMP reg_eax,imm \321\1\x3D\41 386,SM
CMP reg_rax,sbyte \321\1\x83\207\15 X64,SM,ND
CMP reg_rax,imm \321\1\x3D\41 X64,SM
CMP rm8,imm \300\1\x80\207\21 8086,SM
-CMP rm16,imm \320\300\134\1\x81\207\131 8086,SM
-CMP rm32,imm \321\300\144\1\x81\207\141 386,SM
-CMP rm64,imm \324\300\144\1\x81\207\141 X64,SM
+CMP rm16,imm \320\300\145\1\x81\207\141 8086,SM
+CMP rm32,imm \321\300\155\1\x81\207\151 386,SM
+CMP rm64,imm \324\300\155\1\x81\207\151 X64,SM
CMP mem,imm8 \300\1\x80\207\21 8086,SM
-CMP mem,imm16 \320\300\134\1\x81\207\131 8086,SM
-CMP mem,imm32 \321\300\144\1\x81\207\141 386,SM
+CMP mem,imm16 \320\300\145\1\x81\207\141 8086,SM
+CMP mem,imm32 \321\300\155\1\x81\207\151 386,SM
CMPSB void \335\1\xA6 8086
CMPSD void \335\321\1\xA7 386
CMPSQ void \335\324\1\xA7 X64
@@ -270,8 +286,6 @@ CWD void \320\1\x99 8086
CWDE void \321\1\x98 386
DAA void \1\x27 8086,NOLONG
DAS void \1\x2F 8086,NOLONG
-DB ignore ignore ignore
-DD ignore ignore ignore
DEC reg16 \320\10\x48 8086,NOLONG
DEC reg32 \321\10\x48 386,NOLONG
DEC rm8 \300\1\xFE\201 8086
@@ -282,9 +296,6 @@ DIV rm8 \300\1\xF6\206 8086
DIV rm16 \320\300\1\xF7\206 8086
DIV rm32 \321\300\1\xF7\206 386
DIV rm64 \324\300\1\xF7\206 X64
-DQ ignore ignore ignore
-DT ignore ignore ignore
-DW ignore ignore ignore
EMMS void \2\x0F\x77 PENT,MMX
ENTER imm,imm \1\xC8\30\25 186
EQU imm \0 8086
@@ -497,38 +508,38 @@ IMUL reg64,reg64 \324\2\x0F\xAF\110 X64
IMUL reg16,mem,imm8 \320\301\1\x6B\110\16 186,SM
IMUL reg16,mem,sbyte \320\301\1\x6B\110\16 186,SM,ND
IMUL reg16,mem,imm16 \320\301\1\x69\110\32 186,SM
-IMUL reg16,mem,imm \320\301\135\1\x69\110\132 186,SM,ND
+IMUL reg16,mem,imm \320\301\146\1\x69\110\142 186,SM,ND
IMUL reg16,reg16,imm8 \320\1\x6B\110\16 186
IMUL reg16,reg16,sbyte \320\1\x6B\110\16 186,SM,ND
IMUL reg16,reg16,imm16 \320\1\x69\110\32 186
-IMUL reg16,reg16,imm \320\135\1\x69\110\132 186,SM,ND
+IMUL reg16,reg16,imm \320\146\1\x69\110\142 186,SM,ND
IMUL reg32,mem,imm8 \321\301\1\x6B\110\16 386,SM
IMUL reg32,mem,sbyte \321\301\1\x6B\110\16 386,SM,ND
IMUL reg32,mem,imm32 \321\301\1\x69\110\42 386,SM
-IMUL reg32,mem,imm \321\301\145\1\x69\110\142 386,SM,ND
+IMUL reg32,mem,imm \321\301\156\1\x69\110\152 386,SM,ND
IMUL reg32,reg32,imm8 \321\1\x6B\110\16 386
IMUL reg32,reg32,sbyte \321\1\x6B\110\16 386,SM,ND
IMUL reg32,reg32,imm32 \321\1\x69\110\42 386
-IMUL reg32,reg32,imm \321\145\1\x69\110\142 386,SM,ND
+IMUL reg32,reg32,imm \321\156\1\x69\110\152 386,SM,ND
IMUL reg64,mem,imm8 \324\301\1\x6B\110\16 X64,SM
IMUL reg64,mem,sbyte \324\301\1\x6B\110\16 X64,SM,ND
IMUL reg64,mem,imm32 \324\301\1\x69\110\42 X64,SM
-IMUL reg64,mem,imm \324\301\145\1\x69\110\142 X64,SM,ND
+IMUL reg64,mem,imm \324\301\156\1\x69\110\152 X64,SM,ND
IMUL reg64,reg64,imm8 \324\1\x6B\110\16 X64
IMUL reg64,reg64,sbyte \324\1\x6B\110\16 X64,SM,ND
IMUL reg64,reg64,imm32 \324\1\x69\110\42 X64
-IMUL reg64,reg64,imm \324\145\1\x69\110\142 X64,SM,ND
+IMUL reg64,reg64,imm \324\156\1\x69\110\152 X64,SM,ND
IMUL reg16,imm8 \320\1\x6B\100\15 186
IMUL reg16,sbyte \320\1\x6B\100\15 186,SM,ND
IMUL reg16,imm16 \320\1\x69\100\31 186
-IMUL reg16,imm \320\134\1\x69\100\131 186,SM,ND
+IMUL reg16,imm \320\145\1\x69\100\141 186,SM,ND
IMUL reg32,imm8 \321\1\x6B\100\15 386
IMUL reg32,sbyte \321\1\x6B\100\15 386,SM,ND
IMUL reg32,imm32 \321\1\x69\100\41 386
-IMUL reg32,imm \321\144\1\x69\100\141 386,SM,ND
+IMUL reg32,imm \321\155\1\x69\100\151 386,SM,ND
IMUL reg64,sbyte \324\1\x6B\100\15 X64,SM,ND
IMUL reg64,imm32 \324\1\x69\100\41 X64
-IMUL reg64,imm \324\144\1\x69\100\141 X64,SM,ND
+IMUL reg64,imm \324\155\1\x69\100\151 X64,SM,ND
IN reg_al,imm \1\xE4\25 8086,SB
IN reg_ax,imm \320\1\xE5\25 8086,SB
IN reg_eax,imm \321\1\xE5\25 386,SB
@@ -564,13 +575,13 @@ JMP imm|short \1\xEB\50 8086
JMP imm \371\1\xEB\50 8086,ND
JMP imm \322\1\xE9\64 8086
JMP imm|near \322\1\xE9\64 8086,ND
-JMP imm|far \322\1\xEA\34\37 8086,ND,NOLONG
+JMP imm|far \322\1\xEA\34\74 8086,ND,NOLONG
JMP imm16 \320\1\xE9\64 8086
JMP imm16|near \320\1\xE9\64 8086,ND
-JMP imm16|far \320\1\xEA\34\37 8086,ND,NOLONG
+JMP imm16|far \320\1\xEA\34\74 8086,ND,NOLONG
JMP imm32 \321\1\xE9\64 386
JMP imm32|near \321\1\xE9\64 386,ND
-JMP imm32|far \321\1\xEA\34\37 386,ND,NOLONG
+JMP imm32|far \321\1\xEA\34\74 386,ND,NOLONG
JMP imm:imm \322\1\xEA\35\30 8086,NOLONG
JMP imm16:imm \320\1\xEA\31\30 8086,NOLONG
JMP imm:imm16 \320\1\xEA\31\30 8086,NOLONG
@@ -618,9 +629,9 @@ LGDT mem \300\2\x0F\x01\202 286,PRIV
LGS reg16,mem \320\301\2\x0F\xB5\110 386
LGS reg32,mem \321\301\2\x0F\xB5\110 386
LIDT mem \300\2\x0F\x01\203 286,PRIV
-LLDT mem \300\1\x0F\17\202 286,PROT,PRIV
-LLDT mem16 \300\1\x0F\17\202 286,PROT,PRIV
-LLDT reg16 \1\x0F\17\202 286,PROT,PRIV
+LLDT mem \300\1\x0F\170\202 286,PROT,PRIV
+LLDT mem16 \300\1\x0F\170\202 286,PROT,PRIV
+LLDT reg16 \1\x0F\170\202 286,PROT,PRIV
LMSW mem \300\2\x0F\x01\206 286,PRIV
LMSW mem16 \300\2\x0F\x01\206 286,PRIV
LMSW reg16 \2\x0F\x01\206 286,PRIV
@@ -658,9 +669,9 @@ LSL reg64,mem \324\301\2\x0F\x03\110 X64,SM
LSL reg64,reg64 \324\2\x0F\x03\110 X64,PROT
LSS reg16,mem \320\301\2\x0F\xB2\110 386
LSS reg32,mem \321\301\2\x0F\xB2\110 386
-LTR mem \300\1\x0F\17\203 286,PROT,PRIV
-LTR mem16 \300\1\x0F\17\203 286,PROT,PRIV,NOLONG
-LTR reg16 \1\x0F\17\203 286,PROT,PRIV,NOLONG
+LTR mem \300\1\x0F\170\203 286,PROT,PRIV
+LTR mem16 \300\1\x0F\170\203 286,PROT,PRIV,NOLONG
+LTR reg16 \1\x0F\170\203 286,PROT,PRIV,NOLONG
MFENCE void \3\x0F\xAE\xF0 X64,AMD
MONITOR void \3\x0F\x01\xC8 PRESCOTT
MONITOR reg_eax,reg_ecx,reg_edx \3\x0F\x01\xC8 PRESCOTT,ND
@@ -791,12 +802,12 @@ OR reg_eax,imm \321\1\x0D\41 386,SM
OR reg_rax,sbyte \321\1\x83\201\15 X64,SM,ND
OR reg_rax,imm \321\1\x0D\41 X64,SM
OR rm8,imm \300\1\x80\201\21 8086,SM
-OR rm16,imm \320\300\134\1\x81\201\131 8086,SM
-OR rm32,imm \321\300\144\1\x81\201\141 386,SM
-OR rm64,imm \324\300\144\1\x81\201\141 X64,SM
+OR rm16,imm \320\300\145\1\x81\201\141 8086,SM
+OR rm32,imm \321\300\155\1\x81\201\151 386,SM
+OR rm64,imm \324\300\155\1\x81\201\151 X64,SM
OR mem,imm8 \300\1\x80\201\21 8086,SM
-OR mem,imm16 \320\300\134\1\x81\201\131 8086,SM
-OR mem,imm32 \321\300\144\1\x81\201\141 386,SM
+OR mem,imm16 \320\300\145\1\x81\201\141 8086,SM
+OR mem,imm32 \321\300\155\1\x81\201\151 386,SM
OUT imm,reg_al \1\xE6\24 8086,SB
OUT imm,reg_ax \320\1\xE7\24 8086,SB
OUT imm,reg_eax \321\1\xE7\24 386,SB
@@ -990,9 +1001,9 @@ PUSH reg_dess \6 8086,NOLONG
PUSH reg_fsgs \1\x0F\7 386
PUSH imm8 \1\x6A\14 186
PUSH sbyte \1\x6A\14 186,ND
-PUSH imm16 \320\133\1\x68\130 186
-PUSH imm32 \321\143\1\x68\140 386,NOLONG
-PUSH imm64 \321\143\1\x68\140 X64
+PUSH imm16 \320\144\1\x68\140 186
+PUSH imm32 \321\154\1\x68\150 386,NOLONG
+PUSH imm64 \321\154\1\x68\150 X64
PUSH imm \1\x68\34 186
PUSHA void \322\1\x60 186,NOLONG
PUSHAD void \321\1\x60 386,NOLONG
@@ -1032,11 +1043,6 @@ RDMSR void \2\x0F\x32 PENT,PRIV
RDPMC void \2\x0F\x33 P6
RDTSC void \2\x0F\x31 PENT
RDTSCP void \3\x0F\x01\xF9 X64
-RESB imm \340 8086
-RESD ignore ignore ignore
-RESQ ignore ignore ignore
-REST ignore ignore ignore
-RESW ignore ignore ignore
RET void \1\xC3 8086
RET imm \1\xC2\30 8086,SW
RETF void \1\xCB 8086
@@ -1124,12 +1130,12 @@ SBB reg_eax,imm \321\1\x1D\41 386,SM
SBB reg_rax,sbyte \321\1\x83\203\15 X64,SM,ND
SBB reg_rax,imm \321\1\x1D\41 X64,SM
SBB rm8,imm \300\1\x80\203\21 8086,SM
-SBB rm16,imm \320\300\134\1\x81\203\131 8086,SM
-SBB rm32,imm \321\300\144\1\x81\203\141 386,SM
-SBB rm64,imm \324\300\144\1\x81\203\141 X64,SM
+SBB rm16,imm \320\300\145\1\x81\203\141 8086,SM
+SBB rm32,imm \321\300\155\1\x81\203\151 386,SM
+SBB rm64,imm \324\300\155\1\x81\203\151 X64,SM
SBB mem,imm8 \300\1\x80\203\21 8086,SM
-SBB mem,imm16 \320\300\134\1\x81\203\131 8086,SM
-SBB mem,imm32 \321\300\144\1\x81\203\141 386,SM
+SBB mem,imm16 \320\300\145\1\x81\203\141 8086,SM
+SBB mem,imm32 \321\300\155\1\x81\203\151 386,SM
SCASB void \335\1\xAE 8086
SCASD void \335\321\1\xAF 386
SCASQ void \335\324\1\xAF X64
@@ -1185,10 +1191,10 @@ SHRD reg32,reg32,reg_cl \321\2\x0F\xAD\101 386
SHRD mem,reg64,reg_cl \300\324\2\x0F\xAD\101 X64,SM
SHRD reg64,reg64,reg_cl \324\2\x0F\xAD\101 X64
SIDT mem \300\2\x0F\x01\201 286
-SLDT mem \300\1\x0F\17\200 286
-SLDT mem16 \300\1\x0F\17\200 286
-SLDT reg16 \320\1\x0F\17\200 286
-SLDT reg32 \321\1\x0F\17\200 386
+SLDT mem \300\1\x0F\170\200 286
+SLDT mem16 \300\1\x0F\170\200 286
+SLDT reg16 \320\1\x0F\170\200 286
+SLDT reg32 \321\1\x0F\170\200 386
SKINIT void \3\x0F\x01\xDE X64
SMI void \1\xF1 386,UNDOC
SMINT void \2\x0F\x38 P6,CYRIX
@@ -1206,11 +1212,11 @@ STOSB void \1\xAA 8086
STOSD void \321\1\xAB 386
STOSQ void \324\1\xAB X64
STOSW void \320\1\xAB 8086
-STR mem \300\1\x0F\17\201 286,PROT
-STR mem16 \300\1\x0F\17\201 286,PROT
-STR reg16 \320\1\x0F\17\201 286,PROT
-STR reg32 \321\1\x0F\17\201 386,PROT
-STR reg64 \324\1\x0F\17\201 X64
+STR mem \300\1\x0F\170\201 286,PROT
+STR mem16 \300\1\x0F\170\201 286,PROT
+STR reg16 \320\1\x0F\170\201 286,PROT
+STR reg32 \321\1\x0F\170\201 386,PROT
+STR reg64 \324\1\x0F\170\201 X64
SUB mem,reg8 \300\1\x28\101 8086,SM
SUB reg8,reg8 \1\x28\101 8086
SUB mem,reg16 \320\300\1\x29\101 8086,SM
@@ -1238,12 +1244,12 @@ SUB reg_eax,imm \321\1\x2D\41 386,SM
SUB reg_rax,sbyte \321\1\x83\205\15 X64,SM,ND
SUB reg_rax,imm \321\1\x2D\41 X64,SM
SUB rm8,imm \300\1\x80\205\21 8086,SM
-SUB rm16,imm \320\300\134\1\x81\205\131 8086,SM
-SUB rm32,imm \321\300\144\1\x81\205\141 386,SM
-SUB rm64,imm \324\300\144\1\x81\205\141 X64,SM
+SUB rm16,imm \320\300\145\1\x81\205\141 8086,SM
+SUB rm32,imm \321\300\155\1\x81\205\151 386,SM
+SUB rm64,imm \324\300\155\1\x81\205\151 X64,SM
SUB mem,imm8 \300\1\x80\205\21 8086,SM
-SUB mem,imm16 \320\300\134\1\x81\205\131 8086,SM
-SUB mem,imm32 \321\300\144\1\x81\205\141 386,SM
+SUB mem,imm16 \320\300\145\1\x81\205\141 8086,SM
+SUB mem,imm32 \321\300\155\1\x81\205\151 386,SM
SVDC mem80,reg_sreg \300\2\x0F\x78\101 486,CYRIX,SMM
SVLDT mem80 \300\2\x0F\x7A\200 486,CYRIX,SMM
SVTS mem80 \300\2\x0F\x7C\200 486,CYRIX,SMM
@@ -1290,12 +1296,12 @@ UMOV reg16,mem \320\301\2\x0F\x13\110 386,UNDOC,SM
UMOV reg16,reg16 \320\2\x0F\x13\110 386,UNDOC
UMOV reg32,mem \321\301\2\x0F\x13\110 386,UNDOC,SM
UMOV reg32,reg32 \321\2\x0F\x13\110 386,UNDOC
-VERR mem \300\1\x0F\17\204 286,PROT
-VERR mem16 \300\1\x0F\17\204 286,PROT
-VERR reg16 \1\x0F\17\204 286,PROT
-VERW mem \300\1\x0F\17\205 286,PROT
-VERW mem16 \300\1\x0F\17\205 286,PROT
-VERW reg16 \1\x0F\17\205 286,PROT
+VERR mem \300\1\x0F\170\204 286,PROT
+VERR mem16 \300\1\x0F\170\204 286,PROT
+VERR reg16 \1\x0F\170\204 286,PROT
+VERW mem \300\1\x0F\170\205 286,PROT
+VERW mem16 \300\1\x0F\170\205 286,PROT
+VERW reg16 \1\x0F\170\205 286,PROT
WAIT void \1\x9B 8086
FWAIT void \1\x9B 8086
WBINVD void \2\x0F\x09 486,PRIV
@@ -1363,12 +1369,12 @@ XOR reg_eax,imm \321\1\x35\41 386,SM
XOR reg_rax,sbyte \321\1\x83\206\15 X64,SM,ND
XOR reg_rax,imm \321\1\x35\41 X64,SM
XOR rm8,imm \300\1\x80\206\21 8086,SM
-XOR rm16,imm \320\300\134\1\x81\206\131 8086,SM
-XOR rm32,imm \321\300\144\1\x81\206\141 386,SM
-XOR rm64,imm \324\300\144\1\x81\206\141 X64,SM
+XOR rm16,imm \320\300\145\1\x81\206\141 8086,SM
+XOR rm32,imm \321\300\155\1\x81\206\151 386,SM
+XOR rm64,imm \324\300\155\1\x81\206\151 X64,SM
XOR mem,imm8 \300\1\x80\206\21 8086,SM
-XOR mem,imm16 \320\300\134\1\x81\206\131 8086,SM
-XOR mem,imm32 \321\300\144\1\x81\206\141 386,SM
+XOR mem,imm16 \320\300\145\1\x81\206\141 8086,SM
+XOR mem,imm32 \321\300\155\1\x81\206\151 386,SM
XSTORE void \3\x0F\xA7\xC0 P6,CYRIX
CMOVcc reg16,mem \320\301\1\x0F\330\x40\110 P6,SM
CMOVcc reg16,reg16 \320\1\x0F\330\x40\110 P6
@@ -2023,3 +2029,169 @@ PCMPGTQ xmmreg,xmmrm \366\3\x0F\x38\x37\110 SSE42
POPCNT reg16,rm16 \320\333\2\x0F\xB8\110 NEHALEM
POPCNT reg32,rm32 \321\333\2\x0F\xB8\110 NEHALEM
POPCNT reg64,rm32 \324\333\2\x0F\xB8\110 NEHALEM,X64
+
+; AMD SSE5 instructions
+
+; Four operands with DREX
+FMADDPS xmmreg,=0,xmmreg,xmmrm \160\2\x0F\x24\170\132 SSE5,AMD
+FMADDPS xmmreg,=0,xmmrm,xmmreg \164\2\x0F\x24\170\123 SSE5,AMD
+FMADDPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x04\121 SSE5,AMD
+FMADDPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x04\112 SSE5,AMD
+FMADDPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x01\132 SSE5,AMD
+FMADDPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x01\123 SSE5,AMD
+FMADDPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x05\121 SSE5,AMD
+FMADDPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x05\112 SSE5,AMD
+FMADDSS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x02\132 SSE5,AMD
+FMADDSS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x02\123 SSE5,AMD
+FMADDSS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x06\121 SSE5,AMD
+FMADDSS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x06\112 SSE5,AMD
+FMADDSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x03\132 SSE5,AMD
+FMADDSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x03\123 SSE5,AMD
+FMADDSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x07\121 SSE5,AMD
+FMADDSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x07\112 SSE5,AMD
+FMSUBPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x08\132 SSE5,AMD
+FMSUBPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x08\123 SSE5,AMD
+FMSUBPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0C\121 SSE5,AMD
+FMSUBPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0C\112 SSE5,AMD
+FMSUBPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x09\132 SSE5,AMD
+FMSUBPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x09\123 SSE5,AMD
+FMSUBPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0D\121 SSE5,AMD
+FMSUBPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0D\112 SSE5,AMD
+FMSUBSS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x0A\132 SSE5,AMD
+FMSUBSS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x0A\123 SSE5,AMD
+FMSUBSS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0E\121 SSE5,AMD
+FMSUBSS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0E\112 SSE5,AMD
+FMSUBSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x0B\132 SSE5,AMD
+FMSUBSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x0B\123 SSE5,AMD
+FMSUBSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x0F\121 SSE5,AMD
+FMSUBSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x0F\112 SSE5,AMD
+FMNADDPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x10\132 SSE5,AMD
+FMNADDPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x10\123 SSE5,AMD
+FMNADDPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x14\121 SSE5,AMD
+FMNADDPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x14\112 SSE5,AMD
+FMNADDPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x11\132 SSE5,AMD
+FMNADDPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x11\123 SSE5,AMD
+FMNADDPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x15\121 SSE5,AMD
+FMNADDPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x15\112 SSE5,AMD
+FMNADDSS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x12\132 SSE5,AMD
+FMNADDSS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x12\123 SSE5,AMD
+FMNADDSS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x16\121 SSE5,AMD
+FMNADDSS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x16\112 SSE5,AMD
+FMNADDSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x13\132 SSE5,AMD
+FMNADDSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x13\123 SSE5,AMD
+FMNADDSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x17\121 SSE5,AMD
+FMNADDSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x17\112 SSE5,AMD
+FMNSUBPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x18\132 SSE5,AMD
+FMNSUBPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x18\123 SSE5,AMD
+FMNSUBPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x1C\121 SSE5,AMD
+FMNSUBPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x1C\112 SSE5,AMD
+FMNSUBPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x19\132 SSE5,AMD
+FMNSUBPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x19\123 SSE5,AMD
+FMNSUBPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x1D\121 SSE5,AMD
+FMNSUBPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x1D\112 SSE5,AMD
+FMNSUBSS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x1A\132 SSE5,AMD
+FMNSUBSS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x1A\123 SSE5,AMD
+FMNSUBSS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x1E\121 SSE5,AMD
+FMNSUBSS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x1E\112 SSE5,AMD
+FMNSUBSD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x1B\132 SSE5,AMD
+FMNSUBSD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x1B\123 SSE5,AMD
+FMNSUBSD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x1F\121 SSE5,AMD
+FMNSUBSD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x1F\112 SSE5,AMD
+COMPS xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x2C\121\27 SSE5,AMD
+COMPD xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x2D\121\27 SSE5,AMD
+COMSS xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x2E\121\27 SSE5,AMD
+COMSD xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x2F\121\27 SSE5,AMD
+PCOMB xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x4C\121\27 SSE5,AMD
+PCOMW xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x4D\121\27 SSE5,AMD
+PCOMD xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x4E\121\27 SSE5,AMD
+PCOMQ xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x4F\121\27 SSE5,AMD
+PCOMUB xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x6C\121\27 SSE5,AMD
+PCOMUW xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x6D\121\27 SSE5,AMD
+PCOMUD xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x6E\121\27 SSE5,AMD
+PCOMUQ xmmreg,xmmreg,xmmrm,imm \160\3\x0F\x25\x6F\121\27 SSE5,AMD
+PERMPS xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x20\132 SSE5,AMD
+PERMPS xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x20\123 SSE5,AMD
+PERMPS xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x24\121 SSE5,AMD
+PERMPS xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x24\112 SSE5,AMD
+PERMPD xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x21\132 SSE5,AMD
+PERMPD xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x21\123 SSE5,AMD
+PERMPD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x25\121 SSE5,AMD
+PERMPD xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x25\112 SSE5,AMD
+PCMOV xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x22\132 SSE5,AMD
+PCMOV xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x22\123 SSE5,AMD
+PCMOV xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x26\121 SSE5,AMD
+PCMOV xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x26\112 SSE5,AMD
+PPERM xmmreg,=0,xmmreg,xmmrm \160\3\x0F\x24\x23\132 SSE5,AMD
+PPERM xmmreg,=0,xmmrm,xmmreg \164\3\x0F\x24\x23\123 SSE5,AMD
+PPERM xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x27\121 SSE5,AMD
+PPERM xmmreg,xmmrm,xmmreg,=0 \164\3\x0F\x24\x27\112 SSE5,AMD
+PMACSSWW xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x85\121 SSE5,AMD
+PMACSWW xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x95\121 SSE5,AMD
+PMACSSWD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x86\121 SSE5,AMD
+PMACSWD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x96\121 SSE5,AMD
+PMACSSDD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x8E\121 SSE5,AMD
+PMACSDD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x9E\121 SSE5,AMD
+PMACSSDQL xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x87\121 SSE5,AMD
+PMACSDQL xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x97\121 SSE5,AMD
+PMACSSDQH xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x8F\121 SSE5,AMD
+PMACSDQH xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\x9F\121 SSE5,AMD
+PMADCSSWD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\xA6\121 SSE5,AMD
+PMADCSWD xmmreg,xmmreg,xmmrm,=0 \160\3\x0F\x24\xB6\121 SSE5,AMD
+
+; Three operands with DREX
+PROTB xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x40\121 SSE5,AMD
+PROTB xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x40\112 SSE5,AMD
+PROTW xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x41\121 SSE5,AMD
+PROTW xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x41\112 SSE5,AMD
+PROTD xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x42\121 SSE5,AMD
+PROTD xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x42\112 SSE5,AMD
+PROTQ xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x43\121 SSE5,AMD
+PROTQ xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x43\112 SSE5,AMD
+PSHLB xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x44\121 SSE5,AMD
+PSHLB xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x44\112 SSE5,AMD
+PSHLW xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x45\121 SSE5,AMD
+PSHLW xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x45\112 SSE5,AMD
+PSHLD xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x46\121 SSE5,AMD
+PSHLD xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x46\112 SSE5,AMD
+PSHLQ xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x47\121 SSE5,AMD
+PSHLQ xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x47\112 SSE5,AMD
+PSHAB xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x48\121 SSE5,AMD
+PSHAB xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x48\112 SSE5,AMD
+PSHAW xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x49\121 SSE5,AMD
+PSHAW xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x49\112 SSE5,AMD
+PSHAD xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x4A\121 SSE5,AMD
+PSHAD xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x4A\112 SSE5,AMD
+PSHAQ xmmreg,xmmreg,xmmrm \160\3\x0F\x24\x4B\121 SSE5,AMD
+PSHAQ xmmreg,xmmrm,xmmreg \164\3\x0F\x24\x4B\112 SSE5,AMD
+
+; Non-DREX
+FRCZPS xmmreg,xmmrm \3\x0F\x7A\x10\110 SSE5,AMD
+FRCZPD xmmreg,xmmrm \3\x0F\x7A\x11\110 SSE5,AMD
+FRCZSS xmmreg,xmmrm \3\x0F\x7A\x12\110 SSE5,AMD
+FRCZSD xmmreg,xmmrm \3\x0F\x7A\x13\110 SSE5,AMD
+CVTPH2PS xmmreg,xmmrm \3\x0F\x7A\x30\110 SSE5,AMD,SQ
+CVTPS2PH xmmrm,xmmreg \3\x0F\x7A\x31\101 SSE5,AMD,SQ
+PHADDBW xmmreg,xmmrm \3\x0F\x7A\x41\110 SSE5,AMD
+PHADDBD xmmreg,xmmrm \3\x0F\x7A\x42\110 SSE5,AMD
+PHADDBQ xmmreg,xmmrm \3\x0F\x7A\x43\110 SSE5,AMD
+PHADDWD xmmreg,xmmrm \3\x0F\x7A\x46\110 SSE5,AMD
+PHADDWQ xmmreg,xmmrm \3\x0F\x7A\x47\110 SSE5,AMD
+PHADDDQ xmmreg,xmmrm \3\x0F\x7A\x4B\110 SSE5,AMD
+PHADDUBW xmmreg,xmmrm \3\x0F\x7A\x51\110 SSE5,AMD
+PHADDUBD xmmreg,xmmrm \3\x0F\x7A\x52\110 SSE5,AMD
+PHADDUBQ xmmreg,xmmrm \3\x0F\x7A\x53\110 SSE5,AMD
+PHADDUWD xmmreg,xmmrm \3\x0F\x7A\x56\110 SSE5,AMD
+PHADDUWQ xmmreg,xmmrm \3\x0F\x7A\x57\110 SSE5,AMD
+PHADDUDQ xmmreg,xmmrm \3\x0F\x7A\x5B\110 SSE5,AMD
+PHSUBBW xmmreg,xmmrm \3\x0F\x7A\x61\110 SSE5,AMD
+PHSUBWD xmmreg,xmmrm \3\x0F\x7A\x62\110 SSE5,AMD
+PHSUBDQ xmmreg,xmmrm \3\x0F\x7A\x63\110 SSE5,AMD
+PROTB xmmreg,xmmrm,imm \3\x0F\x7B\x40\110\26 SSE5,AMD
+PROTW xmmreg,xmmrm,imm \3\x0F\x7B\x41\110\26 SSE5,AMD
+PROTD xmmreg,xmmrm,imm \3\x0F\x7B\x42\110\26 SSE5,AMD
+PROTQ xmmreg,xmmrm,imm \3\x0F\x7B\x43\110\26 SSE5,AMD
+PTEST xmmreg,xmmrm \366\3\x0F\x38\x17\110 SSE5,AMD
+ROUNDPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x08\110\26 SSE5,AMD
+ROUNDPD xmmreg,xmmrm,imm \366\3\x0F\x3A\x08\110\26 SSE5,AMD
+ROUNDSS xmmreg,xmmrm,imm \366\3\x0F\x3A\x08\110\26 SSE5,AMD
+ROUNDSD xmmreg,xmmrm,imm \366\3\x0F\x3A\x08\110\26 SSE5,AMD
diff --git a/insns.h b/insns.h
index 4deccf94..314737af 100644
--- a/insns.h
+++ b/insns.h
@@ -9,26 +9,35 @@
#ifndef NASM_INSNS_H
#define NASM_INSNS_H
-#include "insnsi.h" /* instruction opcode enum */
+#include "nasm.h"
/* max length of any instruction, register name etc. */
-#if MAX_INSLEN > 9 /* MAX_INSLEN defined in insnsi.h */
+#if MAX_INSLEN > 12 /* MAX_INSLEN defined in insnsi.h */
#define MAX_KEYWORD MAX_INSLEN
#else
-#define MAX_KEYWORD 9
+#define MAX_KEYWORD 12
#endif
struct itemplate {
enum opcode opcode; /* the token, passed from "parser.c" */
int operands; /* number of operands */
- int32_t opd[3]; /* bit flags for operand types */
+ opflags_t opd[MAX_OPERANDS]; /* bit flags for operand types */
const char *code; /* the code it assembles to */
uint32_t flags; /* some flags */
};
+/* Disassembler table structure */
+/* If n == -1, then p points to another table of 256
+ struct disasm_index, otherwise p points to a list of n
+ struct itemplates to consider. */
+struct disasm_index {
+ const void *p;
+ int n;
+};
+
/* Tables for the assembler and disassembler, respectively */
extern const struct itemplate * const nasm_instructions[];
-extern const struct itemplate * const * const itable[];
+extern const struct disasm_index itable[256];
/*
* this define is used to signify the end of an itemplate
@@ -66,12 +75,15 @@ extern const struct itemplate * const * const itable[];
#define IF_SM2 0x00000002UL /* size match first two operands */
#define IF_SB 0x00000004UL /* unsized operands can't be non-byte */
#define IF_SW 0x00000008UL /* unsized operands can't be non-word */
-#define IF_SD 0x00000010UL /* unsized operands can't be non-dword */
-#define IF_SQ 0x00000020UL /* unsized operands can't be non-qword */
-#define IF_AR0 0x00000040UL /* SB, SW, SD applies to argument 0 */
-#define IF_AR1 0x00000080UL /* SB, SW, SD applies to argument 1 */
-#define IF_AR2 0x000000C0UL /* SB, SW, SD applies to argument 2 */
-#define IF_ARMASK 0x000000C0UL /* mask for unsized argument spec */
+#define IF_SD 0x0000000CUL /* unsized operands can't be non-dword */
+#define IF_SQ 0x00000010UL /* unsized operands can't be non-qword */
+#define IF_SO 0x00000014UL /* unsized operands can't be non-oword */
+#define IF_SMASK 0x0000001CUL /* mask for unsized argument size */
+#define IF_AR0 0x00000020UL /* SB, SW, SD applies to argument 0 */
+#define IF_AR1 0x00000040UL /* SB, SW, SD applies to argument 1 */
+#define IF_AR2 0x00000060UL /* SB, SW, SD applies to argument 2 */
+#define IF_AR3 0x00000080UL /* SB, SW, SD applies to argument 2 */
+#define IF_ARMASK 0x000000E0UL /* mask for unsized argument spec */
#define IF_PRIV 0x00000100UL /* it's a privileged instruction */
#define IF_SMM 0x00000200UL /* it's only valid in SMM */
#define IF_PROT 0x00000400UL /* it's protected mode only */
@@ -88,6 +100,7 @@ extern const struct itemplate * const * const itable[];
#define IF_SSSE3 0x00200000UL /* it's an SSSE3 instruction */
#define IF_SSE41 0x00400000UL /* it's an SSE4.1 instruction */
#define IF_SSE42 0x00800000UL /* it's an SSE4.2 instruction */
+#define IF_SSE5 0x00800000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_PMASK 0xFF000000UL /* the mask for processor types */
#define IF_PLEVEL 0x0F000000UL /* the mask for processor instr. level */
/* also the highest possible processor */
diff --git a/insns.pl b/insns.pl
index 421f16aa..356c183d 100644
--- a/insns.pl
+++ b/insns.pl
@@ -7,6 +7,10 @@
# redistributable under the licence given in the file "Licence"
# distributed in the NASM archive.
+# Opcode prefixes which need their own opcode tables
+# LONGER PREFIXES FIRST!
+@disasm_prefixes = qw(0F0F 0F24 0F25 0F38 0F3A 0F7A 0F);
+
print STDERR "Reading insns.dat...\n";
@args = ();
@@ -26,6 +30,8 @@ foreach $arg ( @ARGV ) {
$fname = "insns.dat" unless $fname = $args[0];
open (F, $fname) || die "unable to open $fname";
+%dinstables = ();
+
$line = 0;
$insns = 0;
while (<F>) {
@@ -50,9 +56,11 @@ while (<F>) {
}
if ($formatted && !$nd) {
push @big, $formatted;
- foreach $i (&startbyte($_[2])) {
- $aname = sprintf "dd_%02X",$i;
- push @$aname, $#big;
+ foreach $i (startseq($_[2])) {
+ if (!defined($dinstables{$i})) {
+ $dinstables{$i} = [];
+ }
+ push(@{$dinstables{$i}}, $#big);
}
}
}
@@ -102,26 +110,42 @@ if ( !defined($output) || $output eq 'd' ) {
print D "\n";
print D "static const struct itemplate instrux[] = {\n";
+ $n = 0;
foreach $j (@big) {
- print D " $j\n";
+ printf D " /* %4d */ %s\n", $n++, $j;
}
- print D " ITEMPLATE_END\n};\n\n";
-
- for ($c=0; $c<256; $c++) {
- $h = sprintf "%02X", $c;
- print D "static const struct itemplate * const itable_${h}[] = {\n";
- $aname = "dd_$h";
- foreach $j (@$aname) {
+ print D "};\n";
+
+ foreach $h (sort(keys(%dinstables))) {
+ print D "\nstatic const struct itemplate * const itable_${h}[] = {\n";
+ foreach $j (@{$dinstables{$h}}) {
print D " instrux + $j,\n";
}
- print D " NULL\n};\n\n";
- }
-
- print D "const struct itemplate * const * const itable[] = {\n";
- for ($c=0; $c<256; $c++) {
- printf D " itable_%02X,\n", $c;
+ print D "};\n";
}
+
+ foreach $h (@disasm_prefixes, '') {
+ $is_prefix{$h} = 1;
+ print D "\n";
+ print D "static " unless ($h eq '');
+ print D "const struct disasm_index ";
+ print D ($h eq '') ? 'itable' : "itable_$h";
+ print D "[256] = {\n";
+ for ($c = 0; $c < 256; $c++) {
+ $nn = sprintf("%s%02X", $h, $c);
+ if ($is_prefix{$nn}) {
+ die "$0: ambiguous decoding of $nn\n"
+ if (defined($dinstables{$nn}));
+ printf D " { itable_%s, -1 },\n", $nn;
+ } elsif (defined($dinstables{$nn})) {
+ printf D " { itable_%s, %u },\n",
+ $nn, scalar(@{$dinstables{$nn}});
+ } else {
+ printf D " { NULL, 0 },\n";
+ }
+ }
print D "};\n";
+ }
close D;
}
@@ -203,60 +227,130 @@ if ( !defined($output) || $output eq 'n' ) {
printf STDERR "Done: %d instructions\n", $insns;
sub format {
- local ($opcode, $operands, $codes, $flags) = @_;
- local $num, $nd = 0;
-
- return (undef, undef) if $operands eq "ignore";
+ my ($opcode, $operands, $codes, $flags) = @_;
+ my $num, $nd = 0;
- # format the operands
- $operands =~ s/:/|colon,/g;
- $operands =~ s/mem(\d+)/mem|bits$1/g;
- $operands =~ s/mem/memory/g;
- $operands =~ s/memory_offs/mem_offs/g;
- $operands =~ s/imm(\d+)/imm|bits$1/g;
- $operands =~ s/imm/immediate/g;
- $operands =~ s/rm(\d+)/rm_gpr|bits$1/g;
- $operands =~ s/mmxrm/rm_mmx/g;
- $operands =~ s/xmmrm/rm_xmm/g;
- $num = 3;
- $operands = '0,0,0', $num = 0 if $operands eq 'void';
- $operands .= ',0', $num-- while $operands !~ /,.*,/;
- $operands =~ tr/a-z/A-Z/;
+ return (undef, undef) if $operands eq "ignore";
+
+ # format the operands
+ $operands =~ s/:/|colon,/g;
+ $operands =~ s/mem(\d+)/mem|bits$1/g;
+ $operands =~ s/mem/memory/g;
+ $operands =~ s/memory_offs/mem_offs/g;
+ $operands =~ s/imm(\d+)/imm|bits$1/g;
+ $operands =~ s/imm/immediate/g;
+ $operands =~ s/rm(\d+)/rm_gpr|bits$1/g;
+ $operands =~ s/mmxrm/rm_mmx/g;
+ $operands =~ s/xmmrm/rm_xmm/g;
+ $operands =~ s/\=([0-9]+)/same_as|$1/g;
+ if ($operands eq 'void') {
+ @ops = ();
+ } else {
+ @ops = split(/\,/, $operands);
+ }
+ $num = scalar(@ops);
+ while (scalar(@ops) < 4) {
+ push(@ops, '0');
+ }
+ $operands = join(',', @ops);
+ $operands =~ tr/a-z/A-Z/;
+
+ # format the flags
+ $flags =~ s/,/|IF_/g;
+ $flags =~ s/(\|IF_ND|IF_ND\|)//, $nd = 1 if $flags =~ /IF_ND/;
+ $flags = "IF_" . $flags;
+
+ ("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd);
+}
- # format the flags
- $flags =~ s/,/|IF_/g;
- $flags =~ s/(\|IF_ND|IF_ND\|)//, $nd = 1 if $flags =~ /IF_ND/;
- $flags = "IF_" . $flags;
+sub hexlist($$$) {
+ my($prefix, $start, $n) = @_;
+ my $i;
+ my @l = ();
- ("{I_$opcode, $num, {$operands}, \"$codes\", $flags},", $nd);
+ for ($i = 0; $i < $n; $i++) {
+ push(@l, sprintf("%s%02X", $prefix, $start+$i));
+ }
+ return @l;
}
# Here we determine the range of possible starting bytes for a given
# instruction. We need only consider the codes:
# \1 \2 \3 mean literal bytes, of course
# \4 \5 \6 \7 mean PUSH/POP of segment registers: special case
-# \10 \11 \12 mean byte plus register value
-# \17 means byte zero
+# \1[0123] mean byte plus register value
+# \170 means byte zero
# \330 means byte plus condition code
# \0 or \340 mean give up and return empty set
-sub startbyte {
- local ($codes) = @_;
- local $word, @range;
+sub startseq($) {
+ my ($codestr) = @_;
+ my $word, @range;
+ my @codes = ();
+ my $c = $codestr;
+ my $c0, $c1, $i;
+ my $prefix = '';
+
+ # Although these are C-syntax strings, by convention they should have
+ # only octal escapes (for directives) and hexadecimal escapes
+ # (for verbatim bytes)
+ while ($c ne '') {
+ if ($c =~ /^\\x([0-9a-f]+)(.*)$/i) {
+ push(@codes, hex $1);
+ $c = $2;
+ next;
+ } elsif ($c =~ /^\\([0-7]{1,3})(.*)$/) {
+ push(@codes, oct $1);
+ $c = $2;
+ next;
+ } else {
+ die "$0: unknown code format in \"$codestr\"\n";
+ }
+ }
+
+ while ($c0 = shift(@codes)) {
+ $c1 = $codes[0];
+ if ($c0 == 01 || $c0 == 02 || $c0 == 03 || $c0 == 0170) {
+ # Fixed byte string
+ my $fbs = $prefix;
+ while (1) {
+ if ($c0 == 01 || $c0 == 02 || $c0 == 03) {
+ while ($c0--) {
+ $fbs .= sprintf("%02X", shift(@codes));
+ }
+ } elsif ($c0 == 0170) {
+ $fbs .= '00';
+ } else {
+ last;
+ }
+ $c0 = shift(@codes);
+ }
+
+ foreach $pfx (@disasm_prefixes) {
+ if ($fbs =~ /^$pfx(.*)$/) {
+ $prefix = $pfx;
+ $fbs = $1;
+ last;
+ }
+ }
- while (1) {
- die "couldn't get code in '$codes'" if $codes !~ /^(\\[^\\]+)(\\.*)?$/;
- $word = $1, $codes = $2;
- return (hex $1) if $word =~ /^\\[123]$/ && $codes =~ /^\\x(..)/;
- return (0x07, 0x17, 0x1F) if $word eq "\\4";
- return (0xA1, 0xA9) if $word eq "\\5";
- return (0x06, 0x0E, 0x16, 0x1E) if $word eq "\\6";
- return (0xA0, 0xA8) if $word eq "\\7";
- $start=hex $1, $r=8, last if $word =~ /^\\1[012]$/ && $codes =~/^\\x(..)/;
- return (0) if $word eq "\\17";
- $start=hex $1, $r=16, last if $word =~ /^\\330$/ && $codes =~ /^\\x(..)/;
- return () if $word eq "\\0" || $word eq "\\340";
+ if ($fbs ne '') {
+ return ($prefix.substr($fbs,0,2));
+ }
+ } elsif ($c0 == 04) {
+ return ("07", "17", "1F");
+ } elsif ($c0 == 05) {
+ return ("A1", "A9");
+ } elsif ($c0 == 06) {
+ return ("06", "0E", "16", "1E");
+ } elsif ($c0 == 07) {
+ return ("A0", "A8");
+ } elsif ($c0 >= 010 && $c0 <= 013) {
+ return hexlist($prefix, $c1, 8);
+ } elsif ($c0 == 0330) {
+ return hexlist($prefix, $c1, 16);
+ } elsif ($c0 == 0 || $c0 == 0340) {
+ return ();
+ }
}
- @range = ();
- push @range, $start++ while ($r-- > 0);
- @range;
+ return ();
}
diff --git a/nasm.h b/nasm.h
index 4ae93b61..f4afad36 100644
--- a/nasm.h
+++ b/nasm.h
@@ -375,7 +375,7 @@ enum {
*
* The bits are assigned as follows:
*
- * Bits 0-7: sizes
+ * Bits 0-7, 29: sizes
* 0: 8 bits (BYTE)
* 1: 16 bits (WORD)
* 2: 32 bits (DWORD)
@@ -384,6 +384,7 @@ enum {
* 5: FAR
* 6: NEAR
* 7: SHORT
+ * 29: 128 bits (OWORD)
*
* Bits 8-11 modifiers
* 8: TO
@@ -438,21 +439,29 @@ enum {
* 25: RM_MMX (MMXREG)
* 26: RM_XMM (XMMREG)
*
- * Bits 27-31 are currently unallocated.
+ * Bits 27-29 & 31 are currently unallocated.
+ *
+ * 30: SAME_AS
+ * Special flag only used in instruction patterns; means this operand
+ * has to be identical to another operand. Currently only supported
+ * for registers.
*/
+typedef uint32_t opflags_t;
+
/* Size, and other attributes, of the operand */
#define BITS8 0x00000001L
#define BITS16 0x00000002L
#define BITS32 0x00000004L
#define BITS64 0x00000008L /* x64 and FPU only */
#define BITS80 0x00000010L /* FPU only */
+#define BITS128 0x20000000L
#define FAR 0x00000020L /* grotty: this means 16:16 or */
/* 16:32, like in CALL/JMP */
#define NEAR 0x00000040L
#define SHORT 0x00000080L /* and this means what it says :) */
-#define SIZE_MASK 0x000000FFL /* all the size attributes */
+#define SIZE_MASK 0x200000FFL /* all the size attributes */
/* Modifiers */
#define MODIFIER_MASK 0x00000f00L
@@ -527,6 +536,9 @@ enum {
#define UNITY 0x00012000L /* for shift/rotate instructions */
#define SBYTE 0x00022000L /* for op r16/32,immediate instrs. */
+/* special flags */
+#define SAME_AS 0x40000000L
+
/* Register names automatically generated from regs.dat */
#include "regs.h"
@@ -540,6 +552,8 @@ enum ccode { /* condition code names */
/*
* REX flags
*/
+#define REX_OC 0x0200 /* DREX suffix has the OC0 bit set */
+#define REX_D 0x0100 /* Instruction uses DREX instead of REX */
#define REX_H 0x80 /* High register present, REX forbidden */
#define REX_P 0x40 /* REX prefix present/required */
#define REX_L 0x20 /* Use LOCK prefix instead of REX.R */
@@ -607,6 +621,7 @@ typedef struct extop { /* extended operand */
} extop;
#define MAXPREFIX 4
+#define MAX_OPERANDS 4
typedef struct { /* an instruction itself */
char *label; /* the label defined, or NULL */
@@ -616,12 +631,13 @@ typedef struct { /* an instruction itself */
enum ccode condition; /* the condition code, if Jcc/SETcc */
int operands; /* how many operands? 0-3
* (more if db et al) */
- operand oprs[3]; /* the operands, defined as above */
+ operand oprs[MAX_OPERANDS]; /* the operands, defined as above */
extop *eops; /* extended operands */
int eops_float; /* true if DD and floating */
int32_t times; /* repeat count (TIMES prefix) */
int forw_ref; /* is there a forward reference? */
- uint8_t rex; /* Special REX Prefix */
+ int rex; /* Special REX Prefix */
+ int drexdst; /* Destination register for DREX suffix */
} insn;
enum geninfo { GI_SWITCH };
@@ -945,8 +961,8 @@ struct dfmt {
*/
enum special_tokens {
- S_ABS, S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT, S_QWORD, S_REL,
- S_SHORT, S_STRICT, S_TO, S_TWORD, S_WORD
+ S_ABS, S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT,
+ S_OWORD, S_QWORD, S_REL, S_SHORT, S_STRICT, S_TO, S_TWORD, S_WORD
};
/*
diff --git a/parser.c b/parser.c
index 1c7b8d9b..31c3612a 100644
--- a/parser.c
+++ b/parser.c
@@ -175,23 +175,25 @@ insn *parse_line(int pass, char *buffer, insn * result,
* For the moment, EQU has the same difficulty, so we'll
* include that.
*/
- if (result->opcode == I_RESB || result->opcode == I_RESW || result->opcode == I_RESD || result->opcode == I_RESQ || result->opcode == I_REST || result->opcode == I_EQU || result->opcode == I_INCBIN) { /* fbk */
+ if (result->opcode == I_RESB || result->opcode == I_RESW ||
+ result->opcode == I_RESD || result->opcode == I_RESQ ||
+ result->opcode == I_REST || result->opcode == I_RESO ||
+ result->opcode == I_EQU || result->opcode == I_INCBIN) {
critical = pass0;
} else
critical = (pass == 2 ? 2 : 0);
- if (result->opcode == I_DB ||
- result->opcode == I_DW ||
- result->opcode == I_DD ||
- result->opcode == I_DQ ||
- result->opcode == I_DT || result->opcode == I_INCBIN) {
+ if (result->opcode == I_DB || result->opcode == I_DW ||
+ result->opcode == I_DD || result->opcode == I_DQ ||
+ result->opcode == I_DT || result->opcode == I_DO ||
+ result->opcode == I_INCBIN) {
extop *eop, **tail = &result->eops, **fixptr;
int oper_num = 0;
result->eops_float = FALSE;
/*
- * Begin to read the DB/DW/DD/DQ/DT/INCBIN operands.
+ * Begin to read the DB/DW/DD/DQ/DT/DO/INCBIN operands.
*/
while (1) {
i = stdscan(NULL, &tokval);
@@ -212,45 +214,56 @@ insn *parse_line(int pass, char *buffer, insn * result,
continue;
}
- if ((i == TOKEN_FLOAT && is_comma_next()) || i == '-') {
- int32_t sign = +1L;
+ if ((i == TOKEN_FLOAT && is_comma_next())
+ || i == '-' || i == '+') {
+ int32_t sign = +1;
- if (i == '-') {
+ if (i == '+' || i == '-') {
char *save = stdscan_bufptr;
+ int token = i;
+ sign = (i == '-') ? -1 : 1;
i = stdscan(NULL, &tokval);
- sign = -1L;
if (i != TOKEN_FLOAT || !is_comma_next()) {
stdscan_bufptr = save;
- i = tokval.t_type = '-';
+ i = tokval.t_type = token;
}
}
if (i == TOKEN_FLOAT) {
eop->type = EOT_DB_STRING;
result->eops_float = TRUE;
- if (result->opcode == I_DD)
+ switch (result->opcode) {
+ case I_DW:
+ eop->stringlen = 2;
+ break;
+ case I_DD:
eop->stringlen = 4;
- else if (result->opcode == I_DQ)
+ break;
+ case I_DQ:
eop->stringlen = 8;
- else if (result->opcode == I_DT)
+ break;
+ case I_DT:
eop->stringlen = 10;
- else {
+ break;
+ case I_DO:
+ eop->stringlen = 16;
+ break;
+ default:
error(ERR_NONFATAL, "floating-point constant"
- " encountered in `D%c' instruction",
- result->opcode == I_DW ? 'W' : 'B');
+ " encountered in `db' instruction");
/*
* fix suggested by Pedro Gimeno... original line
* was:
* eop->type = EOT_NOTHING;
*/
eop->stringlen = 0;
+ break;
}
- eop =
- nasm_realloc(eop, sizeof(extop) + eop->stringlen);
+ eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen);
tail = &eop->next;
*fixptr = eop;
eop->stringval = (char *)eop + sizeof(extop);
- if (eop->stringlen < 4 ||
+ if (!eop->stringlen ||
!float_const(tokval.t_charptr, sign,
(uint8_t *)eop->stringval,
eop->stringlen, error))
@@ -339,10 +352,10 @@ insn *parse_line(int pass, char *buffer, insn * result,
return result;
}
- /* right. Now we begin to parse the operands. There may be up to three
+ /* right. Now we begin to parse the operands. There may be up to four
* of these, separated by commas, and terminated by a zero token. */
- for (operand = 0; operand < 3; operand++) {
+ for (operand = 0; operand < MAX_OPERANDS; operand++) {
expr *value; /* used most of the time */
int mref; /* is this going to be a memory ref? */
int bracket; /* is it a [] mref, or a & mref? */
@@ -384,6 +397,11 @@ insn *parse_line(int pass, char *buffer, insn * result,
result->oprs[operand].type |= BITS80;
setsize = 1;
break;
+ case S_OWORD:
+ if (!setsize)
+ result->oprs[operand].type |= BITS128;
+ setsize = 1;
+ break;
case S_TO:
result->oprs[operand].type |= TO;
break;
@@ -440,6 +458,9 @@ insn *parse_line(int pass, char *buffer, insn * result,
case S_TWORD:
result->oprs[operand].type |= BITS80;
break;
+ case S_OWORD:
+ result->oprs[operand].type |= BITS128;
+ break;
default:
error(ERR_NONFATAL,
"invalid operand size specification");
@@ -751,7 +772,7 @@ insn *parse_line(int pass, char *buffer, insn * result,
result->oprs[operand++].type = 0;
/*
- * Transform RESW, RESD, RESQ, REST into RESB.
+ * Transform RESW, RESD, RESQ, REST, RESO into RESB.
*/
switch (result->opcode) {
case I_RESW:
@@ -770,6 +791,10 @@ insn *parse_line(int pass, char *buffer, insn * result,
result->opcode = I_RESB;
result->oprs[0].offset *= 10;
break;
+ case I_RESO:
+ result->opcode = I_RESB;
+ result->oprs[0].offset *= 16;
+ break;
default:
break;
}
diff --git a/perllib/phash.ph b/perllib/phash.ph
index 60334272..3bb3a05b 100644
--- a/perllib/phash.ph
+++ b/perllib/phash.ph
@@ -42,8 +42,8 @@ sub prehash($$$) {
foreach $c (unpack("C*", $key)) {
$ko1 = $k1; $ko2 = $k2;
- $k1 = int32(rot($ko1,$s0)-rot($ko2, $s1)+$c);
- $k2 = int32(rot($ko2,$s2)-rot($ko1, $s3)+$c);
+ $k1 = int32(rot($ko1,$s0)^int32(rot($ko2, $s1)+$c));
+ $k2 = int32(rot($ko2,$s2)^int32(rot($ko1, $s3)+$c));
}
# Create a bipartite graph...
diff --git a/pptok.pl b/pptok.pl
index a0425b7c..a835bf3e 100755
--- a/pptok.pl
+++ b/pptok.pl
@@ -191,8 +191,8 @@ if ($what eq 'c') {
print OUT " while ((c = *p++) != 0) {\n";
print OUT " uint32_t kn1, kn2;\n";
print OUT " c |= 0x20; /* convert to lower case */\n";
- printf OUT " kn1 = rot(k1,%2d) - rot(k2,%2d) + c;\n", ${$sv}[0], ${$sv}[1];
- printf OUT " kn2 = rot(k2,%2d) - rot(k1,%2d) + c;\n", ${$sv}[2], ${$sv}[3];
+ printf OUT " kn1 = rot(k1,%2d)^(rot(k2,%2d) + c);\n", ${$sv}[0], ${$sv}[1];
+ printf OUT " kn2 = rot(k2,%2d)^(rot(k1,%2d) + c);\n", ${$sv}[2], ${$sv}[3];
print OUT " k1 = kn1; k2 = kn2;\n";
print OUT " }\n";
print OUT "\n";
diff --git a/stdscan.c b/stdscan.c
index d4ad696d..aecbd4a7 100644
--- a/stdscan.c
+++ b/stdscan.c
@@ -75,7 +75,6 @@ int stdscan(void *private_data, struct tokenval *tv)
(*stdscan_bufptr == '$' && isidstart(stdscan_bufptr[1]))) {
/* now we've got an identifier */
int is_sym = FALSE;
- int t;
if (*stdscan_bufptr == '$') {
is_sym = TRUE;
@@ -99,10 +98,7 @@ int stdscan(void *private_data, struct tokenval *tv)
*r = '\0';
/* right, so we have an identifier sitting in temp storage. now,
* is it actually a register or instruction name, or what? */
- if ((t = nasm_token_hash(ourcopy, tv)) != -1)
- return t;
- else
- return tv->t_type = TOKEN_ID;
+ return nasm_token_hash(ourcopy, tv);
} else if (*stdscan_bufptr == '$' && !isnumchar(stdscan_bufptr[1])) {
/*
* It's a $ sign with no following hex number; this must
@@ -130,7 +126,9 @@ int stdscan(void *private_data, struct tokenval *tv)
stdscan_bufptr++;
while (isnumchar(*stdscan_bufptr) ||
((stdscan_bufptr[-1] == 'e'
- || stdscan_bufptr[-1] == 'E')
+ || stdscan_bufptr[-1] == 'E'
+ || stdscan_bufptr[-1] == 'p'
+ || stdscan_bufptr[-1] == 'P')
&& (*stdscan_bufptr == '-' || *stdscan_bufptr == '+'))) {
stdscan_bufptr++;
}
diff --git a/test/float.asm b/test/float.asm
new file mode 100644
index 00000000..bcb2ec28
--- /dev/null
+++ b/test/float.asm
@@ -0,0 +1,133 @@
+;
+; Test of floating-point formats
+;
+
+; 16-bit
+ dw 1.0
+ dw +1.0
+ dw -1.0
+ dw 0.0
+ dw +0.0
+ dw -0.0
+ dw 1.83203125
+ dw +1.83203125
+ dw -1.83203125
+ dw 1.83203125e3
+ dw +1.83203125e3
+ dw -1.83203125e3
+ dw 1.83203125e-3
+ dw +1.83203125e-3
+ dw -1.83203125e-3
+ dw 1.83203125e-6 ; Denormal!
+ dw +1.83203125e-6 ; Denormal!
+ dw -1.83203125e-6 ; Denormal!
+ dw __Infinity__
+ dw +__Infinity__
+ dw -__Infinity__
+ dw __NaN__
+ dw __QNaN__
+ dw __SNaN__
+
+; 32-bit
+ dd 1.0
+ dd +1.0
+ dd -1.0
+ dd 0.0
+ dd +0.0
+ dd -0.0
+ dd 1.83203125
+ dd +1.83203125
+ dd -1.83203125
+ dd 1.83203125e15
+ dd +1.83203125e15
+ dd -1.83203125e15
+ dd 1.83203125e-15
+ dd +1.83203125e-15
+ dd -1.83203125e-15
+ dd 1.83203125e-40 ; Denormal!
+ dd +1.83203125e-40 ; Denormal!
+ dd -1.83203125e-40 ; Denormal!
+ dd __Infinity__
+ dd +__Infinity__
+ dd -__Infinity__
+ dd __NaN__
+ dd __QNaN__
+ dd __SNaN__
+
+; 64-bit
+ dq 1.0
+ dq +1.0
+ dq -1.0
+ dq 0.0
+ dq +0.0
+ dq -0.0
+ dq 1.83203125
+ dq +1.83203125
+ dq -1.83203125
+ dq 1.83203125e300
+ dq +1.83203125e300
+ dq -1.83203125e300
+ dq 1.83203125e-300
+ dq +1.83203125e-300
+ dq -1.83203125e-300
+ dq 1.83203125e-320 ; Denormal!
+ dq +1.83203125e-320 ; Denormal!
+ dq -1.83203125e-320 ; Denormal!
+ dq __Infinity__
+ dq +__Infinity__
+ dq -__Infinity__
+ dq __NaN__
+ dq __QNaN__
+ dq __SNaN__
+
+; 80-bit
+ dt 1.0
+ dt +1.0
+ dt -1.0
+ dt 0.0
+ dt +0.0
+ dt -0.0
+ dt 1.83203125
+ dt +1.83203125
+ dt -1.83203125
+ dt 1.83203125e+4000
+ dt +1.83203125e+4000
+ dt -1.83203125e+4000
+ dt 1.83203125e-4000
+ dt +1.83203125e-4000
+ dt -1.83203125e-4000
+ dt 1.83203125e-4940 ; Denormal!
+ dt +1.83203125e-4940 ; Denormal!
+ dt -1.83203125e-4940 ; Denormal!
+ dt __Infinity__
+ dt +__Infinity__
+ dt -__Infinity__
+ dt __NaN__
+ dt __QNaN__
+ dt __SNaN__
+
+; 128-bit
+ do 1.0
+ do +1.0
+ do -1.0
+ do 0.0
+ do +0.0
+ do -0.0
+ do 1.83203125
+ do +1.83203125
+ do -1.83203125
+ do 1.83203125e+4000
+ do +1.83203125e+4000
+ do -1.83203125e+4000
+ do 1.83203125e-4000
+ do +1.83203125e-4000
+ do -1.83203125e-4000
+ do 1.83203125e-4940 ; Denormal!
+ do +1.83203125e-4940 ; Denormal!
+ do -1.83203125e-4940 ; Denormal!
+ do __Infinity__
+ do +__Infinity__
+ do -__Infinity__
+ do __NaN__
+ do __QNaN__
+ do __SNaN__
diff --git a/test/floatx.asm b/test/floatx.asm
new file mode 100644
index 00000000..f513ec83
--- /dev/null
+++ b/test/floatx.asm
@@ -0,0 +1,125 @@
+;
+; floatx.asm
+;
+; Test hexadecimal floating-point numbers
+
+; 16-bit
+ dw 1.0
+ dw 0x1.0
+ dw 2.0
+ dw 0x2.0
+ dw 0x1.0p+1
+ dw 0x1.0p-1
+ dw 0x0.0
+ dw 0x1.23456789
+ dw 0x0.123456789
+ dw 0x0.0000123456789
+ dw 0x1.23456789p10
+ dw 0x1.23456789p+10
+ dw 0x1.23456789p-10
+ dw 0x0.123456789p10
+ dw 0x0.123456789p+10
+ dw 0x0.123456789abcdef0123456789abcdef012345p-10
+ dw 0x0.0000123456789
+ dw 0x0.0000123456789p+10
+ dw 0x0.0000123456789p-10
+
+; 32-bit
+ dd 1.0
+ dd 0x1.0
+ dd 2.0
+ dd 0x2.0
+ dd 0x1.0p+1
+ dd 0x1.0p-1
+ dd 0x0.0
+ dd 0x1.23456789
+ dd 0x0.123456789
+ dd 0x0.0000123456789
+ dd 0x1.23456789p10
+ dd 0x1.23456789p+10
+ dd 0x1.23456789p-10
+ dd 0x0.123456789p10
+ dd 0x0.123456789p+10
+ dd 0x0.123456789abcdef0123456789abcdef012345p-10
+ dd 0x0.0000123456789
+ dd 0x0.0000123456789p+10
+ dd 0x0.0000123456789p-10
+ dd 0x123456789.0
+ dd 0x0000123456789.0
+ dd 0x123456789.0p+0
+ dd 0x123456789.0p+64
+
+; 64-bit
+ dq 1.0
+ dq 0x1.0
+ dq 2.0
+ dq 0x2.0
+ dq 0x1.0p+1
+ dq 0x1.0p-1
+ dq 0x0.0
+ dq 0x1.23456789
+ dq 0x0.123456789
+ dq 0x0.0000123456789
+ dq 0x1.23456789p10
+ dq 0x1.23456789p+10
+ dq 0x1.23456789p-10
+ dq 0x0.123456789p10
+ dq 0x0.123456789p+10
+ dq 0x0.123456789abcdef0123456789abcdef012345p-10
+ dq 0x0.0000123456789
+ dq 0x0.0000123456789p+10
+ dq 0x0.0000123456789p-10
+ dq 0x123456789.0
+ dq 0x0000123456789.0
+ dq 0x123456789.0p+0
+ dq 0x123456789.0p+300
+
+; 80-bit
+ dt 1.0
+ dt 0x1.0
+ dt 2.0
+ dt 0x2.0
+ dt 0x1.0p+1
+ dt 0x1.0p-1
+ dt 0x0.0
+ dt 0x1.23456789
+ dt 0x0.123456789
+ dt 0x0.0000123456789
+ dt 0x1.23456789p10
+ dt 0x1.23456789p+10
+ dt 0x1.23456789p-10
+ dt 0x0.123456789p10
+ dt 0x0.123456789p+10
+ dt 0x0.123456789abcdef0123456789abcdef012345p-10
+ dt 0x0.0000123456789
+ dt 0x0.0000123456789p+10
+ dt 0x0.0000123456789p-10
+ dt 0x123456789.0
+ dt 0x0000123456789.0
+ dt 0x123456789.0p+0
+ dt 0x123456789.0p+1024
+
+; 128-bit
+ do 1.0
+ do 0x1.0
+ do 2.0
+ do 0x2.0
+ do 0x1.0p+1
+ do 0x1.0p-1
+ do 0x0.0
+ do 0x1.23456789
+ do 0x0.123456789
+ do 0x0.0000123456789
+ do 0x1.23456789p10
+ do 0x1.23456789p+10
+ do 0x1.23456789p-10
+ do 0x0.123456789p10
+ do 0x0.123456789p+10
+ do 0x0.123456789abcdef0123456789abcdef012345p-10
+ do 0x0.0000123456789
+ do 0x0.0000123456789p+10
+ do 0x0.0000123456789p-10
+ do 0x123456789.0
+ do 0x0000123456789.0
+ do 0x123456789.0p+0
+ do 0x123456789.0p+1024
diff --git a/test/fmsub.asm b/test/fmsub.asm
new file mode 100644
index 00000000..7f087cd7
--- /dev/null
+++ b/test/fmsub.asm
@@ -0,0 +1,16 @@
+ bits 64
+
+ fmsubps xmm0,xmm0,xmm1,xmm2
+ fmsubps xmm0,xmm0,xmm1,[rax]
+ fmsubps xmm0,xmm0,xmm1,[rax+0x77]
+ fmsubps xmm0,xmm0,xmm1,[rax+0x7777]
+ fmsubps xmm1,xmm2,xmm3,xmm1
+ fmsubps xmm1,xmm2,[rax],xmm1
+ fmsubps xmm1,xmm2,[rax+0x77],xmm1
+ fmsubps xmm1,xmm2,[rax+0x7777],xmm1
+ fmsubps xmm0,[rax],xmm2,xmm0
+ fmsubps xmm0,[rax+0x77],xmm2,xmm0
+ fmsubps xmm0,[rax+0x7777],xmm2,xmm0
+ fmsubps xmm14,[rax],xmm2,xmm14
+ fmsubps xmm14,[rax+0x77],xmm2,xmm14
+ fmsubps xmm14,[rax+0x7777],xmm2,xmm14
diff --git a/tokens.dat b/tokens.dat
index 6acaba49..e7c1cb29 100644
--- a/tokens.dat
+++ b/tokens.dat
@@ -23,6 +23,7 @@ far
long
near
nosplit
+oword
qword
rel
short
@@ -31,6 +32,12 @@ to
tword
word
+% TOKEN_FLOAT, 0, 0
+__infinity__
+__nan__
+__qnan__
+__snan__
+
% TOKEN_*, 0, 0
seg
wrt
diff --git a/tokhash.pl b/tokhash.pl
index 5f1a9f4c..a63e55f3 100755
--- a/tokhash.pl
+++ b/tokhash.pl
@@ -187,21 +187,21 @@ print " const char *p = token;\n";
print "\n";
print " while ((c = *p++) != 0) {\n";
-printf " uint32_t kn1 = rot(k1,%2d) - rot(k2,%2d) + c;\n", ${$sv}[0], ${$sv}[1];
-printf " uint32_t kn2 = rot(k2,%2d) - rot(k1,%2d) + c;\n", ${$sv}[2], ${$sv}[3];
+printf " uint32_t kn1 = rot(k1,%2d)^(rot(k2,%2d) + c);\n", ${$sv}[0], ${$sv}[1];
+printf " uint32_t kn2 = rot(k2,%2d)^(rot(k1,%2d) + c);\n", ${$sv}[2], ${$sv}[3];
print " k1 = kn1; k2 = kn2;\n";
print " }\n";
print "\n";
printf " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
printf " if (ix >= %d)\n", scalar(@tokendata);
-print " return -1;\n";
+print " return tv->t_type = TOKEN_ID;\n";
print "\n";
print " data = &tokendata[ix];\n";
# print " fprintf(stderr, \"Looked for: %s found: %s\\n\", token, data->string);\n\n";
print " if (strcmp(data->string, token))\n";
-print " return -1;\n";
+print " return tv->t_type = TOKEN_ID;\n";
print "\n";
print " tv->t_integer = data->num;\n";
print " tv->t_inttwo = data->aux;\n";