summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2007-09-12 21:58:51 -0700
committerH. Peter Anvin <hpa@zytor.com>2007-09-12 21:58:51 -0700
commitcb9b690ae6d4bc7124faa9600cb10320d288143b (patch)
tree262b89b46318a1ab85d850c4cf9880ab51feb219
parentdaffd793726d852001ba1fef3b7b0d87d5396bf1 (diff)
downloadnasm-cb9b690ae6d4bc7124faa9600cb10320d288143b.tar.gz
Add (untested!) SSSE3, SSE4.1, SSE4.2 instructions
Add the SSSE3, SSE4.1 and SSE4.2 instruction sets. Change \332 to be a literal 0xF2 prefix, by analog with \333 for 0xF3 prefix (the previous \332 flag changed to \335). This is necessary to get the REX prefix in the right place for instructions that use it. We are going to have to go in and change existing instruction patterns which use these, as well.
-rw-r--r--assemble.c17
-rw-r--r--disasm.c9
-rw-r--r--insns.dat88
-rw-r--r--insns.h1
-rw-r--r--nasm.h4
-rw-r--r--regs.dat3
6 files changed, 104 insertions, 18 deletions
diff --git a/assemble.c b/assemble.c
index 9288aab9..54522712 100644
--- a/assemble.c
+++ b/assemble.c
@@ -64,10 +64,10 @@
* to the condition code value of the instruction.
* \331 - instruction not valid with REP prefix. Hint for
* disassembler only; for SSE instructions.
- * \332 - disassemble a rep (0xF3 byte) prefix as repe not rep.
- * \333 - REP prefix (0xF3 byte); for SSE instructions. Not encoded
- * as a literal byte in order to aid the disassembler.
+ * \332 - REP prefix (0xF2 byte) used as opcode extension.
+ * \333 - REP prefix (0xF3 byte) used as opcode extension.
* \334 - LOCK prefix used instead of REX.R
+ * \335 - disassemble a rep (0xF3 byte) prefix as repe not rep.
* \340 - reserve <operand 0> bytes of uninitialized storage.
* Operand 0 had better be a segmentless constant.
* \364 - operand-size prefix (0x66) not permitted
@@ -862,8 +862,8 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
codes++, length++;
break;
case 0331:
- case 0332:
break;
+ case 0332:
case 0333:
length++;
break;
@@ -871,6 +871,8 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits,
assert_no_prefix(ins, P_LOCK);
ins->rex |= REX_L;
break;
+ case 0335:
+ break;
case 0340:
case 0341:
case 0342:
@@ -1360,11 +1362,11 @@ static void gencode(int32_t segment, int32_t offset, int bits,
break;
case 0331:
- case 0332:
break;
+ case 0332:
case 0333:
- *bytes = 0xF3;
+ *bytes = c - 0332 + 0xF2;
out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG);
offset += 1;
break;
@@ -1378,6 +1380,9 @@ static void gencode(int32_t segment, int32_t offset, int bits,
ins->rex &= ~(REX_L|REX_R);
break;
+ case 0335:
+ break;
+
case 0340:
case 0341:
case 0342:
diff --git a/disasm.c b/disasm.c
index cf7cd710..0452c295 100644
--- a/disasm.c
+++ b/disasm.c
@@ -573,9 +573,9 @@ static int matches(const struct itemplate *t, uint8_t *data,
} else if (c == 0331) {
if (prefix->rep)
return FALSE;
- } else if (c == 0332) {
- if (drep == P_REP)
- drep = P_REPE;
+ } else if (c == 0332) {
+ if (prefix->rep != 0xF2)
+ return FALSE;
} else if (c == 0333) {
if (prefix->rep != 0xF3)
return FALSE;
@@ -585,6 +585,9 @@ static int matches(const struct itemplate *t, uint8_t *data,
ins->rex |= REX_R;
lock = 0;
}
+ } else if (c == 0335) {
+ if (drep == P_REP)
+ drep = P_REPE;
} else if (c == 0364) {
if (prefix->osp)
return FALSE;
diff --git a/insns.dat b/insns.dat
index 9b1fcade..a1ad815f 100644
--- a/insns.dat
+++ b/insns.dat
@@ -244,10 +244,10 @@ CMP rm64,imm \324\300\144\1\x81\207\141 X64,SM
CMP mem,imm8 \300\1\x80\207\21 8086,SM
CMP mem,imm16 \320\300\134\1\x81\207\131 8086,SM
CMP mem,imm32 \321\300\144\1\x81\207\141 386,SM
-CMPSB void \332\1\xA6 8086
-CMPSD void \332\321\1\xA7 386
-CMPSQ void \332\324\1\xA7 X64
-CMPSW void \332\320\1\xA7 8086
+CMPSB void \335\1\xA6 8086
+CMPSD void \335\321\1\xA7 386
+CMPSQ void \335\324\1\xA7 X64
+CMPSW void \335\320\1\xA7 8086
CMPXCHG mem,reg8 \300\2\x0F\xB0\101 PENT,SM
CMPXCHG reg8,reg8 \2\x0F\xB0\101 PENT
CMPXCHG mem,reg16 \320\300\2\x0F\xB1\101 PENT,SM
@@ -1127,10 +1127,10 @@ SBB rm64,imm \324\300\144\1\x81\203\141 X64,SM
SBB mem,imm8 \300\1\x80\203\21 8086,SM
SBB mem,imm16 \320\300\134\1\x81\203\131 8086,SM
SBB mem,imm32 \321\300\144\1\x81\203\141 386,SM
-SCASB void \332\1\xAE 8086
-SCASD void \332\321\1\xAF 386
-SCASQ void \332\324\1\xAF X64
-SCASW void \332\320\1\xAF 8086
+SCASB void \335\1\xAE 8086
+SCASD void \335\321\1\xAF 386
+SCASQ void \335\324\1\xAF X64
+SCASW void \335\320\1\xAF 8086
SFENCE void \3\x0F\xAE\xF8 X64,AMD
SGDT mem \300\2\x0F\x01\200 286
SHL rm8,unity \300\1\xD0\204 8086
@@ -1948,3 +1948,75 @@ PSIGNW mmxreg,mmxrm \364\3\x0F\x38\x09\110 SSSE3,MMX,SQ
PSIGNW xmmreg,xmmrm \366\3\x0F\x38\x09\110 SSSE3
PSIGND mmxreg,mmxrm \364\3\x0F\x38\x0A\110 SSSE3,MMX,SQ
PSIGND xmmreg,xmmrm \366\3\x0F\x38\x0A\110 SSSE3
+
+; Penryn New Instructions (SSE4.1)
+BLENDPD xmmreg,xmmrm,imm \366\3\x0F\x3A\x0D\110\26 SSE41
+BLENDPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x0C\110\26 SSE41
+BLENDVPD xmmreg,xmmrm,xmm0 \366\3\x0F\x38\x15\110 SSE41
+BLENDVPS xmmreg,xmmrm,xmm0 \366\3\x0F\x38\x14\110 SSE41
+DPPD xmmreg,xmmrm,imm \366\3\x0F\x3A\x41\110\26 SSE41
+DPPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x40\110\26 SSE41
+EXTRACTPS rm32,xmmreg,imm \366\3\x0F\x3A\x17\101\26 SSE41
+EXTRACTPS reg64,xmmreg,imm \324\366\3\x0F\x3A\x17\101\26 SSE41,X64
+INSERTPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x21\110\26 SSE41,SD
+MOVNTDQA xmmreg,mem \366\3\x0F\x38\x2A\110 SSE41
+MPSADBW xmmreg,xmmrm,imm \366\3\x0F\x3A\x42\110\26 SSE41
+PACKUSDW xmmreg,xmmrm \366\3\x0F\x38\x2B\110 SSE41
+PBLENDVB xmmreg,xmmrm,xmm0 \366\3\x0F\x38\x10\110 SSE41
+PBLENDW xmmreg,xmmrm,imm \366\3\x0F\x3A\x0E\110\26 SSE41
+PCMPEQQ xmmreg,xmmrm \366\3\x0F\x38\x29\110 SSE41
+PEXTRB reg32,xmmreg,imm \366\3\x0F\x3A\x14\101\26 SSE41
+PEXTRB mem8,xmmreg,imm \366\3\x0F\x3A\x14\101\26 SSE41
+PEXTRB reg64,xmmreg,imm \324\366\3\x0F\x3A\x14\101\26 SSE41,X64
+PEXTRD rm32,xmmreg,imm \366\3\x0F\x3A\x16\101\26 SSE41
+PEXTRQ rm64,xmmreg,imm \366\3\x0F\x3A\x16\101\26 SSE41,X64
+PEXTRW reg32,xmmreg,imm \366\3\x0F\x3A\x15\101\26 SSE41
+PEXTRW mem16,xmmreg,imm \366\3\x0F\x3A\x15\101\26 SSE41
+PEXTRW reg64,xmmreg,imm \324\366\3\x0F\x3A\x15\101\26 SSE41,X64
+PHMINPOSUW xmmreg,xmmrm \366\3\x0F\x38\x41\110 SSE41
+PINSRB xmmreg,reg32,imm \366\3\x0F\x3A\x20\110\26 SSE41
+PINSRB xmmreg,mem8,imm \366\3\x0F\x3A\x20\110\26 SSE41
+PINSRD xmmreg,rm32,imm \366\3\x0F\x3A\x22\110\26 SSE41
+PINSRQ xmmreg,rm64,imm \324\366\3\x0F\x3A\x22\110\26 SSE41,X64
+PMAXSB xmmreg,xmmrm \366\3\x0F\x38\x3C\110 SSE41
+PMAXSD xmmreg,xmmrm \366\3\x0F\x38\x3D\110 SSE41
+PMAXUD xmmreg,xmmrm \366\3\x0F\x38\x3F\110 SSE41
+PMAXUW xmmreg,xmmrm \366\3\x0F\x38\x3E\110 SSE41
+PMINSB xmmreg,xmmrm \366\3\x0F\x38\x38\110 SSE41
+PMINSD xmmreg,xmmrm \366\3\x0F\x38\x39\110 SSE41
+PMINUD xmmreg,xmmrm \366\3\x0F\x38\x3B\110 SSE41
+PMINUW xmmreg,xmmrm \366\3\x0F\x38\x3A\110 SSE41
+PMOVSXBW xmmreg,xmmrm \366\3\x0F\x38\x20\110 SSE41,SQ
+PMOVSXBD xmmreg,xmmrm \366\3\x0F\x38\x21\110 SSE41,SD
+PMOVSXBQ xmmreg,xmmrm \366\3\x0F\x38\x22\110 SSE41,SW
+PMOVSXWD xmmreg,xmmrm \366\3\x0F\x38\x23\110 SSE41,SQ
+PMOVSXWQ xmmreg,xmmrm \366\3\x0F\x38\x24\110 SSE41,SD
+PMOVSXDQ xmmreg,xmmrm \366\3\x0F\x38\x25\110 SSE41,SQ
+PMOVZXBW xmmreg,xmmrm \366\3\x0F\x38\x30\110 SSE41,SQ
+PMOVZXBD xmmreg,xmmrm \366\3\x0F\x38\x31\110 SSE41,SD
+PMOVZXBQ xmmreg,xmmrm \366\3\x0F\x38\x32\110 SSE41,SW
+PMOVZXWD xmmreg,xmmrm \366\3\x0F\x38\x33\110 SSE41,SQ
+PMOVZXWQ xmmreg,xmmrm \366\3\x0F\x38\x34\110 SSE41,SD
+PMOVZXDQ xmmreg,xmmrm \366\3\x0F\x38\x35\110 SSE41,SQ
+PMULDQ xmmreg,xmmrm \366\3\x0F\x38\x28\110 SSE41
+PMULLD xmmreg,xmmrm \366\3\x0F\x38\x40\110 SSE41
+PTEST xmmreg,xmmrm \366\3\x0F\x38\x17\110 SSE41
+ROUNDPD xmmreg,xmmrm,imm \366\3\x0F\x3A\x09\110\26 SSE41
+ROUNDPS xmmreg,xmmrm,imm \366\3\x0F\x3A\x08\110\26 SSE41
+ROUNDSD xmmreg,xmmrm,imm \366\3\x0F\x3A\x0B\110\26 SSE41
+ROUNDSS xmmreg,xmmrm,imm \366\3\x0F\x3A\x0A\110\26 SSE41
+
+; Nehalem New Instructions (SSE4.2)
+CRC32 reg32,rm8 \332\3\x0F\x38\1\xF0\110 SSE42
+CRC32 reg32,rm16 \332\3\x0F\x38\1\xF1\110 SSE42
+CRC32 reg32,rm32 \332\3\x0F\x38\1\xF1\110 SSE42
+CRC32 reg64,rm8 \324\332\3\x0F\x38\1\xF0\110 SSE42,X64
+CRC32 reg64,rm64 \324\332\3\x0F\x38\1\xF1\110 SSE42,X64
+PCMPESTRI xmmreg,xmmrm,imm \366\3\x0F\x3A\x61\110\26 SSE42
+PCMPESTRM xmmreg,xmmrm,imm \366\3\x0F\x3A\x60\110\26 SSE42
+PCMPISTRI xmmreg,xmmrm,imm \366\3\x0F\x3A\x63\110\26 SSE42
+PCMPISTRM xmmreg,xmmrm,imm \366\3\x0F\x3A\x62\110\26 SSE42
+PCMPGTQ xmmreg,xmmrm \366\3\x0F\x38\x37\110 SSE42
+POPCNT reg16,rm16 \320\333\2\x0F\xB8\110 NEHALEM
+POPCNT reg32,rm32 \321\333\2\x0F\xB8\110 NEHALEM
+POPCNT reg64,rm32 \324\333\2\x0F\xB8\110 NEHALEM,X64
diff --git a/insns.h b/insns.h
index 706ee7cd..4deccf94 100644
--- a/insns.h
+++ b/insns.h
@@ -103,6 +103,7 @@ extern const struct itemplate * const * const itable[];
#define IF_WILLAMETTE 0x08000000UL /* Willamette instructions */
#define IF_PRESCOTT 0x09000000UL /* Prescott instructions */
#define IF_X86_64 0x0A000000UL /* x86-64 instruction (long or legacy mode) */
+#define IF_NEHALEM 0x0B000000UL /* Nehalem instruction */
#define IF_X64 (IF_LONG|IF_X86_64)
#define IF_IA64 0x0F000000UL /* IA64 instructions (in x86 mode) */
#define IF_CYRIX 0x10000000UL /* Cyrix-specific instruction */
diff --git a/nasm.h b/nasm.h
index 34d6d05e..6e231b61 100644
--- a/nasm.h
+++ b/nasm.h
@@ -422,6 +422,9 @@ enum {
* With FPUREG:
* 16: FPU0
*
+ * With XMMREG:
+ * 16: XMM0
+ *
* With MEMORY:
* 16: MEM_OFFS (this is a simple offset)
* 17: IP_REL (IP-relative offset)
@@ -485,6 +488,7 @@ enum {
#define MMXREG 0x02009000L /* MMX register */
#define RM_XMM 0x04008000L /* XMM (SSE) operand */
#define XMMREG 0x04009000L /* XMM (SSE) register */
+#define XMM0 0x04019000L /* XMM register zero */
#define REG_CDT 0x00101004L /* CRn, DRn and TRn */
#define REG_CREG 0x00111004L /* CRn */
#define REG_DREG 0x00121004L /* DRn */
diff --git a/regs.dat b/regs.dat
index f70e3298..64fa2a0b 100644
--- a/regs.dat
+++ b/regs.dat
@@ -78,4 +78,5 @@ st1-7 FPUREG fpureg 1
mm0-7 MMXREG mmxreg 0
# SSE registers
-xmm0-15 XMMREG xmmreg 0
+xmm0 XMM0 xmmreg 0
+xmm1-15 XMMREG xmmreg 1