summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJin Kyu Song <jin.kyu.song@intel.com>2013-09-13 14:12:55 -0700
committerCyrill Gorcunov <gorcunov@gmail.com>2013-09-14 01:27:02 +0400
commitd4b2b5f17ce7b9fde637eec86bdf035b7b8abb22 (patch)
tree93a0e1b8f9a612fec2a462cf70650f92334b3198
parentc257bb6ae0d8f88e69a7ef30069aa0f1839de468 (diff)
downloadnasm-d4b2b5f17ce7b9fde637eec86bdf035b7b8abb22.tar.gz
AVX-512: Add AVX-512CD instructions
Added Conflict Detection (AVX-512CD) instructions. These instructions are supported if CPUID.(EAX=07H, ECX=0):EBX.AVX512CD[bit 28] = 1. Signed-off-by: Jin Kyu Song <jin.kyu.song@intel.com> Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
-rw-r--r--insns.dat10
-rw-r--r--insns.h1
-rw-r--r--test/avx512cd.asm105
3 files changed, 116 insertions, 0 deletions
diff --git a/insns.dat b/insns.dat
index ad72d613..3c59da2f 100644
--- a/insns.dat
+++ b/insns.dat
@@ -4054,6 +4054,16 @@ KUNPCKBW kreg,kreg,kreg [rvm: vex.nds.l1.66.0f.w0 4b /r ] AVX512,FUTURE
KXNORW kreg,kreg,kreg [rvm: vex.nds.l1.0f.w0 46 /r ] AVX512,FUTURE
KXORW kreg,kreg,kreg [rvm: vex.nds.l1.0f.w0 47 /r ] AVX512,FUTURE
+; AVX-512CD (Conflict Detection) instructions
+VPBROADCASTMB2Q zmmreg,kreg [rm: evex.512.f3.0f38.w1 2a /r ] AVX512CD,FUTURE
+VPBROADCASTMW2D zmmreg,kreg [rm: evex.512.f3.0f38.w0 3a /r ] AVX512CD,FUTURE
+VPCONFLICTD zmmreg|mask|z,zmmrm512|b32 [rm:fv: evex.512.66.0f38.w0 c4 /r ] AVX512CD,FUTURE
+VPCONFLICTQ zmmreg|mask|z,zmmrm512|b64 [rm:fv: evex.512.66.0f38.w1 c4 /r ] AVX512CD,FUTURE
+VPLZCNTD zmmreg|mask|z,zmmrm512|b32 [rm:fv: evex.512.66.0f38.w0 44 /r ] AVX512CD,FUTURE
+VPLZCNTQ zmmreg|mask|z,zmmrm512|b64 [rm:fv: evex.512.66.0f38.w1 44 /r ] AVX512CD,FUTURE
+VPTESTNMD kreg|mask,zmmreg,zmmrm512|b32 [rvm:fv: evex.nds.512.f3.0f38.w0 27 /r ] AVX512CD,FUTURE
+VPTESTNMQ kreg|mask,zmmreg,zmmrm512|b64 [rvm:fv: evex.nds.512.f3.0f38.w1 27 /r ] AVX512CD,FUTURE
+
;# Systematic names for the hinting nop instructions
; These should be last in the file
diff --git a/insns.h b/insns.h
index 19b27ae8..3959a057 100644
--- a/insns.h
+++ b/insns.h
@@ -128,6 +128,7 @@ extern const uint8_t nasm_bytecodes[];
#define IF_TBM 0x1300000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_RTM 0x1400000000UL /* HACK NEED TO REORGANIZE THESE BITS */
#define IF_INVPCID 0x1500000000UL /* HACK NEED TO REORGANIZE THESE BITS */
+#define IF_AVX512CD 0x1600000000UL /* AVX-512 Conflict Detection insns */
#define IF_INSMASK 0xFF00000000UL /* the mask for instruction set types */
#define IF_PMASK 0xFF000000UL /* the mask for processor types */
#define IF_PLEVEL 0x0F000000UL /* the mask for processor instr. level */
diff --git a/test/avx512cd.asm b/test/avx512cd.asm
new file mode 100644
index 00000000..670a6fcc
--- /dev/null
+++ b/test/avx512cd.asm
@@ -0,0 +1,105 @@
+; AVX-512CD testcases from gas
+;------------------------
+;
+; This file is taken from there
+; https://gnu.googlesource.com/binutils/+/master/gas/testsuite/gas/i386/x86-64-avx512cd-intel.d
+; So the original author is "H.J. Lu" <hongjiu dot lu at intel dot com>
+;
+; Jin Kyu Song converted it for the nasm testing suite using gas2nasm.py
+
+%macro testcase 2
+ %ifdef BIN
+ db %1
+ %endif
+ %ifdef SRC
+ %2
+ %endif
+%endmacro
+
+
+bits 64
+
+testcase { 0x62, 0x02, 0x7d, 0x48, 0xc4, 0xf5 }, { vpconflictd zmm30,zmm29 }
+testcase { 0x62, 0x02, 0x7d, 0x4f, 0xc4, 0xf5 }, { vpconflictd zmm30\{k7\},zmm29 }
+testcase { 0x62, 0x02, 0x7d, 0xcf, 0xc4, 0xf5 }, { vpconflictd zmm30\{k7\}\{z\},zmm29 }
+testcase { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0x31 }, { vpconflictd zmm30,ZWORD [rcx] }
+testcase { 0x62, 0x22, 0x7d, 0x48, 0xc4, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vpconflictd zmm30,ZWORD [rax+r14*8+0x123] }
+testcase { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0x31 }, { vpconflictd zmm30,DWORD [rcx]\{1to16\} }
+testcase { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0x72, 0x7f }, { vpconflictd zmm30,ZWORD [rdx+0x1fc0] }
+testcase { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0xb2, 0x00, 0x20, 0x00, 0x00 }, { vpconflictd zmm30,ZWORD [rdx+0x2000] }
+testcase { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0x72, 0x80 }, { vpconflictd zmm30,ZWORD [rdx-0x2000] }
+testcase { 0x62, 0x62, 0x7d, 0x48, 0xc4, 0xb2, 0xc0, 0xdf, 0xff, 0xff }, { vpconflictd zmm30,ZWORD [rdx-0x2040] }
+testcase { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0x72, 0x7f }, { vpconflictd zmm30,DWORD [rdx+0x1fc]\{1to16\} }
+testcase { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0xb2, 0x00, 0x02, 0x00, 0x00 }, { vpconflictd zmm30,DWORD [rdx+0x200]\{1to16\} }
+testcase { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0x72, 0x80 }, { vpconflictd zmm30,DWORD [rdx-0x200]\{1to16\} }
+testcase { 0x62, 0x62, 0x7d, 0x58, 0xc4, 0xb2, 0xfc, 0xfd, 0xff, 0xff }, { vpconflictd zmm30,DWORD [rdx-0x204]\{1to16\} }
+testcase { 0x62, 0x02, 0xfd, 0x48, 0xc4, 0xf5 }, { vpconflictq zmm30,zmm29 }
+testcase { 0x62, 0x02, 0xfd, 0x4f, 0xc4, 0xf5 }, { vpconflictq zmm30\{k7\},zmm29 }
+testcase { 0x62, 0x02, 0xfd, 0xcf, 0xc4, 0xf5 }, { vpconflictq zmm30\{k7\}\{z\},zmm29 }
+testcase { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0x31 }, { vpconflictq zmm30,ZWORD [rcx] }
+testcase { 0x62, 0x22, 0xfd, 0x48, 0xc4, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vpconflictq zmm30,ZWORD [rax+r14*8+0x123] }
+testcase { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0x31 }, { vpconflictq zmm30,QWORD [rcx]\{1to8\} }
+testcase { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0x72, 0x7f }, { vpconflictq zmm30,ZWORD [rdx+0x1fc0] }
+testcase { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0xb2, 0x00, 0x20, 0x00, 0x00 }, { vpconflictq zmm30,ZWORD [rdx+0x2000] }
+testcase { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0x72, 0x80 }, { vpconflictq zmm30,ZWORD [rdx-0x2000] }
+testcase { 0x62, 0x62, 0xfd, 0x48, 0xc4, 0xb2, 0xc0, 0xdf, 0xff, 0xff }, { vpconflictq zmm30,ZWORD [rdx-0x2040] }
+testcase { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0x72, 0x7f }, { vpconflictq zmm30,QWORD [rdx+0x3f8]\{1to8\} }
+testcase { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0xb2, 0x00, 0x04, 0x00, 0x00 }, { vpconflictq zmm30,QWORD [rdx+0x400]\{1to8\} }
+testcase { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0x72, 0x80 }, { vpconflictq zmm30,QWORD [rdx-0x400]\{1to8\} }
+testcase { 0x62, 0x62, 0xfd, 0x58, 0xc4, 0xb2, 0xf8, 0xfb, 0xff, 0xff }, { vpconflictq zmm30,QWORD [rdx-0x408]\{1to8\} }
+testcase { 0x62, 0x02, 0x7d, 0x48, 0x44, 0xf5 }, { vplzcntd zmm30,zmm29 }
+testcase { 0x62, 0x02, 0x7d, 0x4f, 0x44, 0xf5 }, { vplzcntd zmm30\{k7\},zmm29 }
+testcase { 0x62, 0x02, 0x7d, 0xcf, 0x44, 0xf5 }, { vplzcntd zmm30\{k7\}\{z\},zmm29 }
+testcase { 0x62, 0x62, 0x7d, 0x48, 0x44, 0x31 }, { vplzcntd zmm30,ZWORD [rcx] }
+testcase { 0x62, 0x22, 0x7d, 0x48, 0x44, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vplzcntd zmm30,ZWORD [rax+r14*8+0x123] }
+testcase { 0x62, 0x62, 0x7d, 0x58, 0x44, 0x31 }, { vplzcntd zmm30,DWORD [rcx]\{1to16\} }
+testcase { 0x62, 0x62, 0x7d, 0x48, 0x44, 0x72, 0x7f }, { vplzcntd zmm30,ZWORD [rdx+0x1fc0] }
+testcase { 0x62, 0x62, 0x7d, 0x48, 0x44, 0xb2, 0x00, 0x20, 0x00, 0x00 }, { vplzcntd zmm30,ZWORD [rdx+0x2000] }
+testcase { 0x62, 0x62, 0x7d, 0x48, 0x44, 0x72, 0x80 }, { vplzcntd zmm30,ZWORD [rdx-0x2000] }
+testcase { 0x62, 0x62, 0x7d, 0x48, 0x44, 0xb2, 0xc0, 0xdf, 0xff, 0xff }, { vplzcntd zmm30,ZWORD [rdx-0x2040] }
+testcase { 0x62, 0x62, 0x7d, 0x58, 0x44, 0x72, 0x7f }, { vplzcntd zmm30,DWORD [rdx+0x1fc]\{1to16\} }
+testcase { 0x62, 0x62, 0x7d, 0x58, 0x44, 0xb2, 0x00, 0x02, 0x00, 0x00 }, { vplzcntd zmm30,DWORD [rdx+0x200]\{1to16\} }
+testcase { 0x62, 0x62, 0x7d, 0x58, 0x44, 0x72, 0x80 }, { vplzcntd zmm30,DWORD [rdx-0x200]\{1to16\} }
+testcase { 0x62, 0x62, 0x7d, 0x58, 0x44, 0xb2, 0xfc, 0xfd, 0xff, 0xff }, { vplzcntd zmm30,DWORD [rdx-0x204]\{1to16\} }
+testcase { 0x62, 0x02, 0xfd, 0x48, 0x44, 0xf5 }, { vplzcntq zmm30,zmm29 }
+testcase { 0x62, 0x02, 0xfd, 0x4f, 0x44, 0xf5 }, { vplzcntq zmm30\{k7\},zmm29 }
+testcase { 0x62, 0x02, 0xfd, 0xcf, 0x44, 0xf5 }, { vplzcntq zmm30\{k7\}\{z\},zmm29 }
+testcase { 0x62, 0x62, 0xfd, 0x48, 0x44, 0x31 }, { vplzcntq zmm30,ZWORD [rcx] }
+testcase { 0x62, 0x22, 0xfd, 0x48, 0x44, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vplzcntq zmm30,ZWORD [rax+r14*8+0x123] }
+testcase { 0x62, 0x62, 0xfd, 0x58, 0x44, 0x31 }, { vplzcntq zmm30,QWORD [rcx]\{1to8\} }
+testcase { 0x62, 0x62, 0xfd, 0x48, 0x44, 0x72, 0x7f }, { vplzcntq zmm30,ZWORD [rdx+0x1fc0] }
+testcase { 0x62, 0x62, 0xfd, 0x48, 0x44, 0xb2, 0x00, 0x20, 0x00, 0x00 }, { vplzcntq zmm30,ZWORD [rdx+0x2000] }
+testcase { 0x62, 0x62, 0xfd, 0x48, 0x44, 0x72, 0x80 }, { vplzcntq zmm30,ZWORD [rdx-0x2000] }
+testcase { 0x62, 0x62, 0xfd, 0x48, 0x44, 0xb2, 0xc0, 0xdf, 0xff, 0xff }, { vplzcntq zmm30,ZWORD [rdx-0x2040] }
+testcase { 0x62, 0x62, 0xfd, 0x58, 0x44, 0x72, 0x7f }, { vplzcntq zmm30,QWORD [rdx+0x3f8]\{1to8\} }
+testcase { 0x62, 0x62, 0xfd, 0x58, 0x44, 0xb2, 0x00, 0x04, 0x00, 0x00 }, { vplzcntq zmm30,QWORD [rdx+0x400]\{1to8\} }
+testcase { 0x62, 0x62, 0xfd, 0x58, 0x44, 0x72, 0x80 }, { vplzcntq zmm30,QWORD [rdx-0x400]\{1to8\} }
+testcase { 0x62, 0x62, 0xfd, 0x58, 0x44, 0xb2, 0xf8, 0xfb, 0xff, 0xff }, { vplzcntq zmm30,QWORD [rdx-0x408]\{1to8\} }
+testcase { 0x62, 0x92, 0x16, 0x40, 0x27, 0xec }, { vptestnmd k5,zmm29,zmm28 }
+testcase { 0x62, 0x92, 0x16, 0x47, 0x27, 0xec }, { vptestnmd k5\{k7\},zmm29,zmm28 }
+testcase { 0x62, 0xf2, 0x16, 0x40, 0x27, 0x29 }, { vptestnmd k5,zmm29,ZWORD [rcx] }
+testcase { 0x62, 0xb2, 0x16, 0x40, 0x27, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vptestnmd k5,zmm29,ZWORD [rax+r14*8+0x123] }
+testcase { 0x62, 0xf2, 0x16, 0x50, 0x27, 0x29 }, { vptestnmd k5,zmm29,DWORD [rcx]\{1to16\} }
+testcase { 0x62, 0xf2, 0x16, 0x40, 0x27, 0x6a, 0x7f }, { vptestnmd k5,zmm29,ZWORD [rdx+0x1fc0] }
+testcase { 0x62, 0xf2, 0x16, 0x40, 0x27, 0xaa, 0x00, 0x20, 0x00, 0x00 }, { vptestnmd k5,zmm29,ZWORD [rdx+0x2000] }
+testcase { 0x62, 0xf2, 0x16, 0x40, 0x27, 0x6a, 0x80 }, { vptestnmd k5,zmm29,ZWORD [rdx-0x2000] }
+testcase { 0x62, 0xf2, 0x16, 0x40, 0x27, 0xaa, 0xc0, 0xdf, 0xff, 0xff }, { vptestnmd k5,zmm29,ZWORD [rdx-0x2040] }
+testcase { 0x62, 0xf2, 0x16, 0x50, 0x27, 0x6a, 0x7f }, { vptestnmd k5,zmm29,DWORD [rdx+0x1fc]\{1to16\} }
+testcase { 0x62, 0xf2, 0x16, 0x50, 0x27, 0xaa, 0x00, 0x02, 0x00, 0x00 }, { vptestnmd k5,zmm29,DWORD [rdx+0x200]\{1to16\} }
+testcase { 0x62, 0xf2, 0x16, 0x50, 0x27, 0x6a, 0x80 }, { vptestnmd k5,zmm29,DWORD [rdx-0x200]\{1to16\} }
+testcase { 0x62, 0xf2, 0x16, 0x50, 0x27, 0xaa, 0xfc, 0xfd, 0xff, 0xff }, { vptestnmd k5,zmm29,DWORD [rdx-0x204]\{1to16\} }
+testcase { 0x62, 0x92, 0x96, 0x40, 0x27, 0xec }, { vptestnmq k5,zmm29,zmm28 }
+testcase { 0x62, 0x92, 0x96, 0x47, 0x27, 0xec }, { vptestnmq k5\{k7\},zmm29,zmm28 }
+testcase { 0x62, 0xf2, 0x96, 0x40, 0x27, 0x29 }, { vptestnmq k5,zmm29,ZWORD [rcx] }
+testcase { 0x62, 0xb2, 0x96, 0x40, 0x27, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00 }, { vptestnmq k5,zmm29,ZWORD [rax+r14*8+0x123] }
+testcase { 0x62, 0xf2, 0x96, 0x50, 0x27, 0x29 }, { vptestnmq k5,zmm29,QWORD [rcx]\{1to8\} }
+testcase { 0x62, 0xf2, 0x96, 0x40, 0x27, 0x6a, 0x7f }, { vptestnmq k5,zmm29,ZWORD [rdx+0x1fc0] }
+testcase { 0x62, 0xf2, 0x96, 0x40, 0x27, 0xaa, 0x00, 0x20, 0x00, 0x00 }, { vptestnmq k5,zmm29,ZWORD [rdx+0x2000] }
+testcase { 0x62, 0xf2, 0x96, 0x40, 0x27, 0x6a, 0x80 }, { vptestnmq k5,zmm29,ZWORD [rdx-0x2000] }
+testcase { 0x62, 0xf2, 0x96, 0x40, 0x27, 0xaa, 0xc0, 0xdf, 0xff, 0xff }, { vptestnmq k5,zmm29,ZWORD [rdx-0x2040] }
+testcase { 0x62, 0xf2, 0x96, 0x50, 0x27, 0x6a, 0x7f }, { vptestnmq k5,zmm29,QWORD [rdx+0x3f8]\{1to8\} }
+testcase { 0x62, 0xf2, 0x96, 0x50, 0x27, 0xaa, 0x00, 0x04, 0x00, 0x00 }, { vptestnmq k5,zmm29,QWORD [rdx+0x400]\{1to8\} }
+testcase { 0x62, 0xf2, 0x96, 0x50, 0x27, 0x6a, 0x80 }, { vptestnmq k5,zmm29,QWORD [rdx-0x400]\{1to8\} }
+testcase { 0x62, 0xf2, 0x96, 0x50, 0x27, 0xaa, 0xf8, 0xfb, 0xff, 0xff }, { vptestnmq k5,zmm29,QWORD [rdx-0x408]\{1to8\} }
+testcase { 0x62, 0x62, 0x7e, 0x48, 0x3a, 0xf6 }, { vpbroadcastmw2d zmm30,k6 }
+testcase { 0x62, 0x62, 0xfe, 0x48, 0x2a, 0xf6 }, { vpbroadcastmb2q zmm30,k6 }