summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2018-02-07 13:51:54 -0800
committerH. Peter Anvin <hpa@linux.intel.com>2018-02-07 13:51:54 -0800
commit1b53750430f03e5953e6b133d6dbae237c3f880f (patch)
treee8934dae69b539c48d7f63a2276b8fbcc8bc9b25
parent70c439b8de78595e9a465e3907aa282aa9c82984 (diff)
parentef7d18bfbb899fe8e44f4fae8d6a12e2742f69f5 (diff)
downloadnasm-1b53750430f03e5953e6b133d6dbae237c3f880f.tar.gz
Merge tag 'nasm-2.13.03'
NASM 2.13.03 Resolved Conflicts: include/iflag.h version x86/insns-iflags.ph Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r--asm/assemble.c4
-rw-r--r--asm/directiv.c122
-rw-r--r--asm/nasm.c21
-rw-r--r--doc/changes.src6
-rw-r--r--doc/nasmdoc.src43
-rw-r--r--include/iflag.h122
-rw-r--r--include/insns.h6
-rw-r--r--test/vaesenc.asm22
-rw-r--r--x86/insns-iflags.ph212
9 files changed, 341 insertions, 217 deletions
diff --git a/asm/assemble.c b/asm/assemble.c
index fc72065e..561bba55 100644
--- a/asm/assemble.c
+++ b/asm/assemble.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
- * Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ * Copyright 1996-2018 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@@ -1379,7 +1379,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
length++;
} else if ((ins->rex & REX_L) &&
!(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
- iflag_ffs(&cpu) >= IF_X86_64) {
+ iflag_cpu_level_ok(&cpu, IF_X86_64)) {
/* LOCK-as-REX.R */
assert_no_prefix(ins, PPS_LOCK);
lockcheck = false; /* Already errored, no need for warning */
diff --git a/asm/directiv.c b/asm/directiv.c
index 937f17af..7c741685 100644
--- a/asm/directiv.c
+++ b/asm/directiv.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
- * Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ * Copyright 1996-2018 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@@ -56,78 +56,90 @@
#include "labels.h"
#include "iflag.h"
-static iflag_t get_cpu(char *value)
+struct cpunames {
+ const char *name;
+ unsigned int level;
+ /* Eventually a table of features */
+};
+
+static iflag_t get_cpu(const char *value)
{
iflag_t r;
+ const struct cpunames *cpu;
+ static const struct cpunames cpunames[] = {
+ { "8086", IF_8086 },
+ { "186", IF_186 },
+ { "286", IF_286 },
+ { "386", IF_386 },
+ { "486", IF_486 },
+ { "586", IF_PENT },
+ { "pentium", IF_PENT },
+ { "pentiummmx", IF_PENT },
+ { "686", IF_P6 },
+ { "p6", IF_P6 },
+ { "ppro", IF_P6 },
+ { "pentiumpro", IF_P6 },
+ { "p2", IF_P6 }, /* +MMX */
+ { "pentiumii", IF_P6 },
+ { "p3", IF_KATMAI },
+ { "katmai", IF_KATMAI },
+ { "p4", IF_WILLAMETTE },
+ { "willamette", IF_WILLAMETTE },
+ { "prescott", IF_PRESCOTT },
+ { "x64", IF_X86_64 },
+ { "x86-64", IF_X86_64 },
+ { "ia64", IF_IA64 },
+ { "ia-64", IF_IA64 },
+ { "itanium", IF_IA64 },
+ { "itanic", IF_IA64 },
+ { "merced", IF_IA64 },
+ { "any", IF_PLEVEL },
+ { "default", IF_PLEVEL },
+ { "all", IF_PLEVEL },
+ { NULL, IF_PLEVEL } /* Error and final default entry */
+ };
+
+ for (cpu = cpunames; cpu->name; cpu++) {
+ if (!strcmp(value, cpu->name))
+ break;
+ }
- iflag_clear_all(&r);
-
- if (!strcmp(value, "8086"))
- iflag_set(&r, IF_8086);
- else if (!strcmp(value, "186"))
- iflag_set(&r, IF_186);
- else if (!strcmp(value, "286"))
- iflag_set(&r, IF_286);
- else if (!strcmp(value, "386"))
- iflag_set(&r, IF_386);
- else if (!strcmp(value, "486"))
- iflag_set(&r, IF_486);
- else if (!strcmp(value, "586") ||
- !nasm_stricmp(value, "pentium"))
- iflag_set(&r, IF_PENT);
- else if (!strcmp(value, "686") ||
- !nasm_stricmp(value, "ppro") ||
- !nasm_stricmp(value, "pentiumpro") ||
- !nasm_stricmp(value, "p2"))
- iflag_set(&r, IF_P6);
- else if (!nasm_stricmp(value, "p3") ||
- !nasm_stricmp(value, "katmai"))
- iflag_set(&r, IF_KATMAI);
- else if (!nasm_stricmp(value, "p4") || /* is this right? -- jrc */
- !nasm_stricmp(value, "willamette"))
- iflag_set(&r, IF_WILLAMETTE);
- else if (!nasm_stricmp(value, "prescott"))
- iflag_set(&r, IF_PRESCOTT);
- else if (!nasm_stricmp(value, "x64") ||
- !nasm_stricmp(value, "x86-64"))
- iflag_set(&r, IF_X86_64);
- else if (!nasm_stricmp(value, "ia64") ||
- !nasm_stricmp(value, "ia-64") ||
- !nasm_stricmp(value, "itanium")||
- !nasm_stricmp(value, "itanic") ||
- !nasm_stricmp(value, "merced"))
- iflag_set(&r, IF_IA64);
- else {
- iflag_set(&r, IF_PLEVEL);
+ if (!cpu->name) {
nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
- "unknown 'cpu' type");
+ "unknown 'cpu' type '%s'", value);
}
+
+ iflag_set_cpu(&r, cpu->level);
return r;
}
-static int get_bits(char *value)
+static int get_bits(const char *value)
{
- int i;
+ int i = atoi(value);
- if ((i = atoi(value)) == 16)
- return i; /* set for a 16-bit segment */
- else if (i == 32) {
- if (iflag_ffs(&cpu) < IF_386) {
+ switch (i) {
+ case 16:
+ break; /* Always safe */
+ case 32:
+ if (!iflag_cpu_level_ok(&cpu, IF_386)) {
nasm_error(ERR_NONFATAL,
- "cannot specify 32-bit segment on processor below a 386");
+ "cannot specify 32-bit segment on processor below a 386");
i = 16;
}
- } else if (i == 64) {
- if (iflag_ffs(&cpu) < IF_X86_64) {
+ break;
+ case 64:
+ if (!iflag_cpu_level_ok(&cpu, IF_X86_64)) {
nasm_error(ERR_NONFATAL,
- "cannot specify 64-bit segment on processor below an x86-64");
+ "cannot specify 64-bit segment on processor below an x86-64");
i = 16;
}
- } else {
+ break;
+ default:
nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
- "`%s' is not a valid segment size; must be 16, 32 or 64",
- value);
+ "`%s' is not a valid segment size; must be 16, 32 or 64",
+ value);
i = 16;
+ break;
}
return i;
}
diff --git a/asm/nasm.c b/asm/nasm.c
index 0ffb036c..48b34cd4 100644
--- a/asm/nasm.c
+++ b/asm/nasm.c
@@ -323,8 +323,8 @@ int main(int argc, char **argv)
timestamp();
- iflag_set(&cpu, IF_PLEVEL);
- iflag_set(&cmd_cpu, IF_PLEVEL);
+ iflag_set_default_cpu(&cpu);
+ iflag_set_default_cpu(&cmd_cpu);
pass0 = 0;
want_usage = terminate_after_phase = false;
@@ -1289,8 +1289,21 @@ static void assemble_file(const char *fname, StrList **depend_ptr)
uint64_t prev_offset_changed;
unsigned int stall_count = 0; /* Make sure we make forward progress... */
- if (cmd_sb == 32 && iflag_ffs(&cmd_cpu) < IF_386)
- nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu");
+ switch (cmd_sb) {
+ case 16:
+ break;
+ case 32:
+ if (!iflag_cpu_level_ok(&cmd_cpu, IF_386))
+ nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu");
+ break;
+ case 64:
+ if (!iflag_cpu_level_ok(&cmd_cpu, IF_X86_64))
+ nasm_fatal(0, "command line: 64-bit segment size requires a higher cpu");
+ break;
+ default:
+ panic();
+ break;
+ }
pass_max = prev_offset_changed = (INT_MAX >> 1) + 2; /* Almost unlimited */
for (passn = 1; pass0 <= 2; passn++) {
diff --git a/doc/changes.src b/doc/changes.src
index 2076babe..e72457c2 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -7,6 +7,12 @@
The NASM 2 series supports x86-64, and is the production version of NASM
since 2007.
+\S{cl-2.13.03} Version 2.13.03
+
+\b Added AVX and AVX512 \c{VAES*} and \c{VPCLMULQDQ} instructions.
+
+\b Fixed missing dwarf record in x32 ELF output format.
+
\S{cl-2.13.02} Version 2.13.02
\b Fix false positive in testing of numeric overflows.
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src
index f998cc60..7b331d35 100644
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -1,6 +1,6 @@
\# --------------------------------------------------------------------------
\#
-\# Copyright 1996-2017 The NASM Authors - All Rights Reserved
+\# Copyright 1996-2018 The NASM Authors - All Rights Reserved
\# See the file AUTHORS included with the NASM distribution for
\# the specific copyright holders.
\#
@@ -7704,10 +7704,15 @@ platforms pass arguments in registers rather than on the stack.
Furthermore, 64-bit platforms use SSE2 by default for floating point.
Please see the ABI documentation for your platform.
-64-bit platforms differ in the sizes of the fundamental datatypes, not
-just from 32-bit platforms but from each other. If a specific size
-data type is desired, it is probably best to use the types defined in
-the Standard C header \c{<inttypes.h>}.
+64-bit platforms differ in the sizes of the C/C++ fundamental
+datatypes, not just from 32-bit platforms but from each other. If a
+specific size data type is desired, it is probably best to use the
+types defined in the standard C header \c{<inttypes.h>}.
+
+All known 64-bit platforms except some embedded platforms require that
+the stack is 16-byte aligned at the entry to a function. In order to
+enforce that, the stack pointer (\c{RSP}) needs to be aligned on an
+\c{odd} multiple of 8 bytes before the \c{CALL} instruction.
In 64-bit mode, the default instruction size is still 32 bits. When
loading a value into a 32-bit register (but not an 8- or 16-bit
@@ -7755,12 +7760,30 @@ immediate as \c{DWORD}:
The length of these instructions are 10, 5 and 7 bytes, respectively.
+If optimization is enabled and NASM can determine at assembly time
+that a shorter instruction will suffice, the shorter instruction will
+be emitted unless of course \c{STRICT QWORD} or \c{STRICT DWORD} is
+specified (see \k{strict}):
+
+\c mov rax,1 ; Assembles as "mov eax,1" (5 bytes)
+\c mov rax,strict qword 1 ; Full 10-byte instruction
+\c mov rax,strict dword 1 ; 7-byte instruction
+\c mov rax,symbol ; 10 bytes, not known at assembly time
+\c lea rax,[rel symbol] ; 7 bytes, usually preferred by the ABI
+
+Note that \c{lea rax,[rel symbol]} is position-independent, whereas
+\c{mov rax,symbol} is not. Most ABIs prefer or even require
+position-independent code in 64-bit mode. However, the \c{MOV}
+instruction is able to reference a symbol anywhere in the 64-bit
+address space, whereas \c{LEA} is only able to access a symbol within
+within 2 GB of the instruction itself (see below.)
+
The only instructions which take a full \I{64-bit displacement}64-bit
\e{displacement} is loading or storing, using \c{MOV}, \c{AL}, \c{AX},
\c{EAX} or \c{RAX} (but no other registers) to an absolute 64-bit address.
Since this is a relatively rarely used instruction (64-bit code generally uses
relative addressing), the programmer has to explicitly declare the
-displacement size as \c{QWORD}:
+displacement size as \c{ABS QWORD}:
\c default abs
\c
@@ -7797,9 +7820,11 @@ calls, and thus are available for use by the function without saving.
Integer return values are passed in \c{RAX} and \c{RDX}, in that order.
Floating point is done using SSE registers, except for \c{long
-double}. Floating-point arguments are passed in \c{XMM0} to \c{XMM7};
-return is \c{XMM0} and \c{XMM1}. \c{long double} are passed on the
-stack, and returned in \c{ST0} and \c{ST1}.
+double}, which is 80 bits (\c{TWORD}) on most platforms (Android is
+one exception; there \c{long double} is 64 bits and treated the same
+as \c{double}.) Floating-point arguments are passed in \c{XMM0} to
+\c{XMM7}; return is \c{XMM0} and \c{XMM1}. \c{long double} are passed
+on the stack, and returned in \c{ST0} and \c{ST1}.
All SSE and x87 registers are destroyed by function calls.
diff --git a/include/iflag.h b/include/iflag.h
index 289e4272..e8fa6036 100644
--- a/include/iflag.h
+++ b/include/iflag.h
@@ -1,30 +1,29 @@
#ifndef NASM_IFLAG_H
#define NASM_IFLAG_H
-#include <string.h>
-
#include "compiler.h"
#include "ilog2.h"
+
+#include <string.h>
+
#include "iflaggen.h"
+#include "nasmlib.h" /* For ilog2_32() */
#define IF_GENBIT(bit) (UINT32_C(1) << (bit))
-static inline unsigned int iflag_test(const iflag_t *f, unsigned int bit)
+static inline bool iflag_test(const iflag_t *f, unsigned int bit)
{
- unsigned int index = bit / 32;
- return f->field[index] & (UINT32_C(1) << (bit - (index * 32)));
+ return !!(f->field[bit >> 5] & IF_GENBIT(bit & 31));
}
static inline void iflag_set(iflag_t *f, unsigned int bit)
{
- unsigned int index = bit / 32;
- f->field[index] |= (UINT32_C(1) << (bit - (index * 32)));
+ f->field[bit >> 5] |= IF_GENBIT(bit & 31);
}
static inline void iflag_clear(iflag_t *f, unsigned int bit)
{
- unsigned int index = bit / 32;
- f->field[index] &= ~(UINT32_C(1) << (bit - (index * 32)));
+ f->field[bit >> 5] &= ~IF_GENBIT(bit & 31);
}
static inline void iflag_clear_all(iflag_t *f)
@@ -34,39 +33,21 @@ static inline void iflag_clear_all(iflag_t *f)
static inline void iflag_set_all(iflag_t *f)
{
- memset(f, 0xff, sizeof(*f));
+ memset(f, ~0, sizeof(*f));
}
+#define iflag_for_each_field(v) for ((v) = 0; (v) < IF_FIELD_COUNT; (v)++)
+
static inline int iflag_cmp(const iflag_t *a, const iflag_t *b)
{
int i;
- for (i = sizeof(a->field) / sizeof(a->field[0]) - 1; i >= 0; i--) {
+ /* This is intentionally a reverse loop! */
+ for (i = IF_FIELD_COUNT-1; i >= 0; i--) {
if (a->field[i] == b->field[i])
continue;
- return (a->field[i] > b->field[i]) ? 1 : -1;
- }
-
- return 0;
-}
-
-static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b)
-{
- if (a->field[3] < b->field[3])
- return -1;
- else if (a->field[3] > b->field[3])
- return 1;
- return 0;
-}
-
-static inline unsigned int iflag_ffs(const iflag_t *a)
-{
- unsigned int i;
-
- for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++) {
- if (a->field[i])
- return ilog2_32(a->field[i]) + (i * 32);
+ return (int)(a->field[i] - b->field[i]);
}
return 0;
@@ -78,7 +59,7 @@ static inline unsigned int iflag_ffs(const iflag_t *a)
unsigned int i; \
iflag_t res; \
\
- for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++) \
+ iflag_for_each_field(i) \
res.field[i] = a->field[i] op b->field[i]; \
\
return res; \
@@ -86,13 +67,6 @@ static inline unsigned int iflag_ffs(const iflag_t *a)
IF_GEN_HELPER(xor, ^)
-
-/* Use this helper to test instruction template flags */
-#define itemp_has(itemp, bit) iflag_test(&insns_flags[(itemp)->iflag_idx], bit)
-
-
-/* Maximum processor level at moment */
-#define IF_PLEVEL IF_IA64
/* Some helpers which are to work with predefined masks */
#define IF_SMASK \
(IF_GENBIT(IF_SB) |\
@@ -118,23 +92,67 @@ IF_GEN_HELPER(xor, ^)
#define itemp_arg(itemp) _itemp_arg((itemp)->iflag_idx)
#define itemp_armask(itemp) _itemp_armask((itemp)->iflag_idx)
+/*
+ * IF_8086 is the first CPU level flag and IF_PLEVEL the last
+ */
+#if IF_8086 & 31
+#error "IF_8086 must be on a uint32_t boundary"
+#endif
+#define IF_PLEVEL IF_IA64
+#define IF_CPU_FIELD (IF_8086 >> 5)
+#define IF_CPU_LEVEL_MASK ((IF_GENBIT(IF_PLEVEL & 31) << 1) - 1)
+
+/*
+ * IF_PRIV is the firstr instruction filtering flag
+ */
+#if IF_PRIV & 31
+#error "IF_PRIV must be on a uint32_t boundary"
+#endif
+#define IF_FEATURE_FIELD (IF_PRIV >> 5)
+
+static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b)
+{
+ return (int)(a->field[IF_CPU_FIELD] - b->field[IF_CPU_FIELD]);
+}
+
+static inline uint32_t _iflag_cpu_level(const iflag_t *a)
+{
+ return a->field[IF_CPU_FIELD] & IF_CPU_LEVEL_MASK;
+}
+
static inline int iflag_cmp_cpu_level(const iflag_t *a, const iflag_t *b)
{
- iflag_t v1 = *a;
- iflag_t v2 = *b;
+ uint32_t aa = _iflag_cpu_level(a);
+ uint32_t bb = _iflag_cpu_level(b);
- iflag_clear(&v1, IF_CYRIX);
- iflag_clear(&v1, IF_AMD);
+ return (int)(aa - bb);
+}
- iflag_clear(&v2, IF_CYRIX);
- iflag_clear(&v2, IF_AMD);
+/* Returns true if the CPU level is at least a certain value */
+static inline bool iflag_cpu_level_ok(const iflag_t *a, unsigned int bit)
+{
+ return _iflag_cpu_level(a) >= IF_GENBIT(bit & 31);
+}
- if (v1.field[3] < v2.field[3])
- return -1;
- else if (v1.field[3] > v2.field[3])
- return 1;
+static inline void iflag_set_all_features(iflag_t *a)
+{
+ size_t i;
- return 0;
+ for (i = IF_FEATURE_FIELD; i < IF_CPU_FIELD; i++)
+ a->field[i] = ~UINT32_C(0);
+}
+
+static inline void iflag_set_cpu(iflag_t *a, unsigned int cpu)
+{
+ a->field[0] = 0; /* Not applicable to the CPU type */
+ iflag_set_all_features(a); /* All feature masking bits set for now */
+ a->field[IF_CPU_FIELD] &= ~IF_CPU_LEVEL_MASK;
+ iflag_set(a, cpu);
+}
+
+static inline void iflag_set_default_cpu(iflag_t *a)
+{
+ iflag_set_cpu(a, IF_PLEVEL);
}
static inline iflag_t _iflag_pfmask(const iflag_t *a)
diff --git a/include/insns.h b/include/insns.h
index ac2d7924..00de2887 100644
--- a/include/insns.h
+++ b/include/insns.h
@@ -23,6 +23,12 @@ struct itemplate {
uint32_t iflag_idx; /* some flags referenced by index */
};
+/* Use this helper to test instruction template flags */
+static inline bool itemp_has(const struct itemplate *itemp, unsigned int bit)
+{
+ return iflag_test(&insns_flags[itemp->iflag_idx], bit);
+}
+
/* Disassembler table structure */
/*
diff --git a/test/vaesenc.asm b/test/vaesenc.asm
new file mode 100644
index 00000000..9edca705
--- /dev/null
+++ b/test/vaesenc.asm
@@ -0,0 +1,22 @@
+;; BR 3392454, 3392460
+
+ bits 64
+ aesenc xmm0,xmm4
+ vaesenc zmm0,zmm0,zmm4
+ vpclmullqlqdq zmm1,zmm1,zmm5
+ vpclmulqdq zmm0, zmm1, zmm2, 0
+ vaesenclast zmm0, zmm1, zmm2
+
+ bits 32
+ aesenc xmm0,xmm4
+ vaesenc zmm0,zmm0,zmm4
+ vpclmullqlqdq zmm1,zmm1,zmm5
+ vpclmulqdq zmm0, zmm1, zmm2, 0
+ vaesenclast zmm0, zmm1, zmm2
+
+ bits 16
+ aesenc xmm0,xmm4
+ vaesenc zmm0,zmm0,zmm4
+ vpclmullqlqdq zmm1,zmm1,zmm5
+ vpclmulqdq zmm0, zmm1, zmm2, 0
+ vaesenclast zmm0, zmm1, zmm2
diff --git a/x86/insns-iflags.ph b/x86/insns-iflags.ph
index 989276f2..43bf70e8 100644
--- a/x86/insns-iflags.ph
+++ b/x86/insns-iflags.ph
@@ -64,109 +64,117 @@
# for a set of flags, so be careful moving bits (and
# don't forget to update C code generation then).
#
+sub dword_align($) {
+ my($n) = @_;
+
+ $$n = ($$n + 31) & ~31;
+ return $n;
+}
+
+my $f = 0;
my %insns_flag_bit = (
#
# dword bound, index 0 - specific flags
#
- "SM" => [ 0, "Size match"],
- "SM2" => [ 1, "Size match first two operands"],
- "SB" => [ 2, "Unsized operands can't be non-byte"],
- "SW" => [ 3, "Unsized operands can't be non-word"],
- "SD" => [ 4, "Unsized operands can't be non-dword"],
- "SQ" => [ 5, "Unsized operands can't be non-qword"],
- "SO" => [ 6, "Unsized operands can't be non-oword"],
- "SY" => [ 7, "Unsized operands can't be non-yword"],
- "SZ" => [ 8, "Unsized operands can't be non-zword"],
- "SIZE" => [ 9, "Unsized operands must match the bitsize"],
- "SX" => [ 10, "Unsized operands not allowed"],
- "AR0" => [ 11, "SB, SW, SD applies to argument 0"],
- "AR1" => [ 12, "SB, SW, SD applies to argument 1"],
- "AR2" => [ 13, "SB, SW, SD applies to argument 2"],
- "AR3" => [ 14, "SB, SW, SD applies to argument 3"],
- "AR4" => [ 15, "SB, SW, SD applies to argument 4"],
- "OPT" => [ 16, "Optimizing assembly only"],
+ "SM" => [$f++, "Size match"],
+ "SM2" => [$f++, "Size match first two operands"],
+ "SB" => [$f++, "Unsized operands can't be non-byte"],
+ "SW" => [$f++, "Unsized operands can't be non-word"],
+ "SD" => [$f++, "Unsized operands can't be non-dword"],
+ "SQ" => [$f++, "Unsized operands can't be non-qword"],
+ "SO" => [$f++, "Unsized operands can't be non-oword"],
+ "SY" => [$f++, "Unsized operands can't be non-yword"],
+ "SZ" => [$f++, "Unsized operands can't be non-zword"],
+ "SIZE" => [$f++, "Unsized operands must match the bitsize"],
+ "SX" => [$f++, "Unsized operands not allowed"],
+ "AR0" => [$f++, "SB, SW, SD applies to argument 0"],
+ "AR1" => [$f++, "SB, SW, SD applies to argument 1"],
+ "AR2" => [$f++, "SB, SW, SD applies to argument 2"],
+ "AR3" => [$f++, "SB, SW, SD applies to argument 3"],
+ "AR4" => [$f++, "SB, SW, SD applies to argument 4"],
+ "OPT" => [$f++, "Optimizing assembly only"],
#
- # dword bound, index 1 - instruction filtering flags
+ # dword bound - instruction filtering flags
#
- "PRIV" => [ 32, "Privileged instruction"],
- "SMM" => [ 33, "Only valid in SMM"],
- "PROT" => [ 34, "Protected mode only"],
- "LOCK" => [ 35, "Lockable if operand 0 is memory"],
- "NOLONG" => [ 36, "Not available in long mode"],
- "LONG" => [ 37, "Long mode"],
- "NOHLE" => [ 38, "HLE prefixes forbidden"],
- "MIB" => [ 39, "disassemble with split EA"],
- "BND" => [ 40, "BND (0xF2) prefix available"],
- "UNDOC" => [ 41, "Undocumented"],
- "HLE" => [ 42, "HLE prefixed"],
- "FPU" => [ 43, "FPU"],
- "MMX" => [ 44, "MMX"],
- "3DNOW" => [ 45, "3DNow!"],
- "SSE" => [ 46, "SSE (KNI, MMX2)"],
- "SSE2" => [ 47, "SSE2"],
- "SSE3" => [ 48, "SSE3 (PNI)"],
- "VMX" => [ 49, "VMX"],
- "SSSE3" => [ 50, "SSSE3"],
- "SSE4A" => [ 51, "AMD SSE4a"],
- "SSE41" => [ 52, "SSE4.1"],
- "SSE42" => [ 53, "SSE4.2"],
- "SSE5" => [ 54, "SSE5"],
- "AVX" => [ 55, "AVX (128b)"],
- "AVX2" => [ 56, "AVX2 (256b)"],
- "FMA" => [ 57, ""],
- "BMI1" => [ 58, ""],
- "BMI2" => [ 59, ""],
- "TBM" => [ 60, ""],
- "RTM" => [ 61, ""],
- "INVPCID" => [ 62, ""],
+ "PRIV" => [${dword_align(\$f)}++, "Privileged instruction"],
+ "SMM" => [$f++, "Only valid in SMM"],
+ "PROT" => [$f++, "Protected mode only"],
+ "LOCK" => [$f++, "Lockable if operand 0 is memory"],
+ "NOLONG" => [$f++, "Not available in long mode"],
+ "LONG" => [$f++, "Long mode"],
+ "NOHLE" => [$f++, "HLE prefixes forbidden"],
+ "MIB" => [$f++, "disassemble with split EA"],
+ "BND" => [$f++, "BND (0xF2) prefix available"],
+ "UNDOC" => [$f++, "Undocumented"],
+ "HLE" => [$f++, "HLE prefixed"],
+ "FPU" => [$f++, "FPU"],
+ "MMX" => [$f++, "MMX"],
+ "3DNOW" => [$f++, "3DNow!"],
+ "SSE" => [$f++, "SSE (KNI, MMX2)"],
+ "SSE2" => [$f++, "SSE2"],
+ "SSE3" => [$f++, "SSE3 (PNI)"],
+ "VMX" => [$f++, "VMX"],
+ "SSSE3" => [$f++, "SSSE3"],
+ "SSE4A" => [$f++, "AMD SSE4a"],
+ "SSE41" => [$f++, "SSE4.1"],
+ "SSE42" => [$f++, "SSE4.2"],
+ "SSE5" => [$f++, "SSE5"],
+ "AVX" => [$f++, "AVX (256-bit floating point)"],
+ "AVX2" => [$f++, "AVX2 (256-bit integer)"],
+ "FMA" => [$f++, ""],
+ "BMI1" => [$f++, ""],
+ "BMI2" => [$f++, ""],
+ "TBM" => [$f++, ""],
+ "RTM" => [$f++, ""],
+ "INVPCID" => [$f++, ""],
+ "AVX512" => [$f++, "AVX-512F (512-bit base architecture)"],
+ "AVX512CD" => [$f++, "AVX-512 Conflict Detection"],
+ "AVX512ER" => [$f++, "AVX-512 Exponential and Reciprocal"],
+ "AVX512PF" => [$f++, "AVX-512 Prefetch"],
+ "MPX" => [$f++, "MPX"],
+ "SHA" => [$f++, "SHA"],
+ "PREFETCHWT1" => [$f++, "PREFETCHWT1"],
+ "AVX512VL" => [$f++, "AVX-512 Vector Length Orthogonality"],
+ "AVX512DQ" => [$f++, "AVX-512 Dword and Qword"],
+ "AVX512BW" => [$f++, "AVX-512 Byte and Word"],
+ "AVX512IFMA" => [$f++, "AVX-512 IFMA instructions"],
+ "AVX512VBMI" => [$f++, "AVX-512 VBMI instructions"],
+ "AES" => [$f++, "AES instructions"],
+ "VAES" => [$f++, "AES AVX instructions"],
+ "VPCLMULQDQ" => [$f++, "Carry-Less Multiplication extention"],
- #
- # dword bound, index 2 - instruction filtering flags
- #
- "AVX512" => [ 64, "AVX-512F (512b)"],
- "AVX512CD" => [ 65, "AVX-512 Conflict Detection"],
- "AVX512ER" => [ 66, "AVX-512 Exponential and Reciprocal"],
- "AVX512PF" => [ 67, "AVX-512 Prefetch"],
- "MPX" => [ 68 ,"MPX"],
- "SHA" => [ 69 ,"SHA"],
- "PREFETCHWT1" => [ 70 ,"PREFETCHWT1"],
- "AVX512VL" => [ 71, "AVX-512 Vector Length Orthogonality"],
- "AVX512DQ" => [ 72, "AVX-512 Dword and Qword"],
- "AVX512BW" => [ 73, "AVX-512 Byte and Word"],
- "AVX512IFMA" => [ 74, "AVX-512 IFMA instructions"],
- "AVX512VBMI" => [ 75, "AVX-512 VBMI instructions"],
- "OBSOLETE" => [ 93, "Instruction removed from architecture"],
- "VEX" => [ 94, "VEX or XOP encoded instruction"],
- "EVEX" => [ 95, "EVEX encoded instruction"],
- "AES" => [ 96, "AES instructions"],
- "VAES" => [ 97, "AES AVX instructions"],
- "VPCLMULQDQ" => [ 98, "Carry-Less Multiplication extention"],
+ # Put these last
+ "OBSOLETE" => [$f++, "Instruction removed from architecture"],
+ "VEX" => [$f++, "VEX or XOP encoded instruction"],
+ "EVEX" => [$f++, "EVEX encoded instruction"],
#
- # dword bound, cpu type flags
+ # dword bound - cpu type flags
#
# The CYRIX and AMD flags should have the highest bit values; the
# disassembler selection algorithm depends on it.
#
- "8086" => [128, "8086"],
- "186" => [129, "186+"],
- "286" => [130, "286+"],
- "386" => [131, "386+"],
- "486" => [132, "486+"],
- "PENT" => [133, "Pentium"],
- "P6" => [134, "P6"],
- "KATMAI" => [135, "Katmai"],
- "WILLAMETTE" => [136, "Willamette"],
- "PRESCOTT" => [137, "Prescott"],
- "X86_64" => [138, "x86-64 (long or legacy mode)"],
- "NEHALEM" => [139, "Nehalem"],
- "WESTMERE" => [140, "Westmere"],
- "SANDYBRIDGE" => [141, "Sandy Bridge"],
- "FUTURE" => [142, "Future processor (not yet disclosed)"],
- "IA64" => [143, "IA64 (in x86 mode)"],
- "CYRIX" => [144, "Cyrix-specific"],
- "AMD" => [145, "AMD-specific"],
+ "8086" => [${dword_align(\$f)}++, "8086"],
+ "186" => [$f++, "186+"],
+ "286" => [$f++, "286+"],
+ "386" => [$f++, "386+"],
+ "486" => [$f++, "486+"],
+ "PENT" => [$f++, "Pentium"],
+ "P6" => [$f++, "P6"],
+ "KATMAI" => [$f++, "Katmai"],
+ "WILLAMETTE" => [$f++, "Willamette"],
+ "PRESCOTT" => [$f++, "Prescott"],
+ "X86_64" => [$f++, "x86-64 (long or legacy mode)"],
+ "NEHALEM" => [$f++, "Nehalem"],
+ "WESTMERE" => [$f++, "Westmere"],
+ "SANDYBRIDGE" => [$f++, "Sandy Bridge"],
+ "FUTURE" => [$f++, "Future processor (not yet disclosed)"],
+ "IA64" => [$f++, "IA64 (in x86 mode)"],
+
+ # Put these last
+ "CYRIX" => [$f++, "Cyrix-specific"],
+ "AMD" => [$f++, "AMD-specific"],
);
my %insns_flag_hash = ();
@@ -176,9 +184,9 @@ my $iflag_words;
sub get_flag_words() {
my $max = -1;
- foreach my $key (keys(%insns_flag_bit)) {
- if (${$insns_flag_bit{$key}}[0] > $max) {
- $max = ${$insns_flag_bit{$key}}[0];
+ foreach my $vp (values(%insns_flag_bit)) {
+ if ($vp->[0] > $max) {
+ $max = $vp->[0];
}
}
@@ -218,14 +226,28 @@ sub write_iflaggen_h() {
print N "#ifndef NASM_IFLAGGEN_H\n";
print N "#define NASM_IFLAGGEN_H 1\n\n";
- foreach my $key (sort { $insns_flag_bit{$a}[0] <=> $insns_flag_bit{$b}[0] } keys(%insns_flag_bit)) {
+ my @flagnames = keys(%insns_flag_bit);
+ @flagnames = sort {
+ $insns_flag_bit{$a}->[0] <=> $insns_flag_bit{$b}->[0]
+ } @flagnames;
+ my $next = 0;
+ foreach my $key (@flagnames) {
+ my $v = $insns_flag_bit{$key};
+ if ($v->[0] > $next) {
+ printf N "%-31s /* %-64s */\n", '',
+ ($next != $v->[0]-1) ?
+ sprintf("%d...%d unused", $next, $v->[0]-1) :
+ sprintf("%d unused", $next);
+ }
print N sprintf("#define IF_%-16s %3d /* %-64s */\n",
- $key, $insns_flag_bit{$key}[0], $insns_flag_bit{$key}[1]);
+ $key, $v->[0], $v->[1]);
+ $next = $v->[0] + 1;
}
print N "\n";
+ printf N "#define IF_FIELD_COUNT %d\n", $iflag_words;
print N "typedef struct {\n";
- printf N " uint32_t field[%d];\n", $iflag_words;
+ print N " uint32_t field[IF_FIELD_COUNT];\n";
print N "} iflag_t;\n";
print N "\n";