Merge tag 'nasm-2.13.03'

NASM 2.13.03 Resolved Conflicts: include/iflag.h version x86/insns-iflags.ph Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
author: H. Peter Anvin <hpa@linux.intel.com> 2018-02-07 13:51:54 -0800
committer: H. Peter Anvin <hpa@linux.intel.com> 2018-02-07 13:51:54 -0800
commit: 1b53750430f03e5953e6b133d6dbae237c3f880f (patch)
tree: e8934dae69b539c48d7f63a2276b8fbcc8bc9b25
parent: 70c439b8de78595e9a465e3907aa282aa9c82984 (diff)
parent: ef7d18bfbb899fe8e44f4fae8d6a12e2742f69f5 (diff)
download: nasm-1b53750430f03e5953e6b133d6dbae237c3f880f.tar.gz
9 files changed, 341 insertions, 217 deletions
diff --git a/asm/assemble.c b/asm/assemble.c
index fc72065e..561bba55 100644
--- a/asm/assemble.c
+++ b/asm/assemble.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -1379,7 +1379,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
             length++;
         } else if ((ins->rex & REX_L) &&
                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
-                   iflag_ffs(&cpu) >= IF_X86_64) {
+                   iflag_cpu_level_ok(&cpu, IF_X86_64)) {
             /* LOCK-as-REX.R */
             assert_no_prefix(ins, PPS_LOCK);
             lockcheck = false;  /* Already errored, no need for warning */
diff --git a/asm/directiv.c b/asm/directiv.c
index 937f17af..7c741685 100644
--- a/asm/directiv.c
+++ b/asm/directiv.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -56,78 +56,90 @@
 #include "labels.h"
 #include "iflag.h"
 
-static iflag_t get_cpu(char *value)
+struct cpunames {
+    const char *name;
+    unsigned int level;
+    /* Eventually a table of features */
+};
+
+static iflag_t get_cpu(const char *value)
 {
     iflag_t r;
+    const struct cpunames *cpu;
+    static const struct cpunames cpunames[] = {
+        { "8086", IF_8086 },
+        { "186",  IF_186  },
+        { "286",  IF_286  },
+        { "386",  IF_386  },
+        { "486",  IF_486  },
+        { "586",  IF_PENT },
+        { "pentium", IF_PENT },
+        { "pentiummmx", IF_PENT },
+        { "686",  IF_P6 },
+        { "p6",   IF_P6 },
+        { "ppro", IF_P6 },
+        { "pentiumpro", IF_P6 },
+        { "p2", IF_P6 },        /* +MMX */
+        { "pentiumii", IF_P6 },
+        { "p3", IF_KATMAI },
+        { "katmai", IF_KATMAI },
+        { "p4", IF_WILLAMETTE },
+        { "willamette", IF_WILLAMETTE },
+        { "prescott", IF_PRESCOTT },
+        { "x64", IF_X86_64 },
+        { "x86-64", IF_X86_64 },
+        { "ia64", IF_IA64 },
+        { "ia-64", IF_IA64 },
+        { "itanium", IF_IA64 },
+        { "itanic", IF_IA64 },
+        { "merced", IF_IA64 },
+        { "any", IF_PLEVEL },
+        { "default", IF_PLEVEL },
+        { "all", IF_PLEVEL },
+        { NULL, IF_PLEVEL }     /* Error and final default entry */
+    };
+
+    for (cpu = cpunames; cpu->name; cpu++) {
+        if (!strcmp(value, cpu->name))
+            break;
+    }
 
-    iflag_clear_all(&r);
-
-    if (!strcmp(value, "8086"))
-        iflag_set(&r, IF_8086);
-    else if (!strcmp(value, "186"))
-        iflag_set(&r, IF_186);
-    else if (!strcmp(value, "286"))
-        iflag_set(&r, IF_286);
-    else if (!strcmp(value, "386"))
-        iflag_set(&r, IF_386);
-    else if (!strcmp(value, "486"))
-        iflag_set(&r, IF_486);
-    else if (!strcmp(value, "586") ||
-             !nasm_stricmp(value, "pentium"))
-        iflag_set(&r, IF_PENT);
-    else if (!strcmp(value, "686")              ||
-             !nasm_stricmp(value, "ppro")       ||
-             !nasm_stricmp(value, "pentiumpro") ||
-             !nasm_stricmp(value, "p2"))
-        iflag_set(&r, IF_P6);
-    else if (!nasm_stricmp(value, "p3") ||
-             !nasm_stricmp(value, "katmai"))
-        iflag_set(&r, IF_KATMAI);
-    else if (!nasm_stricmp(value, "p4") ||   /* is this right? -- jrc */
-             !nasm_stricmp(value, "willamette"))
-        iflag_set(&r, IF_WILLAMETTE);
-    else if (!nasm_stricmp(value, "prescott"))
-        iflag_set(&r, IF_PRESCOTT);
-    else if (!nasm_stricmp(value, "x64") ||
-             !nasm_stricmp(value, "x86-64"))
-        iflag_set(&r, IF_X86_64);
-    else if (!nasm_stricmp(value, "ia64")   ||
-             !nasm_stricmp(value, "ia-64")  ||
-             !nasm_stricmp(value, "itanium")||
-             !nasm_stricmp(value, "itanic") ||
-             !nasm_stricmp(value, "merced"))
-        iflag_set(&r, IF_IA64);
-    else {
-        iflag_set(&r, IF_PLEVEL);
+    if (!cpu->name) {
         nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
-                   "unknown 'cpu' type");
+                   "unknown 'cpu' type '%s'", value);
     }
+
+    iflag_set_cpu(&r, cpu->level);
     return r;
 }
 
-static int get_bits(char *value)
+static int get_bits(const char *value)
 {
-    int i;
+    int i = atoi(value);
 
-    if ((i = atoi(value)) == 16)
-        return i;               /* set for a 16-bit segment */
-    else if (i == 32) {
-        if (iflag_ffs(&cpu) < IF_386) {
+    switch (i) {
+    case 16:
+        break;                  /* Always safe */
+    case 32:
+        if (!iflag_cpu_level_ok(&cpu, IF_386)) {
             nasm_error(ERR_NONFATAL,
-                         "cannot specify 32-bit segment on processor below a 386");
+                       "cannot specify 32-bit segment on processor below a 386");
             i = 16;
         }
-    } else if (i == 64) {
-        if (iflag_ffs(&cpu) < IF_X86_64) {
+        break;
+    case 64:
+        if (!iflag_cpu_level_ok(&cpu, IF_X86_64)) {
             nasm_error(ERR_NONFATAL,
-                         "cannot specify 64-bit segment on processor below an x86-64");
+                       "cannot specify 64-bit segment on processor below an x86-64");
             i = 16;
         }
-    } else {
+        break;
+    default:
         nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
-                     "`%s' is not a valid segment size; must be 16, 32 or 64",
-                     value);
+                   "`%s' is not a valid segment size; must be 16, 32 or 64",
+                   value);
         i = 16;
+        break;
     }
     return i;
 }
diff --git a/asm/nasm.c b/asm/nasm.c
index 0ffb036c..48b34cd4 100644
--- a/asm/nasm.c
+++ b/asm/nasm.c
@@ -323,8 +323,8 @@ int main(int argc, char **argv)
 
     timestamp();
 
-    iflag_set(&cpu, IF_PLEVEL);
-    iflag_set(&cmd_cpu, IF_PLEVEL);
+    iflag_set_default_cpu(&cpu);
+    iflag_set_default_cpu(&cmd_cpu);
 
     pass0 = 0;
     want_usage = terminate_after_phase = false;
@@ -1289,8 +1289,21 @@ static void assemble_file(const char *fname, StrList **depend_ptr)
     uint64_t prev_offset_changed;
     unsigned int stall_count = 0; /* Make sure we make forward progress... */
 
-    if (cmd_sb == 32 && iflag_ffs(&cmd_cpu) < IF_386)
-	nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu");
+    switch (cmd_sb) {
+    case 16:
+        break;
+    case 32:
+        if (!iflag_cpu_level_ok(&cmd_cpu, IF_386))
+            nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu");
+        break;
+    case 64:
+        if (!iflag_cpu_level_ok(&cmd_cpu, IF_X86_64))
+            nasm_fatal(0, "command line: 64-bit segment size requires a higher cpu");
+        break;
+    default:
+        panic();
+        break;
+    }
 
     pass_max = prev_offset_changed = (INT_MAX >> 1) + 2; /* Almost unlimited */
     for (passn = 1; pass0 <= 2; passn++) {
diff --git a/doc/changes.src b/doc/changes.src
index 2076babe..e72457c2 100644
--- a/doc/changes.src
+++ b/doc/changes.src
@@ -7,6 +7,12 @@
 The NASM 2 series supports x86-64, and is the production version of NASM
 since 2007.
 
+\S{cl-2.13.03} Version 2.13.03
+
+\b Added AVX and AVX512 \c{VAES*} and \c{VPCLMULQDQ} instructions.
+
+\b Fixed missing dwarf record in x32 ELF output format.
+
 \S{cl-2.13.02} Version 2.13.02
 
 \b Fix false positive in testing of numeric overflows.
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src
index f998cc60..7b331d35 100644
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -1,6 +1,6 @@
 \# --------------------------------------------------------------------------
 \#
-\#   Copyright 1996-2017 The NASM Authors - All Rights Reserved
+\#   Copyright 1996-2018 The NASM Authors - All Rights Reserved
 \#   See the file AUTHORS included with the NASM distribution for
 \#   the specific copyright holders.
 \#
@@ -7704,10 +7704,15 @@ platforms pass arguments in registers rather than on the stack.
 Furthermore, 64-bit platforms use SSE2 by default for floating point.
 Please see the ABI documentation for your platform.
 
-64-bit platforms differ in the sizes of the fundamental datatypes, not
-just from 32-bit platforms but from each other.  If a specific size
-data type is desired, it is probably best to use the types defined in
-the Standard C header \c{<inttypes.h>}.
+64-bit platforms differ in the sizes of the C/C++ fundamental
+datatypes, not just from 32-bit platforms but from each other.  If a
+specific size data type is desired, it is probably best to use the
+types defined in the standard C header \c{<inttypes.h>}.
+
+All known 64-bit platforms except some embedded platforms require that
+the stack is 16-byte aligned at the entry to a function.  In order to
+enforce that, the stack pointer (\c{RSP}) needs to be aligned on an
+\c{odd} multiple of 8 bytes before the \c{CALL} instruction.
 
 In 64-bit mode, the default instruction size is still 32 bits.  When
 loading a value into a 32-bit register (but not an 8- or 16-bit
@@ -7755,12 +7760,30 @@ immediate as \c{DWORD}:
 
 The length of these instructions are 10, 5 and 7 bytes, respectively.
 
+If optimization is enabled and NASM can determine at assembly time
+that a shorter instruction will suffice, the shorter instruction will
+be emitted unless of course \c{STRICT QWORD} or \c{STRICT DWORD} is
+specified (see \k{strict}):
+
+\c      mov rax,1		; Assembles as "mov eax,1" (5 bytes)
+\c      mov rax,strict qword 1  ; Full 10-byte instruction
+\c	mov rax,strict dword 1	; 7-byte instruction
+\c      mov rax,symbol          ; 10 bytes, not known at assembly time
+\c      lea rax,[rel symbol]    ; 7 bytes, usually preferred by the ABI
+
+Note that \c{lea rax,[rel symbol]} is position-independent, whereas
+\c{mov rax,symbol} is not.  Most ABIs prefer or even require
+position-independent code in 64-bit mode.  However, the \c{MOV}
+instruction is able to reference a symbol anywhere in the 64-bit
+address space, whereas \c{LEA} is only able to access a symbol within
+within 2 GB of the instruction itself (see below.)
+
 The only instructions which take a full \I{64-bit displacement}64-bit
 \e{displacement} is loading or storing, using \c{MOV}, \c{AL}, \c{AX},
 \c{EAX} or \c{RAX} (but no other registers) to an absolute 64-bit address.
 Since this is a relatively rarely used instruction (64-bit code generally uses
 relative addressing), the programmer has to explicitly declare the
-displacement size as \c{QWORD}:
+displacement size as \c{ABS QWORD}:
 
 \c      default abs
 \c
@@ -7797,9 +7820,11 @@ calls, and thus are available for use by the function without saving.
 Integer return values are passed in \c{RAX} and \c{RDX}, in that order.
 
 Floating point is done using SSE registers, except for \c{long
-double}.  Floating-point arguments are passed in \c{XMM0} to \c{XMM7};
-return is \c{XMM0} and \c{XMM1}.  \c{long double} are passed on the
-stack, and returned in \c{ST0} and \c{ST1}.
+double}, which is 80 bits (\c{TWORD}) on most platforms (Android is
+one exception; there \c{long double} is 64 bits and treated the same
+as \c{double}.)  Floating-point arguments are passed in \c{XMM0} to
+\c{XMM7}; return is \c{XMM0} and \c{XMM1}.  \c{long double} are passed
+on the stack, and returned in \c{ST0} and \c{ST1}.
 
 All SSE and x87 registers are destroyed by function calls.
 
diff --git a/include/iflag.h b/include/iflag.h
index 289e4272..e8fa6036 100644
--- a/include/iflag.h
+++ b/include/iflag.h
@@ -1,30 +1,29 @@
 #ifndef NASM_IFLAG_H
 #define NASM_IFLAG_H
 
-#include <string.h>
-
 #include "compiler.h"
 #include "ilog2.h"
+
+#include <string.h>
+
 #include "iflaggen.h"
+#include "nasmlib.h"            /* For ilog2_32() */
 
 #define IF_GENBIT(bit)          (UINT32_C(1) << (bit))
 
-static inline unsigned int iflag_test(const iflag_t *f, unsigned int bit)
+static inline bool iflag_test(const iflag_t *f, unsigned int bit)
 {
-    unsigned int index = bit / 32;
-    return f->field[index] & (UINT32_C(1) << (bit - (index * 32)));
+    return !!(f->field[bit >> 5] & IF_GENBIT(bit & 31));
 }
 
 static inline void iflag_set(iflag_t *f, unsigned int bit)
 {
-    unsigned int index = bit / 32;
-    f->field[index] |= (UINT32_C(1) << (bit - (index * 32)));
+    f->field[bit >> 5] |= IF_GENBIT(bit & 31);
 }
 
 static inline void iflag_clear(iflag_t *f, unsigned int bit)
 {
-    unsigned int index = bit / 32;
-    f->field[index] &= ~(UINT32_C(1) << (bit - (index * 32)));
+    f->field[bit >> 5] &= ~IF_GENBIT(bit & 31);
 }
 
 static inline void iflag_clear_all(iflag_t *f)
@@ -34,39 +33,21 @@ static inline void iflag_clear_all(iflag_t *f)
 
 static inline void iflag_set_all(iflag_t *f)
 {
-     memset(f, 0xff, sizeof(*f));
+     memset(f, ~0, sizeof(*f));
 }
 
+#define iflag_for_each_field(v) for ((v) = 0; (v) < IF_FIELD_COUNT; (v)++)
+
 static inline int iflag_cmp(const iflag_t *a, const iflag_t *b)
 {
     int i;
 
-    for (i = sizeof(a->field) / sizeof(a->field[0]) - 1; i >= 0; i--) {
+    /* This is intentionally a reverse loop! */
+    for (i = IF_FIELD_COUNT-1; i >= 0; i--) {
         if (a->field[i] == b->field[i])
             continue;
 
-        return (a->field[i] > b->field[i]) ? 1 : -1;
-    }
-
-    return 0;
-}
-
-static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b)
-{
-    if (a->field[3] < b->field[3])
-        return -1;
-    else if (a->field[3] > b->field[3])
-        return 1;
-    return 0;
-}
-
-static inline unsigned int iflag_ffs(const iflag_t *a)
-{
-    unsigned int i;
-
-    for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++) {
-        if (a->field[i])
-            return ilog2_32(a->field[i]) + (i * 32);
+        return (int)(a->field[i] - b->field[i]);
     }
 
     return 0;
@@ -78,7 +59,7 @@ static inline unsigned int iflag_ffs(const iflag_t *a)
         unsigned int i;                                                 \
         iflag_t res;                                                    \
                                                                         \
-        for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++)    \
+        iflag_for_each_field(i)                                         \
             res.field[i] = a->field[i] op b->field[i];                  \
                                                                         \
         return res;                                                     \
@@ -86,13 +67,6 @@ static inline unsigned int iflag_ffs(const iflag_t *a)
 
 IF_GEN_HELPER(xor, ^)
 
-
-/* Use this helper to test instruction template flags */
-#define itemp_has(itemp, bit)   iflag_test(&insns_flags[(itemp)->iflag_idx], bit)
-
-
-/* Maximum processor level at moment */
-#define IF_PLEVEL               IF_IA64
 /* Some helpers which are to work with predefined masks */
 #define IF_SMASK        \
     (IF_GENBIT(IF_SB)  |\
@@ -118,23 +92,67 @@ IF_GEN_HELPER(xor, ^)
 #define itemp_arg(itemp)        _itemp_arg((itemp)->iflag_idx)
 #define itemp_armask(itemp)     _itemp_armask((itemp)->iflag_idx)
 
+/*
+ * IF_8086 is the first CPU level flag and IF_PLEVEL the last
+ */
+#if IF_8086 & 31
+#error "IF_8086 must be on a uint32_t boundary"
+#endif
+#define IF_PLEVEL               IF_IA64
+#define IF_CPU_FIELD	       (IF_8086 >> 5)
+#define IF_CPU_LEVEL_MASK      ((IF_GENBIT(IF_PLEVEL & 31) << 1) - 1)
+
+/*
+ * IF_PRIV is the firstr instruction filtering flag
+ */
+#if IF_PRIV & 31
+#error "IF_PRIV must be on a uint32_t boundary"
+#endif
+#define IF_FEATURE_FIELD	(IF_PRIV >> 5)
+
+static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b)
+{
+    return (int)(a->field[IF_CPU_FIELD] - b->field[IF_CPU_FIELD]);
+}
+
+static inline uint32_t _iflag_cpu_level(const iflag_t *a)
+{
+    return a->field[IF_CPU_FIELD] & IF_CPU_LEVEL_MASK;
+}
+
 static inline int iflag_cmp_cpu_level(const iflag_t *a, const iflag_t *b)
 {
-    iflag_t v1 = *a;
-    iflag_t v2 = *b;
+    uint32_t aa = _iflag_cpu_level(a);
+    uint32_t bb = _iflag_cpu_level(b);
 
-    iflag_clear(&v1, IF_CYRIX);
-    iflag_clear(&v1, IF_AMD);
+    return (int)(aa - bb);
+}
 
-    iflag_clear(&v2, IF_CYRIX);
-    iflag_clear(&v2, IF_AMD);
+/* Returns true if the CPU level is at least a certain value */
+static inline bool iflag_cpu_level_ok(const iflag_t *a, unsigned int bit)
+{
+    return _iflag_cpu_level(a) >= IF_GENBIT(bit & 31);
+}
 
-    if (v1.field[3] < v2.field[3])
-        return -1;
-    else if (v1.field[3] > v2.field[3])
-        return 1;
+static inline void iflag_set_all_features(iflag_t *a)
+{
+    size_t i;
 
-    return 0;
+    for (i = IF_FEATURE_FIELD; i < IF_CPU_FIELD; i++)
+        a->field[i] = ~UINT32_C(0);
+}
+
+static inline void iflag_set_cpu(iflag_t *a, unsigned int cpu)
+{
+    a->field[0] = 0;     /* Not applicable to the CPU type */
+    iflag_set_all_features(a);    /* All feature masking bits set for now */
+    a->field[IF_CPU_FIELD] &= ~IF_CPU_LEVEL_MASK;
+    iflag_set(a, cpu);
+}
+
+static inline void iflag_set_default_cpu(iflag_t *a)
+{
+    iflag_set_cpu(a, IF_PLEVEL);
 }
 
 static inline iflag_t _iflag_pfmask(const iflag_t *a)
diff --git a/include/insns.h b/include/insns.h
index ac2d7924..00de2887 100644
--- a/include/insns.h
+++ b/include/insns.h
@@ -23,6 +23,12 @@ struct itemplate {
     uint32_t        iflag_idx;          /* some flags referenced by index */
 };
 
+/* Use this helper to test instruction template flags */
+static inline bool itemp_has(const struct itemplate *itemp, unsigned int bit)
+{
+    return iflag_test(&insns_flags[itemp->iflag_idx], bit);
+}
+
 /* Disassembler table structure */
 
 /*
diff --git a/test/vaesenc.asm b/test/vaesenc.asm
new file mode 100644
index 00000000..9edca705
--- /dev/null
+++ b/test/vaesenc.asm
@@ -0,0 +1,22 @@
+;; BR 3392454, 3392460
+
+	bits 64
+	aesenc xmm0,xmm4
+	vaesenc zmm0,zmm0,zmm4
+	vpclmullqlqdq zmm1,zmm1,zmm5
+	vpclmulqdq zmm0, zmm1, zmm2, 0
+	vaesenclast zmm0, zmm1, zmm2
+
+	bits 32
+	aesenc xmm0,xmm4
+	vaesenc zmm0,zmm0,zmm4
+	vpclmullqlqdq zmm1,zmm1,zmm5
+	vpclmulqdq zmm0, zmm1, zmm2, 0
+	vaesenclast zmm0, zmm1, zmm2
+
+	bits 16
+	aesenc xmm0,xmm4
+	vaesenc zmm0,zmm0,zmm4
+	vpclmullqlqdq zmm1,zmm1,zmm5
+	vpclmulqdq zmm0, zmm1, zmm2, 0
+	vaesenclast zmm0, zmm1, zmm2
diff --git a/x86/insns-iflags.ph b/x86/insns-iflags.ph
index 989276f2..43bf70e8 100644
--- a/x86/insns-iflags.ph
+++ b/x86/insns-iflags.ph
@@ -64,109 +64,117 @@
 # for a set of flags, so be careful moving bits (and
 # don't forget to update C code generation then).
 #
+sub dword_align($) {
+    my($n) = @_;
+
+    $$n = ($$n + 31) & ~31;
+    return $n;
+}
+
+my $f = 0;
 my %insns_flag_bit = (
     #
     # dword bound, index 0 - specific flags
     #
-    "SM"                => [  0, "Size match"],
-    "SM2"               => [  1, "Size match first two operands"],
-    "SB"                => [  2, "Unsized operands can't be non-byte"],
-    "SW"                => [  3, "Unsized operands can't be non-word"],
-    "SD"                => [  4, "Unsized operands can't be non-dword"],
-    "SQ"                => [  5, "Unsized operands can't be non-qword"],
-    "SO"                => [  6, "Unsized operands can't be non-oword"],
-    "SY"                => [  7, "Unsized operands can't be non-yword"],
-    "SZ"                => [  8, "Unsized operands can't be non-zword"],
-    "SIZE"              => [  9, "Unsized operands must match the bitsize"],
-    "SX"                => [ 10, "Unsized operands not allowed"],
-    "AR0"               => [ 11, "SB, SW, SD applies to argument 0"],
-    "AR1"               => [ 12, "SB, SW, SD applies to argument 1"],
-    "AR2"               => [ 13, "SB, SW, SD applies to argument 2"],
-    "AR3"               => [ 14, "SB, SW, SD applies to argument 3"],
-    "AR4"               => [ 15, "SB, SW, SD applies to argument 4"],
-    "OPT"               => [ 16, "Optimizing assembly only"],
+    "SM"                => [$f++, "Size match"],
+    "SM2"               => [$f++, "Size match first two operands"],
+    "SB"                => [$f++, "Unsized operands can't be non-byte"],
+    "SW"                => [$f++, "Unsized operands can't be non-word"],
+    "SD"                => [$f++, "Unsized operands can't be non-dword"],
+    "SQ"                => [$f++, "Unsized operands can't be non-qword"],
+    "SO"                => [$f++, "Unsized operands can't be non-oword"],
+    "SY"                => [$f++, "Unsized operands can't be non-yword"],
+    "SZ"                => [$f++, "Unsized operands can't be non-zword"],
+    "SIZE"              => [$f++, "Unsized operands must match the bitsize"],
+    "SX"                => [$f++, "Unsized operands not allowed"],
+    "AR0"               => [$f++, "SB, SW, SD applies to argument 0"],
+    "AR1"               => [$f++, "SB, SW, SD applies to argument 1"],
+    "AR2"               => [$f++, "SB, SW, SD applies to argument 2"],
+    "AR3"               => [$f++, "SB, SW, SD applies to argument 3"],
+    "AR4"               => [$f++, "SB, SW, SD applies to argument 4"],
+    "OPT"               => [$f++, "Optimizing assembly only"],
 
     #
-    # dword bound, index 1 - instruction filtering flags
+    # dword bound - instruction filtering flags
     #
-    "PRIV"              => [ 32, "Privileged instruction"],
-    "SMM"               => [ 33, "Only valid in SMM"],
-    "PROT"              => [ 34, "Protected mode only"],
-    "LOCK"              => [ 35, "Lockable if operand 0 is memory"],
-    "NOLONG"            => [ 36, "Not available in long mode"],
-    "LONG"              => [ 37, "Long mode"],
-    "NOHLE"             => [ 38, "HLE prefixes forbidden"],
-    "MIB"               => [ 39, "disassemble with split EA"],
-    "BND"               => [ 40, "BND (0xF2) prefix available"],
-    "UNDOC"             => [ 41, "Undocumented"],
-    "HLE"               => [ 42, "HLE prefixed"],
-    "FPU"               => [ 43, "FPU"],
-    "MMX"               => [ 44, "MMX"],
-    "3DNOW"             => [ 45, "3DNow!"],
-    "SSE"               => [ 46, "SSE (KNI, MMX2)"],
-    "SSE2"              => [ 47, "SSE2"],
-    "SSE3"              => [ 48, "SSE3 (PNI)"],
-    "VMX"               => [ 49, "VMX"],
-    "SSSE3"             => [ 50, "SSSE3"],
-    "SSE4A"             => [ 51, "AMD SSE4a"],
-    "SSE41"             => [ 52, "SSE4.1"],
-    "SSE42"             => [ 53, "SSE4.2"],
-    "SSE5"              => [ 54, "SSE5"],
-    "AVX"               => [ 55, "AVX (128b)"],
-    "AVX2"              => [ 56, "AVX2 (256b)"],
-    "FMA"               => [ 57, ""],
-    "BMI1"              => [ 58, ""],
-    "BMI2"              => [ 59, ""],
-    "TBM"               => [ 60, ""],
-    "RTM"               => [ 61, ""],
-    "INVPCID"           => [ 62, ""],
+    "PRIV"              => [${dword_align(\$f)}++, "Privileged instruction"],
+    "SMM"               => [$f++, "Only valid in SMM"],
+    "PROT"              => [$f++, "Protected mode only"],
+    "LOCK"              => [$f++, "Lockable if operand 0 is memory"],
+    "NOLONG"            => [$f++, "Not available in long mode"],
+    "LONG"              => [$f++, "Long mode"],
+    "NOHLE"             => [$f++, "HLE prefixes forbidden"],
+    "MIB"               => [$f++, "disassemble with split EA"],
+    "BND"               => [$f++, "BND (0xF2) prefix available"],
+    "UNDOC"             => [$f++, "Undocumented"],
+    "HLE"               => [$f++, "HLE prefixed"],
+    "FPU"               => [$f++, "FPU"],
+    "MMX"               => [$f++, "MMX"],
+    "3DNOW"             => [$f++, "3DNow!"],
+    "SSE"               => [$f++, "SSE (KNI, MMX2)"],
+    "SSE2"              => [$f++, "SSE2"],
+    "SSE3"              => [$f++, "SSE3 (PNI)"],
+    "VMX"               => [$f++, "VMX"],
+    "SSSE3"             => [$f++, "SSSE3"],
+    "SSE4A"             => [$f++, "AMD SSE4a"],
+    "SSE41"             => [$f++, "SSE4.1"],
+    "SSE42"             => [$f++, "SSE4.2"],
+    "SSE5"              => [$f++, "SSE5"],
+    "AVX"               => [$f++, "AVX  (256-bit floating point)"],
+    "AVX2"              => [$f++, "AVX2 (256-bit integer)"],
+    "FMA"               => [$f++, ""],
+    "BMI1"              => [$f++, ""],
+    "BMI2"              => [$f++, ""],
+    "TBM"               => [$f++, ""],
+    "RTM"               => [$f++, ""],
+    "INVPCID"           => [$f++, ""],
+    "AVX512"            => [$f++, "AVX-512F (512-bit base architecture)"],
+    "AVX512CD"          => [$f++, "AVX-512 Conflict Detection"],
+    "AVX512ER"          => [$f++, "AVX-512 Exponential and Reciprocal"],
+    "AVX512PF"          => [$f++, "AVX-512 Prefetch"],
+    "MPX"               => [$f++, "MPX"],
+    "SHA"               => [$f++, "SHA"],
+    "PREFETCHWT1"       => [$f++, "PREFETCHWT1"],
+    "AVX512VL"          => [$f++, "AVX-512 Vector Length Orthogonality"],
+    "AVX512DQ"          => [$f++, "AVX-512 Dword and Qword"],
+    "AVX512BW"          => [$f++, "AVX-512 Byte and Word"],
+    "AVX512IFMA"        => [$f++, "AVX-512 IFMA instructions"],
+    "AVX512VBMI"        => [$f++, "AVX-512 VBMI instructions"],
+    "AES"               => [$f++, "AES instructions"],
+    "VAES"              => [$f++, "AES AVX instructions"],
+    "VPCLMULQDQ"        => [$f++, "Carry-Less Multiplication extention"],
 
-    #
-    # dword bound, index 2 - instruction filtering flags
-    #
-    "AVX512"            => [ 64, "AVX-512F (512b)"],
-    "AVX512CD"          => [ 65, "AVX-512 Conflict Detection"],
-    "AVX512ER"          => [ 66, "AVX-512 Exponential and Reciprocal"],
-    "AVX512PF"          => [ 67, "AVX-512 Prefetch"],
-    "MPX"               => [ 68	,"MPX"],
-    "SHA"               => [ 69	,"SHA"],
-    "PREFETCHWT1"       => [ 70	,"PREFETCHWT1"],
-    "AVX512VL"          => [ 71, "AVX-512 Vector Length Orthogonality"],
-    "AVX512DQ"          => [ 72, "AVX-512 Dword and Qword"],
-    "AVX512BW"          => [ 73, "AVX-512 Byte and Word"],
-    "AVX512IFMA"        => [ 74, "AVX-512 IFMA instructions"],
-    "AVX512VBMI"        => [ 75, "AVX-512 VBMI instructions"],
-    "OBSOLETE"          => [ 93, "Instruction removed from architecture"],
-    "VEX"               => [ 94, "VEX or XOP encoded instruction"],
-    "EVEX"              => [ 95, "EVEX encoded instruction"],
-    "AES"               => [ 96, "AES instructions"],
-    "VAES"              => [ 97, "AES AVX instructions"],
-    "VPCLMULQDQ"        => [ 98, "Carry-Less Multiplication extention"],
+    # Put these last
+    "OBSOLETE"          => [$f++, "Instruction removed from architecture"],
+    "VEX"               => [$f++, "VEX or XOP encoded instruction"],
+    "EVEX"              => [$f++, "EVEX encoded instruction"],
 
     #
-    # dword bound, cpu type flags
+    # dword bound - cpu type flags
     #
     # The CYRIX and AMD flags should have the highest bit values; the
     # disassembler selection algorithm depends on it.
     #
-    "8086"              => [128, "8086"],
-    "186"               => [129, "186+"],
-    "286"               => [130, "286+"],
-    "386"               => [131, "386+"],
-    "486"               => [132, "486+"],
-    "PENT"              => [133, "Pentium"],
-    "P6"                => [134, "P6"],
-    "KATMAI"            => [135, "Katmai"],
-    "WILLAMETTE"        => [136, "Willamette"],
-    "PRESCOTT"          => [137, "Prescott"],
-    "X86_64"            => [138, "x86-64 (long or legacy mode)"],
-    "NEHALEM"           => [139, "Nehalem"],
-    "WESTMERE"          => [140, "Westmere"],
-    "SANDYBRIDGE"       => [141, "Sandy Bridge"],
-    "FUTURE"            => [142, "Future processor (not yet disclosed)"],
-    "IA64"              => [143, "IA64 (in x86 mode)"],
-    "CYRIX"             => [144, "Cyrix-specific"],
-    "AMD"               => [145, "AMD-specific"],
+    "8086"              => [${dword_align(\$f)}++, "8086"],
+    "186"               => [$f++, "186+"],
+    "286"               => [$f++, "286+"],
+    "386"               => [$f++, "386+"],
+    "486"               => [$f++, "486+"],
+    "PENT"              => [$f++, "Pentium"],
+    "P6"                => [$f++, "P6"],
+    "KATMAI"            => [$f++, "Katmai"],
+    "WILLAMETTE"        => [$f++, "Willamette"],
+    "PRESCOTT"          => [$f++, "Prescott"],
+    "X86_64"            => [$f++, "x86-64 (long or legacy mode)"],
+    "NEHALEM"           => [$f++, "Nehalem"],
+    "WESTMERE"          => [$f++, "Westmere"],
+    "SANDYBRIDGE"       => [$f++, "Sandy Bridge"],
+    "FUTURE"            => [$f++, "Future processor (not yet disclosed)"],
+    "IA64"              => [$f++, "IA64 (in x86 mode)"],
+
+    # Put these last
+    "CYRIX"             => [$f++, "Cyrix-specific"],
+    "AMD"               => [$f++, "AMD-specific"],
 );
 
 my %insns_flag_hash = ();
@@ -176,9 +184,9 @@ my $iflag_words;
 sub get_flag_words() {
     my $max = -1;
 
-    foreach my $key (keys(%insns_flag_bit)) {
-	if (${$insns_flag_bit{$key}}[0] > $max) {
-	    $max = ${$insns_flag_bit{$key}}[0];
+    foreach my $vp (values(%insns_flag_bit)) {
+	if ($vp->[0] > $max) {
+	    $max = $vp->[0];
 	}
     }
 
@@ -218,14 +226,28 @@ sub write_iflaggen_h() {
     print N "#ifndef NASM_IFLAGGEN_H\n";
     print N "#define NASM_IFLAGGEN_H 1\n\n";
 
-    foreach my $key (sort { $insns_flag_bit{$a}[0] <=> $insns_flag_bit{$b}[0] } keys(%insns_flag_bit)) {
+    my @flagnames = keys(%insns_flag_bit);
+    @flagnames = sort {
+	$insns_flag_bit{$a}->[0] <=> $insns_flag_bit{$b}->[0]
+    } @flagnames;
+    my $next = 0;
+    foreach my $key (@flagnames) {
+	my $v = $insns_flag_bit{$key};
+	if ($v->[0] > $next) {
+	    printf N "%-31s /* %-64s */\n", '',
+		($next != $v->[0]-1) ?
+		sprintf("%d...%d unused", $next, $v->[0]-1) :
+		sprintf("%d unused", $next);
+	}
         print N sprintf("#define IF_%-16s %3d /* %-64s */\n",
-            $key, $insns_flag_bit{$key}[0], $insns_flag_bit{$key}[1]);
+			$key, $v->[0], $v->[1]);
+	$next = $v->[0] + 1;
     }
 
     print N "\n";
+    printf N "#define IF_FIELD_COUNT %d\n", $iflag_words;
     print N "typedef struct {\n";
-    printf N "    uint32_t field[%d];\n", $iflag_words;
+    print N "    uint32_t field[IF_FIELD_COUNT];\n";
     print N "} iflag_t;\n";
 
     print N "\n";
author	H. Peter Anvin <hpa@linux.intel.com>	2018-02-07 13:51:54 -0800
committer	H. Peter Anvin <hpa@linux.intel.com>	2018-02-07 13:51:54 -0800
commit	1b53750430f03e5953e6b133d6dbae237c3f880f (patch)
tree	e8934dae69b539c48d7f63a2276b8fbcc8bc9b25
parent	70c439b8de78595e9a465e3907aa282aa9c82984 (diff)
parent	ef7d18bfbb899fe8e44f4fae8d6a12e2742f69f5 (diff)
download	nasm-1b53750430f03e5953e6b133d6dbae237c3f880f.tar.gz