iflag: automatically assign values, saner handling of CPU levels

Automatically assign values to the instruction flags; we ended up with a case where pushing flags into the next dword caused comparison failures due to other places in the code explicitly comparing field[3]. This creates necessary defines for this not to happen; it also cleans up a fair bit of the iflag code. This resolves BR 3392454. Reported-by: Thomasz Kantecki <tomasz.kantecki@intel.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
author: H. Peter Anvin <hpa@linux.intel.com> 2018-02-06 14:43:07 -0800
committer: H. Peter Anvin <hpa@linux.intel.com> 2018-02-06 14:43:07 -0800
commit: a7ecf2646d6c80b994be7d340140379d580050cf (patch)
tree: 50fc3d8fb36d4a96dff236ddaf2ee8420f27f020
parent: 4d7e680cb015e5d77764ff1cc83a417ecb73e31b (diff)
download: nasm-a7ecf2646d6c80b994be7d340140379d580050cf.tar.gz
7 files changed, 283 insertions, 209 deletions
diff --git a/asm/assemble.c b/asm/assemble.c
index 5e7f6fa1..3bfccec0 100644
--- a/asm/assemble.c
+++ b/asm/assemble.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -1353,7 +1353,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
             length++;
         } else if ((ins->rex & REX_L) &&
                    !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) &&
-                   iflag_ffs(&cpu) >= IF_X86_64) {
+                   iflag_cpu_level_ok(&cpu, IF_X86_64)) {
             /* LOCK-as-REX.R */
             assert_no_prefix(ins, PPS_LOCK);
             lockcheck = false;  /* Already errored, no need for warning */
diff --git a/asm/directiv.c b/asm/directiv.c
index 354b591a..4c30323a 100644
--- a/asm/directiv.c
+++ b/asm/directiv.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -55,78 +55,90 @@
 #include "labels.h"
 #include "iflag.h"
 
-static iflag_t get_cpu(char *value)
+struct cpunames {
+    const char *name;
+    unsigned int level;
+    /* Eventually a table of features */
+};
+
+static iflag_t get_cpu(const char *value)
 {
     iflag_t r;
+    const struct cpunames *cpu;
+    static const struct cpunames cpunames[] = {
+        { "8086", IF_8086 },
+        { "186",  IF_186  },
+        { "286",  IF_286  },
+        { "386",  IF_386  },
+        { "486",  IF_486  },
+        { "586",  IF_PENT },
+        { "pentium", IF_PENT },
+        { "pentiummmx", IF_PENT },
+        { "686",  IF_P6 },
+        { "p6",   IF_P6 },
+        { "ppro", IF_P6 },
+        { "pentiumpro", IF_P6 },
+        { "p2", IF_P6 },        /* +MMX */
+        { "pentiumii", IF_P6 },
+        { "p3", IF_KATMAI },
+        { "katmai", IF_KATMAI },
+        { "p4", IF_WILLAMETTE },
+        { "willamette", IF_WILLAMETTE },
+        { "prescott", IF_PRESCOTT },
+        { "x64", IF_X86_64 },
+        { "x86-64", IF_X86_64 },
+        { "ia64", IF_IA64 },
+        { "ia-64", IF_IA64 },
+        { "itanium", IF_IA64 },
+        { "itanic", IF_IA64 },
+        { "merced", IF_IA64 },
+        { "any", IF_PLEVEL },
+        { "default", IF_PLEVEL },
+        { "all", IF_PLEVEL },
+        { NULL, IF_PLEVEL }     /* Error and final default entry */
+    };
+
+    for (cpu = cpunames; cpu->name; cpu++) {
+        if (!strcmp(value, cpu->name))
+            break;
+    }
 
-    iflag_clear_all(&r);
-
-    if (!strcmp(value, "8086"))
-        iflag_set(&r, IF_8086);
-    else if (!strcmp(value, "186"))
-        iflag_set(&r, IF_186);
-    else if (!strcmp(value, "286"))
-        iflag_set(&r, IF_286);
-    else if (!strcmp(value, "386"))
-        iflag_set(&r, IF_386);
-    else if (!strcmp(value, "486"))
-        iflag_set(&r, IF_486);
-    else if (!strcmp(value, "586") ||
-             !nasm_stricmp(value, "pentium"))
-        iflag_set(&r, IF_PENT);
-    else if (!strcmp(value, "686")              ||
-             !nasm_stricmp(value, "ppro")       ||
-             !nasm_stricmp(value, "pentiumpro") ||
-             !nasm_stricmp(value, "p2"))
-        iflag_set(&r, IF_P6);
-    else if (!nasm_stricmp(value, "p3") ||
-             !nasm_stricmp(value, "katmai"))
-        iflag_set(&r, IF_KATMAI);
-    else if (!nasm_stricmp(value, "p4") ||   /* is this right? -- jrc */
-             !nasm_stricmp(value, "willamette"))
-        iflag_set(&r, IF_WILLAMETTE);
-    else if (!nasm_stricmp(value, "prescott"))
-        iflag_set(&r, IF_PRESCOTT);
-    else if (!nasm_stricmp(value, "x64") ||
-             !nasm_stricmp(value, "x86-64"))
-        iflag_set(&r, IF_X86_64);
-    else if (!nasm_stricmp(value, "ia64")   ||
-             !nasm_stricmp(value, "ia-64")  ||
-             !nasm_stricmp(value, "itanium")||
-             !nasm_stricmp(value, "itanic") ||
-             !nasm_stricmp(value, "merced"))
-        iflag_set(&r, IF_IA64);
-    else {
-        iflag_set(&r, IF_PLEVEL);
+    if (!cpu->name) {
         nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
-                   "unknown 'cpu' type");
+                   "unknown 'cpu' type '%s'", value);
     }
+
+    iflag_set_cpu(&r, cpu->level);
     return r;
 }
 
-static int get_bits(char *value)
+static int get_bits(const char *value)
 {
-    int i;
+    int i = atoi(value);
 
-    if ((i = atoi(value)) == 16)
-        return i;               /* set for a 16-bit segment */
-    else if (i == 32) {
-        if (iflag_ffs(&cpu) < IF_386) {
+    switch (i) {
+    case 16:
+        break;                  /* Always safe */
+    case 32:
+        if (!iflag_cpu_level_ok(&cpu, IF_386)) {
             nasm_error(ERR_NONFATAL,
-                         "cannot specify 32-bit segment on processor below a 386");
+                       "cannot specify 32-bit segment on processor below a 386");
             i = 16;
         }
-    } else if (i == 64) {
-        if (iflag_ffs(&cpu) < IF_X86_64) {
+        break;
+    case 64:
+        if (!iflag_cpu_level_ok(&cpu, IF_X86_64)) {
             nasm_error(ERR_NONFATAL,
-                         "cannot specify 64-bit segment on processor below an x86-64");
+                       "cannot specify 64-bit segment on processor below an x86-64");
             i = 16;
         }
-    } else {
+        break;
+    default:
         nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL,
-                     "`%s' is not a valid segment size; must be 16, 32 or 64",
-                     value);
+                   "`%s' is not a valid segment size; must be 16, 32 or 64",
+                   value);
         i = 16;
+        break;
     }
     return i;
 }
diff --git a/asm/nasm.c b/asm/nasm.c
index b07ed452..8497ec9b 100644
--- a/asm/nasm.c
+++ b/asm/nasm.c
@@ -322,8 +322,8 @@ int main(int argc, char **argv)
 
     timestamp();
 
-    iflag_set(&cpu, IF_PLEVEL);
-    iflag_set(&cmd_cpu, IF_PLEVEL);
+    iflag_set_default_cpu(&cpu);
+    iflag_set_default_cpu(&cmd_cpu);
 
     pass0 = 0;
     want_usage = terminate_after_phase = false;
@@ -1306,8 +1306,21 @@ static void assemble_file(char *fname, StrList **depend_ptr)
     uint64_t prev_offset_changed;
     unsigned int stall_count = 0; /* Make sure we make forward progress... */
 
-    if (cmd_sb == 32 && iflag_ffs(&cmd_cpu) < IF_386)
-	nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu");
+    switch (cmd_sb) {
+    case 16:
+        break;
+    case 32:
+        if (!iflag_cpu_level_ok(&cmd_cpu, IF_386))
+            nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu");
+        break;
+    case 64:
+        if (!iflag_cpu_level_ok(&cmd_cpu, IF_X86_64))
+            nasm_fatal(0, "command line: 64-bit segment size requires a higher cpu");
+        break;
+    default:
+        panic();
+        break;
+    }
 
     pass_max = prev_offset_changed = (INT_MAX >> 1) + 2; /* Almost unlimited */
     for (passn = 1; pass0 <= 2; passn++) {
diff --git a/include/iflag.h b/include/iflag.h
index 02787de9..48e61238 100644
--- a/include/iflag.h
+++ b/include/iflag.h
@@ -1,32 +1,28 @@
 #ifndef NASM_IFLAG_H
 #define NASM_IFLAG_H
 
-#include <string.h>
-
 #include "compiler.h"
 
-int ilog2_32(uint32_t v);
+#include <string.h>
 
 #include "iflaggen.h"
+#include "nasmlib.h"            /* For ilog2_32() */
 
 #define IF_GENBIT(bit)          (UINT32_C(1) << (bit))
 
-static inline unsigned int iflag_test(const iflag_t *f, unsigned int bit)
+static inline bool iflag_test(const iflag_t *f, unsigned int bit)
 {
-    unsigned int index = bit / 32;
-    return f->field[index] & (UINT32_C(1) << (bit - (index * 32)));
+    return !!(f->field[bit >> 5] & IF_GENBIT(bit & 31));
 }
 
 static inline void iflag_set(iflag_t *f, unsigned int bit)
 {
-    unsigned int index = bit / 32;
-    f->field[index] |= (UINT32_C(1) << (bit - (index * 32)));
+    f->field[bit >> 5] |= IF_GENBIT(bit & 31);
 }
 
 static inline void iflag_clear(iflag_t *f, unsigned int bit)
 {
-    unsigned int index = bit / 32;
-    f->field[index] &= ~(UINT32_C(1) << (bit - (index * 32)));
+    f->field[bit >> 5] &= ~IF_GENBIT(bit & 31);
 }
 
 static inline void iflag_clear_all(iflag_t *f)
@@ -36,39 +32,21 @@ static inline void iflag_clear_all(iflag_t *f)
 
 static inline void iflag_set_all(iflag_t *f)
 {
-     memset(f, 0xff, sizeof(*f));
+     memset(f, ~0, sizeof(*f));
 }
 
+#define iflag_for_each_field(v) for ((v) = 0; (v) < IF_FIELD_COUNT; (v)++)
+
 static inline int iflag_cmp(const iflag_t *a, const iflag_t *b)
 {
     int i;
 
-    for (i = sizeof(a->field) / sizeof(a->field[0]) - 1; i >= 0; i--) {
+    /* This is intentionally a reverse loop! */
+    for (i = IF_FIELD_COUNT-1; i >= 0; i--) {
         if (a->field[i] == b->field[i])
             continue;
 
-        return (a->field[i] > b->field[i]) ? 1 : -1;
-    }
-
-    return 0;
-}
-
-static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b)
-{
-    if (a->field[3] < b->field[3])
-        return -1;
-    else if (a->field[3] > b->field[3])
-        return 1;
-    return 0;
-}
-
-static inline unsigned int iflag_ffs(const iflag_t *a)
-{
-    unsigned int i;
-
-    for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++) {
-        if (a->field[i])
-            return ilog2_32(a->field[i]) + (i * 32);
+        return (int)(a->field[i] - b->field[i]);
     }
 
     return 0;
@@ -80,7 +58,7 @@ static inline unsigned int iflag_ffs(const iflag_t *a)
         unsigned int i;                                                 \
         iflag_t res;                                                    \
                                                                         \
-        for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++)    \
+        iflag_for_each_field(i)                                         \
             res.field[i] = a->field[i] op b->field[i];                  \
                                                                         \
         return res;                                                     \
@@ -88,13 +66,6 @@ static inline unsigned int iflag_ffs(const iflag_t *a)
 
 IF_GEN_HELPER(xor, ^)
 
-
-/* Use this helper to test instruction template flags */
-#define itemp_has(itemp, bit)   iflag_test(&insns_flags[(itemp)->iflag_idx], bit)
-
-
-/* Maximum processor level at moment */
-#define IF_PLEVEL               IF_IA64
 /* Some helpers which are to work with predefined masks */
 #define IF_SMASK        \
     (IF_GENBIT(IF_SB)  |\
@@ -120,23 +91,67 @@ IF_GEN_HELPER(xor, ^)
 #define itemp_arg(itemp)        _itemp_arg((itemp)->iflag_idx)
 #define itemp_armask(itemp)     _itemp_armask((itemp)->iflag_idx)
 
+/*
+ * IF_8086 is the first CPU level flag and IF_PLEVEL the last
+ */
+#if IF_8086 & 31
+#error "IF_8086 must be on a uint32_t boundary"
+#endif
+#define IF_PLEVEL               IF_IA64
+#define IF_CPU_FIELD	       (IF_8086 >> 5)
+#define IF_CPU_LEVEL_MASK      ((IF_GENBIT(IF_PLEVEL & 31) << 1) - 1)
+
+/*
+ * IF_PRIV is the firstr instruction filtering flag
+ */
+#if IF_PRIV & 31
+#error "IF_PRIV must be on a uint32_t boundary"
+#endif
+#define IF_FEATURE_FIELD	(IF_PRIV >> 5)
+
+static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b)
+{
+    return (int)(a->field[IF_CPU_FIELD] - b->field[IF_CPU_FIELD]);
+}
+
+static inline uint32_t _iflag_cpu_level(const iflag_t *a)
+{
+    return a->field[IF_CPU_FIELD] & IF_CPU_LEVEL_MASK;
+}
+
 static inline int iflag_cmp_cpu_level(const iflag_t *a, const iflag_t *b)
 {
-    iflag_t v1 = *a;
-    iflag_t v2 = *b;
+    uint32_t aa = _iflag_cpu_level(a);
+    uint32_t bb = _iflag_cpu_level(b);
+
+    return (int)(aa - bb);
+}
+
+/* Returns true if the CPU level is at least a certain value */
+static inline bool iflag_cpu_level_ok(const iflag_t *a, unsigned int bit)
+{
+    return _iflag_cpu_level(a) >= IF_GENBIT(bit & 31);
+}
 
-    iflag_clear(&v1, IF_CYRIX);
-    iflag_clear(&v1, IF_AMD);
+static inline void iflag_set_all_features(iflag_t *a)
+{
+    size_t i;
 
-    iflag_clear(&v2, IF_CYRIX);
-    iflag_clear(&v2, IF_AMD);
+    for (i = IF_FEATURE_FIELD; i < IF_CPU_FIELD; i++)
+        a->field[i] = ~UINT32_C(0);
+}
 
-    if (v1.field[3] < v2.field[3])
-        return -1;
-    else if (v1.field[3] > v2.field[3])
-        return 1;
+static inline void iflag_set_cpu(iflag_t *a, unsigned int cpu)
+{
+    a->field[0] = 0;     /* Not applicable to the CPU type */
+    iflag_set_all_features(a);    /* All feature masking bits set for now */
+    a->field[IF_CPU_FIELD] &= ~IF_CPU_LEVEL_MASK;
+    iflag_set(a, cpu);
+}
 
-    return 0;
+static inline void iflag_set_default_cpu(iflag_t *a)
+{
+    iflag_set_cpu(a, IF_PLEVEL);
 }
 
 static inline iflag_t _iflag_pfmask(const iflag_t *a)
diff --git a/include/insns.h b/include/insns.h
index cba686ce..23d0ea91 100644
--- a/include/insns.h
+++ b/include/insns.h
@@ -23,6 +23,12 @@ struct itemplate {
     uint32_t        iflag_idx;          /* some flags referenced by index */
 };
 
+/* Use this helper to test instruction template flags */
+static inline bool itemp_has(const struct itemplate *itemp, unsigned int bit)
+{
+    return iflag_test(&insns_flags[itemp->iflag_idx], bit);
+}
+
 /* Disassembler table structure */
 
 /*
diff --git a/test/vaesenc.asm b/test/vaesenc.asm
new file mode 100644
index 00000000..5a629ab0
--- /dev/null
+++ b/test/vaesenc.asm
@@ -0,0 +1,6 @@
+;; BR 3392454
+
+	bits 64
+	aesenc xmm0,xmm4
+	vaesenc zmm0,zmm0,zmm4
+	vpclmullqlqdq zmm1,zmm1,zmm5
diff --git a/x86/insns-iflags.ph b/x86/insns-iflags.ph
index 989276f2..43bf70e8 100644
--- a/x86/insns-iflags.ph
+++ b/x86/insns-iflags.ph
@@ -64,109 +64,117 @@
 # for a set of flags, so be careful moving bits (and
 # don't forget to update C code generation then).
 #
+sub dword_align($) {
+    my($n) = @_;
+
+    $$n = ($$n + 31) & ~31;
+    return $n;
+}
+
+my $f = 0;
 my %insns_flag_bit = (
     #
     # dword bound, index 0 - specific flags
     #
-    "SM"                => [  0, "Size match"],
-    "SM2"               => [  1, "Size match first two operands"],
-    "SB"                => [  2, "Unsized operands can't be non-byte"],
-    "SW"                => [  3, "Unsized operands can't be non-word"],
-    "SD"                => [  4, "Unsized operands can't be non-dword"],
-    "SQ"                => [  5, "Unsized operands can't be non-qword"],
-    "SO"                => [  6, "Unsized operands can't be non-oword"],
-    "SY"                => [  7, "Unsized operands can't be non-yword"],
-    "SZ"                => [  8, "Unsized operands can't be non-zword"],
-    "SIZE"              => [  9, "Unsized operands must match the bitsize"],
-    "SX"                => [ 10, "Unsized operands not allowed"],
-    "AR0"               => [ 11, "SB, SW, SD applies to argument 0"],
-    "AR1"               => [ 12, "SB, SW, SD applies to argument 1"],
-    "AR2"               => [ 13, "SB, SW, SD applies to argument 2"],
-    "AR3"               => [ 14, "SB, SW, SD applies to argument 3"],
-    "AR4"               => [ 15, "SB, SW, SD applies to argument 4"],
-    "OPT"               => [ 16, "Optimizing assembly only"],
+    "SM"                => [$f++, "Size match"],
+    "SM2"               => [$f++, "Size match first two operands"],
+    "SB"                => [$f++, "Unsized operands can't be non-byte"],
+    "SW"                => [$f++, "Unsized operands can't be non-word"],
+    "SD"                => [$f++, "Unsized operands can't be non-dword"],
+    "SQ"                => [$f++, "Unsized operands can't be non-qword"],
+    "SO"                => [$f++, "Unsized operands can't be non-oword"],
+    "SY"                => [$f++, "Unsized operands can't be non-yword"],
+    "SZ"                => [$f++, "Unsized operands can't be non-zword"],
+    "SIZE"              => [$f++, "Unsized operands must match the bitsize"],
+    "SX"                => [$f++, "Unsized operands not allowed"],
+    "AR0"               => [$f++, "SB, SW, SD applies to argument 0"],
+    "AR1"               => [$f++, "SB, SW, SD applies to argument 1"],
+    "AR2"               => [$f++, "SB, SW, SD applies to argument 2"],
+    "AR3"               => [$f++, "SB, SW, SD applies to argument 3"],
+    "AR4"               => [$f++, "SB, SW, SD applies to argument 4"],
+    "OPT"               => [$f++, "Optimizing assembly only"],
 
     #
-    # dword bound, index 1 - instruction filtering flags
+    # dword bound - instruction filtering flags
     #
-    "PRIV"              => [ 32, "Privileged instruction"],
-    "SMM"               => [ 33, "Only valid in SMM"],
-    "PROT"              => [ 34, "Protected mode only"],
-    "LOCK"              => [ 35, "Lockable if operand 0 is memory"],
-    "NOLONG"            => [ 36, "Not available in long mode"],
-    "LONG"              => [ 37, "Long mode"],
-    "NOHLE"             => [ 38, "HLE prefixes forbidden"],
-    "MIB"               => [ 39, "disassemble with split EA"],
-    "BND"               => [ 40, "BND (0xF2) prefix available"],
-    "UNDOC"             => [ 41, "Undocumented"],
-    "HLE"               => [ 42, "HLE prefixed"],
-    "FPU"               => [ 43, "FPU"],
-    "MMX"               => [ 44, "MMX"],
-    "3DNOW"             => [ 45, "3DNow!"],
-    "SSE"               => [ 46, "SSE (KNI, MMX2)"],
-    "SSE2"              => [ 47, "SSE2"],
-    "SSE3"              => [ 48, "SSE3 (PNI)"],
-    "VMX"               => [ 49, "VMX"],
-    "SSSE3"             => [ 50, "SSSE3"],
-    "SSE4A"             => [ 51, "AMD SSE4a"],
-    "SSE41"             => [ 52, "SSE4.1"],
-    "SSE42"             => [ 53, "SSE4.2"],
-    "SSE5"              => [ 54, "SSE5"],
-    "AVX"               => [ 55, "AVX (128b)"],
-    "AVX2"              => [ 56, "AVX2 (256b)"],
-    "FMA"               => [ 57, ""],
-    "BMI1"              => [ 58, ""],
-    "BMI2"              => [ 59, ""],
-    "TBM"               => [ 60, ""],
-    "RTM"               => [ 61, ""],
-    "INVPCID"           => [ 62, ""],
+    "PRIV"              => [${dword_align(\$f)}++, "Privileged instruction"],
+    "SMM"               => [$f++, "Only valid in SMM"],
+    "PROT"              => [$f++, "Protected mode only"],
+    "LOCK"              => [$f++, "Lockable if operand 0 is memory"],
+    "NOLONG"            => [$f++, "Not available in long mode"],
+    "LONG"              => [$f++, "Long mode"],
+    "NOHLE"             => [$f++, "HLE prefixes forbidden"],
+    "MIB"               => [$f++, "disassemble with split EA"],
+    "BND"               => [$f++, "BND (0xF2) prefix available"],
+    "UNDOC"             => [$f++, "Undocumented"],
+    "HLE"               => [$f++, "HLE prefixed"],
+    "FPU"               => [$f++, "FPU"],
+    "MMX"               => [$f++, "MMX"],
+    "3DNOW"             => [$f++, "3DNow!"],
+    "SSE"               => [$f++, "SSE (KNI, MMX2)"],
+    "SSE2"              => [$f++, "SSE2"],
+    "SSE3"              => [$f++, "SSE3 (PNI)"],
+    "VMX"               => [$f++, "VMX"],
+    "SSSE3"             => [$f++, "SSSE3"],
+    "SSE4A"             => [$f++, "AMD SSE4a"],
+    "SSE41"             => [$f++, "SSE4.1"],
+    "SSE42"             => [$f++, "SSE4.2"],
+    "SSE5"              => [$f++, "SSE5"],
+    "AVX"               => [$f++, "AVX  (256-bit floating point)"],
+    "AVX2"              => [$f++, "AVX2 (256-bit integer)"],
+    "FMA"               => [$f++, ""],
+    "BMI1"              => [$f++, ""],
+    "BMI2"              => [$f++, ""],
+    "TBM"               => [$f++, ""],
+    "RTM"               => [$f++, ""],
+    "INVPCID"           => [$f++, ""],
+    "AVX512"            => [$f++, "AVX-512F (512-bit base architecture)"],
+    "AVX512CD"          => [$f++, "AVX-512 Conflict Detection"],
+    "AVX512ER"          => [$f++, "AVX-512 Exponential and Reciprocal"],
+    "AVX512PF"          => [$f++, "AVX-512 Prefetch"],
+    "MPX"               => [$f++, "MPX"],
+    "SHA"               => [$f++, "SHA"],
+    "PREFETCHWT1"       => [$f++, "PREFETCHWT1"],
+    "AVX512VL"          => [$f++, "AVX-512 Vector Length Orthogonality"],
+    "AVX512DQ"          => [$f++, "AVX-512 Dword and Qword"],
+    "AVX512BW"          => [$f++, "AVX-512 Byte and Word"],
+    "AVX512IFMA"        => [$f++, "AVX-512 IFMA instructions"],
+    "AVX512VBMI"        => [$f++, "AVX-512 VBMI instructions"],
+    "AES"               => [$f++, "AES instructions"],
+    "VAES"              => [$f++, "AES AVX instructions"],
+    "VPCLMULQDQ"        => [$f++, "Carry-Less Multiplication extention"],
 
-    #
-    # dword bound, index 2 - instruction filtering flags
-    #
-    "AVX512"            => [ 64, "AVX-512F (512b)"],
-    "AVX512CD"          => [ 65, "AVX-512 Conflict Detection"],
-    "AVX512ER"          => [ 66, "AVX-512 Exponential and Reciprocal"],
-    "AVX512PF"          => [ 67, "AVX-512 Prefetch"],
-    "MPX"               => [ 68	,"MPX"],
-    "SHA"               => [ 69	,"SHA"],
-    "PREFETCHWT1"       => [ 70	,"PREFETCHWT1"],
-    "AVX512VL"          => [ 71, "AVX-512 Vector Length Orthogonality"],
-    "AVX512DQ"          => [ 72, "AVX-512 Dword and Qword"],
-    "AVX512BW"          => [ 73, "AVX-512 Byte and Word"],
-    "AVX512IFMA"        => [ 74, "AVX-512 IFMA instructions"],
-    "AVX512VBMI"        => [ 75, "AVX-512 VBMI instructions"],
-    "OBSOLETE"          => [ 93, "Instruction removed from architecture"],
-    "VEX"               => [ 94, "VEX or XOP encoded instruction"],
-    "EVEX"              => [ 95, "EVEX encoded instruction"],
-    "AES"               => [ 96, "AES instructions"],
-    "VAES"              => [ 97, "AES AVX instructions"],
-    "VPCLMULQDQ"        => [ 98, "Carry-Less Multiplication extention"],
+    # Put these last
+    "OBSOLETE"          => [$f++, "Instruction removed from architecture"],
+    "VEX"               => [$f++, "VEX or XOP encoded instruction"],
+    "EVEX"              => [$f++, "EVEX encoded instruction"],
 
     #
-    # dword bound, cpu type flags
+    # dword bound - cpu type flags
     #
     # The CYRIX and AMD flags should have the highest bit values; the
     # disassembler selection algorithm depends on it.
     #
-    "8086"              => [128, "8086"],
-    "186"               => [129, "186+"],
-    "286"               => [130, "286+"],
-    "386"               => [131, "386+"],
-    "486"               => [132, "486+"],
-    "PENT"              => [133, "Pentium"],
-    "P6"                => [134, "P6"],
-    "KATMAI"            => [135, "Katmai"],
-    "WILLAMETTE"        => [136, "Willamette"],
-    "PRESCOTT"          => [137, "Prescott"],
-    "X86_64"            => [138, "x86-64 (long or legacy mode)"],
-    "NEHALEM"           => [139, "Nehalem"],
-    "WESTMERE"          => [140, "Westmere"],
-    "SANDYBRIDGE"       => [141, "Sandy Bridge"],
-    "FUTURE"            => [142, "Future processor (not yet disclosed)"],
-    "IA64"              => [143, "IA64 (in x86 mode)"],
-    "CYRIX"             => [144, "Cyrix-specific"],
-    "AMD"               => [145, "AMD-specific"],
+    "8086"              => [${dword_align(\$f)}++, "8086"],
+    "186"               => [$f++, "186+"],
+    "286"               => [$f++, "286+"],
+    "386"               => [$f++, "386+"],
+    "486"               => [$f++, "486+"],
+    "PENT"              => [$f++, "Pentium"],
+    "P6"                => [$f++, "P6"],
+    "KATMAI"            => [$f++, "Katmai"],
+    "WILLAMETTE"        => [$f++, "Willamette"],
+    "PRESCOTT"          => [$f++, "Prescott"],
+    "X86_64"            => [$f++, "x86-64 (long or legacy mode)"],
+    "NEHALEM"           => [$f++, "Nehalem"],
+    "WESTMERE"          => [$f++, "Westmere"],
+    "SANDYBRIDGE"       => [$f++, "Sandy Bridge"],
+    "FUTURE"            => [$f++, "Future processor (not yet disclosed)"],
+    "IA64"              => [$f++, "IA64 (in x86 mode)"],
+
+    # Put these last
+    "CYRIX"             => [$f++, "Cyrix-specific"],
+    "AMD"               => [$f++, "AMD-specific"],
 );
 
 my %insns_flag_hash = ();
@@ -176,9 +184,9 @@ my $iflag_words;
 sub get_flag_words() {
     my $max = -1;
 
-    foreach my $key (keys(%insns_flag_bit)) {
-	if (${$insns_flag_bit{$key}}[0] > $max) {
-	    $max = ${$insns_flag_bit{$key}}[0];
+    foreach my $vp (values(%insns_flag_bit)) {
+	if ($vp->[0] > $max) {
+	    $max = $vp->[0];
 	}
     }
 
@@ -218,14 +226,28 @@ sub write_iflaggen_h() {
     print N "#ifndef NASM_IFLAGGEN_H\n";
     print N "#define NASM_IFLAGGEN_H 1\n\n";
 
-    foreach my $key (sort { $insns_flag_bit{$a}[0] <=> $insns_flag_bit{$b}[0] } keys(%insns_flag_bit)) {
+    my @flagnames = keys(%insns_flag_bit);
+    @flagnames = sort {
+	$insns_flag_bit{$a}->[0] <=> $insns_flag_bit{$b}->[0]
+    } @flagnames;
+    my $next = 0;
+    foreach my $key (@flagnames) {
+	my $v = $insns_flag_bit{$key};
+	if ($v->[0] > $next) {
+	    printf N "%-31s /* %-64s */\n", '',
+		($next != $v->[0]-1) ?
+		sprintf("%d...%d unused", $next, $v->[0]-1) :
+		sprintf("%d unused", $next);
+	}
         print N sprintf("#define IF_%-16s %3d /* %-64s */\n",
-            $key, $insns_flag_bit{$key}[0], $insns_flag_bit{$key}[1]);
+			$key, $v->[0], $v->[1]);
+	$next = $v->[0] + 1;
     }
 
     print N "\n";
+    printf N "#define IF_FIELD_COUNT %d\n", $iflag_words;
     print N "typedef struct {\n";
-    printf N "    uint32_t field[%d];\n", $iflag_words;
+    print N "    uint32_t field[IF_FIELD_COUNT];\n";
     print N "} iflag_t;\n";
 
     print N "\n";
author	H. Peter Anvin <hpa@linux.intel.com>	2018-02-06 14:43:07 -0800
committer	H. Peter Anvin <hpa@linux.intel.com>	2018-02-06 14:43:07 -0800
commit	a7ecf2646d6c80b994be7d340140379d580050cf (patch)
tree	50fc3d8fb36d4a96dff236ddaf2ee8420f27f020
parent	4d7e680cb015e5d77764ff1cc83a417ecb73e31b (diff)
download	nasm-a7ecf2646d6c80b994be7d340140379d580050cf.tar.gz