diff options
author | H. Peter Anvin <hpa@zytor.com> | 2018-02-22 14:53:46 -0800 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2018-02-22 14:53:46 -0800 |
commit | 281f5bd92c3f2eb820e29d5d8893d718e6428372 (patch) | |
tree | aeb41dd4ecdf24f108f680bf7717b3cfbed55aed | |
parent | 6686fc627ec2f805bc74599912c46d0ee1a4047c (diff) | |
parent | 4dbf3a96a4b17add396ea4592a7fde7cb8083d52 (diff) | |
download | nasm-281f5bd92c3f2eb820e29d5d8893d718e6428372.tar.gz |
Merge branch 'master' of ssh://repo.or.cz/srv/git/nasm
-rw-r--r-- | aclocal.m4 | 27 | ||||
-rw-r--r-- | asm/assemble.c | 4 | ||||
-rw-r--r-- | asm/directiv.c | 122 | ||||
-rw-r--r-- | asm/nasm.c | 129 | ||||
-rw-r--r-- | configure.ac | 7 | ||||
-rw-r--r-- | doc/changes.src | 8 | ||||
-rw-r--r-- | doc/nasmdoc.src | 43 | ||||
-rw-r--r-- | include/compiler.h | 24 | ||||
-rw-r--r-- | include/iflag.h | 121 | ||||
-rw-r--r-- | include/insns.h | 6 | ||||
-rw-r--r-- | include/nasmlib.h | 4 | ||||
-rw-r--r-- | output/outelf.c | 3 | ||||
-rw-r--r-- | test/ret.asm | 56 | ||||
-rw-r--r-- | test/vaesenc.asm | 22 | ||||
-rw-r--r-- | x86/insns-iflags.ph | 212 | ||||
-rw-r--r-- | x86/insns.dat | 20 |
16 files changed, 524 insertions, 284 deletions
@@ -172,3 +172,30 @@ AC_DEFUN(_PA_ADD_HEADER, AC_DEFUN(PA_ADD_HEADERS, [m4_map_args_w([$1],[_PA_ADD_HEADER(],[)])]) + +dnl -------------------------------------------------------------------------- +dnl PA_CHECK_BAD_STDC_INLINE +dnl +dnl Some versions of gcc seem to apply -Wmissing-prototypes to C99 +dnl inline functions, which means we need to use GNU inline syntax +dnl -------------------------------------------------------------------------- +AC_DEFUN(PA_CHECK_BAD_STDC_INLINE, +[AC_MSG_CHECKING([if $CC supports C99 external inlines]) + AC_COMPILE_IFELSE([AC_LANG_SOURCE([ +AC_INCLUDES_DEFAULT + +/* Don't mistake GNU inlines for c99 */ +#ifdef __GNUC_GNU_INLINE__ +# error "Using gnu inline standard" +#endif + +inline int foo(int x) +{ + return x+1; +} + ])], + [AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_STDC_INLINE, 1, + [Define to 1 if your compiler supports C99 extern inline])], + [AC_MSG_RESULT([no]) + PA_ADD_CFLAGS([-fgnu89-inline])])]) diff --git a/asm/assemble.c b/asm/assemble.c index fc72065e..561bba55 100644 --- a/asm/assemble.c +++ b/asm/assemble.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2017 The NASM Authors - All Rights Reserved + * Copyright 1996-2018 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -1379,7 +1379,7 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits, length++; } else if ((ins->rex & REX_L) && !(ins->rex & (REX_P|REX_W|REX_X|REX_B)) && - iflag_ffs(&cpu) >= IF_X86_64) { + iflag_cpu_level_ok(&cpu, IF_X86_64)) { /* LOCK-as-REX.R */ assert_no_prefix(ins, PPS_LOCK); lockcheck = false; /* Already errored, no need for warning */ diff --git a/asm/directiv.c b/asm/directiv.c index 937f17af..7c741685 100644 --- a/asm/directiv.c +++ b/asm/directiv.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2017 The NASM Authors - All Rights Reserved + * Copyright 1996-2018 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -56,78 +56,90 @@ #include "labels.h" #include "iflag.h" -static iflag_t get_cpu(char *value) +struct cpunames { + const char *name; + unsigned int level; + /* Eventually a table of features */ +}; + +static iflag_t get_cpu(const char *value) { iflag_t r; + const struct cpunames *cpu; + static const struct cpunames cpunames[] = { + { "8086", IF_8086 }, + { "186", IF_186 }, + { "286", IF_286 }, + { "386", IF_386 }, + { "486", IF_486 }, + { "586", IF_PENT }, + { "pentium", IF_PENT }, + { "pentiummmx", IF_PENT }, + { "686", IF_P6 }, + { "p6", IF_P6 }, + { "ppro", IF_P6 }, + { "pentiumpro", IF_P6 }, + { "p2", IF_P6 }, /* +MMX */ + { "pentiumii", IF_P6 }, + { "p3", IF_KATMAI }, + { "katmai", IF_KATMAI }, + { "p4", IF_WILLAMETTE }, + { "willamette", IF_WILLAMETTE }, + { "prescott", IF_PRESCOTT }, + { "x64", IF_X86_64 }, + { "x86-64", IF_X86_64 }, + { "ia64", IF_IA64 }, + { "ia-64", IF_IA64 }, + { "itanium", IF_IA64 }, + { "itanic", IF_IA64 }, + { "merced", IF_IA64 }, + { "any", IF_PLEVEL }, + { "default", IF_PLEVEL }, + { "all", IF_PLEVEL }, + { NULL, IF_PLEVEL } /* Error and final default entry */ + }; + + for (cpu = cpunames; cpu->name; cpu++) { + if (!strcmp(value, cpu->name)) + break; + } - iflag_clear_all(&r); - - if (!strcmp(value, "8086")) - iflag_set(&r, IF_8086); - else if (!strcmp(value, "186")) - iflag_set(&r, IF_186); - else if (!strcmp(value, "286")) - iflag_set(&r, IF_286); - else if (!strcmp(value, "386")) - iflag_set(&r, IF_386); - else if (!strcmp(value, "486")) - iflag_set(&r, IF_486); - else if (!strcmp(value, "586") || - !nasm_stricmp(value, "pentium")) - iflag_set(&r, IF_PENT); - else if (!strcmp(value, "686") || - !nasm_stricmp(value, "ppro") || - !nasm_stricmp(value, "pentiumpro") || - !nasm_stricmp(value, "p2")) - iflag_set(&r, IF_P6); - else if (!nasm_stricmp(value, "p3") || - !nasm_stricmp(value, "katmai")) - iflag_set(&r, IF_KATMAI); - else if (!nasm_stricmp(value, "p4") || /* is this right? -- jrc */ - !nasm_stricmp(value, "willamette")) - iflag_set(&r, IF_WILLAMETTE); - else if (!nasm_stricmp(value, "prescott")) - iflag_set(&r, IF_PRESCOTT); - else if (!nasm_stricmp(value, "x64") || - !nasm_stricmp(value, "x86-64")) - iflag_set(&r, IF_X86_64); - else if (!nasm_stricmp(value, "ia64") || - !nasm_stricmp(value, "ia-64") || - !nasm_stricmp(value, "itanium")|| - !nasm_stricmp(value, "itanic") || - !nasm_stricmp(value, "merced")) - iflag_set(&r, IF_IA64); - else { - iflag_set(&r, IF_PLEVEL); + if (!cpu->name) { nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL, - "unknown 'cpu' type"); + "unknown 'cpu' type '%s'", value); } + + iflag_set_cpu(&r, cpu->level); return r; } -static int get_bits(char *value) +static int get_bits(const char *value) { - int i; + int i = atoi(value); - if ((i = atoi(value)) == 16) - return i; /* set for a 16-bit segment */ - else if (i == 32) { - if (iflag_ffs(&cpu) < IF_386) { + switch (i) { + case 16: + break; /* Always safe */ + case 32: + if (!iflag_cpu_level_ok(&cpu, IF_386)) { nasm_error(ERR_NONFATAL, - "cannot specify 32-bit segment on processor below a 386"); + "cannot specify 32-bit segment on processor below a 386"); i = 16; } - } else if (i == 64) { - if (iflag_ffs(&cpu) < IF_X86_64) { + break; + case 64: + if (!iflag_cpu_level_ok(&cpu, IF_X86_64)) { nasm_error(ERR_NONFATAL, - "cannot specify 64-bit segment on processor below an x86-64"); + "cannot specify 64-bit segment on processor below an x86-64"); i = 16; } - } else { + break; + default: nasm_error(pass0 < 2 ? ERR_NONFATAL : ERR_FATAL, - "`%s' is not a valid segment size; must be 16, 32 or 64", - value); + "`%s' is not a valid segment size; must be 16, 32 or 64", + value); i = 16; + break; } return i; } @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2017 The NASM Authors - All Rights Reserved + * Copyright 1996-2018 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -86,6 +86,11 @@ static void usage(void); static bool using_debug_info, opt_verbose_info; static const char *debug_format; +#ifndef ABORT_ON_PANIC +# define ABORT_ON_PANIC 0 +#endif +static bool abort_on_panic = ABORT_ON_PANIC; + bool tasm_compatible_mode = false; int pass0, passn; static int pass1, pass2; /* XXX: Get rid of these, they are redundant */ @@ -323,8 +328,8 @@ int main(int argc, char **argv) timestamp(); - iflag_set(&cpu, IF_PLEVEL); - iflag_set(&cmd_cpu, IF_PLEVEL); + iflag_set_default_cpu(&cpu); + iflag_set_default_cpu(&cmd_cpu); pass0 = 0; want_usage = terminate_after_phase = false; @@ -690,19 +695,25 @@ static char *quote_for_wmake(const char *str) return os; } -struct textargs { - const char *label; - int value; -}; - enum text_options { + OPT_BOGUS, + OPT_VERSION, + OPT_ABORT_ON_PANIC, OPT_PREFIX, OPT_POSTFIX }; +struct textargs { + const char *label; + enum text_options opt; + bool need_arg; +}; static const struct textargs textopts[] = { - {"prefix", OPT_PREFIX}, - {"postfix", OPT_POSTFIX}, - {NULL, 0} + {"v", OPT_VERSION, false}, + {"version", OPT_VERSION, false}, + {"abort-on-panic", OPT_ABORT_ON_PANIC, false}, + {"prefix", OPT_PREFIX, true}, + {"postfix", OPT_POSTFIX, true}, + {NULL, OPT_BOGUS, false} }; static void show_version(void) @@ -1022,61 +1033,49 @@ static bool process_arg(char *p, char *q, int pass) case '-': { - int s; + const struct textargs *tx; if (p[2] == 0) { /* -- => stop processing options */ - stopoptions = 1; + stopoptions = true; break; } - if (!nasm_stricmp(p, "--v")) - show_version(); - - if (!nasm_stricmp(p, "--version")) - show_version(); + for (tx = textopts; tx->label; tx++) { + if (!nasm_stricmp(p + 2, tx->label)) + break; + } - for (s = 0; textopts[s].label; s++) { - if (!nasm_stricmp(p + 2, textopts[s].label)) { + if (tx->need_arg) { + if (!q) { + nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, + "option `--%s' requires an argument", + p + 2); break; } + advance = true; } - switch (s) { + switch (tx->opt) { + case OPT_VERSION: + show_version(); + break; + case OPT_ABORT_ON_PANIC: + abort_on_panic = true; + break; case OPT_PREFIX: + if (pass == 2) + strlcpy(lprefix, q, PREFIX_MAX); + break; case OPT_POSTFIX: - { - if (!q) { - nasm_error(ERR_NONFATAL | ERR_NOFILE | - ERR_USAGE, - "option `--%s' requires an argument", - p + 2); - break; - } else { - advance = 1, param = q; - } - - switch (s) { - case OPT_PREFIX: - if (pass == 2) - strlcpy(lprefix, param, PREFIX_MAX); - break; - case OPT_POSTFIX: - if (pass == 2) - strlcpy(lpostfix, param, POSTFIX_MAX); - break; - default: - panic(); - break; - } - break; - } - + if (pass == 2) + strlcpy(lpostfix, q, POSTFIX_MAX); + break; + case OPT_BOGUS: + nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, + "unrecognized option `--%s'", p + 2); + break; default: - { - nasm_error(ERR_NONFATAL | ERR_NOFILE | ERR_USAGE, - "unrecognised option `--%s'", p + 2); - break; - } + panic(); } break; } @@ -1289,8 +1288,21 @@ static void assemble_file(const char *fname, StrList **depend_ptr) uint64_t prev_offset_changed; unsigned int stall_count = 0; /* Make sure we make forward progress... */ - if (cmd_sb == 32 && iflag_ffs(&cmd_cpu) < IF_386) - nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu"); + switch (cmd_sb) { + case 16: + break; + case 32: + if (!iflag_cpu_level_ok(&cmd_cpu, IF_386)) + nasm_fatal(0, "command line: 32-bit segment size requires a higher cpu"); + break; + case 64: + if (!iflag_cpu_level_ok(&cmd_cpu, IF_X86_64)) + nasm_fatal(0, "command line: 64-bit segment size requires a higher cpu"); + break; + default: + panic(); + break; + } pass_max = prev_offset_changed = (INT_MAX >> 1) + 2; /* Almost unlimited */ for (passn = 1; pass0 <= 2; passn++) { @@ -1812,9 +1824,10 @@ static void nasm_verror_common(int severity, const char *fmt, va_list args) break; /* placate silly compilers */ case ERR_PANIC: fflush(NULL); -#ifdef ABORT_ON_PANIC - abort(); /* halt, catch fire, dump core/stop debugger */ -#endif + + if (abort_on_panic) + abort(); /* halt, catch fire, dump core/stop debugger */ + if (ofile) { fclose(ofile); remove(outname); diff --git a/configure.ac b/configure.ac index 4e27bb3e..44c9e179 100644 --- a/configure.ac +++ b/configure.ac @@ -288,6 +288,13 @@ PA_ARG_ENABLED([werror], ) dnl +dnl On some versions of gcc, -Werror=missing-prototypes causes problems +dnl with C99-style external inlines. Test this *after* adding the -Werror +dnl options. +dnl +PA_CHECK_BAD_STDC_INLINE + +dnl dnl support ccache dnl PA_ARG_ENABLED([ccache], [compile with ccache], [CC="ccache $CC"], []) diff --git a/doc/changes.src b/doc/changes.src index 54a12e21..ddfe6b38 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -7,10 +7,18 @@ The NASM 2 series supports x86-64, and is the production version of NASM since 2007. +\S{cl-2.13.04} Version 2.13.04 + +\b Added \c{-W}, \c{-D}, and \c{-Q} suffix aliases for \c{RET} + instructions so the operand sizes of these instructions can be + encoded without using \c{o16}, \c{o32} or \c{o64}. + \S{cl-2.13.03} Version 2.13.03 \b Added AVX and AVX512 \c{VAES*} and \c{VPCLMULQDQ} instructions. +\b Fixed missing dwarf record in x32 ELF output format. + \S{cl-2.13.02} Version 2.13.02 \b Fix false positive in testing of numeric overflows. diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index f998cc60..7b331d35 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -1,6 +1,6 @@ \# -------------------------------------------------------------------------- \# -\# Copyright 1996-2017 The NASM Authors - All Rights Reserved +\# Copyright 1996-2018 The NASM Authors - All Rights Reserved \# See the file AUTHORS included with the NASM distribution for \# the specific copyright holders. \# @@ -7704,10 +7704,15 @@ platforms pass arguments in registers rather than on the stack. Furthermore, 64-bit platforms use SSE2 by default for floating point. Please see the ABI documentation for your platform. -64-bit platforms differ in the sizes of the fundamental datatypes, not -just from 32-bit platforms but from each other. If a specific size -data type is desired, it is probably best to use the types defined in -the Standard C header \c{<inttypes.h>}. +64-bit platforms differ in the sizes of the C/C++ fundamental +datatypes, not just from 32-bit platforms but from each other. If a +specific size data type is desired, it is probably best to use the +types defined in the standard C header \c{<inttypes.h>}. + +All known 64-bit platforms except some embedded platforms require that +the stack is 16-byte aligned at the entry to a function. In order to +enforce that, the stack pointer (\c{RSP}) needs to be aligned on an +\c{odd} multiple of 8 bytes before the \c{CALL} instruction. In 64-bit mode, the default instruction size is still 32 bits. When loading a value into a 32-bit register (but not an 8- or 16-bit @@ -7755,12 +7760,30 @@ immediate as \c{DWORD}: The length of these instructions are 10, 5 and 7 bytes, respectively. +If optimization is enabled and NASM can determine at assembly time +that a shorter instruction will suffice, the shorter instruction will +be emitted unless of course \c{STRICT QWORD} or \c{STRICT DWORD} is +specified (see \k{strict}): + +\c mov rax,1 ; Assembles as "mov eax,1" (5 bytes) +\c mov rax,strict qword 1 ; Full 10-byte instruction +\c mov rax,strict dword 1 ; 7-byte instruction +\c mov rax,symbol ; 10 bytes, not known at assembly time +\c lea rax,[rel symbol] ; 7 bytes, usually preferred by the ABI + +Note that \c{lea rax,[rel symbol]} is position-independent, whereas +\c{mov rax,symbol} is not. Most ABIs prefer or even require +position-independent code in 64-bit mode. However, the \c{MOV} +instruction is able to reference a symbol anywhere in the 64-bit +address space, whereas \c{LEA} is only able to access a symbol within +within 2 GB of the instruction itself (see below.) + The only instructions which take a full \I{64-bit displacement}64-bit \e{displacement} is loading or storing, using \c{MOV}, \c{AL}, \c{AX}, \c{EAX} or \c{RAX} (but no other registers) to an absolute 64-bit address. Since this is a relatively rarely used instruction (64-bit code generally uses relative addressing), the programmer has to explicitly declare the -displacement size as \c{QWORD}: +displacement size as \c{ABS QWORD}: \c default abs \c @@ -7797,9 +7820,11 @@ calls, and thus are available for use by the function without saving. Integer return values are passed in \c{RAX} and \c{RDX}, in that order. Floating point is done using SSE registers, except for \c{long -double}. Floating-point arguments are passed in \c{XMM0} to \c{XMM7}; -return is \c{XMM0} and \c{XMM1}. \c{long double} are passed on the -stack, and returned in \c{ST0} and \c{ST1}. +double}, which is 80 bits (\c{TWORD}) on most platforms (Android is +one exception; there \c{long double} is 64 bits and treated the same +as \c{double}.) Floating-point arguments are passed in \c{XMM0} to +\c{XMM7}; return is \c{XMM0} and \c{XMM1}. \c{long double} are passed +on the stack, and returned in \c{ST0} and \c{ST1}. All SSE and x87 registers are destroyed by function calls. diff --git a/include/compiler.h b/include/compiler.h index 6c7e20c5..aba773e7 100644 --- a/include/compiler.h +++ b/include/compiler.h @@ -214,15 +214,20 @@ size_t strnlen(const char *s, size_t maxlen); /* * Hack to support external-linkage inline functions */ -#ifdef __GNUC__ -# ifdef __GNUC_STDC_INLINE__ -# define HAVE_STDC_INLINE -# else -# define HAVE_GNU_INLINE -# endif -#elif defined(__STDC_VERSION__) -# if __STDC_VERSION__ >= 199901L -# define HAVE_STDC_INLINE +#ifndef HAVE_STDC_INLINE +# ifdef __GNUC__ +# ifdef __GNUC_STDC_INLINE__ +# define HAVE_STDC_INLINE +# else +# define HAVE_GNU_INLINE +# endif +# elif defined(__GNUC_GNU_INLINE__) +/* Some other compiler implementing only GNU inline semantics? */ +# define HAVE_GNU_INLINE +# elif defined(__STDC_VERSION__) +# if __STDC_VERSION__ >= 199901L +# define HAVE_STDC_INLINE +# endif # endif #endif @@ -230,6 +235,7 @@ size_t strnlen(const char *s, size_t maxlen); # define extern_inline inline #elif defined(HAVE_GNU_INLINE) # define extern_inline extern inline +# define inline_prototypes #else # define inline_prototypes #endif diff --git a/include/iflag.h b/include/iflag.h index 289e4272..5280703e 100644 --- a/include/iflag.h +++ b/include/iflag.h @@ -1,30 +1,28 @@ #ifndef NASM_IFLAG_H #define NASM_IFLAG_H -#include <string.h> - #include "compiler.h" #include "ilog2.h" + +#include <string.h> + #include "iflaggen.h" #define IF_GENBIT(bit) (UINT32_C(1) << (bit)) -static inline unsigned int iflag_test(const iflag_t *f, unsigned int bit) +static inline bool iflag_test(const iflag_t *f, unsigned int bit) { - unsigned int index = bit / 32; - return f->field[index] & (UINT32_C(1) << (bit - (index * 32))); + return !!(f->field[bit >> 5] & IF_GENBIT(bit & 31)); } static inline void iflag_set(iflag_t *f, unsigned int bit) { - unsigned int index = bit / 32; - f->field[index] |= (UINT32_C(1) << (bit - (index * 32))); + f->field[bit >> 5] |= IF_GENBIT(bit & 31); } static inline void iflag_clear(iflag_t *f, unsigned int bit) { - unsigned int index = bit / 32; - f->field[index] &= ~(UINT32_C(1) << (bit - (index * 32))); + f->field[bit >> 5] &= ~IF_GENBIT(bit & 31); } static inline void iflag_clear_all(iflag_t *f) @@ -34,39 +32,21 @@ static inline void iflag_clear_all(iflag_t *f) static inline void iflag_set_all(iflag_t *f) { - memset(f, 0xff, sizeof(*f)); + memset(f, ~0, sizeof(*f)); } +#define iflag_for_each_field(v) for ((v) = 0; (v) < IF_FIELD_COUNT; (v)++) + static inline int iflag_cmp(const iflag_t *a, const iflag_t *b) { int i; - for (i = sizeof(a->field) / sizeof(a->field[0]) - 1; i >= 0; i--) { + /* This is intentionally a reverse loop! */ + for (i = IF_FIELD_COUNT-1; i >= 0; i--) { if (a->field[i] == b->field[i]) continue; - return (a->field[i] > b->field[i]) ? 1 : -1; - } - - return 0; -} - -static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b) -{ - if (a->field[3] < b->field[3]) - return -1; - else if (a->field[3] > b->field[3]) - return 1; - return 0; -} - -static inline unsigned int iflag_ffs(const iflag_t *a) -{ - unsigned int i; - - for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++) { - if (a->field[i]) - return ilog2_32(a->field[i]) + (i * 32); + return (int)(a->field[i] - b->field[i]); } return 0; @@ -78,7 +58,7 @@ static inline unsigned int iflag_ffs(const iflag_t *a) unsigned int i; \ iflag_t res; \ \ - for (i = 0; i < sizeof(a->field) / sizeof(a->field[0]); i++) \ + iflag_for_each_field(i) \ res.field[i] = a->field[i] op b->field[i]; \ \ return res; \ @@ -86,13 +66,6 @@ static inline unsigned int iflag_ffs(const iflag_t *a) IF_GEN_HELPER(xor, ^) - -/* Use this helper to test instruction template flags */ -#define itemp_has(itemp, bit) iflag_test(&insns_flags[(itemp)->iflag_idx], bit) - - -/* Maximum processor level at moment */ -#define IF_PLEVEL IF_IA64 /* Some helpers which are to work with predefined masks */ #define IF_SMASK \ (IF_GENBIT(IF_SB) |\ @@ -118,23 +91,67 @@ IF_GEN_HELPER(xor, ^) #define itemp_arg(itemp) _itemp_arg((itemp)->iflag_idx) #define itemp_armask(itemp) _itemp_armask((itemp)->iflag_idx) +/* + * IF_8086 is the first CPU level flag and IF_PLEVEL the last + */ +#if IF_8086 & 31 +#error "IF_8086 must be on a uint32_t boundary" +#endif +#define IF_PLEVEL IF_IA64 +#define IF_CPU_FIELD (IF_8086 >> 5) +#define IF_CPU_LEVEL_MASK ((IF_GENBIT(IF_PLEVEL & 31) << 1) - 1) + +/* + * IF_PRIV is the firstr instruction filtering flag + */ +#if IF_PRIV & 31 +#error "IF_PRIV must be on a uint32_t boundary" +#endif +#define IF_FEATURE_FIELD (IF_PRIV >> 5) + +static inline int iflag_cmp_cpu(const iflag_t *a, const iflag_t *b) +{ + return (int)(a->field[IF_CPU_FIELD] - b->field[IF_CPU_FIELD]); +} + +static inline uint32_t _iflag_cpu_level(const iflag_t *a) +{ + return a->field[IF_CPU_FIELD] & IF_CPU_LEVEL_MASK; +} + static inline int iflag_cmp_cpu_level(const iflag_t *a, const iflag_t *b) { - iflag_t v1 = *a; - iflag_t v2 = *b; + uint32_t aa = _iflag_cpu_level(a); + uint32_t bb = _iflag_cpu_level(b); - iflag_clear(&v1, IF_CYRIX); - iflag_clear(&v1, IF_AMD); + return (int)(aa - bb); +} - iflag_clear(&v2, IF_CYRIX); - iflag_clear(&v2, IF_AMD); +/* Returns true if the CPU level is at least a certain value */ +static inline bool iflag_cpu_level_ok(const iflag_t *a, unsigned int bit) +{ + return _iflag_cpu_level(a) >= IF_GENBIT(bit & 31); +} - if (v1.field[3] < v2.field[3]) - return -1; - else if (v1.field[3] > v2.field[3]) - return 1; +static inline void iflag_set_all_features(iflag_t *a) +{ + size_t i; - return 0; + for (i = IF_FEATURE_FIELD; i < IF_CPU_FIELD; i++) + a->field[i] = ~UINT32_C(0); +} + +static inline void iflag_set_cpu(iflag_t *a, unsigned int cpu) +{ + a->field[0] = 0; /* Not applicable to the CPU type */ + iflag_set_all_features(a); /* All feature masking bits set for now */ + a->field[IF_CPU_FIELD] &= ~IF_CPU_LEVEL_MASK; + iflag_set(a, cpu); +} + +static inline void iflag_set_default_cpu(iflag_t *a) +{ + iflag_set_cpu(a, IF_PLEVEL); } static inline iflag_t _iflag_pfmask(const iflag_t *a) diff --git a/include/insns.h b/include/insns.h index ac2d7924..00de2887 100644 --- a/include/insns.h +++ b/include/insns.h @@ -23,6 +23,12 @@ struct itemplate { uint32_t iflag_idx; /* some flags referenced by index */ }; +/* Use this helper to test instruction template flags */ +static inline bool itemp_has(const struct itemplate *itemp, unsigned int bit) +{ + return iflag_test(&insns_flags[itemp->iflag_idx], bit); +} + /* Disassembler table structure */ /* diff --git a/include/nasmlib.h b/include/nasmlib.h index ae0473a1..bb1becdf 100644 --- a/include/nasmlib.h +++ b/include/nasmlib.h @@ -189,11 +189,9 @@ int64_t readnum(char *str, bool *error); int64_t readstrnum(char *str, int length, bool *warn); /* - * seg_init: Initialise the segment-number allocator. * seg_alloc: allocate a hitherto unused segment number. */ -void pure_func seg_init(void); -int32_t pure_func seg_alloc(void); +int32_t seg_alloc(void); /* * Add/replace or remove an extension to the end of a filename diff --git a/output/outelf.c b/output/outelf.c index 3ca2c7c3..7ac2d54b 100644 --- a/output/outelf.c +++ b/output/outelf.c @@ -3273,6 +3273,9 @@ static void dwarf_generate(void) if (is_elf32()) { WRITELONG(pbuf,0); /* null beginning offset */ WRITELONG(pbuf,0); /* null ending offset */ + } else if (is_elfx32()) { + WRITELONG(pbuf,0); /* null beginning offset */ + WRITELONG(pbuf,0); /* null ending offset */ } else { nasm_assert(is_elf64()); WRITEDLONG(pbuf,0); /* null beginning offset */ diff --git a/test/ret.asm b/test/ret.asm new file mode 100644 index 00000000..a6aa5332 --- /dev/null +++ b/test/ret.asm @@ -0,0 +1,56 @@ + ;; All the flavors of RET +%ifndef ERROR + %define ERROR 0 +%endif + + + bits 16 + + ret + retn + retf + retw + retnw + retfw + retd + retnd + retfd +%if ERROR + retq + retnq + retfq +%endif + + bits 32 + + ret + retn + retf + retw + retnw + retfw + retd + retnd + retfd +%if ERROR + retq + retnq + retfq +%endif + + bits 64 + + ret + retn + retf ; Probably should have been RETFQ, but: legacy... + retw + retnw + retfw +%if ERROR + retd + retnd +%endif + retfd + retq + retnq + retfq diff --git a/test/vaesenc.asm b/test/vaesenc.asm new file mode 100644 index 00000000..9edca705 --- /dev/null +++ b/test/vaesenc.asm @@ -0,0 +1,22 @@ +;; BR 3392454, 3392460 + + bits 64 + aesenc xmm0,xmm4 + vaesenc zmm0,zmm0,zmm4 + vpclmullqlqdq zmm1,zmm1,zmm5 + vpclmulqdq zmm0, zmm1, zmm2, 0 + vaesenclast zmm0, zmm1, zmm2 + + bits 32 + aesenc xmm0,xmm4 + vaesenc zmm0,zmm0,zmm4 + vpclmullqlqdq zmm1,zmm1,zmm5 + vpclmulqdq zmm0, zmm1, zmm2, 0 + vaesenclast zmm0, zmm1, zmm2 + + bits 16 + aesenc xmm0,xmm4 + vaesenc zmm0,zmm0,zmm4 + vpclmullqlqdq zmm1,zmm1,zmm5 + vpclmulqdq zmm0, zmm1, zmm2, 0 + vaesenclast zmm0, zmm1, zmm2 diff --git a/x86/insns-iflags.ph b/x86/insns-iflags.ph index 989276f2..43bf70e8 100644 --- a/x86/insns-iflags.ph +++ b/x86/insns-iflags.ph @@ -64,109 +64,117 @@ # for a set of flags, so be careful moving bits (and # don't forget to update C code generation then). # +sub dword_align($) { + my($n) = @_; + + $$n = ($$n + 31) & ~31; + return $n; +} + +my $f = 0; my %insns_flag_bit = ( # # dword bound, index 0 - specific flags # - "SM" => [ 0, "Size match"], - "SM2" => [ 1, "Size match first two operands"], - "SB" => [ 2, "Unsized operands can't be non-byte"], - "SW" => [ 3, "Unsized operands can't be non-word"], - "SD" => [ 4, "Unsized operands can't be non-dword"], - "SQ" => [ 5, "Unsized operands can't be non-qword"], - "SO" => [ 6, "Unsized operands can't be non-oword"], - "SY" => [ 7, "Unsized operands can't be non-yword"], - "SZ" => [ 8, "Unsized operands can't be non-zword"], - "SIZE" => [ 9, "Unsized operands must match the bitsize"], - "SX" => [ 10, "Unsized operands not allowed"], - "AR0" => [ 11, "SB, SW, SD applies to argument 0"], - "AR1" => [ 12, "SB, SW, SD applies to argument 1"], - "AR2" => [ 13, "SB, SW, SD applies to argument 2"], - "AR3" => [ 14, "SB, SW, SD applies to argument 3"], - "AR4" => [ 15, "SB, SW, SD applies to argument 4"], - "OPT" => [ 16, "Optimizing assembly only"], + "SM" => [$f++, "Size match"], + "SM2" => [$f++, "Size match first two operands"], + "SB" => [$f++, "Unsized operands can't be non-byte"], + "SW" => [$f++, "Unsized operands can't be non-word"], + "SD" => [$f++, "Unsized operands can't be non-dword"], + "SQ" => [$f++, "Unsized operands can't be non-qword"], + "SO" => [$f++, "Unsized operands can't be non-oword"], + "SY" => [$f++, "Unsized operands can't be non-yword"], + "SZ" => [$f++, "Unsized operands can't be non-zword"], + "SIZE" => [$f++, "Unsized operands must match the bitsize"], + "SX" => [$f++, "Unsized operands not allowed"], + "AR0" => [$f++, "SB, SW, SD applies to argument 0"], + "AR1" => [$f++, "SB, SW, SD applies to argument 1"], + "AR2" => [$f++, "SB, SW, SD applies to argument 2"], + "AR3" => [$f++, "SB, SW, SD applies to argument 3"], + "AR4" => [$f++, "SB, SW, SD applies to argument 4"], + "OPT" => [$f++, "Optimizing assembly only"], # - # dword bound, index 1 - instruction filtering flags + # dword bound - instruction filtering flags # - "PRIV" => [ 32, "Privileged instruction"], - "SMM" => [ 33, "Only valid in SMM"], - "PROT" => [ 34, "Protected mode only"], - "LOCK" => [ 35, "Lockable if operand 0 is memory"], - "NOLONG" => [ 36, "Not available in long mode"], - "LONG" => [ 37, "Long mode"], - "NOHLE" => [ 38, "HLE prefixes forbidden"], - "MIB" => [ 39, "disassemble with split EA"], - "BND" => [ 40, "BND (0xF2) prefix available"], - "UNDOC" => [ 41, "Undocumented"], - "HLE" => [ 42, "HLE prefixed"], - "FPU" => [ 43, "FPU"], - "MMX" => [ 44, "MMX"], - "3DNOW" => [ 45, "3DNow!"], - "SSE" => [ 46, "SSE (KNI, MMX2)"], - "SSE2" => [ 47, "SSE2"], - "SSE3" => [ 48, "SSE3 (PNI)"], - "VMX" => [ 49, "VMX"], - "SSSE3" => [ 50, "SSSE3"], - "SSE4A" => [ 51, "AMD SSE4a"], - "SSE41" => [ 52, "SSE4.1"], - "SSE42" => [ 53, "SSE4.2"], - "SSE5" => [ 54, "SSE5"], - "AVX" => [ 55, "AVX (128b)"], - "AVX2" => [ 56, "AVX2 (256b)"], - "FMA" => [ 57, ""], - "BMI1" => [ 58, ""], - "BMI2" => [ 59, ""], - "TBM" => [ 60, ""], - "RTM" => [ 61, ""], - "INVPCID" => [ 62, ""], + "PRIV" => [${dword_align(\$f)}++, "Privileged instruction"], + "SMM" => [$f++, "Only valid in SMM"], + "PROT" => [$f++, "Protected mode only"], + "LOCK" => [$f++, "Lockable if operand 0 is memory"], + "NOLONG" => [$f++, "Not available in long mode"], + "LONG" => [$f++, "Long mode"], + "NOHLE" => [$f++, "HLE prefixes forbidden"], + "MIB" => [$f++, "disassemble with split EA"], + "BND" => [$f++, "BND (0xF2) prefix available"], + "UNDOC" => [$f++, "Undocumented"], + "HLE" => [$f++, "HLE prefixed"], + "FPU" => [$f++, "FPU"], + "MMX" => [$f++, "MMX"], + "3DNOW" => [$f++, "3DNow!"], + "SSE" => [$f++, "SSE (KNI, MMX2)"], + "SSE2" => [$f++, "SSE2"], + "SSE3" => [$f++, "SSE3 (PNI)"], + "VMX" => [$f++, "VMX"], + "SSSE3" => [$f++, "SSSE3"], + "SSE4A" => [$f++, "AMD SSE4a"], + "SSE41" => [$f++, "SSE4.1"], + "SSE42" => [$f++, "SSE4.2"], + "SSE5" => [$f++, "SSE5"], + "AVX" => [$f++, "AVX (256-bit floating point)"], + "AVX2" => [$f++, "AVX2 (256-bit integer)"], + "FMA" => [$f++, ""], + "BMI1" => [$f++, ""], + "BMI2" => [$f++, ""], + "TBM" => [$f++, ""], + "RTM" => [$f++, ""], + "INVPCID" => [$f++, ""], + "AVX512" => [$f++, "AVX-512F (512-bit base architecture)"], + "AVX512CD" => [$f++, "AVX-512 Conflict Detection"], + "AVX512ER" => [$f++, "AVX-512 Exponential and Reciprocal"], + "AVX512PF" => [$f++, "AVX-512 Prefetch"], + "MPX" => [$f++, "MPX"], + "SHA" => [$f++, "SHA"], + "PREFETCHWT1" => [$f++, "PREFETCHWT1"], + "AVX512VL" => [$f++, "AVX-512 Vector Length Orthogonality"], + "AVX512DQ" => [$f++, "AVX-512 Dword and Qword"], + "AVX512BW" => [$f++, "AVX-512 Byte and Word"], + "AVX512IFMA" => [$f++, "AVX-512 IFMA instructions"], + "AVX512VBMI" => [$f++, "AVX-512 VBMI instructions"], + "AES" => [$f++, "AES instructions"], + "VAES" => [$f++, "AES AVX instructions"], + "VPCLMULQDQ" => [$f++, "Carry-Less Multiplication extention"], - # - # dword bound, index 2 - instruction filtering flags - # - "AVX512" => [ 64, "AVX-512F (512b)"], - "AVX512CD" => [ 65, "AVX-512 Conflict Detection"], - "AVX512ER" => [ 66, "AVX-512 Exponential and Reciprocal"], - "AVX512PF" => [ 67, "AVX-512 Prefetch"], - "MPX" => [ 68 ,"MPX"], - "SHA" => [ 69 ,"SHA"], - "PREFETCHWT1" => [ 70 ,"PREFETCHWT1"], - "AVX512VL" => [ 71, "AVX-512 Vector Length Orthogonality"], - "AVX512DQ" => [ 72, "AVX-512 Dword and Qword"], - "AVX512BW" => [ 73, "AVX-512 Byte and Word"], - "AVX512IFMA" => [ 74, "AVX-512 IFMA instructions"], - "AVX512VBMI" => [ 75, "AVX-512 VBMI instructions"], - "OBSOLETE" => [ 93, "Instruction removed from architecture"], - "VEX" => [ 94, "VEX or XOP encoded instruction"], - "EVEX" => [ 95, "EVEX encoded instruction"], - "AES" => [ 96, "AES instructions"], - "VAES" => [ 97, "AES AVX instructions"], - "VPCLMULQDQ" => [ 98, "Carry-Less Multiplication extention"], + # Put these last + "OBSOLETE" => [$f++, "Instruction removed from architecture"], + "VEX" => [$f++, "VEX or XOP encoded instruction"], + "EVEX" => [$f++, "EVEX encoded instruction"], # - # dword bound, cpu type flags + # dword bound - cpu type flags # # The CYRIX and AMD flags should have the highest bit values; the # disassembler selection algorithm depends on it. # - "8086" => [128, "8086"], - "186" => [129, "186+"], - "286" => [130, "286+"], - "386" => [131, "386+"], - "486" => [132, "486+"], - "PENT" => [133, "Pentium"], - "P6" => [134, "P6"], - "KATMAI" => [135, "Katmai"], - "WILLAMETTE" => [136, "Willamette"], - "PRESCOTT" => [137, "Prescott"], - "X86_64" => [138, "x86-64 (long or legacy mode)"], - "NEHALEM" => [139, "Nehalem"], - "WESTMERE" => [140, "Westmere"], - "SANDYBRIDGE" => [141, "Sandy Bridge"], - "FUTURE" => [142, "Future processor (not yet disclosed)"], - "IA64" => [143, "IA64 (in x86 mode)"], - "CYRIX" => [144, "Cyrix-specific"], - "AMD" => [145, "AMD-specific"], + "8086" => [${dword_align(\$f)}++, "8086"], + "186" => [$f++, "186+"], + "286" => [$f++, "286+"], + "386" => [$f++, "386+"], + "486" => [$f++, "486+"], + "PENT" => [$f++, "Pentium"], + "P6" => [$f++, "P6"], + "KATMAI" => [$f++, "Katmai"], + "WILLAMETTE" => [$f++, "Willamette"], + "PRESCOTT" => [$f++, "Prescott"], + "X86_64" => [$f++, "x86-64 (long or legacy mode)"], + "NEHALEM" => [$f++, "Nehalem"], + "WESTMERE" => [$f++, "Westmere"], + "SANDYBRIDGE" => [$f++, "Sandy Bridge"], + "FUTURE" => [$f++, "Future processor (not yet disclosed)"], + "IA64" => [$f++, "IA64 (in x86 mode)"], + + # Put these last + "CYRIX" => [$f++, "Cyrix-specific"], + "AMD" => [$f++, "AMD-specific"], ); my %insns_flag_hash = (); @@ -176,9 +184,9 @@ my $iflag_words; sub get_flag_words() { my $max = -1; - foreach my $key (keys(%insns_flag_bit)) { - if (${$insns_flag_bit{$key}}[0] > $max) { - $max = ${$insns_flag_bit{$key}}[0]; + foreach my $vp (values(%insns_flag_bit)) { + if ($vp->[0] > $max) { + $max = $vp->[0]; } } @@ -218,14 +226,28 @@ sub write_iflaggen_h() { print N "#ifndef NASM_IFLAGGEN_H\n"; print N "#define NASM_IFLAGGEN_H 1\n\n"; - foreach my $key (sort { $insns_flag_bit{$a}[0] <=> $insns_flag_bit{$b}[0] } keys(%insns_flag_bit)) { + my @flagnames = keys(%insns_flag_bit); + @flagnames = sort { + $insns_flag_bit{$a}->[0] <=> $insns_flag_bit{$b}->[0] + } @flagnames; + my $next = 0; + foreach my $key (@flagnames) { + my $v = $insns_flag_bit{$key}; + if ($v->[0] > $next) { + printf N "%-31s /* %-64s */\n", '', + ($next != $v->[0]-1) ? + sprintf("%d...%d unused", $next, $v->[0]-1) : + sprintf("%d unused", $next); + } print N sprintf("#define IF_%-16s %3d /* %-64s */\n", - $key, $insns_flag_bit{$key}[0], $insns_flag_bit{$key}[1]); + $key, $v->[0], $v->[1]); + $next = $v->[0] + 1; } print N "\n"; + printf N "#define IF_FIELD_COUNT %d\n", $iflag_words; print N "typedef struct {\n"; - printf N " uint32_t field[%d];\n", $iflag_words; + print N " uint32_t field[IF_FIELD_COUNT];\n"; print N "} iflag_t;\n"; print N "\n"; diff --git a/x86/insns.dat b/x86/insns.dat index 99746302..1bda0e3c 100644 --- a/x86/insns.dat +++ b/x86/insns.dat @@ -1,6 +1,6 @@ ;; -------------------------------------------------------------------------- ;; -;; Copyright 1996-2017 The NASM Authors - All Rights Reserved +;; Copyright 1996-2018 The NASM Authors - All Rights Reserved ;; See the file AUTHORS included with the NASM distribution for ;; the specific copyright holders. ;; @@ -1129,6 +1129,24 @@ RETF void [ cb] 8086 RETF imm [i: ca iw] 8086,SW RETN void [ c3] 8086,BND RETN imm [i: c2 iw] 8086,SW,BND +RETW void [ o16 c3] 8086,BND +RETW imm [i: c2 iw] 8086,SW,BND +RETFW void [ o16 cb] 8086 +RETFW imm [i: o16 ca iw] 8086,SW +RETNW void [ o16 c3] 8086,BND +RETNW imm [i: o16 c2 iw] 8086,SW,BND +RETD void [ o32 c3] 8086,BND,NOLONG +RETD imm [i: o32 c2 iw] 8086,SW,BND,NOLONG +RETFD void [ o32 cb] 8086 +RETFD imm [i: o32 ca iw] 8086,SW +RETND void [ o32 c3] 8086,BND,NOLONG +RETND imm [i: o32 c2 iw] 8086,SW,BND,NOLONG +RETQ void [ o64nw c3] X64,BND +RETQ imm [i: o64nw c2 iw] X64,SW,BND +RETFQ void [ o64 cb] X64 +RETFQ imm [i: o64 ca iw] X64,SW +RETNQ void [ o64nw c3] X64,BND +RETNQ imm [i: o64nw c2 iw] X64,SW,BND ROL rm8,unity [m-: d0 /0] 8086 ROL rm8,reg_cl [m-: d2 /0] 8086 |