diff options
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/cpuid.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/driver-i386.c | 6 | ||||
-rw-r--r-- | gcc/config/i386/freebsd.h | 31 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin-types.def | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386-c.c | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 291 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 99 | ||||
-rw-r--r-- | gcc/config/i386/i386.opt | 4 | ||||
-rw-r--r-- | gcc/config/i386/mwaitxintrin.h | 50 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 10 | ||||
-rw-r--r-- | gcc/config/i386/x86intrin.h | 1 |
12 files changed, 370 insertions, 129 deletions
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index f931969459..f3ad4dbb8f 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -57,6 +57,7 @@ #define bit_LWP (1 << 15) #define bit_FMA4 (1 << 16) #define bit_TBM (1 << 21) +#define bit_MWAITX (1 << 29) /* %edx */ #define bit_MMXEXT (1 << 22) diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index c69149d7cb..1c6c22172d 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -413,7 +413,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0; unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0; unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0; - unsigned int has_pcommit = 0; + unsigned int has_pcommit = 0, has_mwaitx = 0; bool arch; @@ -532,6 +532,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_longmode = edx & bit_LM; has_3dnowp = edx & bit_3DNOWP; has_3dnow = edx & bit_3DNOW; + has_mwaitx = ecx & bit_MWAITX; } /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */ @@ -953,6 +954,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi"; const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb"; const char *pcommit = has_pcommit ? " -mpcommit" : " -mno-pcommit"; + const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx"; options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, sse4a, cx16, sahf, movbe, aes, sha, pclmul, @@ -962,7 +964,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) fxsr, xsave, xsaveopt, avx512f, avx512er, avx512cd, avx512pf, prefetchwt1, clflushopt, xsavec, xsaves, avx512dq, avx512bw, avx512vl, - avx512ifma, avx512vbmi, clwb, pcommit, NULL); + avx512ifma, avx512vbmi, clwb, pcommit, mwaitx, NULL); } done: diff --git a/gcc/config/i386/freebsd.h b/gcc/config/i386/freebsd.h index ff73aeaaa1..6ce160ed40 100644 --- a/gcc/config/i386/freebsd.h +++ b/gcc/config/i386/freebsd.h @@ -59,28 +59,15 @@ along with GCC; see the file COPYING3. If not see #define SUBTARGET_EXTRA_SPECS \ { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER } -/* Provide a STARTFILE_SPEC appropriate for FreeBSD. Here we add - the magical crtbegin.o file (see crtstuff.c) which provides part - of the support for getting C++ file-scope static object constructed - before entering `main'. */ - -#undef STARTFILE_SPEC -#define STARTFILE_SPEC \ - "%{!shared: \ - %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} \ - %{!p:%{profile:gcrt1.o%s} \ - %{!profile:crt1.o%s}}}} \ - crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}" - -/* Provide a ENDFILE_SPEC appropriate for FreeBSD. Here we tack on - the magical crtend.o file (see crtstuff.c) which provides part of - the support for getting C++ file-scope static object constructed - before entering `main', followed by a normal "finalizer" file, - `crtn.o'. */ - -#undef ENDFILE_SPEC -#define ENDFILE_SPEC \ - "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s" +/* Use the STARTFILE_SPEC from config/freebsd-spec.h. */ + +#undef STARTFILE_SPEC +#define STARTFILE_SPEC FBSD_STARTFILE_SPEC + +/* Use the ENDFILE_SPEC from config/freebsd-spec.h. */ + +#undef ENDFILE_SPEC +#define ENDFILE_SPEC FBSD_ENDFILE_SPEC /* Provide a LINK_SPEC appropriate for FreeBSD. Here we provide support for the special GCC options -static and -shared, which allow us to diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 864d0ea23a..2459c440ba 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -595,6 +595,7 @@ DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI) DEF_FUNCTION_TYPE (VOID, PV4SF, V4SF) DEF_FUNCTION_TYPE (VOID, PV8SF, V8SF) DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED) +DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED, UNSIGNED) DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI) # Instructions returning mask diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 03045fb723..f3f90df06e 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -432,6 +432,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__PCOMMIT__"); if (isa_flag & OPTION_MASK_ISA_CLWB) def_or_undef (parse_in, "__CLWB__"); + if (isa_flag & OPTION_MASK_ISA_MWAITX) + def_or_undef (parse_in, "__MWAITX__"); } diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index d6c2de8198..7c28a559a4 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2374,6 +2374,7 @@ static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); static rtx (*ix86_gen_one_cmpl2) (rtx, rtx); static rtx (*ix86_gen_monitor) (rtx, rtx, rtx); +static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx); static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx); static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx); @@ -2677,6 +2678,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch, { "-mmpx", OPTION_MASK_ISA_MPX }, { "-mclwb", OPTION_MASK_ISA_CLWB }, { "-mpcommit", OPTION_MASK_ISA_PCOMMIT }, + { "-mmwaitx", OPTION_MASK_ISA_MWAITX }, }; /* Flag options. */ @@ -2988,6 +2990,17 @@ ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset) return; } + if ((stringop_alg) i == rep_prefix_8_byte + && !TARGET_64BIT) + { + /* rep; movq isn't available in 32-bit code. */ + error ("stringop strategy name %s specified for option %s " + "not supported for 32-bit code", + alg_name, + is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="); + return; + } + input_ranges[n].max = maxs; input_ranges[n].alg = (stringop_alg) i; if (!strcmp (align, "align")) @@ -3179,6 +3192,7 @@ ix86_option_override_internal (bool main_args_p, #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54) #define PTA_CLWB (HOST_WIDE_INT_1 << 55) #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56) +#define PTA_MWAITX (HOST_WIDE_INT_1 << 57) #define PTA_CORE2 \ (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \ @@ -3332,7 +3346,7 @@ ix86_option_override_internal (bool main_args_p, | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND - | PTA_MOVBE}, + | PTA_MOVBE | PTA_MWAITX}, {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW @@ -3776,6 +3790,9 @@ ix86_option_override_internal (bool main_args_p, opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA; if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) x86_prefetch_sse = true; + if (processor_alias_table[i].flags & PTA_MWAITX + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX; break; } @@ -4206,6 +4223,7 @@ ix86_option_override_internal (bool main_args_p, ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi; ix86_gen_probe_stack_range = gen_probe_stack_rangedi; ix86_gen_monitor = gen_sse3_monitor_di; + ix86_gen_monitorx = gen_monitorx_di; } else { @@ -4218,6 +4236,7 @@ ix86_option_override_internal (bool main_args_p, ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi; ix86_gen_probe_stack_range = gen_probe_stack_rangesi; ix86_gen_monitor = gen_sse3_monitor_si; + ix86_gen_monitorx = gen_monitorx_si; } #ifdef USE_IX86_CLD @@ -4742,6 +4761,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma), IX86_ATTR_ISA ("clwb", OPT_mclwb), IX86_ATTR_ISA ("pcommit", OPT_mpcommit), + IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), @@ -5867,7 +5887,10 @@ ix86_function_regparm (const_tree type, const_tree decl) /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and DFmode (2) arguments in SSE registers for a function with the indicated TYPE and DECL. DECL may be NULL when calling function - indirectly or considering a libcall. Otherwise return 0. */ + indirectly or considering a libcall. Return -1 if any FP parameter + should be rejected by error. This is used in siutation we imply SSE + calling convetion but the function is called from another function with + SSE disabled. Otherwise return 0. */ static int ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) @@ -5916,14 +5939,13 @@ ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) { /* Refuse to produce wrong code when local function with SSE enabled is called from SSE disabled function. - We may work hard to work out these scenarios but hopefully - it doesnot matter in practice. */ + FIXME: We need a way to detect these cases cross-ltrans partition + and avoid using SSE calling conventions on local functions called + from function with SSE disabled. For now at least delay the + warning until we know we are going to produce wrong code. + See PR66047 */ if (!TARGET_SSE && warn) - { - error ("calling %qD with SSE caling convention without " - "SSE/SSE2 enabled", decl); - return 0; - } + return -1; return TARGET_SSE2_P (target_opts_for_fn (target->decl) ->x_ix86_isa_flags) ? 2 : 1; } @@ -6118,6 +6140,7 @@ bool ix86_function_arg_regno_p (int regno) { int i; + enum calling_abi call_abi; const int *parm_regs; if (TARGET_MPX && BND_REGNO_P (regno)) @@ -6143,16 +6166,18 @@ ix86_function_arg_regno_p (int regno) /* TODO: The function should depend on current function ABI but builtins.c would need updating then. Therefore we use the default ABI. */ + call_abi = ix86_cfun_abi (); /* RAX is used as hidden argument to va_arg functions. */ - if (ix86_abi == SYSV_ABI && regno == AX_REG) + if (call_abi == SYSV_ABI && regno == AX_REG) return true; - if (ix86_abi == MS_ABI) + if (call_abi == MS_ABI) parm_regs = x86_64_ms_abi_int_parameter_registers; else parm_regs = x86_64_int_parameter_registers; - for (i = 0; i < (ix86_abi == MS_ABI + + for (i = 0; i < (call_abi == MS_ABI ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) if (regno == parm_regs[i]) return true; @@ -6479,6 +6504,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ cum->bnd_regno = FIRST_BND_REG; cum->bnds_in_bt = 0; cum->force_bnd_pass = 0; + cum->decl = fndecl; if (!TARGET_64BIT) { @@ -7424,6 +7450,7 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, HOST_WIDE_INT words) { int res = 0; + bool error_p = NULL; switch (mode) { @@ -7456,9 +7483,13 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, gcc_unreachable (); case DFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 2) break; case SFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 1) break; /* FALLTHRU */ @@ -7514,6 +7545,14 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, } break; } + if (error_p) + { + cum->float_in_sse = 0; + error ("calling %qD with SSE calling convention without " + "SSE/SSE2 enabled", cum->decl); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } return res; } @@ -7646,10 +7685,11 @@ ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, (otherwise it is an extra parameter matching an ellipsis). */ static rtx -function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode, +function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, machine_mode orig_mode, const_tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words) { + bool error_p = false; /* Avoid the AL settings for the Unix64 ABI. */ if (mode == VOIDmode) return constm1_rtx; @@ -7690,9 +7730,13 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode, break; case DFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 2) break; case SFmode: + if (cum->float_in_sse == -1) + error_p = 1; if (cum->float_in_sse < 1) break; /* FALLTHRU */ @@ -7751,6 +7795,14 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode, } break; } + if (error_p) + { + cum->float_in_sse = 0; + error ("calling %qD with SSE calling convention without " + "SSE/SSE2 enabled", cum->decl); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } return NULL_RTX; } @@ -8154,10 +8206,10 @@ ix86_function_value_regno_p (const unsigned int regno) case AX_REG: return true; case DX_REG: - return (!TARGET_64BIT || ix86_abi != MS_ABI); + return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); case DI_REG: case SI_REG: - return TARGET_64BIT && ix86_abi != MS_ABI; + return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; case FIRST_BND_REG: return chkp_function_instrumented_p (current_function_decl); @@ -8168,7 +8220,7 @@ ix86_function_value_regno_p (const unsigned int regno) /* TODO: The function should depend on current function ABI but builtins.c would need updating then. Therefore we use the default ABI. */ - if (TARGET_64BIT && ix86_abi == MS_ABI) + if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) return false; return TARGET_FLOAT_RETURNS_IN_80387; @@ -8230,8 +8282,15 @@ function_value_32 (machine_mode orig_mode, machine_mode mode, if ((fn || fntype) && (mode == SFmode || mode == DFmode)) { int sse_level = ix86_function_sseregparm (fntype, fn, false); - if ((sse_level >= 1 && mode == SFmode) - || (sse_level == 2 && mode == DFmode)) + if (sse_level == -1) + { + error ("calling %qD with SSE caling convention without " + "SSE/SSE2 enabled", fn); + sorry ("this is a GCC bug that can be worked around by adding " + "attribute used to function called"); + } + else if ((sse_level >= 1 && mode == SFmode) + || (sse_level == 2 && mode == DFmode)) regno = FIRST_SSE_REG; } @@ -22943,7 +23002,7 @@ ix86_split_long_move (rtx operands[]) Do an lea to the last part and use only one colliding move. */ else if (collisions > 1) { - rtx base; + rtx base, addr, tls_base = NULL_RTX; collisions = 1; @@ -22954,10 +23013,50 @@ ix86_split_long_move (rtx operands[]) if (GET_MODE (base) != Pmode) base = gen_rtx_REG (Pmode, REGNO (base)); - emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); + addr = XEXP (part[1][0], 0); + if (TARGET_TLS_DIRECT_SEG_REFS) + { + struct ix86_address parts; + int ok = ix86_decompose_address (addr, &parts); + gcc_assert (ok); + if (parts.seg == DEFAULT_TLS_SEG_REG) + { + /* It is not valid to use %gs: or %fs: in + lea though, so we need to remove it from the + address used for lea and add it to each individual + memory loads instead. */ + addr = copy_rtx (addr); + rtx *x = &addr; + while (GET_CODE (*x) == PLUS) + { + for (i = 0; i < 2; i++) + { + rtx u = XEXP (*x, i); + if (GET_CODE (u) == ZERO_EXTEND) + u = XEXP (u, 0); + if (GET_CODE (u) == UNSPEC + && XINT (u, 1) == UNSPEC_TP) + { + tls_base = XEXP (*x, i); + *x = XEXP (*x, 1 - i); + break; + } + } + if (tls_base) + break; + x = &XEXP (*x, 0); + } + gcc_assert (tls_base); + } + } + emit_insn (gen_rtx_SET (VOIDmode, base, addr)); + if (tls_base) + base = gen_rtx_PLUS (GET_MODE (base), base, tls_base); part[1][0] = replace_equiv_address (part[1][0], base); for (i = 1; i < nparts; i++) { + if (tls_base) + base = copy_rtx (base); tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i); part[1][i] = replace_equiv_address (part[1][i], tmp); } @@ -30581,6 +30680,10 @@ enum ix86_builtins IX86_BUILTIN_CVTPS2PH, IX86_BUILTIN_CVTPS2PH256, + /* MONITORX and MWAITX instrucions. */ + IX86_BUILTIN_MONITORX, + IX86_BUILTIN_MWAITX, + /* CFString built-in for darwin */ IX86_BUILTIN_CFSTRING, @@ -34199,6 +34302,12 @@ ix86_init_mmx_sse_builtins (void) def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb", VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB); + /* MONITORX and MWAITX. */ + def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx", + VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX); + def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx", + VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX); + /* Add FMA4 multi-arg argument instructions */ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) { @@ -38971,6 +39080,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, return 0; case IX86_BUILTIN_MONITOR: + case IX86_BUILTIN_MONITORX: arg0 = CALL_EXPR_ARG (exp, 0); arg1 = CALL_EXPR_ARG (exp, 1); arg2 = CALL_EXPR_ARG (exp, 2); @@ -38983,7 +39093,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, op1 = copy_to_mode_reg (SImode, op1); if (!REG_P (op2)) op2 = copy_to_mode_reg (SImode, op2); - emit_insn (ix86_gen_monitor (op0, op1, op2)); + + emit_insn (fcode == IX86_BUILTIN_MONITOR + ? ix86_gen_monitor (op0, op1, op2) + : ix86_gen_monitorx (op0, op1, op2)); return 0; case IX86_BUILTIN_MWAIT: @@ -38998,6 +39111,22 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, emit_insn (gen_sse3_mwait (op0, op1)); return 0; + case IX86_BUILTIN_MWAITX: + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + op2 = expand_normal (arg2); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + if (!REG_P (op2)) + op2 = copy_to_mode_reg (SImode, op2); + emit_insn (gen_mwaitx (op0, op1, op2)); + return 0; + case IX86_BUILTIN_VEC_INIT_V2SI: case IX86_BUILTIN_VEC_INIT_V4HI: case IX86_BUILTIN_VEC_INIT_V8QI: @@ -44740,6 +44869,8 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df } }; int i, j, n; + machine_mode mmode = VOIDmode; + rtx (*gen_blendm) (rtx, rtx, rtx, rtx); switch (mode) { @@ -44956,81 +45087,65 @@ half: case V8DFmode: if (TARGET_AVX512F) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512f_blendmv8df (target, tmp, target, - force_reg (QImode, GEN_INT (1 << elt)))); - return; + mmode = QImode; + gen_blendm = gen_avx512f_blendmv8df; } - else - break; + break; + case V8DImode: if (TARGET_AVX512F) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512f_blendmv8di (target, tmp, target, - force_reg (QImode, GEN_INT (1 << elt)))); - return; + mmode = QImode; + gen_blendm = gen_avx512f_blendmv8di; } - else - break; + break; + case V16SFmode: if (TARGET_AVX512F) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512f_blendmv16sf (target, tmp, target, - force_reg (HImode, GEN_INT (1 << elt)))); - return; + mmode = HImode; + gen_blendm = gen_avx512f_blendmv16si; } - else - break; + break; + case V16SImode: if (TARGET_AVX512F) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512f_blendmv16si (target, tmp, target, - force_reg (HImode, GEN_INT (1 << elt)))); - return; + mmode = HImode; + gen_blendm = gen_avx512f_blendmv16si; } - else - break; + break; + case V32HImode: if (TARGET_AVX512F && TARGET_AVX512BW) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target, - force_reg (SImode, GEN_INT (1 << elt)))); - return; + mmode = SImode; + gen_blendm = gen_avx512bw_blendmv32hi; } - else - break; + break; + case V64QImode: if (TARGET_AVX512F && TARGET_AVX512BW) { - tmp = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (VOIDmode, tmp, - gen_rtx_VEC_DUPLICATE (mode, val))); - emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target, - force_reg (DImode, GEN_INT (1 << elt)))); - return; + mmode = DImode; + gen_blendm = gen_avx512bw_blendmv64qi; } - else - break; + break; default: break; } - if (use_vec_merge) + if (mmode != VOIDmode) + { + tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, tmp, + gen_rtx_VEC_DUPLICATE (mode, val))); + emit_insn (gen_blendm (target, tmp, target, + force_reg (mmode, + gen_int_mode (1 << elt, mmode)))); + } + else if (use_vec_merge) { tmp = gen_rtx_VEC_DUPLICATE (mode, val); tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); @@ -46892,15 +47007,16 @@ expand_vselect_vconcat (rtx target, rtx op0, rtx op1, static bool expand_vec_perm_blend (struct expand_vec_perm_d *d) { - machine_mode vmode = d->vmode; + machine_mode mmode, vmode = d->vmode; unsigned i, mask, nelt = d->nelt; - rtx target, op0, op1, x; + rtx target, op0, op1, maskop, x; rtx rperm[32], vperm; if (d->one_operand_p) return false; if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 - && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4) + && (TARGET_AVX512BW + || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)) ; else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) ; @@ -47074,8 +47190,33 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d) gcc_unreachable (); } + switch (vmode) + { + case V8DFmode: + case V8DImode: + mmode = QImode; + break; + case V16SFmode: + case V16SImode: + mmode = HImode; + break; + case V32HImode: + mmode = SImode; + break; + case V64QImode: + mmode = DImode; + break; + default: + mmode = VOIDmode; + } + + if (mmode != VOIDmode) + maskop = force_reg (mmode, gen_int_mode (mask, mmode)); + else + maskop = GEN_INT (mask); + /* This matches five different patterns with the different modes. */ - x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask)); + x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop); x = gen_rtx_SET (VOIDmode, target, x); emit_insn (x); if (target != d->target) @@ -51606,7 +51747,7 @@ ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop) for (i = 0; i < loop->num_nodes; i++) FOR_BB_INSNS (bbs[i], insn) if (NONDEBUG_INSN_P (insn)) - FOR_EACH_SUBRTX (iter, array, insn, NONCONST) + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) if (const_rtx x = *iter) if (MEM_P (x)) { diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 1e755d3a35..de43f06bef 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -154,6 +154,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_PCOMMIT_P(x) TARGET_ISA_PCOMMIT_P(x) #define TARGET_CLWB TARGET_ISA_CLWB #define TARGET_CLWB_P(x) TARGET_ISA_CLWB_P(x) +#define TARGET_MWAITX TARGET_ISA_MWAITX +#define TARGET_MWAITX_P(x) TARGET_ISA_MWAITX_P(x) #define TARGET_LP64 TARGET_ABI_64 #define TARGET_LP64_P(x) TARGET_ABI_64_P(x) @@ -1682,6 +1684,7 @@ typedef struct ix86_args { int stdarg; /* Set to 1 if function is stdarg. */ enum calling_abi call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise MS_ABI for ms abi. */ + tree decl; /* Callee decl. */ } CUMULATIVE_ARGS; /* Initialize a variable CUM of type CUMULATIVE_ARGS diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e1c82fefc0..6b6f44c8a1 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -261,6 +261,11 @@ ;; For CLFLUSHOPT support UNSPECV_CLFLUSHOPT + + ;; For MONITORX and MWAITX support + UNSPECV_MONITORX + UNSPECV_MWAITX + ]) ;; Constants to represent rounding modes in the ROUND instruction @@ -10843,6 +10848,7 @@ [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (zero_extend:DI (match_dup 2)))] { + operands[1] = shallow_copy_rtx (operands[1]); PUT_MODE (operands[1], QImode); operands[2] = gen_lowpart (QImode, operands[0]); }) @@ -10860,6 +10866,7 @@ (parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { + operands[1] = shallow_copy_rtx (operands[1]); PUT_MODE (operands[1], QImode); operands[2] = gen_lowpart (QImode, operands[0]); }) @@ -10875,6 +10882,7 @@ [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (zero_extend:SI (match_dup 2)))] { + operands[1] = shallow_copy_rtx (operands[1]); PUT_MODE (operands[1], QImode); operands[2] = gen_lowpart (QImode, operands[0]); }) @@ -10912,7 +10920,10 @@ (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] - "PUT_MODE (operands[1], QImode);") +{ + operands[1] = shallow_copy_rtx (operands[1]); + PUT_MODE (operands[1], QImode); +}) (define_split [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand")) @@ -10921,7 +10932,10 @@ (const_int 0)))] "" [(set (match_dup 0) (match_dup 1))] - "PUT_MODE (operands[1], QImode);") +{ + operands[1] = shallow_copy_rtx (operands[1]); + PUT_MODE (operands[1], QImode); +}) (define_split [(set (match_operand:QI 0 "nonimmediate_operand") @@ -10931,15 +10945,15 @@ "" [(set (match_dup 0) (match_dup 1))] { - rtx new_op1 = copy_rtx (operands[1]); - operands[1] = new_op1; - PUT_MODE (new_op1, QImode); - PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), - GET_MODE (XEXP (new_op1, 0)))); + operands[1] = shallow_copy_rtx (operands[1]); + PUT_MODE (operands[1], QImode); + PUT_CODE (operands[1], + ix86_reverse_condition (GET_CODE (operands[1]), + GET_MODE (XEXP (operands[1], 0)))); /* Make sure that (a) the CCmode we have for the flags is strong enough for the reversed compare or (b) we have a valid FP compare. */ - if (! ix86_comparison_operator (new_op1, VOIDmode)) + if (! ix86_comparison_operator (operands[1], VOIDmode)) FAIL; }) @@ -10951,15 +10965,15 @@ "" [(set (match_dup 0) (match_dup 1))] { - rtx new_op1 = copy_rtx (operands[1]); - operands[1] = new_op1; - PUT_MODE (new_op1, QImode); - PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1), - GET_MODE (XEXP (new_op1, 0)))); + operands[1] = shallow_copy_rtx (operands[1]); + PUT_MODE (operands[1], QImode); + PUT_CODE (operands[1], + ix86_reverse_condition (GET_CODE (operands[1]), + GET_MODE (XEXP (operands[1], 0)))); /* Make sure that (a) the CCmode we have for the flags is strong enough for the reversed compare or (b) we have a valid FP compare. */ - if (! ix86_comparison_operator (new_op1, VOIDmode)) + if (! ix86_comparison_operator (operands[1], VOIDmode)) FAIL; }) @@ -11078,7 +11092,10 @@ (if_then_else (match_dup 0) (label_ref (match_dup 1)) (pc)))] - "PUT_MODE (operands[0], VOIDmode);") +{ + operands[0] = shallow_copy_rtx (operands[0]); + PUT_MODE (operands[0], VOIDmode); +}) (define_split [(set (pc) @@ -11093,15 +11110,15 @@ (label_ref (match_dup 1)) (pc)))] { - rtx new_op0 = copy_rtx (operands[0]); - operands[0] = new_op0; - PUT_MODE (new_op0, VOIDmode); - PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0), - GET_MODE (XEXP (new_op0, 0)))); + operands[0] = shallow_copy_rtx (operands[0]); + PUT_MODE (operands[0], VOIDmode); + PUT_CODE (operands[0], + ix86_reverse_condition (GET_CODE (operands[0]), + GET_MODE (XEXP (operands[0], 0)))); /* Make sure that (a) the CCmode we have for the flags is strong enough for the reversed compare or (b) we have a valid FP compare. */ - if (! ix86_comparison_operator (new_op0, VOIDmode)) + if (! ix86_comparison_operator (operands[0], VOIDmode)) FAIL; }) @@ -11138,7 +11155,7 @@ (pc)))] { operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], QImode, 0); - + operands[0] = shallow_copy_rtx (operands[0]); PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); }) @@ -11171,7 +11188,7 @@ (pc)))] { operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0); - + operands[0] = shallow_copy_rtx (operands[0]); PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); }) @@ -11207,7 +11224,7 @@ (pc)))] { operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0); - + operands[0] = shallow_copy_rtx (operands[0]); PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); }) @@ -11239,7 +11256,7 @@ (pc)))] { operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0); - + operands[0] = shallow_copy_rtx (operands[0]); PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); }) @@ -11275,7 +11292,10 @@ (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) (label_ref (match_dup 4)) (pc)))] - "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));") +{ + operands[0] = shallow_copy_rtx (operands[0]); + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) ;; Define combination compare-and-branch fp compare instructions to help ;; combine. @@ -17241,6 +17261,7 @@ operands[1] = gen_lowpart (SImode, operands[1]); if (GET_CODE (operands[3]) != ASHIFT) operands[2] = gen_lowpart (SImode, operands[2]); + operands[3] = shallow_copy_rtx (operands[3]); PUT_MODE (operands[3], SImode); }) @@ -18847,6 +18868,32 @@ (set_attr "atom_sse_attr" "fence") (set_attr "memory" "unknown")]) +;; MONITORX and MWAITX +(define_insn "mwaitx" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "c") + (match_operand:SI 1 "register_operand" "a") + (match_operand:SI 2 "register_operand" "b")] + UNSPECV_MWAITX)] + "TARGET_MWAITX" +;; 64bit version is "mwaitx %rax,%rcx,%rbx". But only lower 32bits are used. +;; Since 32bit register operands are implicitly zero extended to 64bit, +;; we only need to set up 32bit registers. + "mwaitx" + [(set_attr "length" "3")]) + +(define_insn "monitorx_<mode>" + [(unspec_volatile [(match_operand:P 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_MONITORX)] + "TARGET_MWAITX" +;; 64bit version is "monitorx %rax,%rcx,%rdx". But only lower 32bits in +;; RCX and RDX are used. Since 32bit register operands are implicitly +;; zero extended to 64bit, we only need to set up 32bit registers. + "%^monitorx" + [(set (attr "length") + (symbol_ref ("(Pmode != word_mode) + 3")))]) + ;; MPX instructions (define_expand "<mode>_mk" diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 301430c238..dd46e26de3 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -859,6 +859,10 @@ mmpx Target Report Mask(ISA_MPX) Var(ix86_isa_flags) Save Support MPX code generation +mmwaitx +Target Report Mask(ISA_MWAITX) Var(ix86_isa_flags) Save +Support MWAITX and MONITORX built-in functions and code generation + mstack-protector-guard= Target RejectNegative Joined Enum(stack_protector_guard) Var(ix86_stack_protector_guard) Init(SSP_TLS) Use given stack-protector guard diff --git a/gcc/config/i386/mwaitxintrin.h b/gcc/config/i386/mwaitxintrin.h new file mode 100644 index 0000000000..d7112dad20 --- /dev/null +++ b/gcc/config/i386/mwaitxintrin.h @@ -0,0 +1,50 @@ +/* Copyright (C) 2012-2015 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _MWAITXINTRIN_H_INCLUDED +#define _MWAITXINTRIN_H_INCLUDED + +#ifndef __MWAITX__ +#pragma GCC push_options +#pragma GCC target("mwaitx") +#define __DISABLE_MWAITX__ +#endif /* __MWAITX__ */ + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_monitorx (void const * __P, unsigned int __E, unsigned int __H) +{ + __builtin_ia32_monitorx (__P, __E, __H); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mwaitx (unsigned int __E, unsigned int __H, unsigned int __C) +{ + __builtin_ia32_mwaitx (__E, __H, __C); +} + +#ifdef __DISABLE_MWAITX__ +#undef __DISABLE_MWAITX__ +#pragma GCC pop_options +#endif /* __DISABLE_MWAITX__ */ + +#endif /* _MWAITXINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 6d3b54a28c..58caf1aeb9 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -9523,7 +9523,7 @@ (mult:V4DI (sign_extend:V4DI (vec_select:V4SI - (match_operand:V8SI 1 "nonimmediate_operand" "v") + (match_operand:V8SI 1 "nonimmediate_operand" "%v") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))) (sign_extend:V4DI @@ -13161,10 +13161,12 @@ (set_attr "atom_sse_attr" "fence") (set_attr "memory" "unknown")]) - +;; As per AMD and Intel ISA manuals, the first operand is extensions +;; and it goes to %ecx. The second operand received is hints and it goes +;; to %eax. (define_insn "sse3_mwait" - [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") - (match_operand:SI 1 "register_operand" "c")] + [(unspec_volatile [(match_operand:SI 0 "register_operand" "c") + (match_operand:SI 1 "register_operand" "a")] UNSPECV_MWAIT)] "TARGET_SSE3" ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used. diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h index a51188679e..6f7b1f66a6 100644 --- a/gcc/config/i386/x86intrin.h +++ b/gcc/config/i386/x86intrin.h @@ -85,4 +85,5 @@ #include <xsavecintrin.h> +#include <mwaitxintrin.h> #endif /* _X86INTRIN_H_INCLUDED */ |