summaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/cpuid.h1
-rw-r--r--gcc/config/i386/driver-i386.c6
-rw-r--r--gcc/config/i386/freebsd.h31
-rw-r--r--gcc/config/i386/i386-builtin-types.def1
-rw-r--r--gcc/config/i386/i386-c.c2
-rw-r--r--gcc/config/i386/i386.c291
-rw-r--r--gcc/config/i386/i386.h3
-rw-r--r--gcc/config/i386/i386.md99
-rw-r--r--gcc/config/i386/i386.opt4
-rw-r--r--gcc/config/i386/mwaitxintrin.h50
-rw-r--r--gcc/config/i386/sse.md10
-rw-r--r--gcc/config/i386/x86intrin.h1
12 files changed, 370 insertions, 129 deletions
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index f931969459..f3ad4dbb8f 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -57,6 +57,7 @@
#define bit_LWP (1 << 15)
#define bit_FMA4 (1 << 16)
#define bit_TBM (1 << 21)
+#define bit_MWAITX (1 << 29)
/* %edx */
#define bit_MMXEXT (1 << 22)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index c69149d7cb..1c6c22172d 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -413,7 +413,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
- unsigned int has_pcommit = 0;
+ unsigned int has_pcommit = 0, has_mwaitx = 0;
bool arch;
@@ -532,6 +532,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_longmode = edx & bit_LM;
has_3dnowp = edx & bit_3DNOWP;
has_3dnow = edx & bit_3DNOW;
+ has_mwaitx = ecx & bit_MWAITX;
}
/* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
@@ -953,6 +954,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
const char *pcommit = has_pcommit ? " -mpcommit" : " -mno-pcommit";
+ const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
@@ -962,7 +964,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
fxsr, xsave, xsaveopt, avx512f, avx512er,
avx512cd, avx512pf, prefetchwt1, clflushopt,
xsavec, xsaves, avx512dq, avx512bw, avx512vl,
- avx512ifma, avx512vbmi, clwb, pcommit, NULL);
+ avx512ifma, avx512vbmi, clwb, pcommit, mwaitx, NULL);
}
done:
diff --git a/gcc/config/i386/freebsd.h b/gcc/config/i386/freebsd.h
index ff73aeaaa1..6ce160ed40 100644
--- a/gcc/config/i386/freebsd.h
+++ b/gcc/config/i386/freebsd.h
@@ -59,28 +59,15 @@ along with GCC; see the file COPYING3. If not see
#define SUBTARGET_EXTRA_SPECS \
{ "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
-/* Provide a STARTFILE_SPEC appropriate for FreeBSD. Here we add
- the magical crtbegin.o file (see crtstuff.c) which provides part
- of the support for getting C++ file-scope static object constructed
- before entering `main'. */
-
-#undef STARTFILE_SPEC
-#define STARTFILE_SPEC \
- "%{!shared: \
- %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} \
- %{!p:%{profile:gcrt1.o%s} \
- %{!profile:crt1.o%s}}}} \
- crti.o%s %{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}"
-
-/* Provide a ENDFILE_SPEC appropriate for FreeBSD. Here we tack on
- the magical crtend.o file (see crtstuff.c) which provides part of
- the support for getting C++ file-scope static object constructed
- before entering `main', followed by a normal "finalizer" file,
- `crtn.o'. */
-
-#undef ENDFILE_SPEC
-#define ENDFILE_SPEC \
- "%{!shared:crtend.o%s} %{shared:crtendS.o%s} crtn.o%s"
+/* Use the STARTFILE_SPEC from config/freebsd-spec.h. */
+
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC FBSD_STARTFILE_SPEC
+
+/* Use the ENDFILE_SPEC from config/freebsd-spec.h. */
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC FBSD_ENDFILE_SPEC
/* Provide a LINK_SPEC appropriate for FreeBSD. Here we provide support
for the special GCC options -static and -shared, which allow us to
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 864d0ea23a..2459c440ba 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -595,6 +595,7 @@ DEF_FUNCTION_TYPE (VOID, PV4DI, V4DI)
DEF_FUNCTION_TYPE (VOID, PV4SF, V4SF)
DEF_FUNCTION_TYPE (VOID, PV8SF, V8SF)
DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED)
+DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED, UNSIGNED)
DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI)
# Instructions returning mask
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 03045fb723..f3f90df06e 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -432,6 +432,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__PCOMMIT__");
if (isa_flag & OPTION_MASK_ISA_CLWB)
def_or_undef (parse_in, "__CLWB__");
+ if (isa_flag & OPTION_MASK_ISA_MWAITX)
+ def_or_undef (parse_in, "__MWAITX__");
}
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index d6c2de8198..7c28a559a4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2374,6 +2374,7 @@ static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
+static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
@@ -2677,6 +2678,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
{ "-mmpx", OPTION_MASK_ISA_MPX },
{ "-mclwb", OPTION_MASK_ISA_CLWB },
{ "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
+ { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
};
/* Flag options. */
@@ -2988,6 +2990,17 @@ ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
return;
}
+ if ((stringop_alg) i == rep_prefix_8_byte
+ && !TARGET_64BIT)
+ {
+ /* rep; movq isn't available in 32-bit code. */
+ error ("stringop strategy name %s specified for option %s "
+ "not supported for 32-bit code",
+ alg_name,
+ is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
+ return;
+ }
+
input_ranges[n].max = maxs;
input_ranges[n].alg = (stringop_alg) i;
if (!strcmp (align, "align"))
@@ -3179,6 +3192,7 @@ ix86_option_override_internal (bool main_args_p,
#define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
#define PTA_CLWB (HOST_WIDE_INT_1 << 55)
#define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
+#define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
#define PTA_CORE2 \
(PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
@@ -3332,7 +3346,7 @@ ix86_option_override_internal (bool main_args_p,
| PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
| PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
| PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
- | PTA_MOVBE},
+ | PTA_MOVBE | PTA_MWAITX},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
@@ -3776,6 +3790,9 @@ ix86_option_override_internal (bool main_args_p,
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
x86_prefetch_sse = true;
+ if (processor_alias_table[i].flags & PTA_MWAITX
+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
break;
}
@@ -4206,6 +4223,7 @@ ix86_option_override_internal (bool main_args_p,
ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
ix86_gen_monitor = gen_sse3_monitor_di;
+ ix86_gen_monitorx = gen_monitorx_di;
}
else
{
@@ -4218,6 +4236,7 @@ ix86_option_override_internal (bool main_args_p,
ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
ix86_gen_monitor = gen_sse3_monitor_si;
+ ix86_gen_monitorx = gen_monitorx_si;
}
#ifdef USE_IX86_CLD
@@ -4742,6 +4761,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
IX86_ATTR_ISA ("clwb", OPT_mclwb),
IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
+ IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@@ -5867,7 +5887,10 @@ ix86_function_regparm (const_tree type, const_tree decl)
/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
DFmode (2) arguments in SSE registers for a function with the
indicated TYPE and DECL. DECL may be NULL when calling function
- indirectly or considering a libcall. Otherwise return 0. */
+ indirectly or considering a libcall. Return -1 if any FP parameter
+ should be rejected by error. This is used in siutation we imply SSE
+ calling convetion but the function is called from another function with
+ SSE disabled. Otherwise return 0. */
static int
ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
@@ -5916,14 +5939,13 @@ ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
{
/* Refuse to produce wrong code when local function with SSE enabled
is called from SSE disabled function.
- We may work hard to work out these scenarios but hopefully
- it doesnot matter in practice. */
+ FIXME: We need a way to detect these cases cross-ltrans partition
+ and avoid using SSE calling conventions on local functions called
+ from function with SSE disabled. For now at least delay the
+ warning until we know we are going to produce wrong code.
+ See PR66047 */
if (!TARGET_SSE && warn)
- {
- error ("calling %qD with SSE caling convention without "
- "SSE/SSE2 enabled", decl);
- return 0;
- }
+ return -1;
return TARGET_SSE2_P (target_opts_for_fn (target->decl)
->x_ix86_isa_flags) ? 2 : 1;
}
@@ -6118,6 +6140,7 @@ bool
ix86_function_arg_regno_p (int regno)
{
int i;
+ enum calling_abi call_abi;
const int *parm_regs;
if (TARGET_MPX && BND_REGNO_P (regno))
@@ -6143,16 +6166,18 @@ ix86_function_arg_regno_p (int regno)
/* TODO: The function should depend on current function ABI but
builtins.c would need updating then. Therefore we use the
default ABI. */
+ call_abi = ix86_cfun_abi ();
/* RAX is used as hidden argument to va_arg functions. */
- if (ix86_abi == SYSV_ABI && regno == AX_REG)
+ if (call_abi == SYSV_ABI && regno == AX_REG)
return true;
- if (ix86_abi == MS_ABI)
+ if (call_abi == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
- for (i = 0; i < (ix86_abi == MS_ABI
+
+ for (i = 0; i < (call_abi == MS_ABI
? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
if (regno == parm_regs[i])
return true;
@@ -6479,6 +6504,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
cum->bnd_regno = FIRST_BND_REG;
cum->bnds_in_bt = 0;
cum->force_bnd_pass = 0;
+ cum->decl = fndecl;
if (!TARGET_64BIT)
{
@@ -7424,6 +7450,7 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
HOST_WIDE_INT words)
{
int res = 0;
+ bool error_p = NULL;
switch (mode)
{
@@ -7456,9 +7483,13 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
gcc_unreachable ();
case DFmode:
+ if (cum->float_in_sse == -1)
+ error_p = 1;
if (cum->float_in_sse < 2)
break;
case SFmode:
+ if (cum->float_in_sse == -1)
+ error_p = 1;
if (cum->float_in_sse < 1)
break;
/* FALLTHRU */
@@ -7514,6 +7545,14 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
}
break;
}
+ if (error_p)
+ {
+ cum->float_in_sse = 0;
+ error ("calling %qD with SSE calling convention without "
+ "SSE/SSE2 enabled", cum->decl);
+ sorry ("this is a GCC bug that can be worked around by adding "
+ "attribute used to function called");
+ }
return res;
}
@@ -7646,10 +7685,11 @@ ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
(otherwise it is an extra parameter matching an ellipsis). */
static rtx
-function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
+function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
machine_mode orig_mode, const_tree type,
HOST_WIDE_INT bytes, HOST_WIDE_INT words)
{
+ bool error_p = false;
/* Avoid the AL settings for the Unix64 ABI. */
if (mode == VOIDmode)
return constm1_rtx;
@@ -7690,9 +7730,13 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
break;
case DFmode:
+ if (cum->float_in_sse == -1)
+ error_p = 1;
if (cum->float_in_sse < 2)
break;
case SFmode:
+ if (cum->float_in_sse == -1)
+ error_p = 1;
if (cum->float_in_sse < 1)
break;
/* FALLTHRU */
@@ -7751,6 +7795,14 @@ function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
}
break;
}
+ if (error_p)
+ {
+ cum->float_in_sse = 0;
+ error ("calling %qD with SSE calling convention without "
+ "SSE/SSE2 enabled", cum->decl);
+ sorry ("this is a GCC bug that can be worked around by adding "
+ "attribute used to function called");
+ }
return NULL_RTX;
}
@@ -8154,10 +8206,10 @@ ix86_function_value_regno_p (const unsigned int regno)
case AX_REG:
return true;
case DX_REG:
- return (!TARGET_64BIT || ix86_abi != MS_ABI);
+ return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
case DI_REG:
case SI_REG:
- return TARGET_64BIT && ix86_abi != MS_ABI;
+ return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
case FIRST_BND_REG:
return chkp_function_instrumented_p (current_function_decl);
@@ -8168,7 +8220,7 @@ ix86_function_value_regno_p (const unsigned int regno)
/* TODO: The function should depend on current function ABI but
builtins.c would need updating then. Therefore we use the
default ABI. */
- if (TARGET_64BIT && ix86_abi == MS_ABI)
+ if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
return false;
return TARGET_FLOAT_RETURNS_IN_80387;
@@ -8230,8 +8282,15 @@ function_value_32 (machine_mode orig_mode, machine_mode mode,
if ((fn || fntype) && (mode == SFmode || mode == DFmode))
{
int sse_level = ix86_function_sseregparm (fntype, fn, false);
- if ((sse_level >= 1 && mode == SFmode)
- || (sse_level == 2 && mode == DFmode))
+ if (sse_level == -1)
+ {
+ error ("calling %qD with SSE caling convention without "
+ "SSE/SSE2 enabled", fn);
+ sorry ("this is a GCC bug that can be worked around by adding "
+ "attribute used to function called");
+ }
+ else if ((sse_level >= 1 && mode == SFmode)
+ || (sse_level == 2 && mode == DFmode))
regno = FIRST_SSE_REG;
}
@@ -22943,7 +23002,7 @@ ix86_split_long_move (rtx operands[])
Do an lea to the last part and use only one colliding move. */
else if (collisions > 1)
{
- rtx base;
+ rtx base, addr, tls_base = NULL_RTX;
collisions = 1;
@@ -22954,10 +23013,50 @@ ix86_split_long_move (rtx operands[])
if (GET_MODE (base) != Pmode)
base = gen_rtx_REG (Pmode, REGNO (base));
- emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
+ addr = XEXP (part[1][0], 0);
+ if (TARGET_TLS_DIRECT_SEG_REFS)
+ {
+ struct ix86_address parts;
+ int ok = ix86_decompose_address (addr, &parts);
+ gcc_assert (ok);
+ if (parts.seg == DEFAULT_TLS_SEG_REG)
+ {
+ /* It is not valid to use %gs: or %fs: in
+ lea though, so we need to remove it from the
+ address used for lea and add it to each individual
+ memory loads instead. */
+ addr = copy_rtx (addr);
+ rtx *x = &addr;
+ while (GET_CODE (*x) == PLUS)
+ {
+ for (i = 0; i < 2; i++)
+ {
+ rtx u = XEXP (*x, i);
+ if (GET_CODE (u) == ZERO_EXTEND)
+ u = XEXP (u, 0);
+ if (GET_CODE (u) == UNSPEC
+ && XINT (u, 1) == UNSPEC_TP)
+ {
+ tls_base = XEXP (*x, i);
+ *x = XEXP (*x, 1 - i);
+ break;
+ }
+ }
+ if (tls_base)
+ break;
+ x = &XEXP (*x, 0);
+ }
+ gcc_assert (tls_base);
+ }
+ }
+ emit_insn (gen_rtx_SET (VOIDmode, base, addr));
+ if (tls_base)
+ base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
part[1][0] = replace_equiv_address (part[1][0], base);
for (i = 1; i < nparts; i++)
{
+ if (tls_base)
+ base = copy_rtx (base);
tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
part[1][i] = replace_equiv_address (part[1][i], tmp);
}
@@ -30581,6 +30680,10 @@ enum ix86_builtins
IX86_BUILTIN_CVTPS2PH,
IX86_BUILTIN_CVTPS2PH256,
+ /* MONITORX and MWAITX instrucions. */
+ IX86_BUILTIN_MONITORX,
+ IX86_BUILTIN_MWAITX,
+
/* CFString built-in for darwin */
IX86_BUILTIN_CFSTRING,
@@ -34199,6 +34302,12 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
+ /* MONITORX and MWAITX. */
+ def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
+ VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
+ def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
+ VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
+
/* Add FMA4 multi-arg argument instructions */
for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
{
@@ -38971,6 +39080,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
return 0;
case IX86_BUILTIN_MONITOR:
+ case IX86_BUILTIN_MONITORX:
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
arg2 = CALL_EXPR_ARG (exp, 2);
@@ -38983,7 +39093,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
op1 = copy_to_mode_reg (SImode, op1);
if (!REG_P (op2))
op2 = copy_to_mode_reg (SImode, op2);
- emit_insn (ix86_gen_monitor (op0, op1, op2));
+
+ emit_insn (fcode == IX86_BUILTIN_MONITOR
+ ? ix86_gen_monitor (op0, op1, op2)
+ : ix86_gen_monitorx (op0, op1, op2));
return 0;
case IX86_BUILTIN_MWAIT:
@@ -38998,6 +39111,22 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
emit_insn (gen_sse3_mwait (op0, op1));
return 0;
+ case IX86_BUILTIN_MWAITX:
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ if (!REG_P (op0))
+ op0 = copy_to_mode_reg (SImode, op0);
+ if (!REG_P (op1))
+ op1 = copy_to_mode_reg (SImode, op1);
+ if (!REG_P (op2))
+ op2 = copy_to_mode_reg (SImode, op2);
+ emit_insn (gen_mwaitx (op0, op1, op2));
+ return 0;
+
case IX86_BUILTIN_VEC_INIT_V2SI:
case IX86_BUILTIN_VEC_INIT_V4HI:
case IX86_BUILTIN_VEC_INIT_V8QI:
@@ -44740,6 +44869,8 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
{ gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
};
int i, j, n;
+ machine_mode mmode = VOIDmode;
+ rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
switch (mode)
{
@@ -44956,81 +45087,65 @@ half:
case V8DFmode:
if (TARGET_AVX512F)
{
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, tmp,
- gen_rtx_VEC_DUPLICATE (mode, val)));
- emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
- force_reg (QImode, GEN_INT (1 << elt))));
- return;
+ mmode = QImode;
+ gen_blendm = gen_avx512f_blendmv8df;
}
- else
- break;
+ break;
+
case V8DImode:
if (TARGET_AVX512F)
{
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, tmp,
- gen_rtx_VEC_DUPLICATE (mode, val)));
- emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
- force_reg (QImode, GEN_INT (1 << elt))));
- return;
+ mmode = QImode;
+ gen_blendm = gen_avx512f_blendmv8di;
}
- else
- break;
+ break;
+
case V16SFmode:
if (TARGET_AVX512F)
{
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, tmp,
- gen_rtx_VEC_DUPLICATE (mode, val)));
- emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
- force_reg (HImode, GEN_INT (1 << elt))));
- return;
+ mmode = HImode;
+ gen_blendm = gen_avx512f_blendmv16si;
}
- else
- break;
+ break;
+
case V16SImode:
if (TARGET_AVX512F)
{
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, tmp,
- gen_rtx_VEC_DUPLICATE (mode, val)));
- emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
- force_reg (HImode, GEN_INT (1 << elt))));
- return;
+ mmode = HImode;
+ gen_blendm = gen_avx512f_blendmv16si;
}
- else
- break;
+ break;
+
case V32HImode:
if (TARGET_AVX512F && TARGET_AVX512BW)
{
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, tmp,
- gen_rtx_VEC_DUPLICATE (mode, val)));
- emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
- force_reg (SImode, GEN_INT (1 << elt))));
- return;
+ mmode = SImode;
+ gen_blendm = gen_avx512bw_blendmv32hi;
}
- else
- break;
+ break;
+
case V64QImode:
if (TARGET_AVX512F && TARGET_AVX512BW)
{
- tmp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, tmp,
- gen_rtx_VEC_DUPLICATE (mode, val)));
- emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
- force_reg (DImode, GEN_INT (1 << elt))));
- return;
+ mmode = DImode;
+ gen_blendm = gen_avx512bw_blendmv64qi;
}
- else
- break;
+ break;
default:
break;
}
- if (use_vec_merge)
+ if (mmode != VOIDmode)
+ {
+ tmp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp,
+ gen_rtx_VEC_DUPLICATE (mode, val)));
+ emit_insn (gen_blendm (target, tmp, target,
+ force_reg (mmode,
+ gen_int_mode (1 << elt, mmode))));
+ }
+ else if (use_vec_merge)
{
tmp = gen_rtx_VEC_DUPLICATE (mode, val);
tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
@@ -46892,15 +47007,16 @@ expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
static bool
expand_vec_perm_blend (struct expand_vec_perm_d *d)
{
- machine_mode vmode = d->vmode;
+ machine_mode mmode, vmode = d->vmode;
unsigned i, mask, nelt = d->nelt;
- rtx target, op0, op1, x;
+ rtx target, op0, op1, maskop, x;
rtx rperm[32], vperm;
if (d->one_operand_p)
return false;
if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
- && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
+ && (TARGET_AVX512BW
+ || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4))
;
else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
;
@@ -47074,8 +47190,33 @@ expand_vec_perm_blend (struct expand_vec_perm_d *d)
gcc_unreachable ();
}
+ switch (vmode)
+ {
+ case V8DFmode:
+ case V8DImode:
+ mmode = QImode;
+ break;
+ case V16SFmode:
+ case V16SImode:
+ mmode = HImode;
+ break;
+ case V32HImode:
+ mmode = SImode;
+ break;
+ case V64QImode:
+ mmode = DImode;
+ break;
+ default:
+ mmode = VOIDmode;
+ }
+
+ if (mmode != VOIDmode)
+ maskop = force_reg (mmode, gen_int_mode (mask, mmode));
+ else
+ maskop = GEN_INT (mask);
+
/* This matches five different patterns with the different modes. */
- x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
+ x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
x = gen_rtx_SET (VOIDmode, target, x);
emit_insn (x);
if (target != d->target)
@@ -51606,7 +51747,7 @@ ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
for (i = 0; i < loop->num_nodes; i++)
FOR_BB_INSNS (bbs[i], insn)
if (NONDEBUG_INSN_P (insn))
- FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
+ FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
if (const_rtx x = *iter)
if (MEM_P (x))
{
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 1e755d3a35..de43f06bef 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -154,6 +154,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_PCOMMIT_P(x) TARGET_ISA_PCOMMIT_P(x)
#define TARGET_CLWB TARGET_ISA_CLWB
#define TARGET_CLWB_P(x) TARGET_ISA_CLWB_P(x)
+#define TARGET_MWAITX TARGET_ISA_MWAITX
+#define TARGET_MWAITX_P(x) TARGET_ISA_MWAITX_P(x)
#define TARGET_LP64 TARGET_ABI_64
#define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
@@ -1682,6 +1684,7 @@ typedef struct ix86_args {
int stdarg; /* Set to 1 if function is stdarg. */
enum calling_abi call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise
MS_ABI for ms abi. */
+ tree decl; /* Callee decl. */
} CUMULATIVE_ARGS;
/* Initialize a variable CUM of type CUMULATIVE_ARGS
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e1c82fefc0..6b6f44c8a1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -261,6 +261,11 @@
;; For CLFLUSHOPT support
UNSPECV_CLFLUSHOPT
+
+ ;; For MONITORX and MWAITX support
+ UNSPECV_MONITORX
+ UNSPECV_MWAITX
+
])
;; Constants to represent rounding modes in the ROUND instruction
@@ -10843,6 +10848,7 @@
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (zero_extend:DI (match_dup 2)))]
{
+ operands[1] = shallow_copy_rtx (operands[1]);
PUT_MODE (operands[1], QImode);
operands[2] = gen_lowpart (QImode, operands[0]);
})
@@ -10860,6 +10866,7 @@
(parallel [(set (match_dup 0) (zero_extend:SI (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
{
+ operands[1] = shallow_copy_rtx (operands[1]);
PUT_MODE (operands[1], QImode);
operands[2] = gen_lowpart (QImode, operands[0]);
})
@@ -10875,6 +10882,7 @@
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (zero_extend:SI (match_dup 2)))]
{
+ operands[1] = shallow_copy_rtx (operands[1]);
PUT_MODE (operands[1], QImode);
operands[2] = gen_lowpart (QImode, operands[0]);
})
@@ -10912,7 +10920,10 @@
(const_int 0)))]
""
[(set (match_dup 0) (match_dup 1))]
- "PUT_MODE (operands[1], QImode);")
+{
+ operands[1] = shallow_copy_rtx (operands[1]);
+ PUT_MODE (operands[1], QImode);
+})
(define_split
[(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand"))
@@ -10921,7 +10932,10 @@
(const_int 0)))]
""
[(set (match_dup 0) (match_dup 1))]
- "PUT_MODE (operands[1], QImode);")
+{
+ operands[1] = shallow_copy_rtx (operands[1]);
+ PUT_MODE (operands[1], QImode);
+})
(define_split
[(set (match_operand:QI 0 "nonimmediate_operand")
@@ -10931,15 +10945,15 @@
""
[(set (match_dup 0) (match_dup 1))]
{
- rtx new_op1 = copy_rtx (operands[1]);
- operands[1] = new_op1;
- PUT_MODE (new_op1, QImode);
- PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
- GET_MODE (XEXP (new_op1, 0))));
+ operands[1] = shallow_copy_rtx (operands[1]);
+ PUT_MODE (operands[1], QImode);
+ PUT_CODE (operands[1],
+ ix86_reverse_condition (GET_CODE (operands[1]),
+ GET_MODE (XEXP (operands[1], 0))));
/* Make sure that (a) the CCmode we have for the flags is strong
enough for the reversed compare or (b) we have a valid FP compare. */
- if (! ix86_comparison_operator (new_op1, VOIDmode))
+ if (! ix86_comparison_operator (operands[1], VOIDmode))
FAIL;
})
@@ -10951,15 +10965,15 @@
""
[(set (match_dup 0) (match_dup 1))]
{
- rtx new_op1 = copy_rtx (operands[1]);
- operands[1] = new_op1;
- PUT_MODE (new_op1, QImode);
- PUT_CODE (new_op1, ix86_reverse_condition (GET_CODE (new_op1),
- GET_MODE (XEXP (new_op1, 0))));
+ operands[1] = shallow_copy_rtx (operands[1]);
+ PUT_MODE (operands[1], QImode);
+ PUT_CODE (operands[1],
+ ix86_reverse_condition (GET_CODE (operands[1]),
+ GET_MODE (XEXP (operands[1], 0))));
/* Make sure that (a) the CCmode we have for the flags is strong
enough for the reversed compare or (b) we have a valid FP compare. */
- if (! ix86_comparison_operator (new_op1, VOIDmode))
+ if (! ix86_comparison_operator (operands[1], VOIDmode))
FAIL;
})
@@ -11078,7 +11092,10 @@
(if_then_else (match_dup 0)
(label_ref (match_dup 1))
(pc)))]
- "PUT_MODE (operands[0], VOIDmode);")
+{
+ operands[0] = shallow_copy_rtx (operands[0]);
+ PUT_MODE (operands[0], VOIDmode);
+})
(define_split
[(set (pc)
@@ -11093,15 +11110,15 @@
(label_ref (match_dup 1))
(pc)))]
{
- rtx new_op0 = copy_rtx (operands[0]);
- operands[0] = new_op0;
- PUT_MODE (new_op0, VOIDmode);
- PUT_CODE (new_op0, ix86_reverse_condition (GET_CODE (new_op0),
- GET_MODE (XEXP (new_op0, 0))));
+ operands[0] = shallow_copy_rtx (operands[0]);
+ PUT_MODE (operands[0], VOIDmode);
+ PUT_CODE (operands[0],
+ ix86_reverse_condition (GET_CODE (operands[0]),
+ GET_MODE (XEXP (operands[0], 0))));
/* Make sure that (a) the CCmode we have for the flags is strong
enough for the reversed compare or (b) we have a valid FP compare. */
- if (! ix86_comparison_operator (new_op0, VOIDmode))
+ if (! ix86_comparison_operator (operands[0], VOIDmode))
FAIL;
})
@@ -11138,7 +11155,7 @@
(pc)))]
{
operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], QImode, 0);
-
+ operands[0] = shallow_copy_rtx (operands[0]);
PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
})
@@ -11171,7 +11188,7 @@
(pc)))]
{
operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0);
-
+ operands[0] = shallow_copy_rtx (operands[0]);
PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
})
@@ -11207,7 +11224,7 @@
(pc)))]
{
operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0);
-
+ operands[0] = shallow_copy_rtx (operands[0]);
PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
})
@@ -11239,7 +11256,7 @@
(pc)))]
{
operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
-
+ operands[0] = shallow_copy_rtx (operands[0]);
PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
})
@@ -11275,7 +11292,10 @@
(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
(label_ref (match_dup 4))
(pc)))]
- "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));")
+{
+ operands[0] = shallow_copy_rtx (operands[0]);
+ PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
;; Define combination compare-and-branch fp compare instructions to help
;; combine.
@@ -17241,6 +17261,7 @@
operands[1] = gen_lowpart (SImode, operands[1]);
if (GET_CODE (operands[3]) != ASHIFT)
operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[3] = shallow_copy_rtx (operands[3]);
PUT_MODE (operands[3], SImode);
})
@@ -18847,6 +18868,32 @@
(set_attr "atom_sse_attr" "fence")
(set_attr "memory" "unknown")])
+;; MONITORX and MWAITX
+(define_insn "mwaitx"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
+ (match_operand:SI 1 "register_operand" "a")
+ (match_operand:SI 2 "register_operand" "b")]
+ UNSPECV_MWAITX)]
+ "TARGET_MWAITX"
+;; 64bit version is "mwaitx %rax,%rcx,%rbx". But only lower 32bits are used.
+;; Since 32bit register operands are implicitly zero extended to 64bit,
+;; we only need to set up 32bit registers.
+ "mwaitx"
+ [(set_attr "length" "3")])
+
+(define_insn "monitorx_<mode>"
+ [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
+ (match_operand:SI 1 "register_operand" "c")
+ (match_operand:SI 2 "register_operand" "d")]
+ UNSPECV_MONITORX)]
+ "TARGET_MWAITX"
+;; 64bit version is "monitorx %rax,%rcx,%rdx". But only lower 32bits in
+;; RCX and RDX are used. Since 32bit register operands are implicitly
+;; zero extended to 64bit, we only need to set up 32bit registers.
+ "%^monitorx"
+ [(set (attr "length")
+ (symbol_ref ("(Pmode != word_mode) + 3")))])
+
;; MPX instructions
(define_expand "<mode>_mk"
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 301430c238..dd46e26de3 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -859,6 +859,10 @@ mmpx
Target Report Mask(ISA_MPX) Var(ix86_isa_flags) Save
Support MPX code generation
+mmwaitx
+Target Report Mask(ISA_MWAITX) Var(ix86_isa_flags) Save
+Support MWAITX and MONITORX built-in functions and code generation
+
mstack-protector-guard=
Target RejectNegative Joined Enum(stack_protector_guard) Var(ix86_stack_protector_guard) Init(SSP_TLS)
Use given stack-protector guard
diff --git a/gcc/config/i386/mwaitxintrin.h b/gcc/config/i386/mwaitxintrin.h
new file mode 100644
index 0000000000..d7112dad20
--- /dev/null
+++ b/gcc/config/i386/mwaitxintrin.h
@@ -0,0 +1,50 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _MWAITXINTRIN_H_INCLUDED
+#define _MWAITXINTRIN_H_INCLUDED
+
+#ifndef __MWAITX__
+#pragma GCC push_options
+#pragma GCC target("mwaitx")
+#define __DISABLE_MWAITX__
+#endif /* __MWAITX__ */
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_monitorx (void const * __P, unsigned int __E, unsigned int __H)
+{
+ __builtin_ia32_monitorx (__P, __E, __H);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mwaitx (unsigned int __E, unsigned int __H, unsigned int __C)
+{
+ __builtin_ia32_mwaitx (__E, __H, __C);
+}
+
+#ifdef __DISABLE_MWAITX__
+#undef __DISABLE_MWAITX__
+#pragma GCC pop_options
+#endif /* __DISABLE_MWAITX__ */
+
+#endif /* _MWAITXINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6d3b54a28c..58caf1aeb9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -9523,7 +9523,7 @@
(mult:V4DI
(sign_extend:V4DI
(vec_select:V4SI
- (match_operand:V8SI 1 "nonimmediate_operand" "v")
+ (match_operand:V8SI 1 "nonimmediate_operand" "%v")
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)])))
(sign_extend:V4DI
@@ -13161,10 +13161,12 @@
(set_attr "atom_sse_attr" "fence")
(set_attr "memory" "unknown")])
-
+;; As per AMD and Intel ISA manuals, the first operand is extensions
+;; and it goes to %ecx. The second operand received is hints and it goes
+;; to %eax.
(define_insn "sse3_mwait"
- [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
- (match_operand:SI 1 "register_operand" "c")]
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
+ (match_operand:SI 1 "register_operand" "a")]
UNSPECV_MWAIT)]
"TARGET_SSE3"
;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index a51188679e..6f7b1f66a6 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -85,4 +85,5 @@
#include <xsavecintrin.h>
+#include <mwaitxintrin.h>
#endif /* _X86INTRIN_H_INCLUDED */