summaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/avx512fintrin.h76
-rw-r--r--gcc/config/i386/i386-expand.cc77
-rw-r--r--gcc/config/i386/i386.cc10
-rw-r--r--gcc/config/i386/smmintrin.h25
-rw-r--r--gcc/config/i386/sse.md34
5 files changed, 138 insertions, 84 deletions
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 29511fd2831..77d6249c2bc 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
(__mmask8) __U, __R);
}
#else
-#define _mm512_scalef_round_pd(A, B, C) \
- (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
- (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
- (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+#define _mm512_scalef_round_pd(A, B, C) \
+ ((__m512d) \
+ __builtin_ia32_scalefpd512_mask((A), (B), \
+ (__v8df) _mm512_undefined_pd(), \
+ -1, (C)))
+
+#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
+ ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
+ ((__m512d) \
+ __builtin_ia32_scalefpd512_mask((A), (B), \
+ (__v8df) _mm512_setzero_pd(), \
+ (U), (C)))
+
+#define _mm512_scalef_round_ps(A, B, C) \
+ ((__m512) \
+ __builtin_ia32_scalefps512_mask((A), (B), \
+ (__v16sf) _mm512_undefined_ps(), \
+ -1, (C)))
+
+#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
+ ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
+ ((__m512) \
+ __builtin_ia32_scalefps512_mask((A), (B), \
+ (__v16sf) _mm512_setzero_ps(), \
+ (U), (C)))
+
+#define _mm_scalef_round_sd(A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), \
+ (__v2df) _mm_undefined_pd (), \
+ -1, (C)))
-#define _mm512_scalef_round_ps(A, B, C) \
- (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+#define _mm_scalef_round_ss(A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), \
+ (__v4sf) _mm_undefined_ps (), \
+ -1, (C)))
-#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
- (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
+#define _mm_mask_scalef_round_sd(W, U, A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
-#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
- (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+#define _mm_mask_scalef_round_ss(W, U, A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
-#define _mm_scalef_round_sd(A, B, C) \
- (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
- (__v2df)_mm_setzero_pd (), -1, C)
+#define _mm_maskz_scalef_round_sd(U, A, B, C) \
+ ((__m128d) \
+ __builtin_ia32_scalefsd_mask_round ((A), (B), \
+ (__v2df) _mm_setzero_pd (), \
+ (U), (C)))
-#define _mm_scalef_round_ss(A, B, C) \
- (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
- (__v4sf)_mm_setzero_ps (), -1, C)
+#define _mm_maskz_scalef_round_ss(U, A, B, C) \
+ ((__m128) \
+ __builtin_ia32_scalefss_mask_round ((A), (B), \
+ (__v4sf) _mm_setzero_ps (), \
+ (U), (C)))
#endif
#define _mm_mask_scalef_sd(W, U, A, B) \
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 794315ee2f7..31780b6daf7 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3136,6 +3136,8 @@ ix86_expand_int_movcc (rtx operands[])
bool sign_bit_compare_p = false;
rtx op0 = XEXP (operands[1], 0);
rtx op1 = XEXP (operands[1], 1);
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
if (GET_MODE (op0) == TImode
|| (GET_MODE (op0) == DImode
@@ -3153,17 +3155,29 @@ ix86_expand_int_movcc (rtx operands[])
|| (op1 == constm1_rtx && (code == GT || code == LE)))
sign_bit_compare_p = true;
+ /* op0 == op1 ? op0 : op3 is equivalent to op0 == op1 ? op1 : op3,
+ but if op1 is a constant, the latter form allows more optimizations,
+ either through the last 2 ops being constant handling, or the one
+ constant and one variable cases. On the other side, for cmov the
+ former might be better as we don't need to load the constant into
+ another register. */
+ if (code == EQ && CONST_INT_P (op1) && rtx_equal_p (op0, op2))
+ op2 = op1;
+ /* Similarly for op0 != op1 ? op2 : op0 and op0 != op1 ? op2 : op1. */
+ else if (code == NE && CONST_INT_P (op1) && rtx_equal_p (op0, op3))
+ op3 = op1;
+
/* Don't attempt mode expansion here -- if we had to expand 5 or 6
HImode insns, we'd be swallowed in word prefix ops. */
if ((mode != HImode || TARGET_FAST_PREFIX)
&& (mode != (TARGET_64BIT ? TImode : DImode))
- && CONST_INT_P (operands[2])
- && CONST_INT_P (operands[3]))
+ && CONST_INT_P (op2)
+ && CONST_INT_P (op3))
{
rtx out = operands[0];
- HOST_WIDE_INT ct = INTVAL (operands[2]);
- HOST_WIDE_INT cf = INTVAL (operands[3]);
+ HOST_WIDE_INT ct = INTVAL (op2);
+ HOST_WIDE_INT cf = INTVAL (op3);
HOST_WIDE_INT diff;
diff = ct - cf;
@@ -3559,6 +3573,9 @@ ix86_expand_int_movcc (rtx operands[])
if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
return false;
+ operands[2] = op2;
+ operands[3] = op3;
+
/* If one of the two operands is an interesting constant, load a
constant with the above and mask it in with a logical operation. */
@@ -17036,7 +17053,8 @@ ix86_emit_fp_unordered_jump (rtx label)
/* Output code to perform an sinh XFmode calculation. */
-void ix86_emit_i387_sinh (rtx op0, rtx op1)
+void
+ix86_emit_i387_sinh (rtx op0, rtx op1)
{
rtx e1 = gen_reg_rtx (XFmode);
rtx e2 = gen_reg_rtx (XFmode);
@@ -17084,7 +17102,8 @@ void ix86_emit_i387_sinh (rtx op0, rtx op1)
/* Output code to perform an cosh XFmode calculation. */
-void ix86_emit_i387_cosh (rtx op0, rtx op1)
+void
+ix86_emit_i387_cosh (rtx op0, rtx op1)
{
rtx e1 = gen_reg_rtx (XFmode);
rtx e2 = gen_reg_rtx (XFmode);
@@ -17106,7 +17125,8 @@ void ix86_emit_i387_cosh (rtx op0, rtx op1)
/* Output code to perform an tanh XFmode calculation. */
-void ix86_emit_i387_tanh (rtx op0, rtx op1)
+void
+ix86_emit_i387_tanh (rtx op0, rtx op1)
{
rtx e1 = gen_reg_rtx (XFmode);
rtx e2 = gen_reg_rtx (XFmode);
@@ -17152,7 +17172,8 @@ void ix86_emit_i387_tanh (rtx op0, rtx op1)
/* Output code to perform an asinh XFmode calculation. */
-void ix86_emit_i387_asinh (rtx op0, rtx op1)
+void
+ix86_emit_i387_asinh (rtx op0, rtx op1)
{
rtx e1 = gen_reg_rtx (XFmode);
rtx e2 = gen_reg_rtx (XFmode);
@@ -17204,7 +17225,8 @@ void ix86_emit_i387_asinh (rtx op0, rtx op1)
/* Output code to perform an acosh XFmode calculation. */
-void ix86_emit_i387_acosh (rtx op0, rtx op1)
+void
+ix86_emit_i387_acosh (rtx op0, rtx op1)
{
rtx e1 = gen_reg_rtx (XFmode);
rtx e2 = gen_reg_rtx (XFmode);
@@ -17230,7 +17252,8 @@ void ix86_emit_i387_acosh (rtx op0, rtx op1)
/* Output code to perform an atanh XFmode calculation. */
-void ix86_emit_i387_atanh (rtx op0, rtx op1)
+void
+ix86_emit_i387_atanh (rtx op0, rtx op1)
{
rtx e1 = gen_reg_rtx (XFmode);
rtx e2 = gen_reg_rtx (XFmode);
@@ -17281,7 +17304,8 @@ void ix86_emit_i387_atanh (rtx op0, rtx op1)
/* Output code to perform a log1p XFmode calculation. */
-void ix86_emit_i387_log1p (rtx op0, rtx op1)
+void
+ix86_emit_i387_log1p (rtx op0, rtx op1)
{
rtx_code_label *label1 = gen_label_rtx ();
rtx_code_label *label2 = gen_label_rtx ();
@@ -17291,6 +17315,11 @@ void ix86_emit_i387_log1p (rtx op0, rtx op1)
rtx cst, cstln2, cst1;
rtx_insn *insn;
+ /* The emit_jump call emits pending stack adjust, make sure it is emitted
+ before the conditional jump, otherwise the stack adjustment will be
+ only conditional. */
+ do_pending_stack_adjust ();
+
cst = const_double_from_real_value
(REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), XFmode);
cstln2 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */
@@ -17320,7 +17349,8 @@ void ix86_emit_i387_log1p (rtx op0, rtx op1)
}
/* Emit code for round calculation. */
-void ix86_emit_i387_round (rtx op0, rtx op1)
+void
+ix86_emit_i387_round (rtx op0, rtx op1)
{
machine_mode inmode = GET_MODE (op1);
machine_mode outmode = GET_MODE (op0);
@@ -17434,7 +17464,8 @@ void ix86_emit_i387_round (rtx op0, rtx op1)
/* Output code to perform a Newton-Rhapson approximation of a single precision
floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
-void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
+void
+ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
{
rtx x0, x1, e0, e1;
@@ -17485,7 +17516,8 @@ void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
/* Output code to perform a Newton-Rhapson approximation of a
single precision floating point [reciprocal] square root. */
-void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
+void
+ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
{
rtx x0, e0, e1, e2, e3, mthree, mhalf;
REAL_VALUE_TYPE r;
@@ -23240,9 +23272,10 @@ ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
*rem_p = rem;
}
-void ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val,
- enum rtx_code code, bool after,
- bool doubleword)
+void
+ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val,
+ enum rtx_code code, bool after,
+ bool doubleword)
{
rtx old_reg, new_reg, old_mem, success;
machine_mode mode = GET_MODE (target);
@@ -23286,10 +23319,11 @@ void ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val,
it will be relaxed to an atomic load + compare, and skip
cmpxchg instruction if mem != exp_input. */
-void ix86_expand_cmpxchg_loop (rtx *ptarget_bool, rtx target_val,
- rtx mem, rtx exp_input, rtx new_input,
- rtx mem_model, bool doubleword,
- rtx_code_label *loop_label)
+void
+ix86_expand_cmpxchg_loop (rtx *ptarget_bool, rtx target_val,
+ rtx mem, rtx exp_input, rtx new_input,
+ rtx mem_model, bool doubleword,
+ rtx_code_label *loop_label)
{
rtx_code_label *cmp_label = NULL;
rtx_code_label *done_label = NULL;
@@ -23388,6 +23422,7 @@ void ix86_expand_cmpxchg_loop (rtx *ptarget_bool, rtx target_val,
/* If mem is not expected, pause and loop back. */
emit_label (cmp_label);
+ emit_move_insn (target_val, new_mem);
emit_insn (gen_pause ());
emit_jump_insn (gen_jump (loop_label));
emit_barrier ();
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index c959b7144de..b16df5b183e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -4891,6 +4891,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
{
int i, prev_size = 0;
tree temp = create_tmp_var (type, "va_arg_tmp");
+ TREE_ADDRESSABLE (temp) = 1;
/* addr = &temp; */
t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
@@ -6524,7 +6525,8 @@ ix86_initial_elimination_offset (int from, int to)
}
/* Emits a warning for unsupported msabi to sysv pro/epilogues. */
-void warn_once_call_ms2sysv_xlogues (const char *feature)
+void
+warn_once_call_ms2sysv_xlogues (const char *feature)
{
static bool warned_once = false;
if (!warned_once)
@@ -18806,7 +18808,8 @@ ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
return NULL_TREE;
}
- tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
+ tree fndecl = mathfn_built_in (el_mode == DFmode
+ ? double_type_node : float_type_node, fn);
bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
@@ -18898,7 +18901,8 @@ ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
return NULL_TREE;
}
- tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
+ tree fndecl = mathfn_built_in (el_mode == DFmode
+ ? double_type_node : float_type_node, fn);
bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
sprintf (name + 7, "%s", bname+10);
diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h
index b42b212300f..eb6a451c10a 100644
--- a/gcc/config/i386/smmintrin.h
+++ b/gcc/config/i386/smmintrin.h
@@ -810,17 +810,11 @@ _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
#include <popcntintrin.h>
-#ifndef __SSE4_1__
+#ifndef __CRC32__
#pragma GCC push_options
-#pragma GCC target("sse4.1")
-#define __DISABLE_SSE4_1__
-#endif /* __SSE4_1__ */
-
-#ifndef __SSE4_2__
-#pragma GCC push_options
-#pragma GCC target("sse4.2")
-#define __DISABLE_SSE4_2__
-#endif /* __SSE4_1__ */
+#pragma GCC target("crc32")
+#define __DISABLE_CRC32__
+#endif /* __CRC32__ */
/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -849,14 +843,9 @@ _mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
}
#endif
-#ifdef __DISABLE_SSE4_2__
-#undef __DISABLE_SSE4_2__
+#ifdef __DISABLE_CRC32__
+#undef __DISABLE_CRC32__
#pragma GCC pop_options
-#endif /* __DISABLE_SSE4_2__ */
-
-#ifdef __DISABLE_SSE4_1__
-#undef __DISABLE_SSE4_1__
-#pragma GCC pop_options
-#endif /* __DISABLE_SSE4_1__ */
+#endif /* __DISABLE_CRC32__ */
#endif /* _SMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a852c166a58..5e93aa23b47 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -327,9 +327,7 @@
;; 128-, 256- and 512-bit float vector modes for bitwise operations
(define_mode_iterator VFB
- [(V32HF "TARGET_AVX512FP16")
- (V16HF "TARGET_AVX512FP16")
- (V8HF "TARGET_AVX512FP16")
+ [(V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") (V8HF "TARGET_SSE2")
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
@@ -340,8 +338,7 @@
;; 128- and 256-bit float vector modes for bitwise operations
(define_mode_iterator VFB_128_256
- [(V16HF "TARGET_AVX512FP16")
- (V8HF "TARGET_AVX512FP16")
+ [(V16HF "TARGET_AVX") (V8HF "TARGET_SSE2")
(V8SF "TARGET_AVX") V4SF
(V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
@@ -399,7 +396,7 @@
;; All 512bit vector float modes for bitwise operations
(define_mode_iterator VFB_512
- [(V32HF "TARGET_AVX512FP16") V16SF V8DF])
+ [V32HF V16SF V8DF])
(define_mode_iterator VI48_AVX512VL
[V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
@@ -4581,7 +4578,8 @@
(not:VFB_128_256
(match_operand:VFB_128_256 1 "register_operand" "0,x,v,v"))
(match_operand:VFB_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
- "TARGET_SSE && <mask_avx512vl_condition>"
+ "TARGET_SSE && <mask_avx512vl_condition>
+ && (!<mask_applied> || <ssescalarmode>mode != HFmode)"
{
char buf[128];
const char *ops;
@@ -4648,7 +4646,7 @@
(not:VFB_512
(match_operand:VFB_512 1 "register_operand" "v"))
(match_operand:VFB_512 2 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && (!<mask_applied> || <ssescalarmode>mode != HFmode)"
{
char buf[128];
const char *ops;
@@ -4683,7 +4681,8 @@
(any_logic:VFB_128_256
(match_operand:VFB_128_256 1 "vector_operand")
(match_operand:VFB_128_256 2 "vector_operand")))]
- "TARGET_SSE && <mask_avx512vl_condition>"
+ "TARGET_SSE && <mask_avx512vl_condition>
+ && (!<mask_applied> || <ssescalarmode>mode != HFmode)"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_expand "<code><mode>3<mask_name>"
@@ -4691,7 +4690,7 @@
(any_logic:VFB_512
(match_operand:VFB_512 1 "nonimmediate_operand")
(match_operand:VFB_512 2 "nonimmediate_operand")))]
- "TARGET_AVX512F"
+ "TARGET_AVX512F && (!<mask_applied> || <ssescalarmode>mode != HFmode)"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<code><mode>3<mask_name>"
@@ -4700,6 +4699,7 @@
(match_operand:VFB_128_256 1 "vector_operand" "%0,x,v,v")
(match_operand:VFB_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
"TARGET_SSE && <mask_avx512vl_condition>
+ && (!<mask_applied> || <ssescalarmode>mode != HFmode)
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
{
char buf[128];
@@ -4766,7 +4766,8 @@
(any_logic:VFB_512
(match_operand:VFB_512 1 "nonimmediate_operand" "%v")
(match_operand:VFB_512 2 "nonimmediate_operand" "vm")))]
- "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+ && (!<mask_applied> || <ssescalarmode>mode != HFmode)"
{
char buf[128];
const char *ops;
@@ -16741,17 +16742,6 @@
(match_operand:<avx512fmaskmode> 4 "register_operand")))]
"TARGET_AVX512F")
-(define_expand "<sse2_avx2>_andnot<mode>3_mask"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand")
- (vec_merge:VI12_AVX512VL
- (and:VI12_AVX512VL
- (not:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 1 "register_operand"))
- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
- (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
- (match_operand:<avx512fmaskmode> 4 "register_operand")))]
- "TARGET_AVX512BW")
-
(define_insn "*andnot<mode>3"
[(set (match_operand:VI 0 "register_operand" "=x,x,v")
(and:VI