summaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
authorYvan Roux <yvan.roux@linaro.org>2016-04-12 13:39:01 +0200
committerYvan Roux <yvan.roux@linaro.org>2016-04-14 08:32:53 +0200
commit8d7043e84c526ead366949c3c30ce74f964efe91 (patch)
tree78fe3a1a7554a848fc30717c71105e45359bf3ef /gcc/config/i386
parent55d3bceea8eef018564a026e615af58cca5d6273 (diff)
downloadgcc-8d7043e84c526ead366949c3c30ce74f964efe91.tar.gz
Merge branches/gcc-5-branch rev 234898.
Change-Id: I076a131171e689eede74dd1827406c6d3855fcbd
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/i386.c77
-rw-r--r--gcc/config/i386/i386.md25
-rw-r--r--gcc/config/i386/sol2.h5
-rw-r--r--gcc/config/i386/sse.md91
4 files changed, 106 insertions, 92 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e61d2c55ea5..5ab999db6a7 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -24621,9 +24621,10 @@ alg_usable_p (enum stringop_alg alg, bool memset)
static enum stringop_alg
decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
- bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
+ bool memset, bool zero_memset, int *dynamic_check, bool *noalign,
+ bool recur)
{
- const struct stringop_algs * algs;
+ const struct stringop_algs *algs;
bool optimize_for_speed;
int max = 0;
const struct processor_costs *cost;
@@ -24657,7 +24658,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
any_alg_usable_p |= usable;
if (candidate != libcall && candidate && usable)
- max = algs->size[i].max;
+ max = algs->size[i].max;
}
/* If expected size is not known but max size is small enough
@@ -24667,7 +24668,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
&& expected_size == -1)
expected_size = min_size / 2 + max_size / 2;
- /* If user specified the algorithm, honnor it if possible. */
+ /* If user specified the algorithm, honor it if possible. */
if (ix86_stringop_alg != no_stringop
&& alg_usable_p (ix86_stringop_alg, memset))
return ix86_stringop_alg;
@@ -24742,21 +24743,20 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
|| !alg_usable_p (algs->unknown_size, memset)))
{
enum stringop_alg alg;
+ HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
- /* If there aren't any usable algorithms, then recursing on
- smaller sizes isn't going to find anything. Just return the
- simple byte-at-a-time copy loop. */
- if (!any_alg_usable_p)
- {
- /* Pick something reasonable. */
- if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
- *dynamic_check = 128;
- return loop_1_byte;
- }
- if (max <= 0)
- max = 4096;
- alg = decide_alg (count, max / 2, min_size, max_size, memset,
- zero_memset, dynamic_check, noalign);
+ /* If there aren't any usable algorithms or if recursing already,
+ then recursing on smaller sizes or same size isn't going to
+ find anything. Just return the simple byte-at-a-time copy loop. */
+ if (!any_alg_usable_p || recur)
+ {
+ /* Pick something reasonable. */
+ if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
+ *dynamic_check = 128;
+ return loop_1_byte;
+ }
+ alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
+ zero_memset, dynamic_check, noalign, true);
gcc_assert (*dynamic_check == -1);
if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
*dynamic_check = max;
@@ -25012,7 +25012,7 @@ ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
alg = decide_alg (count, expected_size, min_size, probable_max_size,
issetmem,
issetmem && val_exp == const0_rtx,
- &dynamic_check, &noalign);
+ &dynamic_check, &noalign, false);
if (alg == libcall)
return false;
gcc_assert (alg != no_stringop);
@@ -30804,6 +30804,14 @@ def_builtin (HOST_WIDE_INT mask, const char *name,
{
ix86_builtins_isa[(int) code].isa = mask;
+ /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
+ where any bit set means that built-in is enable, this bit must be *and-ed*
+ with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
+ means that *both* cpuid bits must be set for the built-in to be available.
+ Handle this here. */
+ if (mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
+ mask &= ~OPTION_MASK_ISA_AVX512VL;
+
mask &= ~OPTION_MASK_ISA_64BIT;
if (mask == 0
|| (mask & ix86_isa_flags) != 0
@@ -32465,9 +32473,9 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
{ OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
{ OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
- { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
{ OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
- { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
+ { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
{ OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
{ OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
{ OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
@@ -33182,9 +33190,9 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
{ OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
{ OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
- { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
+ { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
{ OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
- { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
+ { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
{ OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
{ OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
};
@@ -45211,7 +45219,12 @@ half:
tmp = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (VOIDmode, tmp,
gen_rtx_VEC_DUPLICATE (mode, val)));
- emit_insn (gen_blendm (target, tmp, target,
+ /* The avx512*_blendm<mode> expanders have different operand order
+ from VEC_MERGE. In VEC_MERGE, the first input operand is used for
+ elements where the mask is set and second input operand otherwise,
+ in {sse,avx}*_*blend* the first input operand is used for elements
+ where the mask is clear and second input operand otherwise. */
+ emit_insn (gen_blendm (target, target, tmp,
force_reg (mmode,
gen_int_mode (1 << elt, mmode))));
}
@@ -50028,16 +50041,24 @@ ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
{
/* For SSE2, we used an full interleave, so the desired
results are in the even elements. */
- for (i = 0; i < 64; ++i)
+ for (i = 0; i < d.nelt; ++i)
d.perm[i] = i * 2;
}
else
{
/* For AVX, the interleave used above was not cross-lane. So the
extraction is evens but with the second and third quarter swapped.
- Happily, that is even one insn shorter than even extraction. */
- for (i = 0; i < 64; ++i)
- d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
+ Happily, that is even one insn shorter than even extraction.
+ For AVX512BW we have 4 lanes. We extract evens from within a lane,
+ always first from the first and then from the second source operand,
+ the index bits above the low 4 bits remains the same.
+ Thus, for d.nelt == 32 we want permutation
+ 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
+ and for d.nelt == 64 we want permutation
+ 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
+ 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
+ for (i = 0; i < d.nelt; ++i)
+ d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
}
ok = ix86_expand_vec_perm_const_1 (&d);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 89e8161645e..87619db4dbc 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1866,7 +1866,7 @@
[(set (match_operand:XI 0 "nonimmediate_operand")
(match_operand:XI 1 "general_operand"))]
"TARGET_AVX512F"
- "ix86_expand_move (XImode, operands); DONE;")
+ "ix86_expand_vector_move (XImode, operands); DONE;")
;; Reload patterns to support multi-word load/store
;; with non-offsetable address.
@@ -1906,11 +1906,11 @@
[(set (match_operand:OI 0 "nonimmediate_operand")
(match_operand:OI 1 "general_operand"))]
"TARGET_AVX"
- "ix86_expand_move (OImode, operands); DONE;")
+ "ix86_expand_vector_move (OImode, operands); DONE;")
(define_expand "movti"
[(set (match_operand:TI 0 "nonimmediate_operand")
- (match_operand:TI 1 "nonimmediate_operand"))]
+ (match_operand:TI 1 "general_operand"))]
"TARGET_64BIT || TARGET_SSE"
{
if (TARGET_64BIT)
@@ -2416,7 +2416,7 @@
(define_insn "kmovw"
[(set (match_operand:HI 0 "nonimmediate_operand" "=k,k")
(unspec:HI
- [(match_operand:HI 1 "nonimmediate_operand" "rm,k")]
+ [(match_operand:HI 1 "nonimmediate_operand" "r,km")]
UNSPEC_KMOV))]
"!(MEM_P (operands[0]) && MEM_P (operands[1])) && TARGET_AVX512F"
"@
@@ -2428,8 +2428,8 @@
(define_insn "*movhi_internal"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k,rm")
- (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,rm,k,k"))]
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r ,r ,m ,k,k, r,m")
+ (match_operand:HI 1 "general_operand" "r ,rn,rm,rn,r,km,k,k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2443,7 +2443,8 @@
switch (which_alternative)
{
case 4: return "kmovw\t{%k1, %0|%0, %k1}";
- case 5: return "kmovw\t{%1, %0|%0, %1}";
+ case 5: /* FALLTHRU */
+ case 7: return "kmovw\t{%1, %0|%0, %1}";
case 6: return "kmovw\t{%1, %k0|%k0, %1}";
default: gcc_unreachable ();
}
@@ -2456,7 +2457,7 @@
}
}
[(set (attr "type")
- (cond [(eq_attr "alternative" "4,5,6")
+ (cond [(eq_attr "alternative" "4,5,6,7")
(const_string "mskmov")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "imov")
@@ -2473,7 +2474,7 @@
]
(const_string "imov")))
(set (attr "prefix")
- (if_then_else (eq_attr "alternative" "4,5,6")
+ (if_then_else (eq_attr "alternative" "4,5,6,7")
(const_string "vex")
(const_string "orig")))
(set (attr "mode")
@@ -7998,10 +7999,10 @@
(clobber (reg:CC FLAGS_REG))]
"TARGET_AVX512F && !TARGET_BMI && reload_completed"
[(set (match_dup 0)
- (not:HI (match_dup 0)))
+ (not:SWI12 (match_dup 0)))
(parallel [(set (match_dup 0)
- (and:HI (match_dup 0)
- (match_dup 1)))
+ (and:SWI12 (match_dup 0)
+ (match_dup 1)))
(clobber (reg:CC FLAGS_REG))])])
;; Turn *anddi_1 into *andsi_1_zext if possible.
diff --git a/gcc/config/i386/sol2.h b/gcc/config/i386/sol2.h
index f213388413a..0ed4b68c15b 100644
--- a/gcc/config/i386/sol2.h
+++ b/gcc/config/i386/sol2.h
@@ -137,8 +137,9 @@ along with GCC; see the file COPYING3. If not see
/* The Solaris assembler wants a .local for non-exported aliases. */
#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET) \
do { \
- const char *declname = \
- IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL)); \
+ tree id = DECL_ASSEMBLER_NAME (DECL); \
+ ultimate_transparent_alias_target (&id); \
+ const char *declname = IDENTIFIER_POINTER (id); \
ASM_OUTPUT_DEF ((FILE), declname, \
IDENTIFIER_POINTER (TARGET)); \
if (! TREE_PUBLIC (DECL)) \
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index fc58030003d..5692a11aa29 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -483,8 +483,9 @@
[(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
(V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
(V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
- (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
- (V64QI "i") (V1TI "i") (V2TI "i")])
+ (V32HI "i") (V16HI "i") (V8HI "i")
+ (V64QI "i") (V32QI "i") (V16QI "i")
+ (V4TI "i") (V2TI "i") (V1TI "i")])
(define_mode_attr ssequartermode
[(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
@@ -677,7 +678,8 @@
;; Pointer size override for scalar modes (Intel asm dialect)
(define_mode_attr iptr
- [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
+ [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
+ (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
(V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
(V8SF "k") (V4DF "q")
(V4SF "k") (V2DF "q")
@@ -704,7 +706,8 @@
(V64QI "8") (V32QI "8") (V16QI "8")
(V32HI "16") (V16HI "16") (V8HI "16")
(V16SI "32") (V8SI "32") (V4SI "32")
- (V16SF "32") (V8DF "64")])
+ (V16SF "32") (V8SF "32") (V4SF "32")
+ (V8DF "64") (V4DF "64") (V2DF "64")])
;; SSE prefix for integer vector modes
(define_mode_attr sseintprefix
@@ -10824,45 +10827,46 @@
case MODE_XI:
gcc_assert (TARGET_AVX512F);
case MODE_OI:
- gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
+ gcc_assert (TARGET_AVX2);
case MODE_TI:
- gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
+ gcc_assert (TARGET_SSE2);
switch (<MODE>mode)
- {
- case V16SImode:
- case V8DImode:
- if (TARGET_AVX512F)
- {
- tmp = "pandn<ssemodesuffix>";
- break;
- }
- case V8SImode:
- case V4DImode:
- case V4SImode:
- case V2DImode:
- if (TARGET_AVX512VL)
- {
- tmp = "pandn<ssemodesuffix>";
- break;
- }
- default:
- tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
- }
+ {
+ case V64QImode:
+ case V32HImode:
+ /* There is no vpandnb or vpandnw instruction, nor vpandn for
+ 512-bit vectors. Use vpandnq instead. */
+ tmp = "pandnq";
+ break;
+ case V16SImode:
+ case V8DImode:
+ tmp = "pandn<ssemodesuffix>";
+ break;
+ case V8SImode:
+ case V4DImode:
+ case V4SImode:
+ case V2DImode:
+ tmp = TARGET_AVX512VL ? "pandn<ssemodesuffix>" : "pandn";
+ break;
+ default:
+ tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
+ break;
+ }
break;
- case MODE_V16SF:
+ case MODE_V16SF:
gcc_assert (TARGET_AVX512F);
- case MODE_V8SF:
+ case MODE_V8SF:
gcc_assert (TARGET_AVX);
- case MODE_V4SF:
+ case MODE_V4SF:
gcc_assert (TARGET_SSE);
tmp = "andnps";
break;
- default:
+ default:
gcc_unreachable ();
- }
+ }
switch (which_alternative)
{
@@ -10870,7 +10874,7 @@
ops = "%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
- ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
+ ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
break;
default:
gcc_unreachable ();
@@ -10920,21 +10924,6 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "*andnot<mode>3_mask"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
- (vec_merge:VI12_AVX512VL
- (and:VI12_AVX512VL
- (not:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
- (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
- (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
- "TARGET_AVX512BW"
- "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
- [(set_attr "type" "sselog")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
-
(define_expand "<code><mode>3"
[(set (match_operand:VI 0 "register_operand")
(any_logic:VI
@@ -16655,8 +16644,9 @@
(match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
(parallel [(const_int 0)]))))]
"TARGET_AVX512F"
- "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
- vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
+ "@
+ vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
+ vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -16811,7 +16801,8 @@
v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
#"
- [(set_attr "type" "ssemov")
+ [(set_attr "isa" "*,*,noavx512vl")
+ (set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])