summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2017-11-17 10:38:05 +0000
committerRichard Sandiford <richard.sandiford@linaro.org>2017-11-20 11:42:14 +0000
commitc6bae96f6e24a3a89fd511f2f2cef57972726bc4 (patch)
treece0976ae2562c0136ef0f4eaf82c6433fe4409be
parent4e31ab7b28f374792122bdb98fd350bd024534ec (diff)
downloadgcc-c6bae96f6e24a3a89fd511f2f2cef57972726bc4.tar.gz
Add support for reductions in fully-masked loops
This patch removes the restriction that fully-masked loops cannot have reductions. The key thing here is to make sure that the reduction accumulator doesn't include any values associated with inactive lanes; the patch adds a bunch of conditional binary operations for doing that. 2017-11-16 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * doc/md.texi (cond_add@var{mode}, cond_sub@var{mode}) (cond_and@var{mode}, cond_ior@var{mode}, cond_xor@var{mode}) (cond_smin@var{mode}, cond_smax@var{mode}, cond_umin@var{mode}) (cond_umax@var{mode}): Document. * optabs.def (cond_add_optab, cond_sub_optab, cond_and_optab) (cond_ior_optab, cond_xor_optab, cond_smin_optab, cond_smax_optab) (cond_umin_optab, cond_umax_optab): New optabs. * internal-fn.def (COND_ADD, COND_SUB, COND_SMIN, COND_SMAX) (COND_UMIN, COND_UMAX, COND_AND, COND_IOR, COND_XOR): New internal functions. * internal-fn.h (get_conditional_internal_fn): Declare. * internal-fn.c (cond_binary_direct): New macro. (expand_cond_binary_optab_fn): Likewise. (direct_cond_binary_optab_supported_p): Likewise. (get_conditional_internal_fn): New function. * tree-vect-loop.c (vectorizable_reduction): Handle fully-masked loops. Cope with reduction statements that are vectorized as calls rather than assignments. * config/aarch64/aarch64-sve.md (cond_<optab><mode>): New insns. * config/aarch64/iterators.md (UNSPEC_COND_ADD, UNSPEC_COND_SUB) (UNSPEC_COND_SMAX, UNSPEC_COND_UMAX, UNSPEC_COND_SMIN) (UNSPEC_COND_UMIN, UNSPEC_COND_AND, UNSPEC_COND_ORR) (UNSPEC_COND_EOR): New unspecs. (optab): Add mappings for them. (SVE_COND_INT_OP, SVE_COND_FP_OP): New int iterators. (sve_int_op, sve_fp_op): New int attributes. gcc/testsuite/ * gcc.dg/vect/pr60482.c: Remove XFAIL for variable-length vectors. * gcc.target/aarch64/sve_reduc_1.c: Expect the loop operations to be predicated. * gcc.target/aarch64/sve_slp_5.c: Check for a fully-masked loop. * gcc.target/aarch64/sve_slp_7.c: Likewise. * gcc.target/aarch64/sve_reduc_5.c: New test. * gcc.target/aarch64/sve_slp_13.c: Likewise. * gcc.target/aarch64/sve_slp_13_run.c: Likewise.
-rw-r--r--gcc/config/aarch64/aarch64-sve.md24
-rw-r--r--gcc/config/aarch64/iterators.md42
-rw-r--r--gcc/doc/md.texi36
-rw-r--r--gcc/internal-fn.c36
-rw-r--r--gcc/internal-fn.def24
-rw-r--r--gcc/internal-fn.h2
-rw-r--r--gcc/optabs.def9
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr60482.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_1.c53
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_reduc_5.c38
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_13.c52
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c28
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_5.c9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_slp_7.c9
-rw-r--r--gcc/tree-vect-loop.c88
15 files changed, 405 insertions, 49 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index e01641d15eb..78d78c34afd 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1441,6 +1441,18 @@
"<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
)
+;; Predicated integer operations.
+(define_insn "cond_<optab><mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_I 2 "register_operand" "0")
+ (match_operand:SVE_I 3 "register_operand" "w")]
+ SVE_COND_INT_OP))]
+ "TARGET_SVE"
+ "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+)
+
;; Unpredicated integer add reduction.
(define_expand "reduc_plus_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
@@ -2118,6 +2130,18 @@
}
)
+;; Predicated floating-point operations.
+(define_insn "cond_<optab><mode>"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_F 2 "register_operand" "0")
+ (match_operand:SVE_F 3 "register_operand" "w")]
+ SVE_COND_FP_OP))]
+ "TARGET_SVE"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+)
+
;; Shift an SVE vector left and insert a scalar into element 0.
(define_insn "vec_shl_insert_<mode>"
[(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index cce359f9dc3..f57c88da5aa 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -415,6 +415,15 @@
UNSPEC_ANDF ; Used in aarch64-sve.md.
UNSPEC_IORF ; Used in aarch64-sve.md.
UNSPEC_XORF ; Used in aarch64-sve.md.
+ UNSPEC_COND_ADD ; Used in aarch64-sve.md.
+ UNSPEC_COND_SUB ; Used in aarch64-sve.md.
+ UNSPEC_COND_SMAX ; Used in aarch64-sve.md.
+ UNSPEC_COND_UMAX ; Used in aarch64-sve.md.
+ UNSPEC_COND_SMIN ; Used in aarch64-sve.md.
+ UNSPEC_COND_UMIN ; Used in aarch64-sve.md.
+ UNSPEC_COND_AND ; Used in aarch64-sve.md.
+ UNSPEC_COND_ORR ; Used in aarch64-sve.md.
+ UNSPEC_COND_EOR ; Used in aarch64-sve.md.
UNSPEC_COND_LT ; Used in aarch64-sve.md.
UNSPEC_COND_LE ; Used in aarch64-sve.md.
UNSPEC_COND_EQ ; Used in aarch64-sve.md.
@@ -1413,6 +1422,15 @@
(define_int_iterator UNPACK_UNSIGNED [UNSPEC_UNPACKULO UNSPEC_UNPACKUHI])
+(define_int_iterator SVE_COND_INT_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB
+ UNSPEC_COND_SMAX UNSPEC_COND_UMAX
+ UNSPEC_COND_SMIN UNSPEC_COND_UMIN
+ UNSPEC_COND_AND
+ UNSPEC_COND_ORR
+ UNSPEC_COND_EOR])
+
+(define_int_iterator SVE_COND_FP_OP [UNSPEC_COND_ADD UNSPEC_COND_SUB])
+
(define_int_iterator SVE_COND_INT_CMP [UNSPEC_COND_LT UNSPEC_COND_LE
UNSPEC_COND_EQ UNSPEC_COND_NE
UNSPEC_COND_GE UNSPEC_COND_GT
@@ -1445,7 +1463,16 @@
(UNSPEC_XORF "xor")
(UNSPEC_ANDV "and")
(UNSPEC_IORV "ior")
- (UNSPEC_XORV "xor")])
+ (UNSPEC_XORV "xor")
+ (UNSPEC_COND_ADD "add")
+ (UNSPEC_COND_SUB "sub")
+ (UNSPEC_COND_SMAX "smax")
+ (UNSPEC_COND_UMAX "umax")
+ (UNSPEC_COND_SMIN "smin")
+ (UNSPEC_COND_UMIN "umin")
+ (UNSPEC_COND_AND "and")
+ (UNSPEC_COND_ORR "ior")
+ (UNSPEC_COND_EOR "xor")])
(define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax")
(UNSPEC_UMINV "umin")
@@ -1652,3 +1679,16 @@
(UNSPEC_COND_LS "vsd")
(UNSPEC_COND_HS "vsd")
(UNSPEC_COND_HI "vsd")])
+
+(define_int_attr sve_int_op [(UNSPEC_COND_ADD "add")
+ (UNSPEC_COND_SUB "sub")
+ (UNSPEC_COND_SMAX "smax")
+ (UNSPEC_COND_UMAX "umax")
+ (UNSPEC_COND_SMIN "smin")
+ (UNSPEC_COND_UMIN "umin")
+ (UNSPEC_COND_AND "and")
+ (UNSPEC_COND_ORR "orr")
+ (UNSPEC_COND_EOR "eor")])
+
+(define_int_attr sve_fp_op [(UNSPEC_COND_ADD "fadd")
+ (UNSPEC_COND_SUB "fsub")])
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 89c72e3aafc..ce562d1a555 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6297,6 +6297,42 @@ move operand 2 or (operands 2 + operand 3) into operand 0 according to the
comparison in operand 1. If the comparison is false, operand 2 is moved into
operand 0, otherwise (operand 2 + operand 3) is moved.
+@cindex @code{cond_add@var{mode}} instruction pattern
+@cindex @code{cond_sub@var{mode}} instruction pattern
+@cindex @code{cond_and@var{mode}} instruction pattern
+@cindex @code{cond_ior@var{mode}} instruction pattern
+@cindex @code{cond_xor@var{mode}} instruction pattern
+@cindex @code{cond_smin@var{mode}} instruction pattern
+@cindex @code{cond_smax@var{mode}} instruction pattern
+@cindex @code{cond_umin@var{mode}} instruction pattern
+@cindex @code{cond_umax@var{mode}} instruction pattern
+@item @samp{cond_add@var{mode}}
+@itemx @samp{cond_sub@var{mode}}
+@itemx @samp{cond_and@var{mode}}
+@itemx @samp{cond_ior@var{mode}}
+@itemx @samp{cond_xor@var{mode}}
+@itemx @samp{cond_smin@var{mode}}
+@itemx @samp{cond_smax@var{mode}}
+@itemx @samp{cond_umin@var{mode}}
+@itemx @samp{cond_umax@var{mode}}
+Perform an elementwise operation on vector operands 2 and 3,
+under the control of the vector mask in operand 1, and store the result
+in operand 0. This is equivalent to:
+
+@smallexample
+for (i = 0; i < GET_MODE_NUNITS (@var{n}); i++)
+ op0[i] = op1[i] ? op2[i] @var{op} op3[i] : op2[i];
+@end smallexample
+
+where, for example, @var{op} is @code{+} for @samp{cond_add@var{mode}}.
+
+When defined for floating-point modes, the contents of @samp{op3[i]}
+are not interpreted if @var{op1[i]} is false, just like they would not
+be in a normal C @samp{?:} condition.
+
+Operands 0, 2 and 3 all have mode @var{m}, while operand 1 has the mode
+returned by @code{TARGET_VECTORIZE_GET_MASK_MODE}.
+
@cindex @code{neg@var{mode}cc} instruction pattern
@item @samp{neg@var{mode}cc}
Similar to @samp{mov@var{mode}cc} but for conditional negation. Conditionally
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index d8adc080b45..b3a5360caa8 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -88,6 +88,7 @@ init_internal_fns ()
#define mask_store_lanes_direct { 0, 0, false }
#define unary_direct { 0, 0, true }
#define binary_direct { 0, 0, true }
+#define cond_binary_direct { 1, 1, true }
#define while_direct { 0, 2, false }
const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
@@ -2802,6 +2803,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
#define expand_binary_optab_fn(FN, STMT, OPTAB) \
expand_direct_optab_fn (FN, STMT, OPTAB, 2)
+#define expand_cond_binary_optab_fn(FN, STMT, OPTAB) \
+ expand_direct_optab_fn (FN, STMT, OPTAB, 3)
+
/* RETURN_TYPE and ARGS are a return type and argument list that are
in principle compatible with FN (which satisfies direct_internal_fn_p).
Return the types that should be used to determine whether the
@@ -2875,6 +2879,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
#define direct_unary_optab_supported_p direct_optab_supported_p
#define direct_binary_optab_supported_p direct_optab_supported_p
+#define direct_cond_binary_optab_supported_p direct_optab_supported_p
#define direct_mask_load_optab_supported_p direct_optab_supported_p
#define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p
#define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
@@ -2953,6 +2958,37 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
0
};
+/* Return a function that performs the conditional form of CODE, i.e.:
+
+ LHS = RHS1 ? RHS2 CODE RHS3 : RHS2
+
+ (operating elementwise if the operands are vectors). Return IFN_LAST
+ if no such function exists. */
+
+internal_fn
+get_conditional_internal_fn (tree_code code, tree type)
+{
+ switch (code)
+ {
+ case PLUS_EXPR:
+ return IFN_COND_ADD;
+ case MINUS_EXPR:
+ return IFN_COND_SUB;
+ case MIN_EXPR:
+ return TYPE_UNSIGNED (type) ? IFN_COND_UMIN : IFN_COND_SMIN;
+ case MAX_EXPR:
+ return TYPE_UNSIGNED (type) ? IFN_COND_UMAX : IFN_COND_SMAX;
+ case BIT_AND_EXPR:
+ return IFN_COND_AND;
+ case BIT_IOR_EXPR:
+ return IFN_COND_IOR;
+ case BIT_XOR_EXPR:
+ return IFN_COND_XOR;
+ default:
+ return IFN_LAST;
+ }
+}
+
/* Expand STMT as though it were a call to internal function FN. */
void
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 4b01e3fe8c2..576d83901ea 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see
DEF_INTERNAL_FN (NAME, FLAGS, FNSPEC)
DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE)
+ DEF_INTERNAL_COND_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE)
DEF_INTERNAL_FLT_FN (NAME, FLAGS, OPTAB, TYPE)
DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE)
@@ -51,6 +52,15 @@ along with GCC; see the file COPYING3. If not see
- store_lanes: currently just vec_store_lanes
- mask_store_lanes: currently just vec_mask_store_lanes
+ - unary: a normal unary optab, such as vec_reverse_<mode>
+ - binary: a normal binary optab, such as vec_interleave_lo_<mode>
+
+ - cond_binary: a conditional binary optab, such as add<mode>cc
+
+ DEF_INTERNAL_COND_OPTAB_FN defines a conditional function COND_<NAME>,
+ with optab cond_<OPTAB> and type cond_<TYPE>. All these functions
+ are predicated and take the predicate as the first argument.
+
DEF_INTERNAL_FLT_FN is like DEF_INTERNAL_OPTAB_FN, but in addition,
the function implements the computational part of a built-in math
function BUILT_IN_<NAME>{F,,L}. Unlike some built-in functions,
@@ -77,6 +87,9 @@ along with GCC; see the file COPYING3. If not see
DEF_INTERNAL_FN (NAME, FLAGS | ECF_LEAF, NULL)
#endif
+#define DEF_INTERNAL_COND_OPTAB_FN(NAME, FLAGS, OPTAB, TYPE) \
+ DEF_INTERNAL_OPTAB_FN (COND_##NAME, FLAGS, cond_##OPTAB, cond_##TYPE)
+
#ifndef DEF_INTERNAL_FLT_FN
#define DEF_INTERNAL_FLT_FN(NAME, FLAGS, OPTAB, TYPE) \
DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE)
@@ -117,6 +130,16 @@ DEF_INTERNAL_OPTAB_FN (VEC_REVERSE, ECF_CONST | ECF_NOTHROW,
DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW,
vec_shl_insert, binary)
+DEF_INTERNAL_COND_OPTAB_FN (ADD, ECF_CONST, add, binary)
+DEF_INTERNAL_COND_OPTAB_FN (SUB, ECF_CONST, sub, binary)
+DEF_INTERNAL_COND_OPTAB_FN (SMIN, ECF_CONST, smin, binary)
+DEF_INTERNAL_COND_OPTAB_FN (SMAX, ECF_CONST, smax, binary)
+DEF_INTERNAL_COND_OPTAB_FN (UMIN, ECF_CONST, umin, binary)
+DEF_INTERNAL_COND_OPTAB_FN (UMAX, ECF_CONST, umax, binary)
+DEF_INTERNAL_COND_OPTAB_FN (AND, ECF_CONST | ECF_NOTHROW, and, binary)
+DEF_INTERNAL_COND_OPTAB_FN (IOR, ECF_CONST | ECF_NOTHROW, ior, binary)
+DEF_INTERNAL_COND_OPTAB_FN (XOR, ECF_CONST | ECF_NOTHROW, xor, binary)
+
DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
/* Unary math functions. */
@@ -257,5 +280,6 @@ DEF_INTERNAL_FN (DIVMOD, ECF_CONST | ECF_LEAF, NULL)
#undef DEF_INTERNAL_INT_FN
#undef DEF_INTERNAL_FLT_FN
#undef DEF_INTERNAL_FLT_FLOATN_FN
+#undef DEF_INTERNAL_COND_OPTAB_FN
#undef DEF_INTERNAL_OPTAB_FN
#undef DEF_INTERNAL_FN
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index dc731f601a2..15691a6eb1a 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -190,6 +190,8 @@ direct_internal_fn_supported_p (internal_fn fn, tree type0, tree type1,
extern bool set_edom_supported_p (void);
+extern internal_fn get_conditional_internal_fn (tree_code, tree);
+
extern void expand_internal_call (gcall *);
extern void expand_internal_call (internal_fn, gcall *);
extern void expand_PHI (internal_fn, gcall *);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 96e6d63ff1a..27e98346a88 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -220,6 +220,15 @@ OPTAB_D (addcc_optab, "add$acc")
OPTAB_D (negcc_optab, "neg$acc")
OPTAB_D (notcc_optab, "not$acc")
OPTAB_D (movcc_optab, "mov$acc")
+OPTAB_D (cond_add_optab, "cond_add$a")
+OPTAB_D (cond_sub_optab, "cond_sub$a")
+OPTAB_D (cond_and_optab, "cond_and$a")
+OPTAB_D (cond_ior_optab, "cond_ior$a")
+OPTAB_D (cond_xor_optab, "cond_xor$a")
+OPTAB_D (cond_smin_optab, "cond_smin$a")
+OPTAB_D (cond_smax_optab, "cond_smax$a")
+OPTAB_D (cond_umin_optab, "cond_umin$a")
+OPTAB_D (cond_umax_optab, "cond_umax$a")
OPTAB_D (cmov_optab, "cmov$a6")
OPTAB_D (cstore_optab, "cstore$a4")
OPTAB_D (ctrap_optab, "ctrap$a4")
diff --git a/gcc/testsuite/gcc.dg/vect/pr60482.c b/gcc/testsuite/gcc.dg/vect/pr60482.c
index b07fe1ef33e..4c5c20c8109 100644
--- a/gcc/testsuite/gcc.dg/vect/pr60482.c
+++ b/gcc/testsuite/gcc.dg/vect/pr60482.c
@@ -16,6 +16,4 @@ foo (double *x, int n)
return p;
}
-/* Until fully-masked loops are supported, we always need an epilog
- loop for variable-length vectors. */
-/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" { xfail vect_variable_length } } } */
+/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_1.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_1.c
index b975fe69fea..4c26e78fae8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_reduc_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_1.c
@@ -105,10 +105,10 @@ reduc_##NAME##_##TYPE (TYPE *a, int n) \
TEST_BITWISE (DEF_REDUC_BITWISE)
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
@@ -130,9 +130,9 @@ TEST_BITWISE (DEF_REDUC_BITWISE)
/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
@@ -142,11 +142,20 @@ TEST_BITWISE (DEF_REDUC_BITWISE)
/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
-/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
-/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 8 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
@@ -180,17 +189,17 @@ TEST_BITWISE (DEF_REDUC_BITWISE)
/* { dg-final { scan-assembler-times {\tfminnmv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfminnmv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
-/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
-/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tandv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
-/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
-/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
-/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\torv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
-/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
-/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
-/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
-/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\tb[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\ts[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\teorv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_reduc_5.c b/gcc/testsuite/gcc.target/aarch64/sve_reduc_5.c
new file mode 100644
index 00000000000..59363aac56b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_reduc_5.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=armv8-a+sve" } */
+
+#include <stdint.h>
+
+#define REDUC(TYPE) \
+ TYPE reduc_##TYPE (TYPE *x, int count) \
+ { \
+ TYPE sum = 0; \
+ for (int i = 0; i < count; ++i) \
+ sum -= x[i]; \
+ return sum; \
+ }
+
+REDUC (int8_t)
+REDUC (uint8_t)
+REDUC (int16_t)
+REDUC (uint16_t)
+REDUC (int32_t)
+REDUC (uint32_t)
+REDUC (int64_t)
+REDUC (uint64_t)
+REDUC (float)
+REDUC (double)
+
+/* XFAILed until we support sub-int reductions for signed types. */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 1 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 1 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m} 2 } } */
+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m} 2 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m} 1 } } */
+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m} 1 } } */
+
+/* XFAILed until we support sub-int reductions for signed types. */
+/* { dg-final { scan-assembler-times {\tsub\t} 8 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfsub\t} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c
new file mode 100644
index 00000000000..11973e48415
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_13.c
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve -msve-vector-bits=scalable" } */
+
+#include <stdint.h>
+
+#define VEC_PERM(TYPE) \
+TYPE __attribute__ ((noinline, noclone)) \
+vec_slp_##TYPE (TYPE *restrict a, int n) \
+{ \
+ TYPE res = 0; \
+ for (int i = 0; i < n; ++i) \
+ { \
+ res += a[i * 2] * 3; \
+ res += a[i * 2 + 1] * 5; \
+ } \
+ return res; \
+}
+
+#define TEST_ALL(T) \
+ T (int8_t) \
+ T (uint8_t) \
+ T (int16_t) \
+ T (uint16_t) \
+ T (int32_t) \
+ T (uint32_t) \
+ T (int64_t) \
+ T (uint64_t)
+
+TEST_ALL (VEC_PERM)
+
+/* ??? We don't treat the int8_t and int16_t loops as reductions. */
+/* ??? We don't treat the uint loops as SLP. */
+/* The loop should be fully-masked. */
+/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1h\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1w\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1w\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tld1d\t} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tld1d\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tldr} { xfail *-*-* } } } */
+
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+
+/* { dg-final { scan-assembler-not {\tuqdec} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c
new file mode 100644
index 00000000000..2824073cf14
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_13_run.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_slp_13.c"
+
+#define N1 (103 * 2)
+#define N2 (111 * 2)
+
+#define HARNESS(TYPE) \
+ { \
+ TYPE a[N2]; \
+ TYPE expected = 0; \
+ for (unsigned int i = 0; i < N2; ++i) \
+ { \
+ a[i] = i * 2 + i % 5; \
+ if (i < N1) \
+ expected += a[i] * (i & 1 ? 5 : 3); \
+ asm volatile (""); \
+ } \
+ if (vec_slp_##TYPE (a, N1 / 2) != expected) \
+ __builtin_abort (); \
+ }
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ TEST_ALL (HARNESS)
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c
index 88fe213f66a..e0bacb0cad8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_5.c
@@ -56,3 +56,12 @@ TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+
+/* Should be 4 and 6 respectively, if we used reductions for int8_t and
+ int16_t. */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-not {\tuqdec} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c b/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c
index 6613ea6dd2d..372c7575cdb 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_slp_7.c
@@ -64,3 +64,12 @@ TEST_ALL (VEC_PERM)
/* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
/* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
+
+/* Should be 4 and 6 respectively, if we used reductions for int8_t and
+ int16_t. */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
+
+/* { dg-final { scan-assembler-not {\tuqdec} } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index d2c4cc3f39a..7220351652e 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -6726,19 +6726,42 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
return false;
}
+ if (slp_node)
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+ else
+ vec_num = 1;
+
+ internal_fn cond_fn = get_conditional_internal_fn (code, scalar_type);
+ vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+
if (!vec_stmt) /* transformation not required. */
{
- if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "can't use a fully-masked loop due to "
- "reduction operation.\n");
- LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
- }
-
if (first_p)
vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
+ if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+ {
+ if (cond_fn == IFN_LAST
+ || !direct_internal_fn_supported_p (cond_fn, vectype_in,
+ OPTIMIZE_FOR_SPEED))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use a fully-masked loop because no"
+ " conditional operation is available.\n");
+ LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ }
+ else if (reduc_index == -1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't use a fully-masked loop for chained"
+ " reductions.\n");
+ LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+ }
+ else
+ vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+ vectype_in);
+ }
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
return true;
}
@@ -6752,16 +6775,15 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
if (code == COND_EXPR)
gcc_assert (ncopies == 1);
+ bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+
/* Create the destination vector */
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
prev_stmt_info = NULL;
prev_phi_info = NULL;
- if (slp_node)
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- else
+ if (!slp_node)
{
- vec_num = 1;
vec_oprnds0.create (1);
vec_oprnds1.create (1);
if (op_type == ternary_op)
@@ -6835,19 +6857,19 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
gcc_assert (reduc_index != -1 || ! single_defuse_cycle);
if (single_defuse_cycle && reduc_index == 0)
- vec_oprnds0[0] = gimple_assign_lhs (new_stmt);
+ vec_oprnds0[0] = gimple_get_lhs (new_stmt);
else
vec_oprnds0[0]
= vect_get_vec_def_for_stmt_copy (dts[0], vec_oprnds0[0]);
if (single_defuse_cycle && reduc_index == 1)
- vec_oprnds1[0] = gimple_assign_lhs (new_stmt);
+ vec_oprnds1[0] = gimple_get_lhs (new_stmt);
else
vec_oprnds1[0]
= vect_get_vec_def_for_stmt_copy (dts[1], vec_oprnds1[0]);
if (op_type == ternary_op)
{
if (single_defuse_cycle && reduc_index == 2)
- vec_oprnds2[0] = gimple_assign_lhs (new_stmt);
+ vec_oprnds2[0] = gimple_get_lhs (new_stmt);
else
vec_oprnds2[0]
= vect_get_vec_def_for_stmt_copy (dts[2], vec_oprnds2[0]);
@@ -6858,13 +6880,33 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
{
tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
- if (op_type == ternary_op)
- vop[2] = vec_oprnds2[i];
+ if (masked_loop_p)
+ {
+ /* Make sure that the reduction accumulator is vop[0]. */
+ if (reduc_index == 1)
+ {
+ gcc_assert (commutative_tree_code (code));
+ std::swap (vop[0], vop[1]);
+ }
+ tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+ vectype_in, i * ncopies + j);
+ gcall *call = gimple_build_call_internal (cond_fn, 3, mask,
+ vop[0], vop[1]);
+ new_temp = make_ssa_name (vec_dest, call);
+ gimple_call_set_lhs (call, new_temp);
+ gimple_call_set_nothrow (call, true);
+ new_stmt = call;
+ }
+ else
+ {
+ if (op_type == ternary_op)
+ vop[2] = vec_oprnds2[i];
- new_temp = make_ssa_name (vec_dest, new_stmt);
- new_stmt = gimple_build_assign (new_temp, code,
- vop[0], vop[1], vop[2]);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ new_stmt = gimple_build_assign (new_temp, code,
+ vop[0], vop[1], vop[2]);
+ }
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
if (slp_node)
{
@@ -6889,7 +6931,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
/* Finalize the reduction-phi (set its arguments) and create the
epilog reduction code. */
if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
- vect_defs[0] = gimple_assign_lhs (*vec_stmt);
+ vect_defs[0] = gimple_get_lhs (*vec_stmt);
vect_create_epilog_for_reduction (vect_defs, stmt, reduc_def_stmt,
epilog_copies,