summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2017-05-12 13:18:01 +0100
committerRichard Sandiford <richard.sandiford@linaro.org>2017-11-20 11:42:50 +0000
commit694a5e4db22d1b42f23e661cf7bb57e10da6c96e (patch)
tree417393d1cc8704cd3dc33aadc160bbd3177acbb9
parentf9d9d07b1ba7536ac4e24d2852e1cc0147fb8acc (diff)
downloadgcc-694a5e4db22d1b42f23e661cf7bb57e10da6c96e.tar.gz
Add support for conditional reductions using SVE CLASTB
This patch uses SVE CLASTB to optimise conditional reductions. It means that we no longer need to maintain a separate index vector to record the most recent valid value, and no longer need to worry about overflow cases. 2017-11-16 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * doc/md.texi (fold_extract_last_@var{m}): Document. * doc/sourcebuild.texi (vect_fold_extract_last): Likewise. * optabs.def (fold_extract_last_optab): New optab. * internal-fn.def (FOLD_EXTRACT_LAST): New internal function. * internal-fn.c (fold_extract_direct): New macro. (expand_fold_extract_optab_fn): Likewise. (direct_fold_extract_optab_supported_p): Likewise. * tree-vectorizer.h (EXTRACT_LAST_REDUCTION): New vect_reduction_type. * tree-vect-loop.c (vect_model_reduction_cost): Handle EXTRACT_LAST_REDUCTION. (get_initial_def_for_reduction): Do not create an initial vector for EXTRACT_LAST_REDUCTION reductions. (vectorizable_reduction): Leave the scalar phi in place for EXTRACT_LAST_REDUCTIONs. Try using EXTRACT_LAST_REDUCTION ahead of INTEGER_INDUC_COND_REDUCTION. Do not check for an epilogue code for EXTRACT_LAST_REDUCTION and defer the transform phase to vectorizable_condition. * tree-vect-stmts.c (vect_finish_stmt_generation_1): New function, split out from... (vect_finish_stmt_generation): ...here. (vect_finish_replace_stmt): New function. (vectorizable_condition): Handle EXTRACT_LAST_REDUCTION. * config/aarch64/aarch64-sve.md (fold_extract_last_<mode>): New pattern. * config/aarch64/aarch64.md (UNSPEC_CLASTB): New unspec. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_vect_fold_extract_last): New proc. * gcc.dg/vect/pr65947-1.c: Update dump messages. Add markup for fold_extract_last. * gcc.dg/vect/pr65947-2.c: Likewise. * gcc.dg/vect/pr65947-3.c: Likewise. * gcc.dg/vect/pr65947-4.c: Likewise. * gcc.dg/vect/pr65947-5.c: Likewise. * gcc.dg/vect/pr65947-6.c: Likewise. * gcc.dg/vect/pr65947-9.c: Likewise. * gcc.dg/vect/pr65947-10.c: Likewise. * gcc.dg/vect/pr65947-12.c: Likewise. * gcc.dg/vect/pr65947-13.c: Likewise. * gcc.dg/vect/pr65947-14.c: Likewise. * gcc.target/aarch64/sve_clastb_1.c: New test. * gcc.target/aarch64/sve_clastb_1_run.c: Likewise. * gcc.target/aarch64/sve_clastb_2.c: Likewise. * gcc.target/aarch64/sve_clastb_2_run.c: Likewise. * gcc.target/aarch64/sve_clastb_3.c: Likewise. * gcc.target/aarch64/sve_clastb_3_run.c: Likewise. * gcc.target/aarch64/sve_clastb_4.c: Likewise. * gcc.target/aarch64/sve_clastb_4_run.c: Likewise. * gcc.target/aarch64/sve_clastb_5.c: Likewise. * gcc.target/aarch64/sve_clastb_5_run.c: Likewise. * gcc.target/aarch64/sve_clastb_6.c: Likewise. * gcc.target/aarch64/sve_clastb_6_run.c: Likewise. * gcc.target/aarch64/sve_clastb_7.c: Likewise. * gcc.target/aarch64/sve_clastb_7_run.c: Likewise.
-rw-r--r--gcc/config/aarch64/aarch64-sve.md15
-rw-r--r--gcc/config/aarch64/aarch64.md1
-rw-r--r--gcc/doc/md.texi9
-rw-r--r--gcc/doc/sourcebuild.texi3
-rw-r--r--gcc/internal-fn.c5
-rw-r--r--gcc/internal-fn.def5
-rw-r--r--gcc/optabs.def1
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-10.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-12.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-13.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-14.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-2.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-3.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-4.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-5.c8
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-6.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr65947-9.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c26
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c23
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c23
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c25
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c23
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c22
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c7
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c22
-rw-r--r--gcc/testsuite/lib/target-supports.exp6
-rw-r--r--gcc/tree-vect-loop.c121
-rw-r--r--gcc/tree-vect-stmts.c112
-rw-r--r--gcc/tree-vectorizer.h9
36 files changed, 506 insertions, 82 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 946798851f9..d08ec4a252e 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1451,6 +1451,21 @@
"<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
)
+;; Set operand 0 to the last active element in operand 3, or to tied
+;; operand 1 if no elements are active.
+(define_insn "fold_extract_last_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
+ (unspec:<VEL>
+ [(match_operand:<VEL> 1 "register_operand" "0, 0")
+ (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
+ (match_operand:SVE_ALL 3 "register_operand" "w, w")]
+ UNSPEC_CLASTB))]
+ "TARGET_SVE"
+ "@
+ clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
+ clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
+)
+
;; Unpredicated integer add reduction.
(define_expand "reduc_plus_scal_<mode>"
[(set (match_operand:<VEL> 0 "register_operand")
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 0e95a6235ea..c7bac9edf64 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -163,6 +163,7 @@
UNSPEC_LDN
UNSPEC_STN
UNSPEC_INSR
+ UNSPEC_CLASTB
])
(define_c_enum "unspecv" [
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 2958728d90f..15be86f6752 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5276,6 +5276,15 @@ has vector mode @var{m} while operand 0 has the mode appropriate for one
element of @var{m}. Operand 1 has the usual mask mode for vectors of mode
@var{m}; see @code{TARGET_VECTORIZE_GET_MASK_MODE}.
+@cindex @code{fold_extract_last_@var{m}} instruction pattern
+@item @code{fold_extract_last_@var{m}}
+If any bits of mask operand 2 are set, find the last set bit, extract
+the associated element from vector operand 3, and store the result
+in operand 0. Store operand 1 in operand 0 otherwise. Operand 3
+has mode @var{m} and operands 0 and 1 have the mode appropriate for
+one element of @var{m}. Operand 2 has the usual mask mode for vectors
+of mode @var{m}; see @code{TARGET_VECTORIZE_GET_MASK_MODE}.
+
@cindex @code{sdot_prod@var{m}} instruction pattern
@item @samp{sdot_prod@var{m}}
@cindex @code{udot_prod@var{m}} instruction pattern
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 1ecd042f5a5..e0b1d973e51 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1577,6 +1577,9 @@ Target supports 32- and 16-bytes vectors.
@item vect_logical_reduc
Target supports AND, IOR and XOR reduction on vectors.
+
+@item vect_fold_extract_last
+Target supports the @code{fold_extract_last} optab.
@end table
@subsubsection Thread Local Storage attributes
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 0779c3538aa..f8974c5c8ca 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -91,6 +91,7 @@ init_internal_fns ()
#define cond_unary_direct { 1, 1, true }
#define cond_binary_direct { 1, 1, true }
#define while_direct { 0, 2, false }
+#define fold_extract_direct { 2, 2, false }
const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
@@ -2833,6 +2834,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
#define expand_cond_binary_optab_fn(FN, STMT, OPTAB) \
expand_direct_optab_fn (FN, STMT, OPTAB, 3)
+#define expand_fold_extract_optab_fn(FN, STMT, OPTAB) \
+ expand_direct_optab_fn (FN, STMT, OPTAB, 3)
+
/* RETURN_TYPE and ARGS are a return type and argument list that are
in principle compatible with FN (which satisfies direct_internal_fn_p).
Return the types that should be used to determine whether the
@@ -2915,6 +2919,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
#define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
#define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
#define direct_while_optab_supported_p convert_optab_supported_p
+#define direct_fold_extract_optab_supported_p direct_optab_supported_p
/* Return true if FN is supported for the types in TYPES when the
optimization type is OPT_TYPE. The types are those associated with
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 8d6871feb44..9c1b190f442 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -146,6 +146,11 @@ DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
DEF_INTERNAL_OPTAB_FN (EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
extract_last, cond_unary)
+/* Same, but return the first argument if no elements are active. */
+DEF_INTERNAL_OPTAB_FN (FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW,
+ fold_extract_last, fold_extract)
+
+
/* Unary math functions. */
DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary)
DEF_INTERNAL_FLT_FN (ASIN, ECF_CONST, asin, unary)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 4660f50345f..db6fc2271f7 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -308,6 +308,7 @@ OPTAB_D (reduc_ior_scal_optab, "reduc_ior_scal_$a")
OPTAB_D (reduc_xor_scal_optab, "reduc_xor_scal_$a")
OPTAB_D (extract_last_optab, "extract_last_$a")
+OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
index 9072f11a104..bf6c098b3ee 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
@@ -41,4 +41,4 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction" 4 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
index 321cb8c9211..b58b3456bd4 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
@@ -42,5 +42,6 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-not "optimizing condition reduction" "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
index 8e2c46f1a6b..1c959e16ab8 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-12.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
@@ -42,4 +42,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-not "optimizing condition reduction" "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
index 061777af34c..fc88cbe6227 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
@@ -42,4 +42,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-not "optimizing condition reduction" "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-14.c b/gcc/testsuite/gcc.dg/vect/pr65947-14.c
index a28e80bb9fc..194e40f280e 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-14.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-14.c
@@ -41,4 +41,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction based on integer induction" 4 "vect" { target { ! vect_fold_extract_last } } } }*/
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
index d72fffa6720..569da87ceaa 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
@@ -42,4 +42,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-not "optimizing condition reduction" "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
index 98945ba505d..05c266686b0 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
@@ -52,4 +52,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-not "optimizing condition reduction" "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
index 695889d743b..0fa50cef31f 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
@@ -41,5 +41,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction" 4 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
index e577820ac3f..15f5ea8d8fa 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
@@ -50,6 +50,8 @@ main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */
-/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_fold_extract_last } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-not "optimizing condition reduction" "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
index caa4a14120a..1c760366e71 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
@@ -41,4 +41,5 @@ main (void)
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-/* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-not "optimizing condition reduction" "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-9.c b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
index e8f20aabbdd..49dc7cb9ed2 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-9.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
@@ -45,5 +45,8 @@ main ()
return 0;
}
-/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
-/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" } } */
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_fold_extract_last } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { target { ! vect_fold_extract_last } } } } */
+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+/* { dg-final { scan-tree-dump-not "optimizing condition reduction" "vect" { target { ! vect_fold_extract_last } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c
new file mode 100644
index 00000000000..4651c70afda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1.c
@@ -0,0 +1,20 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define N 32
+
+/* Simple condition reduction. */
+
+int __attribute__ ((noinline, noclone))
+condition_reduction (int *a, int min_v)
+{
+ int last = 66; /* High start value. */
+
+ for (int i = 0; i < N; i++)
+ if (a[i] < min_v)
+ last = i;
+
+ return last;
+}
+
+/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c
new file mode 100644
index 00000000000..0dcba03b61c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_1_run.c
@@ -0,0 +1,22 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_clastb_1.c"
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ int a[N] = {
+ 11, -12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
+ };
+
+ int ret = condition_reduction (a, 1);
+
+ if (ret != 17)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c
new file mode 100644
index 00000000000..381cbd17577
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2.c
@@ -0,0 +1,26 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#include <stdint.h>
+
+#if !defined(TYPE)
+#define TYPE uint32_t
+#endif
+
+#define N 254
+
+/* Non-simple condition reduction. */
+
+TYPE __attribute__ ((noinline, noclone))
+condition_reduction (TYPE *a, TYPE min_v)
+{
+ TYPE last = 65;
+
+ for (TYPE i = 0; i < N; i++)
+ if (a[i] < min_v)
+ last = a[i];
+
+ return last;
+}
+
+/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c
new file mode 100644
index 00000000000..0d5187ba3ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_2_run.c
@@ -0,0 +1,23 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_clastb_2.c"
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ unsigned int a[N] = {
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
+ };
+ __builtin_memset (a + 32, 43, (N - 32) * sizeof (int));
+
+ unsigned int ret = condition_reduction (a, 16);
+
+ if (ret != 10)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c
new file mode 100644
index 00000000000..90a3b938593
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3.c
@@ -0,0 +1,8 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define TYPE uint8_t
+
+#include "sve_clastb_2.c"
+
+/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.b} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c
new file mode 100644
index 00000000000..f90fbfc5e9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_3_run.c
@@ -0,0 +1,23 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_clastb_3.c"
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ unsigned char a[N] = {
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
+ };
+ __builtin_memset (a + 32, 43, N - 32);
+
+ unsigned char ret = condition_reduction (a, 16);
+
+ if (ret != 10)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c
new file mode 100644
index 00000000000..dc01b21c273
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4.c
@@ -0,0 +1,8 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define TYPE int16_t
+
+#include "sve_clastb_2.c"
+
+/* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.h} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c
new file mode 100644
index 00000000000..e17199f3672
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_4_run.c
@@ -0,0 +1,25 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline -march=armv8-a+sve" } */
+
+#include "sve_clastb_4.c"
+
+extern void abort (void) __attribute__ ((noreturn));
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ short a[N] = {
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
+ };
+ __builtin_memset (a+32, 43, (N-32)*sizeof (short));
+
+ short ret = condition_reduction (a, 16);
+
+ if (ret != 10)
+ abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c
new file mode 100644
index 00000000000..aef2a80c68f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5.c
@@ -0,0 +1,8 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define TYPE uint64_t
+
+#include "sve_clastb_2.c"
+
+/* { dg-final { scan-assembler {\tclastb\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c
new file mode 100644
index 00000000000..e251db0bb76
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_5_run.c
@@ -0,0 +1,23 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_clastb_5.c"
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ long a[N] = {
+ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32
+ };
+ __builtin_memset (a + 32, 43, (N - 32) * sizeof (long));
+
+ long ret = condition_reduction (a, 16);
+
+ if (ret != 10)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c
new file mode 100644
index 00000000000..93fec6396a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6.c
@@ -0,0 +1,24 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define N 32
+
+#ifndef TYPE
+#define TYPE float
+#endif
+
+/* Non-integer data types. */
+
+TYPE __attribute__ ((noinline, noclone))
+condition_reduction (TYPE *a, TYPE min_v)
+{
+ TYPE last = 0;
+
+ for (int i = 0; i < N; i++)
+ if (a[i] < min_v)
+ last = a[i];
+
+ return last;
+}
+
+/* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c
new file mode 100644
index 00000000000..c204ed4c4f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_6_run.c
@@ -0,0 +1,22 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_clastb_6.c"
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ float a[N] = {
+ 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
+ 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
+ 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
+ 31.111, 32.322
+ };
+
+ float ret = condition_reduction (a, 16.7);
+
+ if (ret != (float) 10.6)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c
new file mode 100644
index 00000000000..d232a87e41d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7.c
@@ -0,0 +1,7 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve --save-temps" } */
+
+#define TYPE double
+#include "sve_clastb_6.c"
+
+/* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c
new file mode 100644
index 00000000000..2f87a4766e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve_clastb_7_run.c
@@ -0,0 +1,22 @@
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
+
+#include "sve_clastb_7.c"
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ double a[N] = {
+ 11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
+ 1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
+ 21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
+ 31.111, 32.322
+ };
+
+ double ret = condition_reduction (a, 16.7);
+
+ if (ret != 10.6)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 7c7c042851f..00a6d9b5e7d 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7174,6 +7174,12 @@ proc check_effective_target_vect_logical_reduc { } {
return [check_effective_target_aarch64_sve]
}
+# Return 1 if the target supports the fold_extract_last optab.
+
+proc check_effective_target_vect_fold_extract_last { } {
+ return [check_effective_target_aarch64_sve]
+}
+
# Return 1 if the target supports section-anchors
proc check_effective_target_section_anchors { } {
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 815dff37dad..910c2207aab 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4026,7 +4026,7 @@ static void
vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
int ncopies)
{
- int prologue_cost = 0, epilogue_cost = 0;
+ int prologue_cost = 0, epilogue_cost = 0, inside_cost;
enum tree_code code;
optab optab;
tree vectype;
@@ -4045,13 +4045,11 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
/* Condition reductions generate two reductions in the loop. */
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ vect_reduction_type reduction_type
+ = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
+ if (reduction_type == COND_REDUCTION)
ncopies *= 2;
- /* Cost of reduction op inside loop. */
- unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
- stmt_info, 0, vect_body);
-
vectype = STMT_VINFO_VECTYPE (stmt_info);
mode = TYPE_MODE (vectype);
orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
@@ -4061,14 +4059,30 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
code = gimple_assign_rhs_code (orig_stmt);
- /* Add in cost for initial definition.
- For cond reduction we have four vectors: initial index, step, initial
- result of the data reduction, initial value of the index reduction. */
- int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
- == COND_REDUCTION ? 4 : 1;
- prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
- scalar_to_vec, stmt_info, 0,
- vect_prologue);
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
+ {
+ /* No extra instructions needed in the prologue. */
+ prologue_cost = 0;
+
+ /* Count NCOPIES FOLD_EXTRACT_LAST operations. */
+ inside_cost = add_stmt_cost (target_cost_data, ncopies, vec_to_scalar,
+ stmt_info, 0, vect_body);
+ }
+ else
+ {
+ /* Add in cost for initial definition.
+ For cond reduction we have four vectors: initial index, step,
+ initial result of the data reduction, initial value of the index
+ reduction. */
+ int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1;
+ prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
+ scalar_to_vec, stmt_info, 0,
+ vect_prologue);
+
+ /* Cost of reduction op inside loop. */
+ inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
+ stmt_info, 0, vect_body);
+ }
/* Determine cost of epilogue code.
@@ -4079,7 +4093,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
{
if (reduc_code != ERROR_MARK)
{
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ if (reduction_type == COND_REDUCTION)
{
/* An EQ stmt and an COND_EXPR stmt. */
epilogue_cost += add_stmt_cost (target_cost_data, 2,
@@ -4104,7 +4118,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
vect_epilogue);
}
}
- else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ else if (reduction_type == COND_REDUCTION)
{
unsigned estimated_nunits = vect_nunits_for_cost (vectype);
/* Extraction of scalar elements. */
@@ -4118,6 +4132,9 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
scalar_stmt, stmt_info, 0,
vect_epilogue);
}
+ else if (reduction_type == EXTRACT_LAST_REDUCTION)
+ /* No extra instructions need in the epilogue. */
+ ;
else
{
int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
@@ -4285,6 +4302,9 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val,
return vect_create_destination_var (init_val, vectype);
}
+ vect_reduction_type reduction_type
+ = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo);
+
/* In case of a nested reduction do not use an adjustment def as
that case is not supported by the epilogue generation correctly
if ncopies is not one. */
@@ -4358,7 +4378,8 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val,
if (adjustment_def)
{
*adjustment_def = NULL_TREE;
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION)
+ if (reduction_type != COND_REDUCTION
+ && reduction_type != EXTRACT_LAST_REDUCTION)
{
init_def = vect_get_vec_def_for_operand (init_val, stmt);
break;
@@ -6040,6 +6061,11 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (reduc_stmt)))
reduc_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (reduc_stmt));
+ if (STMT_VINFO_VEC_REDUCTION_TYPE (vinfo_for_stmt (reduc_stmt))
+ == EXTRACT_LAST_REDUCTION)
+ /* Leave the scalar phi in place. */
+ return true;
+
gcc_assert (is_gimple_assign (reduc_stmt));
for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k)
{
@@ -6292,16 +6318,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
/* If we have a condition reduction, see if we can simplify it further. */
if (v_reduc_type == COND_REDUCTION)
{
- if (cond_reduc_dt == vect_induction_def)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "condition expression based on "
- "integer induction.\n");
- STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
- = INTEGER_INDUC_COND_REDUCTION;
- }
-
/* Loop peeling modifies initial value of reduction PHI, which
makes the reduction stmt to be transformed different to the
original stmt analyzed. We need to record reduction code for
@@ -6314,6 +6330,24 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
gcc_assert (cond_reduc_dt == vect_constant_def);
STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
}
+ else if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
+ vectype_in, OPTIMIZE_FOR_SPEED))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "optimizing condition reduction with"
+ " FOLD_EXTRACT_LAST.\n");
+ STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = EXTRACT_LAST_REDUCTION;
+ }
+ else if (cond_reduc_dt == vect_induction_def)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "optimizing condition reduction based on "
+ "integer induction.\n");
+ STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+ = INTEGER_INDUC_COND_REDUCTION;
+ }
else if (cond_reduc_dt == vect_constant_def)
{
enum vect_def_type cond_initial_dt;
@@ -6467,12 +6501,12 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
(and also the same tree-code) when generating the epilog code and
when generating the code inside the loop. */
- if (orig_stmt)
+ vect_reduction_type reduction_type
+ = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
+ if (orig_stmt && reduction_type == TREE_CODE_REDUCTION)
{
/* This is a reduction pattern: get the vectype from the type of the
reduction variable, and get the tree-code from orig_stmt. */
- gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
- == TREE_CODE_REDUCTION);
orig_code = gimple_assign_rhs_code (orig_stmt);
gcc_assert (vectype_out);
vec_mode = TYPE_MODE (vectype_out);
@@ -6488,13 +6522,12 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
/* For simple condition reductions, replace with the actual expression
we want to base our reduction around. */
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION)
+ if (reduction_type == CONST_COND_REDUCTION)
{
orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
}
- else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
- == INTEGER_INDUC_COND_REDUCTION)
+ else if (reduction_type == INTEGER_INDUC_COND_REDUCTION)
orig_code = MAX_EXPR;
}
@@ -6516,7 +6549,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
epilog_reduc_code = ERROR_MARK;
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION)
+ if (reduction_type == TREE_CODE_REDUCTION
+ || reduction_type == INTEGER_INDUC_COND_REDUCTION
+ || reduction_type == CONST_COND_REDUCTION)
{
if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
{
@@ -6551,7 +6586,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
}
}
}
- else
+ else if (reduction_type == COND_REDUCTION)
{
int scalar_precision
= GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type));
@@ -6566,7 +6601,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
epilog_reduc_code = REDUC_MAX_EXPR;
}
- if (epilog_reduc_code == ERROR_MARK && !nunits_out.is_constant ())
+ if (reduction_type != EXTRACT_LAST_REDUCTION
+ && epilog_reduc_code == ERROR_MARK
+ && !nunits_out.is_constant ())
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6575,8 +6612,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
return false;
}
- if ((double_reduc
- || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION)
+ if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
&& ncopies > 1)
{
if (dump_enabled_p ())
@@ -6666,7 +6702,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
}
}
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
+ if (reduction_type == COND_REDUCTION)
{
widest_int ni;
@@ -6803,6 +6839,13 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
+ {
+ gcc_assert (!slp_node);
+ return vectorizable_condition (stmt, gsi, vec_stmt,
+ NULL, reduc_index, NULL);
+ }
+
/* Create the destination vector */
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 3c06757fcae..89ae60243b1 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1601,6 +1601,47 @@ vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
}
}
+/* Helper function called by vect_finish_replace_stmt and
+ vect_finish_stmt_generation. Set the location of the new
+ statement and create a stmt_vec_info for it. */
+
+static void
+vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ vec_info *vinfo = stmt_info->vinfo;
+
+ set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
+ }
+
+ gimple_set_location (vec_stmt, gimple_location (stmt));
+
+ /* While EH edges will generally prevent vectorization, stmt might
+ e.g. be in a must-not-throw region. Ensure newly created stmts
+ that could throw are part of the same region. */
+ int lp_nr = lookup_stmt_eh_lp (stmt);
+ if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
+ add_stmt_to_eh_lp (vec_stmt, lp_nr);
+}
+
+/* Replace the scalar statement STMT with a new vector statement VEC_STMT,
+ which sets the same scalar result as STMT did. */
+
+void
+vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
+{
+ gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
+
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ gsi_replace (&gsi, vec_stmt, false);
+
+ vect_finish_stmt_generation_1 (stmt, vec_stmt);
+}
/* Function vect_finish_stmt_generation.
@@ -1610,9 +1651,6 @@ void
vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
gimple_stmt_iterator *gsi)
{
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- vec_info *vinfo = stmt_info->vinfo;
-
gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
if (!gsi_end_p (*gsi)
@@ -1642,23 +1680,7 @@ vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
}
}
gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
-
- set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
-
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
- dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
- }
-
- gimple_set_location (vec_stmt, gimple_location (stmt));
-
- /* While EH edges will generally prevent vectorization, stmt might
- e.g. be in a must-not-throw region. Ensure newly created stmts
- that could throw are part of the same region. */
- int lp_nr = lookup_stmt_eh_lp (stmt);
- if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
- add_stmt_to_eh_lp (vec_stmt, lp_nr);
+ vect_finish_stmt_generation_1 (stmt, vec_stmt);
}
/* We want to vectorize a call to combined function CFN with function
@@ -8091,7 +8113,9 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
if (reduc_index && STMT_SLP_TYPE (stmt_info))
return false;
- if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
+ vect_reduction_type reduction_type
+ = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
+ if (reduction_type == TREE_CODE_REDUCTION)
{
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
@@ -8250,12 +8274,13 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
/* Handle def. */
scalar_dest = gimple_assign_lhs (stmt);
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ if (reduction_type != EXTRACT_LAST_REDUCTION)
+ vec_dest = vect_create_destination_var (scalar_dest, vectype);
/* Handle cond expr. */
for (j = 0; j < ncopies; j++)
{
- gassign *new_stmt = NULL;
+ gimple *new_stmt = NULL;
if (j == 0)
{
if (slp_node)
@@ -8389,11 +8414,42 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
}
}
}
- new_temp = make_ssa_name (vec_dest);
- new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
- vec_compare, vec_then_clause,
- vec_else_clause);
- vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
+ {
+ if (!is_gimple_val (vec_compare))
+ {
+ tree vec_compare_name = make_ssa_name (vec_cmp_type);
+ new_stmt = gimple_build_assign (vec_compare_name,
+ vec_compare);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ vec_compare = vec_compare_name;
+ }
+ gcc_assert (reduc_index == 2);
+ new_stmt = gimple_build_call_internal
+ (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
+ vec_then_clause);
+ gimple_call_set_lhs (new_stmt, scalar_dest);
+ SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
+ if (stmt == gsi_stmt (*gsi))
+ vect_finish_replace_stmt (stmt, new_stmt);
+ else
+ {
+ /* In this case we're moving the definition to later in the
+ block. That doesn't matter because the only uses of the
+ lhs are in phi statements. */
+ gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
+ gsi_remove (&old_gsi, true);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ }
+ }
+ else
+ {
+ new_temp = make_ssa_name (vec_dest);
+ new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
+ vec_compare, vec_then_clause,
+ vec_else_clause);
+ vect_finish_stmt_generation (stmt, new_stmt, gsi);
+ }
if (slp_node)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 029e2b1d008..a87a9b49e14 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -67,7 +67,14 @@ enum vect_reduction_type {
TREE_CODE_REDUCTION,
COND_REDUCTION,
INTEGER_INDUC_COND_REDUCTION,
- CONST_COND_REDUCTION
+ CONST_COND_REDUCTION,
+
+ /* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop
+ to implement:
+
+ for (int i = 0; i < VF; ++i)
+ res = cond[i] ? val[i] : res; */
+ EXTRACT_LAST_REDUCTION
};
#define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \