summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2018-11-07 15:01:09 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2018-11-07 15:01:09 +0000
commit5a951baaf1b789281bf62b852d24a4ab8cf3e714 (patch)
tree0c036e96b195e6e1d15aa5359bf49a3c7372370d
parent7d3a67d7b0c01c0370226db7840c9ef6e054b56c (diff)
downloadgcc-5a951baaf1b789281bf62b852d24a4ab8cf3e714.tar.gz
re PR tree-optimization/87914 (gcc fails to vectorize bitreverse code)
2018-11-07 Richard Biener <rguenther@suse.de> PR tree-optimization/87914 * tree-vect-loop.c (vect_is_simple_reduction): Improve detection of nested cycles. (vectorizable_reduction): Handle shifts and rotates by dispatching to vectorizable_shift. * tree-vect-stmts.c (vect_get_vec_def_for_operand_1): Handle in-loop uses of vect_nested_cycle defs. Merge cycle and internal def cases. (vectorizable_shift): Export and handle being called as vect_nested_cycle. (vect_analyze_stmt): Call vectorizable_shift after vectorizable_reduction. * tree-vectorizer.h (vectorizable_shift): Declare. * lib/target-supports.exp (check_effective_target_vect_var_shift): New. (check_avx2_available): Likewise. * g++.dg/vect/pr87914.cc: New testcase. From-SVN: r265876
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/g++.dg/vect/pr87914.cc49
-rw-r--r--gcc/testsuite/lib/target-supports.exp22
-rw-r--r--gcc/tree-vect-loop.c87
-rw-r--r--gcc/tree-vect-stmts.c35
-rw-r--r--gcc/tree-vectorizer.h3
7 files changed, 170 insertions, 49 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c893ee253bf..21ba2ef7497 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2018-11-07 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/87914
+ * tree-vect-loop.c (vect_is_simple_reduction): Improve detection
+ of nested cycles.
+ (vectorizable_reduction): Handle shifts and rotates by dispatching
+ to vectorizable_shift.
+ * tree-vect-stmts.c (vect_get_vec_def_for_operand_1): Handle
+ in-loop uses of vect_nested_cycle defs. Merge cycle and internal
+ def cases.
+ (vectorizable_shift): Export and handle being called as
+ vect_nested_cycle.
+ (vect_analyze_stmt): Call vectorizable_shift after
+ vectorizable_reduction.
+ * tree-vectorizer.h (vectorizable_shift): Declare.
+
2018-11-07 Jan Hubicka <jh@suse.cz>
* ipa-devirt.c (odr_types_equivalent_p): Expect constants
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 79e0fa3e3c9..210ad30b66d 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2018-11-07 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/87914
+ * lib/target-supports.exp (check_effective_target_vect_var_shift): New.
+ (check_avx2_available): Likewise.
+ * g++.dg/vect/pr87914.cc: New testcase.
+
2018-11-07 Chenghua Xu <paul.hua.gm@gmail.com>
* gcc.target/mips/loongson-ctz.c: Fix typo.
diff --git a/gcc/testsuite/g++.dg/vect/pr87914.cc b/gcc/testsuite/g++.dg/vect/pr87914.cc
new file mode 100644
index 00000000000..12fbba3af2f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr87914.cc
@@ -0,0 +1,49 @@
+// { dg-do run }
+// { dg-additional-options "-fopenmp-simd" }
+// { dg-additional-options "-mavx2" { target { avx2_runtime } } }
+
+extern "C" int memcmp(const void *s1, const void *s2, __SIZE_TYPE__ n);
+extern "C" void abort(void);
+
+template <typename T>
+T reverseBits(T x)
+{
+ unsigned int s = sizeof(x) * 8;
+ T mask = ~T(0);
+ while ((s >>= 1) > 0)
+ {
+ mask ^= (mask << s);
+ x = ((x >> s) & mask) | ((x << s) & ~mask); // unsupported use in stmt
+ }
+ return x;
+}
+
+void __attribute__((noinline,noipa))
+test_reverseBits(unsigned* x)
+{
+#pragma omp simd aligned(x:32)
+ for (int i = 0; i < 16; ++i)
+ x[i] = reverseBits(x[i]); // couldn't vectorize loop
+}
+
+int main()
+{
+ unsigned arr[16] __attribute__((aligned(32)))
+ = { 0x01020304, 0x05060708, 0x0a0b0c0d, 0x0e0f1011,
+ 0x11121314, 0x45065708, 0xfa0b3c0du, 0x0e0f1211,
+ 0x21222324, 0x55066708, 0xfa0b2c0du, 0x1e0f1011,
+ 0x31323334, 0x65067708, 0xfa0b5c0du, 0x0e3f1011 };
+ unsigned arr2[16]
+ = { 0x20c04080, 0x10e060a0, 0xb030d050, 0x8808f070u,
+ 0x28c84888, 0x10ea60a2, 0xb03cd05f, 0x8848f070u,
+ 0x24c44484, 0x10e660aa, 0xb034d05f, 0x8808f078u,
+ 0x2ccc4c8c, 0x10ee60a6, 0xb03ad05f, 0x8808fc70u };
+
+ test_reverseBits (arr);
+
+ if (memcmp (arr, arr2, sizeof (arr)) != 0)
+ abort ();
+ return 0;
+}
+
+// { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_var_shift && vect_int } } } }
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 76c393d85c9..c202a083edd 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5329,6 +5329,15 @@ proc check_effective_target_vect_shift { } {
&& [check_effective_target_s390_vx]) }}]
}
+# Return 1 if the target supports hardware vector shift by register operation.
+
+proc check_effective_target_vect_var_shift { } {
+ return [check_cached_effective_target_indexed vect_var_shift {
+ expr {(([istarget i?86-*-*] || [istarget x86_64-*-*])
+ && [check_avx2_available])
+ }}]
+}
+
proc check_effective_target_whole_vector_shift { } {
if { [istarget i?86-*-*] || [istarget x86_64-*-*]
|| [istarget ia64-*-*]
@@ -7163,6 +7172,19 @@ proc check_avx_available { } {
return 0;
}
+# Return true if we are compiling for AVX2 target.
+
+proc check_avx2_available { } {
+ if { [check_no_compiler_messages avx_available assembly {
+ #ifndef __AVX2__
+ #error unsupported
+ #endif
+ } ""] } {
+ return 1;
+ }
+ return 0;
+}
+
# Return true if we are compiling for SSSE3 target.
proc check_ssse3_available { } {
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 41a46c2d234..5ce203b369d 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2843,6 +2843,11 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
return NULL;
}
+ /* For inner loop reductions in nested vectorization there are no
+ constraints on the number of uses in the inner loop. */
+ if (loop == vect_loop->inner)
+ continue;
+
nloop_uses++;
if (nloop_uses > 1)
{
@@ -2901,13 +2906,19 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
else
/* We can have more than one loop-closed PHI. */
lcphis.safe_push (as_a <gphi *> (use_stmt));
- if (nloop_uses > 1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "reduction used in loop.\n");
- return NULL;
- }
+ }
+
+ /* If this isn't a nested cycle or if the nested cycle reduction value
+ is used ouside of the inner loop we cannot handle uses of the reduction
+ value. */
+ bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
+ if ((!nested_in_vect_loop || !lcphis.is_empty ())
+ && nloop_uses > 1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "reduction used in loop.\n");
+ return NULL;
}
/* If DEF_STMT is a phi node itself, we expect it to have a single argument
@@ -2968,9 +2979,15 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
}
gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
- bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
code = orig_code = gimple_assign_rhs_code (def_stmt);
+ if (nested_in_vect_loop && !check_reduction)
+ {
+ if (dump_enabled_p ())
+ report_vect_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
+ return def_stmt_info;
+ }
+
/* We can handle "res -= x[i]", which is non-associative by
simply rewriting this into "res += -x[i]". Avoid changing
gimple instruction for the first simple tests and only do this
@@ -6448,6 +6465,19 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vec_mode = TYPE_MODE (vectype_in);
poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
+ if (nested_cycle)
+ {
+ def_bb = gimple_bb (reduc_def_phi);
+ def_stmt_loop = def_bb->loop_father;
+ def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
+ loop_preheader_edge (def_stmt_loop));
+ stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg);
+ if (def_arg_stmt_info
+ && (STMT_VINFO_DEF_TYPE (def_arg_stmt_info)
+ == vect_double_reduction_def))
+ double_reduc = true;
+ }
+
if (code == COND_EXPR)
{
/* Only call during the analysis stage, otherwise we'll lose
@@ -6462,20 +6492,26 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
return false;
}
}
- else
+ else if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
+ || code == LROTATE_EXPR || code == RROTATE_EXPR)
{
- /* 4. Supportable by target? */
-
- if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
- || code == LROTATE_EXPR || code == RROTATE_EXPR)
+ /* Only call during the analysis stage, otherwise we'll lose
+ STMT_VINFO_TYPE. We only support this for nested cycles
+ without double reductions at the moment. */
+ if (!nested_cycle
+ || double_reduc
+ || (!vec_stmt && !vectorizable_shift (stmt_info, gsi, NULL,
+ NULL, cost_vec)))
{
- /* Shifts and rotates are only supported by vectorizable_shifts,
- not vectorizable_reduction. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "unsupported shift or rotation.\n");
+ "unsupported shift or rotation in reduction\n");
return false;
}
+ }
+ else
+ {
+ /* 4. Supportable by target? */
/* 4.1. check support for the operation in the loop */
optab = optab_for_tree_code (code, vectype_in, optab_default);
@@ -6580,19 +6616,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
orig_code = cond_reduc_op_code;
}
- if (nested_cycle)
- {
- def_bb = gimple_bb (reduc_def_phi);
- def_stmt_loop = def_bb->loop_father;
- def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
- loop_preheader_edge (def_stmt_loop));
- stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg);
- if (def_arg_stmt_info
- && (STMT_VINFO_DEF_TYPE (def_arg_stmt_info)
- == vect_double_reduction_def))
- double_reduc = true;
- }
-
reduc_fn = IFN_LAST;
if (reduction_type == TREE_CODE_REDUCTION
@@ -6963,6 +6986,12 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
/* Multiple types are not supported for condition. */
break;
}
+ if (code == LSHIFT_EXPR
+ || code == RSHIFT_EXPR)
+ {
+ vectorizable_shift (stmt_info, gsi, vec_stmt, slp_node, NULL);
+ break;
+ }
/* Handle uses. */
if (j == 0)
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 7127c17c788..8133149b2dc 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1461,6 +1461,16 @@ vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
/* Code should use vect_get_vec_def_for_operand. */
gcc_unreachable ();
+ /* Operand is defined by a loop header phi. In case of nested
+ cycles we also may have uses of the backedge def. */
+ case vect_reduction_def:
+ case vect_double_reduction_def:
+ case vect_nested_cycle:
+ case vect_induction_def:
+ gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
+ || dt == vect_nested_cycle);
+ /* Fallthru. */
+
/* operand is defined inside the loop. */
case vect_internal_def:
{
@@ -1480,23 +1490,6 @@ vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
return vec_oprnd;
}
- /* operand is defined by a loop header phi. */
- case vect_reduction_def:
- case vect_double_reduction_def:
- case vect_nested_cycle:
- case vect_induction_def:
- {
- gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI);
-
- /* Get the def from the vectorized stmt. */
- vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
- if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
- vec_oprnd = PHI_RESULT (phi);
- else
- vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
- return vec_oprnd;
- }
-
default:
gcc_unreachable ();
}
@@ -5363,7 +5356,7 @@ vect_supportable_shift (enum tree_code code, tree scalar_type)
stmt to replace it, put it in VEC_STMT, and insert it at GSI.
Return true if STMT_INFO is vectorizable in this way. */
-static bool
+bool
vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
stmt_vec_info *vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
@@ -5401,6 +5394,7 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
+ && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
&& ! vec_stmt)
return false;
@@ -5480,7 +5474,8 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
shift/rotate amount is a vector, use the vector/vector shift optabs. */
if ((dt[1] == vect_internal_def
- || dt[1] == vect_induction_def)
+ || dt[1] == vect_induction_def
+ || dt[1] == vect_nested_cycle)
&& !slp_node)
scalar_shift_arg = false;
else if (dt[1] == vect_constant_def
@@ -9540,7 +9535,6 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
|| vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
cost_vec)
|| vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
|| vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
|| vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
|| vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
@@ -9549,6 +9543,7 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
|| vectorizable_reduction (stmt_info, NULL, NULL, node,
node_instance, cost_vec)
|| vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
+ || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
|| vectorizable_condition (stmt_info, NULL, NULL, NULL, 0, node,
cost_vec)
|| vectorizable_comparison (stmt_info, NULL, NULL, NULL, node,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index e1292aa6eb6..e66f28b364e 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1483,6 +1483,9 @@ extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree,
extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *,
stmt_vec_info *, tree, int, slp_tree,
stmt_vector_for_cost *);
+extern bool vectorizable_shift (stmt_vec_info, gimple_stmt_iterator *,
+ stmt_vec_info *, slp_tree,
+ stmt_vector_for_cost *);
extern void vect_get_load_cost (stmt_vec_info, int, bool,
unsigned int *, unsigned int *,
stmt_vector_for_cost *,