summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2018-11-09 10:53:31 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2018-11-09 10:53:31 +0000
commit1dd6990226fd92adf1e3c12a96c5776ec8efcb60 (patch)
tree0aac21b4707fd7fd0474d87f2cfc0233c4385409
parent43b01cc12de676d5a3871f7f813c740fc32b5a1a (diff)
downloadgcc-1dd6990226fd92adf1e3c12a96c5776ec8efcb60.tar.gz
re PR tree-optimization/87621 (outer loop auto-vectorization fails for exponentiation code)
2018-11-09 Richard Biener <rguenther@suse.de> PR tree-optimization/87621 * tree-vect-loop.c (vectorizable_reduction): Handle reduction op with only phi inputs. * tree-ssa-loop-ch.c: Include tree-ssa-sccvn.h. (ch_base::copy_headers): Run CSE on copied loop headers. (pass_ch_vect::process_loop_p): Simplify. * g++.dg/vect/pr87621.cc: New testcase. From-SVN: r265959
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/g++.dg/vect/pr87621.cc27
-rw-r--r--gcc/tree-ssa-loop-ch.c46
-rw-r--r--gcc/tree-vect-loop.c4
5 files changed, 75 insertions, 16 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ec1ad174de9..ff8f8224147 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2018-11-09 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/87621
+ * tree-vect-loop.c (vectorizable_reduction): Handle reduction
+ op with only phi inputs.
+ * tree-ssa-loop-ch.c: Include tree-ssa-sccvn.h.
+ (ch_base::copy_headers): Run CSE on copied loop headers.
+ (pass_ch_vect::process_loop_p): Simplify.
+
2018-11-09 Alexandre Oliva <oliva@adacore.com>
* config/i386/mingw32.h (LINK_SPEC_LARGE_ADDR_AWARE): Adjust
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6434be07cfa..6c9969497b6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2018-11-09 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/87621
+ * g++.dg/vect/pr87621.cc: New testcase.
+
2018-11-09 Alexandre Oliva <aoliva@redhat.com>
PR rtl-optimization/86438
diff --git a/gcc/testsuite/g++.dg/vect/pr87621.cc b/gcc/testsuite/g++.dg/vect/pr87621.cc
new file mode 100644
index 00000000000..cfc53be4ee1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr87621.cc
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+
+extern "C" double pow(double, double);
+template <typename T>
+T pow(T x, unsigned int n)
+{
+ if (!n)
+ return 1;
+
+ T y = 1;
+ while (n > 1)
+ {
+ if (n%2)
+ y *= x;
+ x = x*x;
+ n /= 2;
+ }
+ return x*y;
+}
+
+void testVec(int* x)
+{
+ for (int i = 0; i < 8; ++i)
+ x[i] = pow(x[i], 10);
+}
+
+/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_double && vect_hw_misalign } } } } */
diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c
index c876d62405f..4d4813df3c8 100644
--- a/gcc/tree-ssa-loop-ch.c
+++ b/gcc/tree-ssa-loop-ch.c
@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "tree-ssa-scopedtables.h"
#include "tree-ssa-threadedge.h"
+#include "tree-ssa-sccvn.h"
#include "params.h"
/* Duplicates headers of loops if they are small enough, so that the statements
@@ -297,12 +298,14 @@ ch_base::copy_headers (function *fun)
bool changed = false;
if (number_of_loops (fun) <= 1)
- return 0;
+ return 0;
bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
bbs_size = n_basic_blocks_for_fn (fun);
+ auto_vec<std::pair<edge, loop_p> > copied;
+
FOR_EACH_LOOP (loop, 0)
{
int initial_limit = PARAM_VALUE (PARAM_MAX_LOOP_HEADER_INSNS);
@@ -371,6 +374,7 @@ ch_base::copy_headers (function *fun)
fprintf (dump_file, "Duplication failed.\n");
continue;
}
+ copied.safe_push (std::make_pair (entry, loop));
/* If the loop has the form "for (i = j; i < j + 10; i++)" then
this copying can introduce a case where we rely on undefined
@@ -422,7 +426,28 @@ ch_base::copy_headers (function *fun)
}
if (changed)
- update_ssa (TODO_update_ssa);
+ {
+ update_ssa (TODO_update_ssa);
+ /* After updating SSA form perform CSE on the loop header
+ copies. This is esp. required for the pass before
+ vectorization since nothing cleans up copied exit tests
+ that can now be simplified. CSE from the entry of the
+ region we copied till all loop exit blocks but not
+ entering the loop itself. */
+ for (unsigned i = 0; i < copied.length (); ++i)
+ {
+ edge entry = copied[i].first;
+ loop_p loop = copied[i].second;
+ vec<edge> exit_edges = get_loop_exit_edges (loop);
+ bitmap exit_bbs = BITMAP_ALLOC (NULL);
+ for (unsigned j = 0; j < exit_edges.length (); ++j)
+ bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index);
+ bitmap_set_bit (exit_bbs, loop->header->index);
+ do_rpo_vn (cfun, entry, exit_bbs);
+ BITMAP_FREE (exit_bbs);
+ exit_edges.release ();
+ }
+ }
free (bbs);
free (copied_bbs);
@@ -473,24 +498,13 @@ pass_ch_vect::process_loop_p (struct loop *loop)
if (loop->dont_vectorize)
return false;
- if (!do_while_loop_p (loop))
- return true;
-
- /* The vectorizer won't handle anything with multiple exits, so skip. */
+ /* The vectorizer won't handle anything with multiple exits, so skip. */
edge exit = single_exit (loop);
if (!exit)
return false;
- /* Copy headers iff there looks to be code in the loop after the exit block,
- i.e. the exit block has an edge to another block (besides the latch,
- which should be empty). */
- edge_iterator ei;
- edge e;
- FOR_EACH_EDGE (e, ei, exit->src->succs)
- if (!loop_exit_edge_p (loop, e)
- && e->dest != loop->header
- && e->dest != loop->latch)
- return true;
+ if (!do_while_loop_p (loop))
+ return true;
return false;
}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 5ce203b369d..7338ca5efa8 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -6075,6 +6075,10 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
break;
}
+ /* For a nested cycle we might end up with an operation like
+ phi_result * phi_result. */
+ if (!vectype_in)
+ vectype_in = STMT_VINFO_VECTYPE (stmt_info);
gcc_assert (vectype_in);
if (slp_node)