diff options
author | Richard Biener <rguenther@suse.de> | 2018-11-09 10:53:31 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2018-11-09 10:53:31 +0000 |
commit | 1dd6990226fd92adf1e3c12a96c5776ec8efcb60 (patch) | |
tree | 0aac21b4707fd7fd0474d87f2cfc0233c4385409 | |
parent | 43b01cc12de676d5a3871f7f813c740fc32b5a1a (diff) | |
download | gcc-1dd6990226fd92adf1e3c12a96c5776ec8efcb60.tar.gz |
re PR tree-optimization/87621 (outer loop auto-vectorization fails for exponentiation code)
2018-11-09 Richard Biener <rguenther@suse.de>
PR tree-optimization/87621
* tree-vect-loop.c (vectorizable_reduction): Handle reduction
op with only phi inputs.
* tree-ssa-loop-ch.c: Include tree-ssa-sccvn.h.
(ch_base::copy_headers): Run CSE on copied loop headers.
(pass_ch_vect::process_loop_p): Simplify.
* g++.dg/vect/pr87621.cc: New testcase.
From-SVN: r265959
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/g++.dg/vect/pr87621.cc | 27 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-ch.c | 46 | ||||
-rw-r--r-- | gcc/tree-vect-loop.c | 4 |
5 files changed, 75 insertions, 16 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ec1ad174de9..ff8f8224147 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2018-11-09 Richard Biener <rguenther@suse.de> + + PR tree-optimization/87621 + * tree-vect-loop.c (vectorizable_reduction): Handle reduction + op with only phi inputs. + * tree-ssa-loop-ch.c: Include tree-ssa-sccvn.h. + (ch_base::copy_headers): Run CSE on copied loop headers. + (pass_ch_vect::process_loop_p): Simplify. + 2018-11-09 Alexandre Oliva <oliva@adacore.com> * config/i386/mingw32.h (LINK_SPEC_LARGE_ADDR_AWARE): Adjust diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6434be07cfa..6c9969497b6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-11-09 Richard Biener <rguenther@suse.de> + + PR tree-optimization/87621 + * g++.dg/vect/pr87621.cc: New testcase. + 2018-11-09 Alexandre Oliva <aoliva@redhat.com> PR rtl-optimization/86438 diff --git a/gcc/testsuite/g++.dg/vect/pr87621.cc b/gcc/testsuite/g++.dg/vect/pr87621.cc new file mode 100644 index 00000000000..cfc53be4ee1 --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/pr87621.cc @@ -0,0 +1,27 @@ +/* { dg-do compile } */ + +extern "C" double pow(double, double); +template <typename T> +T pow(T x, unsigned int n) +{ + if (!n) + return 1; + + T y = 1; + while (n > 1) + { + if (n%2) + y *= x; + x = x*x; + n /= 2; + } + return x*y; +} + +void testVec(int* x) +{ + for (int i = 0; i < 8; ++i) + x[i] = pow(x[i], 10); +} + +/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_double && vect_hw_misalign } } } } */ diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c index c876d62405f..4d4813df3c8 100644 --- a/gcc/tree-ssa-loop-ch.c +++ b/gcc/tree-ssa-loop-ch.c @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-inline.h" #include "tree-ssa-scopedtables.h" #include "tree-ssa-threadedge.h" +#include "tree-ssa-sccvn.h" #include "params.h" /* Duplicates headers of loops if they are small enough, so that the statements @@ -297,12 +298,14 @@ ch_base::copy_headers (function *fun) bool changed = false; if (number_of_loops (fun) <= 1) - return 0; + return 0; bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); bbs_size = n_basic_blocks_for_fn (fun); + auto_vec<std::pair<edge, loop_p> > copied; + FOR_EACH_LOOP (loop, 0) { int initial_limit = PARAM_VALUE (PARAM_MAX_LOOP_HEADER_INSNS); @@ -371,6 +374,7 @@ ch_base::copy_headers (function *fun) fprintf (dump_file, "Duplication failed.\n"); continue; } + copied.safe_push (std::make_pair (entry, loop)); /* If the loop has the form "for (i = j; i < j + 10; i++)" then this copying can introduce a case where we rely on undefined @@ -422,7 +426,28 @@ ch_base::copy_headers (function *fun) } if (changed) - update_ssa (TODO_update_ssa); + { + update_ssa (TODO_update_ssa); + /* After updating SSA form perform CSE on the loop header + copies. This is esp. required for the pass before + vectorization since nothing cleans up copied exit tests + that can now be simplified. CSE from the entry of the + region we copied till all loop exit blocks but not + entering the loop itself. */ + for (unsigned i = 0; i < copied.length (); ++i) + { + edge entry = copied[i].first; + loop_p loop = copied[i].second; + vec<edge> exit_edges = get_loop_exit_edges (loop); + bitmap exit_bbs = BITMAP_ALLOC (NULL); + for (unsigned j = 0; j < exit_edges.length (); ++j) + bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index); + bitmap_set_bit (exit_bbs, loop->header->index); + do_rpo_vn (cfun, entry, exit_bbs); + BITMAP_FREE (exit_bbs); + exit_edges.release (); + } + } free (bbs); free (copied_bbs); @@ -473,24 +498,13 @@ pass_ch_vect::process_loop_p (struct loop *loop) if (loop->dont_vectorize) return false; - if (!do_while_loop_p (loop)) - return true; - - /* The vectorizer won't handle anything with multiple exits, so skip. */ + /* The vectorizer won't handle anything with multiple exits, so skip. */ edge exit = single_exit (loop); if (!exit) return false; - /* Copy headers iff there looks to be code in the loop after the exit block, - i.e. the exit block has an edge to another block (besides the latch, - which should be empty). */ - edge_iterator ei; - edge e; - FOR_EACH_EDGE (e, ei, exit->src->succs) - if (!loop_exit_edge_p (loop, e) - && e->dest != loop->header - && e->dest != loop->latch) - return true; + if (!do_while_loop_p (loop)) + return true; return false; } diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 5ce203b369d..7338ca5efa8 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -6075,6 +6075,10 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op)); break; } + /* For a nested cycle we might end up with an operation like + phi_result * phi_result. */ + if (!vectype_in) + vectype_in = STMT_VINFO_VECTYPE (stmt_info); gcc_assert (vectype_in); if (slp_node) |