summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2017-06-24 17:27:03 +0100
committerRichard Sandiford <richard.sandiford@linaro.org>2017-11-20 16:01:23 +0000
commitd184c97130599e4fc15db38655451e380379c387 (patch)
treed046e56bfbd6a40106ae6ab96fafc954f1dfc955
parentb9dd452e9cedf0f4e384d397dca64a1f4a7ebfd4 (diff)
downloadgcc-rsandifo/sve-rebase.tar.gz
Early predcomrsandifo/sve-rebase
-rw-r--r--gcc/passes.def1
-rw-r--r--gcc/testsuite/gnat.dg/vect18.adb2
-rw-r--r--gcc/tree-pass.h1
-rw-r--r--gcc/tree-predcom.c157
4 files changed, 146 insertions, 15 deletions
diff --git a/gcc/passes.def b/gcc/passes.def
index 8a0740716fc..55226f7b751 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -289,6 +289,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_parallelize_loops, false /* oacc_kernels_p */);
NEXT_PASS (pass_expand_omp_ssa);
NEXT_PASS (pass_ch_vect);
+ NEXT_PASS (pass_early_predcom);
NEXT_PASS (pass_if_conversion);
/* pass_vectorize must immediately follow pass_if_conversion.
Please do not add any other passes in between. */
diff --git a/gcc/testsuite/gnat.dg/vect18.adb b/gcc/testsuite/gnat.dg/vect18.adb
index 91b1175248d..8739f9f1eb6 100644
--- a/gcc/testsuite/gnat.dg/vect18.adb
+++ b/gcc/testsuite/gnat.dg/vect18.adb
@@ -1,5 +1,5 @@
-- { dg-do compile { target i?86-*-* x86_64-*-* } }
--- { dg-options "-O3 -msse2 -fdump-tree-vect-details" }
+-- { dg-options "-O3 -msse2 -fdump-tree-vect-details -fno-predictive-commoning" }
package body Vect18 is
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index fbd0dbdf924..9b97c02e06d 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -370,6 +370,7 @@ extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_loop_split (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_early_predcom (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_predcom (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index dbdc0acdeb4..73ebd951dba 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -234,7 +234,7 @@ along with GCC; see the file COPYING3. If not see
/* The maximum number of iterations between the considered memory
references. */
-#define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
+#define MAX_DISTANCE (target_avail_regs < 16 ? 3 : 7)
/* Data references (or phi nodes that carry data reference values across
loop iterations). */
@@ -379,6 +379,71 @@ static bitmap looparound_phis;
static hash_map<tree, name_expansion *> *name_expansions;
+/* True if we're running the early predcom pass and should only handle
+ cases that aid vectorization. Specifically this means that:
+
+ - only CT_INVARIANT and CT_STORE_LOAD chains are used
+ - the maximum distance for a CT_STORE_LOAD chain is 1 iteration,
+ and at that distance the store must come after the load
+ - there's no unrolling or detection of looparound phis.
+
+ The idea is handle inductions that go via memory, such as:
+
+ for (int i = 1; i < n; ++i)
+ x[i] = x[i - 1] + 1;
+
+ As it stands this loop could never be vectorized, because a loop body
+ that contains a read of x[j] followed by a write to x[j + 1] would
+ have its vectorization factor limited to 1. Transforming it to:
+
+ int tmp = x[0];
+ for (int i = 0; i < n; ++i)
+ {
+ tmp += 1;
+ x[i] = tmp:
+ }
+
+ exposes the fact that the stored value is a simple vectorizable
+ induction with start value x[0] and step 1.
+
+ [ Commoning is not always useful even in this situation. For example,
+ carrying over the value of x[i] won't help us to vectorize:
+
+ for (int i = 1; i < n; ++i)
+ {
+ y[i] = x[i - 1];
+ x[i] += i;
+ }
+
+ There's no real need to restrict things further though, because we're
+ unable to vectorize these load/store combinations in their current
+ form whatever happens. ]
+
+ We require the store to come after the load when the distance is 1
+ to avoid cases like:
+
+ for (int i = 1; i < n; ++i)
+ {
+ x[i] = ...;
+ ... = x[i - 1];
+ }
+
+ These accesses effectively have a distance somewhere between 1 and 2,
+ since after commoning the value stored in the previous iteration would
+ still be live at the next store. This means that the combination
+ isn't useful for exposing simple inductions.
+
+ Also, unlike the motivating case above, this combination does not
+ prevent vectorization. If a write to x[j + 1] comes before a read
+ of x[j], the vectorized write completes for all vector elements
+ before the read starts for any vector elements. */
+
+static bool only_simple_p;
+
+/* The maximum loop carry distance for this execution of the pass. */
+
+static int max_distance;
+
/* Dumps data reference REF to FILE. */
extern void dump_dref (FILE *, dref);
@@ -1067,7 +1132,13 @@ add_ref_to_chain (chain_p chain, dref ref)
gcc_assert (wi::les_p (root->offset, ref->offset));
widest_int dist = ref->offset - root->offset;
- if (wi::leu_p (MAX_DISTANCE, dist))
+ /* When running before vectorization, only allow the maximum distance
+ if the consumer comes before the producer. See the comment above
+ ONLY_SIMPLE_P for details. */
+ if (wi::ltu_p (max_distance, dist)
+ || (only_simple_p
+ && wi::eq_p (max_distance, dist)
+ && root->pos < ref->pos))
{
free (ref);
return;
@@ -1307,7 +1378,9 @@ add_looparound_copies (struct loop *loop, chain_p chain)
dref ref, root = get_chain_root (chain);
gphi *phi;
- if (chain->type == CT_STORE_STORE)
+ /* There's no point doing this when running before vectorization,
+ since we won't unroll the loop or combine chains. */
+ if (only_simple_p || chain->type == CT_STORE_STORE)
return;
FOR_EACH_VEC_ELT (chain->refs, i, ref)
@@ -1360,7 +1433,7 @@ determine_roots_comp (struct loop *loop,
{
if (!chain
|| (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
- || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
+ || wi::ltu_p (max_distance, a->offset - last_ofs))
{
if (nontrivial_chain_p (chain))
{
@@ -1370,6 +1443,15 @@ determine_roots_comp (struct loop *loop,
else
release_chain (chain);
+ /* Only create CT_STORE_LOAD and CT_INVARIANT chains when
+ running before vectorization. */
+ if (only_simple_p && !DR_IS_WRITE (a->ref))
+ {
+ free (a);
+ chain = NULL;
+ continue;
+ }
+
if (DR_IS_READ (a->ref))
type = CT_LOAD;
else
@@ -2134,6 +2216,10 @@ determine_unroll_factor (vec<chain_p> chains)
unsigned factor = 1, af, nfactor, i;
unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+ /* Do not unroll when running before vectorization. */
+ if (only_simple_p)
+ return 1;
+
FOR_EACH_VEC_ELT (chains, i, chain)
{
if (chain->type == CT_INVARIANT)
@@ -3174,8 +3260,11 @@ tree_predictive_commoning_loop (struct loop *loop)
prepare_initializers (loop, chains);
loop_closed_ssa = prepare_finalizers (loop, chains);
- /* Try to combine the chains that are always worked with together. */
- try_combine_chains (loop, &chains);
+ /* During the main pass, try to combine the chains that are always
+ worked with together. For the early pass it should be better
+ to leave this to the vectorizer. */
+ if (!only_simple_p)
+ try_combine_chains (loop, &chains);
insert_init_seqs (loop, chains);
@@ -3237,14 +3326,18 @@ end: ;
return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
}
-/* Runs predictive commoning. */
+/* Runs predictive commoning. EARLY_P is true if we are running before
+ vectorization. */
unsigned
-tree_predictive_commoning (void)
+tree_predictive_commoning (bool early_p)
{
struct loop *loop;
unsigned ret = 0, changed = 0;
+ only_simple_p = early_p;
+ max_distance = early_p ? 1 : MAX_DISTANCE;
+
initialize_original_copy_tables ();
FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
if (optimize_loop_for_speed_p (loop))
@@ -3266,19 +3359,51 @@ tree_predictive_commoning (void)
return ret;
}
-/* Predictive commoning Pass. */
+/* Predictive commoning pass. EARLY_P is true if we are running before
+ vectorization. */
static unsigned
-run_tree_predictive_commoning (struct function *fun)
+run_tree_predictive_commoning (struct function *fun, bool early_p)
{
if (number_of_loops (fun) <= 1)
return 0;
- return tree_predictive_commoning ();
+ return tree_predictive_commoning (early_p);
}
namespace {
+const pass_data pass_data_early_predcom =
+{
+ GIMPLE_PASS, /* type */
+ "epcom", /* name */
+ OPTGROUP_LOOP, /* optinfo_flags */
+ TV_PREDCOM, /* tv_id */
+ PROP_cfg, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa_only_virtuals, /* todo_flags_finish */
+};
+
+class pass_early_predcom : public gimple_opt_pass
+{
+public:
+ pass_early_predcom (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_early_predcom, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return flag_predictive_commoning && flag_tree_loop_vectorize;
+ }
+ virtual unsigned int execute (function *fun)
+ {
+ return run_tree_predictive_commoning (fun, true);
+ }
+}; // class pass_early_predcom
+
const pass_data pass_data_predcom =
{
GIMPLE_PASS, /* type */
@@ -3303,7 +3428,7 @@ public:
virtual bool gate (function *) { return flag_predictive_commoning != 0; }
virtual unsigned int execute (function *fun)
{
- return run_tree_predictive_commoning (fun);
+ return run_tree_predictive_commoning (fun, false);
}
}; // class pass_predcom
@@ -3311,9 +3436,13 @@ public:
} // anon namespace
gimple_opt_pass *
+make_pass_early_predcom (gcc::context *ctxt)
+{
+ return new pass_early_predcom (ctxt);
+}
+
+gimple_opt_pass *
make_pass_predcom (gcc::context *ctxt)
{
return new pass_predcom (ctxt);
}
-
-