summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog35
-rw-r--r--gcc/Makefile.in11
-rw-r--r--gcc/common.opt6
-rw-r--r--gcc/doc/invoke.texi6
-rw-r--r--gcc/loop.c2
-rw-r--r--gcc/passes.c1
-rw-r--r--gcc/timevar.def1
-rw-r--r--gcc/tree-cfgcleanup.c26
-rw-r--r--gcc/tree-flow.h25
-rw-r--r--gcc/tree-outof-ssa.c48
-rw-r--r--gcc/tree-pass.h1
-rw-r--r--gcc/tree-scalar-evolution.h11
-rw-r--r--gcc/tree-ssa-loop-manip.c326
-rw-r--r--gcc/tree-ssa-loop-niter.c20
-rw-r--r--gcc/tree-ssa-loop.c34
15 files changed, 512 insertions, 41 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2811d2e9d71..9aec203aa99 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,38 @@
+2006-02-14 Zdenek Dvorak <dvorakz@suse.cz>
+
+ * doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl):
+ Document.
+ * tree-ssa-loop-niter.c (number_of_iterations_ne,
+ number_of_iterations_lt, number_of_iterations_cond): Remember the shape
+ of the ending condition.
+ * tree-ssa-loop-manip.c: Include params.h.
+ (build_if_stmt, can_unroll_loop_p, determine_exit_conditions,
+ tree_unroll_loop): New functions.
+ * tree-pass.h (pass_loop_prefetch): Declare.
+ * loop.c (rest_of_handle_loop_optimize): Test for
+ -fprefetch-loop-arrays-rtl.
+ * tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h.
+ * timevar.def (TV_TREE_PREFETCH): New timevar.
+ * tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch,
+ pass_loop_prefetch): New.
+ * tree-cfgcleanup.c: Include tree-scalar-evolution.h.
+ (cleanup_tree_cfg_loop): Call scev_reset.
+ * common.opt (fprefetch-loop-arrays-rtl): Add.
+ * tree-ssa-loop-prefetch.c: New file.
+ * tree-outof-ssa.c (struct value_expr_d): Add expr_vars field.
+ (new_temp_expr_table): Initialize expr_vars.
+ (free_temp_expr_table): Cleanup expr_vars.
+ (check_replaceable, find_replaceable_in_bb): Prevent accumulating
+ expressions from being merged into one.
+ * tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h.
+ (struct tree_niter_desc): Add control, bound and cmp fields.
+ (tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop):
+ Declare.
+ * Makefile.in (tree-ssa-loop-prefetch.o): Add.
+ (tree-cfgcleanup.o): Add SCEV_H dependency.
+ (tree-ssa-loop-manip.o): Add PARAMS_H dependency.
+ * passes.c (init_optimization_passes): Add pass_loop_prefetch.
+
2006-02-14 Richard Guenther <rguenther@suse.de>
PR tree-optimization/26258
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 09786c99087..d6d79005cdd 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -963,7 +963,7 @@ OBJS-common = \
tree-vect-generic.o tree-ssa-loop.o tree-ssa-loop-niter.o \
tree-ssa-loop-manip.o tree-ssa-threadupdate.o tree-ssa-threadedge.o \
tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \
- tree-vect-patterns.o \
+ tree-vect-patterns.o tree-ssa-loop-prefetch.o \
tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \
tree-ssa-math-opts.o \
tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \
@@ -1975,6 +1975,12 @@ tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
tree-pass.h $(FLAGS_H) $(BASIC_BLOCK_H) hard-reg-set.h
+tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \
+ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
+ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
+ tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
+ $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
+ tree-chrec.h toplev.h langhooks.h
tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
@@ -1984,7 +1990,8 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
- tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h
+ tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h \
+ $(PARAMS_H)
tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) domwalk.h \
$(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
diff --git a/gcc/common.opt b/gcc/common.opt
index f8077ae3751..c7fee2c9c09 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -659,7 +659,11 @@ Common Report Var(flag_pie,1) VarExists
Generate position-independent code for executables if possible (small mode)
fprefetch-loop-arrays
-Common Report Var(flag_prefetch_loop_arrays)
+Common Report Var(flag_prefetch_loop_arrays,1)
+Generate prefetch instructions, if available, for arrays in loops
+
+fprefetch-loop-arrays-rtl
+Common Report Var(flag_prefetch_loop_arrays,2)
Generate prefetch instructions, if available, for arrays in loops
fprofile
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6fb86e314e8..5c7ff5ebeae 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -321,7 +321,7 @@ Objective-C and Objective-C++ Dialects}.
-funsafe-math-optimizations -funsafe-loop-optimizations -ffinite-math-only @gol
-fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
-fomit-frame-pointer -foptimize-register-move @gol
--foptimize-sibling-calls -fprefetch-loop-arrays @gol
+-foptimize-sibling-calls -fprefetch-loop-arrays -fprefetch-loop-arrays-rtl @gol
-fprofile-generate -fprofile-use @gol
-fregmove -frename-registers @gol
-freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol
@@ -5171,7 +5171,9 @@ With this option, the compiler will create multiple copies of some
local variables when unrolling a loop which can result in superior code.
@item -fprefetch-loop-arrays
+@itemx -fprefetch-loop-arrays-rtl
@opindex fprefetch-loop-arrays
+@opindex fprefetch-loop-arrays-rtl
If supported by the target machine, generate instructions to prefetch
memory to improve the performance of loops that access large arrays.
@@ -5709,7 +5711,9 @@ Move branches with loop invariant conditions out of the loop, with duplicates
of the loop on both branches (modified according to result of the condition).
@item -fprefetch-loop-arrays
+@itemx -fprefetch-loop-arrays-rtl
@opindex fprefetch-loop-arrays
+@opindex fprefetch-loop-arrays-rtl
If supported by the target machine, generate instructions to prefetch
memory to improve the performance of loops that access large arrays.
diff --git a/gcc/loop.c b/gcc/loop.c
index fcb7d1ab21f..1beb4dc16a6 100644
--- a/gcc/loop.c
+++ b/gcc/loop.c
@@ -11780,7 +11780,7 @@ rest_of_handle_loop_optimize (void)
free_bb_for_insn ();
profile_status = PROFILE_ABSENT;
- do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0;
+ do_prefetch = flag_prefetch_loop_arrays == 2 ? LOOP_PREFETCH : 0;
if (flag_rerun_loop_opt)
{
diff --git a/gcc/passes.c b/gcc/passes.c
index 5e026957307..2e7f0b53b32 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -601,6 +601,7 @@ init_optimization_passes (void)
vectorizer creates alias relations that are not supported by
pass_may_alias. */
NEXT_PASS (pass_complete_unroll);
+ NEXT_PASS (pass_loop_prefetch);
NEXT_PASS (pass_iv_optimize);
NEXT_PASS (pass_tree_loop_done);
*p = NULL;
diff --git a/gcc/timevar.def b/gcc/timevar.def
index e769cba2fe4..d6065e7ca01 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -107,6 +107,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
+DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching")
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
DEFTIMEVAR (TV_TREE_LOOP_INIT , "tree loop init")
DEFTIMEVAR (TV_TREE_LOOP_FINI , "tree loop fini")
diff --git a/gcc/tree-cfgcleanup.c b/gcc/tree-cfgcleanup.c
index 4619d1dbad0..76667a6edc1 100644
--- a/gcc/tree-cfgcleanup.c
+++ b/gcc/tree-cfgcleanup.c
@@ -45,6 +45,7 @@ Boston, MA 02110-1301, USA. */
#include "cfglayout.h"
#include "hashtab.h"
#include "tree-ssa-propagate.h"
+#include "tree-scalar-evolution.h"
/* Remove any fallthru edge from EV. Return true if an edge was removed. */
@@ -559,23 +560,26 @@ cleanup_tree_cfg (void)
void
cleanup_tree_cfg_loop (void)
{
- bitmap changed_bbs = BITMAP_ALLOC (NULL);
+ bool changed = cleanup_tree_cfg ();
- cleanup_tree_cfg ();
-
- fix_loop_structure (current_loops, changed_bbs);
- calculate_dominance_info (CDI_DOMINATORS);
+ if (changed)
+ {
+ bitmap changed_bbs = BITMAP_ALLOC (NULL);
+ fix_loop_structure (current_loops, changed_bbs);
+ calculate_dominance_info (CDI_DOMINATORS);
- /* This usually does nothing. But sometimes parts of cfg that originally
- were inside a loop get out of it due to edge removal (since they
- become unreachable by back edges from latch). */
- rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa);
+ /* This usually does nothing. But sometimes parts of cfg that originally
+ were inside a loop get out of it due to edge removal (since they
+ become unreachable by back edges from latch). */
+ rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa);
- BITMAP_FREE (changed_bbs);
+ BITMAP_FREE (changed_bbs);
#ifdef ENABLE_CHECKING
- verify_loop_structure (current_loops);
+ verify_loop_structure (current_loops);
#endif
+ scev_reset ();
+ }
}
/* Merge the PHI nodes at BB into those at BB's sole successor. */
diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h
index 0e4824e9321..7774c3b42b7 100644
--- a/gcc/tree-flow.h
+++ b/gcc/tree-flow.h
@@ -667,6 +667,17 @@ extern void replace_exp (use_operand_p, tree);
extern bool may_propagate_copy (tree, tree);
extern bool may_propagate_copy_into_asm (tree);
+/* Affine iv. */
+
+typedef struct
+{
+ /* Iv = BASE + STEP * i. */
+ tree base, step;
+
+ /* True if this iv does not overflow. */
+ bool no_overflow;
+} affine_iv;
+
/* Description of number of iterations of a loop. All the expressions inside
the structure can be evaluated at the end of the loop's preheader
(and due to ssa form, also anywhere inside the body of the loop). */
@@ -697,6 +708,15 @@ struct tree_niter_desc
MAX_SIGNED_INT. However if the (n <= 0) assumption
is eliminated (by looking at the guard on entry of
the loop), then the information would be lost. */
+
+ /* The simplified shape of the exit condition. The loop exits if
+ CONTROL CMP BOUND is false, where CMP is one of NE_EXPR,
+ LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP is
+ LE_EXPR and negative if CMP is GE_EXPR. This information is used
+ by loop unrolling. */
+ affine_iv control;
+ tree bound;
+ enum tree_code cmp;
};
/* In tree-vectorizer.c */
@@ -711,6 +731,7 @@ void tree_ssa_lim (struct loops *);
void tree_ssa_unswitch_loops (struct loops *);
void canonicalize_induction_variables (struct loops *);
void tree_unroll_loops_completely (struct loops *, bool);
+void tree_ssa_prefetch_arrays (struct loops *);
void remove_empty_loops (struct loops *);
void tree_ssa_iv_optimize (struct loops *);
@@ -748,6 +769,10 @@ struct loop *tree_ssa_loop_version (struct loops *, struct loop *, tree,
tree expand_simple_operations (tree);
void substitute_in_loop_info (struct loop *, tree, tree);
edge single_dom_exit (struct loop *);
+bool can_unroll_loop_p (struct loop *loop, unsigned factor,
+ struct tree_niter_desc *niter);
+void tree_unroll_loop (struct loops *, struct loop *, unsigned,
+ edge, struct tree_niter_desc *);
/* In tree-ssa-threadedge.c */
extern bool potentially_threadable_block (basic_block);
diff --git a/gcc/tree-outof-ssa.c b/gcc/tree-outof-ssa.c
index 78ec5e7b356..40d6c930511 100644
--- a/gcc/tree-outof-ssa.c
+++ b/gcc/tree-outof-ssa.c
@@ -1299,7 +1299,8 @@ typedef struct value_expr_d
typedef struct temp_expr_table_d
{
var_map map;
- void **version_info;
+ void **version_info;
+ bitmap *expr_vars;
value_expr_p *partition_dep_list;
bitmap replaceable;
bool saw_replaceable;
@@ -1344,6 +1345,7 @@ new_temp_expr_table (var_map map)
t->map = map;
t->version_info = XCNEWVEC (void *, num_ssa_names + 1);
+ t->expr_vars = XCNEWVEC (bitmap, num_ssa_names + 1);
t->partition_dep_list = XCNEWVEC (value_expr_p,
num_var_partitions (map) + 1);
@@ -1367,6 +1369,7 @@ free_temp_expr_table (temp_expr_table_p t)
{
value_expr_p p;
tree *ret = NULL;
+ unsigned i;
#ifdef ENABLE_CHECKING
unsigned x;
@@ -1383,6 +1386,11 @@ free_temp_expr_table (temp_expr_table_p t)
BITMAP_FREE (t->partition_in_use);
BITMAP_FREE (t->replaceable);
+ for (i = 0; i <= num_ssa_names; i++)
+ if (t->expr_vars[i])
+ BITMAP_FREE (t->expr_vars[i]);
+ free (t->expr_vars);
+
free (t->partition_dep_list);
if (t->saw_replaceable)
ret = (tree *)t->version_info;
@@ -1545,11 +1553,12 @@ add_dependance (temp_expr_table_p tab, int version, tree var)
static bool
check_replaceable (temp_expr_table_p tab, tree stmt)
{
- tree var, def;
+ tree var, def, basevar;
int version;
var_map map = tab->map;
ssa_op_iter iter;
tree call_expr;
+ bitmap def_vars = BITMAP_ALLOC (NULL), use_vars;
if (TREE_CODE (stmt) != MODIFY_EXPR)
return false;
@@ -1580,12 +1589,19 @@ check_replaceable (temp_expr_table_p tab, tree stmt)
}
version = SSA_NAME_VERSION (def);
+ basevar = SSA_NAME_VAR (def);
+ bitmap_set_bit (def_vars, DECL_UID (basevar));
/* Add this expression to the dependency list for each use partition. */
FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
{
add_dependance (tab, version, var);
+
+ use_vars = tab->expr_vars[SSA_NAME_VERSION (var)];
+ if (use_vars)
+ bitmap_ior_into (def_vars, use_vars);
}
+ tab->expr_vars[version] = def_vars;
/* If there are VUSES, add a dependence on virtual defs. */
if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE))
@@ -1704,7 +1720,7 @@ static void
find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
{
block_stmt_iterator bsi;
- tree stmt, def;
+ tree stmt, def, use;
stmt_ann_t ann;
int partition;
var_map map = tab->map;
@@ -1717,30 +1733,34 @@ find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
ann = stmt_ann (stmt);
/* Determine if this stmt finishes an existing expression. */
- FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_USE)
+ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
{
- if (tab->version_info[SSA_NAME_VERSION (def)])
+ unsigned ver = SSA_NAME_VERSION (use);
+
+ if (tab->version_info[ver])
{
bool same_root_var = false;
- tree def2;
ssa_op_iter iter2;
+ bitmap vars = tab->expr_vars[ver];
/* See if the root variables are the same. If they are, we
do not want to do the replacement to avoid problems with
code size, see PR tree-optimization/17549. */
- FOR_EACH_SSA_TREE_OPERAND (def2, stmt, iter2, SSA_OP_DEF)
- if (SSA_NAME_VAR (def) == SSA_NAME_VAR (def2))
- {
- same_root_var = true;
- break;
- }
+ FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter2, SSA_OP_DEF)
+ {
+ if (bitmap_bit_p (vars, DECL_UID (SSA_NAME_VAR (def))))
+ {
+ same_root_var = true;
+ break;
+ }
+ }
/* Mark expression as replaceable unless stmt is volatile
or DEF sets the same root variable as STMT. */
if (!ann->has_volatile_ops && !same_root_var)
- mark_replaceable (tab, def);
+ mark_replaceable (tab, use);
else
- finish_expr (tab, SSA_NAME_VERSION (def), false);
+ finish_expr (tab, ver, false);
}
}
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 5d49b6c7569..baa60da96e3 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -247,6 +247,7 @@ extern struct tree_opt_pass pass_record_bounds;
extern struct tree_opt_pass pass_if_conversion;
extern struct tree_opt_pass pass_vectorize;
extern struct tree_opt_pass pass_complete_unroll;
+extern struct tree_opt_pass pass_loop_prefetch;
extern struct tree_opt_pass pass_iv_optimize;
extern struct tree_opt_pass pass_tree_loop_done;
extern struct tree_opt_pass pass_ch;
diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h
index 0fecaee2390..f7749545f9a 100644
--- a/gcc/tree-scalar-evolution.h
+++ b/gcc/tree-scalar-evolution.h
@@ -34,17 +34,6 @@ extern void gather_stats_on_scev_database (void);
extern void scev_analysis (void);
void scev_const_prop (void);
-/* Affine iv. */
-
-typedef struct
-{
- /* Iv = BASE + STEP * i. */
- tree base, step;
-
- /* True if this iv does not overflow. */
- bool no_overflow;
-} affine_iv;
-
extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
#endif /* GCC_TREE_SCALAR_EVOLUTION_H */
diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c
index ab9971dfabf..21d1ea14b14 100644
--- a/gcc/tree-ssa-loop-manip.c
+++ b/gcc/tree-ssa-loop-manip.c
@@ -36,6 +36,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "tree-pass.h"
#include "cfglayout.h"
#include "tree-scalar-evolution.h"
+#include "params.h"
/* Creates an induction variable with value BASE + STEP * iteration in LOOP.
It is expected that neither BASE nor STEP are shared with other expressions
@@ -618,3 +619,328 @@ tree_duplicate_loop_to_header_edge (struct loop *loop, edge e,
return true;
}
+
+/* Build if (COND) goto THEN_LABEL; else goto ELSE_LABEL; */
+
+static tree
+build_if_stmt (tree cond, tree then_label, tree else_label)
+{
+ return build3 (COND_EXPR, void_type_node,
+ cond,
+ build1 (GOTO_EXPR, void_type_node, then_label),
+ build1 (GOTO_EXPR, void_type_node, else_label));
+}
+
+/* Returns true if we can unroll LOOP FACTOR times. Number
+ of iterations of the loop is returned in NITER. */
+
+bool
+can_unroll_loop_p (struct loop *loop, unsigned factor,
+ struct tree_niter_desc *niter)
+{
+ edge exit;
+
+ /* Check whether unrolling is possible. We only want to unroll loops
+ for that we are able to determine number of iterations. We also
+ want to split the extra iterations of the loop from its end,
+ therefore we require that the loop has precisely one
+ exit. */
+
+ exit = single_dom_exit (loop);
+ if (!exit)
+ return false;
+
+ if (!number_of_iterations_exit (loop, exit, niter, false)
+ || niter->cmp == ERROR_MARK)
+ return false;
+
+ /* And of course, we must be able to duplicate the loop. */
+ if (!can_duplicate_loop_p (loop))
+ return false;
+
+ /* The final loop should be small enough. */
+ if (tree_num_loop_insns (loop) * factor
+ > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
+ return false;
+
+ return true;
+}
+
+/* Determines the conditions that control execution of LOOP unrolled FACTOR
+ times. DESC is number of iterations of LOOP. ENTER_COND is set to
+ condition that must be true if the main loop can be entered.
+ EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
+ how the exit from the unrolled loop should be controlled. */
+
+static void
+determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc,
+ unsigned factor, tree *enter_cond,
+ tree *exit_base, tree *exit_step,
+ enum tree_code *exit_cmp, tree *exit_bound)
+{
+ tree stmts;
+ tree base = desc->control.base;
+ tree step = desc->control.step;
+ tree bound = desc->bound;
+ tree type = TREE_TYPE (base);
+ tree bigstep, delta;
+ tree min = lower_bound_in_type (type, type);
+ tree max = upper_bound_in_type (type, type);
+ enum tree_code cmp = desc->cmp;
+ tree cond = boolean_true_node, assum;
+
+ *enter_cond = boolean_false_node;
+ *exit_base = NULL_TREE;
+ *exit_step = NULL_TREE;
+ *exit_cmp = ERROR_MARK;
+ *exit_bound = NULL_TREE;
+ gcc_assert (cmp != ERROR_MARK);
+
+ /* We only need to be correct when we answer question
+ "Do at least FACTOR more iterations remain?" in the unrolled loop.
+ Thus, transforming BASE + STEP * i <> BOUND to
+ BASE + STEP * i < BOUND is ok. */
+ if (cmp == NE_EXPR)
+ {
+ if (tree_int_cst_sign_bit (step))
+ cmp = GT_EXPR;
+ else
+ cmp = LT_EXPR;
+ }
+ else if (cmp == LT_EXPR)
+ {
+ gcc_assert (!tree_int_cst_sign_bit (step));
+ }
+ else if (cmp == GT_EXPR)
+ {
+ gcc_assert (tree_int_cst_sign_bit (step));
+ }
+ else
+ gcc_unreachable ();
+
+ /* The main body of the loop may be entered iff:
+
+ 1) desc->may_be_zero is false.
+ 2) it is possible to check that there are at least FACTOR iterations
+ of the loop, i.e., BOUND - step * FACTOR does not overflow.
+ 3) # of iterations is at least FACTOR */
+
+ if (!zero_p (desc->may_be_zero))
+ cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+ invert_truthvalue (desc->may_be_zero),
+ cond);
+
+ bigstep = fold_build2 (MULT_EXPR, type, step,
+ build_int_cst_type (type, factor));
+ delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
+ if (cmp == LT_EXPR)
+ assum = fold_build2 (GE_EXPR, boolean_type_node,
+ bound,
+ fold_build2 (PLUS_EXPR, type, min, delta));
+ else
+ assum = fold_build2 (LE_EXPR, boolean_type_node,
+ bound,
+ fold_build2 (PLUS_EXPR, type, max, delta));
+ cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
+
+ bound = fold_build2 (MINUS_EXPR, type, bound, delta);
+ assum = fold_build2 (cmp, boolean_type_node, base, bound);
+ cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
+
+ cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
+ if (stmts)
+ bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+ /* cond now may be a gimple comparison, which would be OK, but also any
+ other gimple rhs (say a && b). In this case we need to force it to
+ operand. */
+ if (!is_gimple_condexpr (cond))
+ {
+ cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
+ if (stmts)
+ bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+ }
+ *enter_cond = cond;
+
+ base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
+ if (stmts)
+ bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+ bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
+ if (stmts)
+ bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+
+ *exit_base = base;
+ *exit_step = bigstep;
+ *exit_cmp = cmp;
+ *exit_bound = bound;
+}
+
+/* Unroll LOOP FACTOR times. LOOPS is the loops tree. DESC describes
+ number of iterations of LOOP. EXIT is the exit of the loop to that
+ DESC corresponds.
+
+ If N is number of iterations of the loop and MAY_BE_ZERO is the condition
+ under that loop exits in the first iteration even if N != 0,
+
+ while (1)
+ {
+ x = phi (init, next);
+
+ pre;
+ if (st)
+ break;
+ post;
+ }
+
+ becomes (with possibly the exit conditions formulated a bit differently,
+ avoiding the need to create a new iv):
+
+ if (MAY_BE_ZERO || N < FACTOR)
+ goto rest;
+
+ do
+ {
+ x = phi (init, next);
+
+ pre;
+ post;
+ pre;
+ post;
+ ...
+ pre;
+ post;
+ N -= FACTOR;
+
+ } while (N >= FACTOR);
+
+ rest:
+ init' = phi (init, x);
+
+ while (1)
+ {
+ x = phi (init', next);
+
+ pre;
+ if (st)
+ break;
+ post;
+ } */
+
+void
+tree_unroll_loop (struct loops *loops, struct loop *loop, unsigned factor,
+ edge exit, struct tree_niter_desc *desc)
+{
+ tree dont_exit, exit_if, ctr_before, ctr_after;
+ tree enter_main_cond, exit_base, exit_step, exit_bound;
+ enum tree_code exit_cmp;
+ tree phi_old_loop, phi_new_loop, phi_rest, init, next, new_init, var;
+ struct loop *new_loop;
+ basic_block rest, exit_bb;
+ edge old_entry, new_entry, old_latch, precond_edge, new_exit;
+ edge nonexit, new_nonexit;
+ block_stmt_iterator bsi;
+ use_operand_p op;
+ bool ok;
+ unsigned est_niter;
+ sbitmap wont_exit;
+
+ est_niter = expected_loop_iterations (loop);
+ determine_exit_conditions (loop, desc, factor,
+ &enter_main_cond, &exit_base, &exit_step,
+ &exit_cmp, &exit_bound);
+
+ new_loop = loop_version (loops, loop, enter_main_cond, NULL, true);
+ gcc_assert (new_loop != NULL);
+ update_ssa (TODO_update_ssa);
+
+ /* Unroll the loop and remove the old exits. */
+ dont_exit = ((exit->flags & EDGE_TRUE_VALUE)
+ ? boolean_false_node
+ : boolean_true_node);
+ if (exit == EDGE_SUCC (exit->src, 0))
+ nonexit = EDGE_SUCC (exit->src, 1);
+ else
+ nonexit = EDGE_SUCC (exit->src, 0);
+ nonexit->probability = REG_BR_PROB_BASE;
+ exit->probability = 0;
+ nonexit->count += exit->count;
+ exit->count = 0;
+ exit_if = last_stmt (exit->src);
+ COND_EXPR_COND (exit_if) = dont_exit;
+ update_stmt (exit_if);
+
+ wont_exit = sbitmap_alloc (factor);
+ sbitmap_ones (wont_exit);
+ ok = tree_duplicate_loop_to_header_edge
+ (loop, loop_latch_edge (loop), loops, factor - 1,
+ wont_exit, NULL, NULL, NULL, DLTHE_FLAG_UPDATE_FREQ);
+ free (wont_exit);
+ gcc_assert (ok);
+ update_ssa (TODO_update_ssa);
+
+ /* Prepare the cfg and update the phi nodes. */
+ rest = loop_preheader_edge (new_loop)->src;
+ precond_edge = single_pred_edge (rest);
+ loop_split_edge_with (loop_latch_edge (loop), NULL);
+ exit_bb = single_pred (loop->latch);
+
+ new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE);
+ new_exit->count = loop_preheader_edge (loop)->count;
+ est_niter = est_niter / factor + 1;
+ new_exit->probability = REG_BR_PROB_BASE / est_niter;
+
+ new_nonexit = single_pred_edge (loop->latch);
+ new_nonexit->flags = EDGE_TRUE_VALUE;
+ new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
+
+ old_entry = loop_preheader_edge (loop);
+ new_entry = loop_preheader_edge (new_loop);
+ old_latch = loop_latch_edge (loop);
+ for (phi_old_loop = phi_nodes (loop->header),
+ phi_new_loop = phi_nodes (new_loop->header);
+ phi_old_loop;
+ phi_old_loop = PHI_CHAIN (phi_old_loop),
+ phi_new_loop = PHI_CHAIN (phi_new_loop))
+ {
+ init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
+ op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
+ gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
+ next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
+
+ /* Prefer using original variable as a base for the new ssa name.
+ This is necessary for virtual ops, and useful in order to avoid
+ losing debug info for real ops. */
+ if (TREE_CODE (next) == SSA_NAME)
+ var = SSA_NAME_VAR (next);
+ else if (TREE_CODE (init) == SSA_NAME)
+ var = SSA_NAME_VAR (init);
+ else
+ {
+ var = create_tmp_var (TREE_TYPE (init), "unrinittmp");
+ add_referenced_tmp_var (var);
+ }
+
+ new_init = make_ssa_name (var, NULL_TREE);
+ phi_rest = create_phi_node (new_init, rest);
+ SSA_NAME_DEF_STMT (new_init) = phi_rest;
+
+ add_phi_arg (phi_rest, init, precond_edge);
+ add_phi_arg (phi_rest, next, new_exit);
+ SET_USE (op, new_init);
+ }
+
+ /* Finally create the new counter for number of iterations and add the new
+ exit instruction. */
+ bsi = bsi_last (exit_bb);
+ create_iv (exit_base, exit_step, NULL_TREE, loop,
+ &bsi, true, &ctr_before, &ctr_after);
+ exit_if = build_if_stmt (build2 (exit_cmp, boolean_type_node, ctr_after,
+ exit_bound),
+ tree_block_label (loop->latch),
+ tree_block_label (rest));
+ bsi_insert_after (&bsi, exit_if, BSI_NEW_STMT);
+
+ verify_flow_info ();
+ verify_dominators (CDI_DOMINATORS);
+ verify_loop_structure (loops);
+ verify_loop_closed_ssa ();
+}
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 7566e7cad49..f913df3141b 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -140,6 +140,10 @@ number_of_iterations_ne (tree type, affine_iv *iv, tree final,
tree niter_type = unsigned_type_for (type);
tree s, c, d, bits, assumption, tmp, bound;
+ niter->control = *iv;
+ niter->bound = final;
+ niter->cmp = NE_EXPR;
+
/* Rearrange the terms so that we get inequality s * i <> c, with s
positive. Also cast everything to the unsigned type. */
if (tree_int_cst_sign_bit (iv->step))
@@ -410,6 +414,19 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
tree niter_type = unsigned_type_for (type);
tree delta, step, s;
+ if (nonzero_p (iv0->step))
+ {
+ niter->control = *iv0;
+ niter->cmp = LT_EXPR;
+ niter->bound = iv1->base;
+ }
+ else
+ {
+ niter->control = *iv1;
+ niter->cmp = GT_EXPR;
+ niter->bound = iv0->base;
+ }
+
delta = fold_build2 (MINUS_EXPR, niter_type,
fold_convert (niter_type, iv1->base),
fold_convert (niter_type, iv0->base));
@@ -543,6 +560,9 @@ number_of_iterations_cond (tree type, affine_iv *iv0, enum tree_code code,
niter->niter = NULL_TREE;
niter->additional_info = boolean_true_node;
+ niter->bound = NULL_TREE;
+ niter->cmp = ERROR_MARK;
+
/* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
the control variable is on lhs. */
if (code == GE_EXPR || code == GT_EXPR
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index a735084803e..60cdefcbe24 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -401,6 +401,40 @@ struct tree_opt_pass pass_complete_unroll =
0 /* letter */
};
+/* Prefetching. */
+
+static void
+tree_ssa_loop_prefetch (void)
+{
+ if (!current_loops)
+ return;
+
+ tree_ssa_prefetch_arrays (current_loops);
+}
+
+static bool
+gate_tree_ssa_loop_prefetch (void)
+{
+ return flag_prefetch_loop_arrays == 1;
+}
+
+struct tree_opt_pass pass_loop_prefetch =
+{
+ "prefetch", /* name */
+ gate_tree_ssa_loop_prefetch, /* gate */
+ tree_ssa_loop_prefetch, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_PREFETCH, /* tv_id */
+ PROP_cfg | PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | TODO_verify_loops, /* todo_flags_finish */
+ 0 /* letter */
+};
+
/* Induction variable optimizations. */
static void