diff options
-rw-r--r-- | gcc/ChangeLog | 35 | ||||
-rw-r--r-- | gcc/Makefile.in | 11 | ||||
-rw-r--r-- | gcc/common.opt | 6 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 6 | ||||
-rw-r--r-- | gcc/loop.c | 2 | ||||
-rw-r--r-- | gcc/passes.c | 1 | ||||
-rw-r--r-- | gcc/timevar.def | 1 | ||||
-rw-r--r-- | gcc/tree-cfgcleanup.c | 26 | ||||
-rw-r--r-- | gcc/tree-flow.h | 25 | ||||
-rw-r--r-- | gcc/tree-outof-ssa.c | 48 | ||||
-rw-r--r-- | gcc/tree-pass.h | 1 | ||||
-rw-r--r-- | gcc/tree-scalar-evolution.h | 11 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-manip.c | 326 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-niter.c | 20 | ||||
-rw-r--r-- | gcc/tree-ssa-loop.c | 34 |
15 files changed, 512 insertions, 41 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2811d2e9d71..9aec203aa99 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +2006-02-14 Zdenek Dvorak <dvorakz@suse.cz> + + * doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl): + Document. + * tree-ssa-loop-niter.c (number_of_iterations_ne, + number_of_iterations_lt, number_of_iterations_cond): Remember the shape + of the ending condition. + * tree-ssa-loop-manip.c: Include params.h. + (build_if_stmt, can_unroll_loop_p, determine_exit_conditions, + tree_unroll_loop): New functions. + * tree-pass.h (pass_loop_prefetch): Declare. + * loop.c (rest_of_handle_loop_optimize): Test for + -fprefetch-loop-arrays-rtl. + * tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h. + * timevar.def (TV_TREE_PREFETCH): New timevar. + * tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch, + pass_loop_prefetch): New. + * tree-cfgcleanup.c: Include tree-scalar-evolution.h. + (cleanup_tree_cfg_loop): Call scev_reset. + * common.opt (fprefetch-loop-arrays-rtl): Add. + * tree-ssa-loop-prefetch.c: New file. + * tree-outof-ssa.c (struct value_expr_d): Add expr_vars field. + (new_temp_expr_table): Initialize expr_vars. + (free_temp_expr_table): Cleanup expr_vars. + (check_replaceable, find_replaceable_in_bb): Prevent accumulating + expressions from being merged into one. + * tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h. + (struct tree_niter_desc): Add control, bound and cmp fields. + (tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop): + Declare. + * Makefile.in (tree-ssa-loop-prefetch.o): Add. + (tree-cfgcleanup.o): Add SCEV_H dependency. + (tree-ssa-loop-manip.o): Add PARAMS_H dependency. + * passes.c (init_optimization_passes): Add pass_loop_prefetch. + 2006-02-14 Richard Guenther <rguenther@suse.de> PR tree-optimization/26258 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 09786c99087..d6d79005cdd 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -963,7 +963,7 @@ OBJS-common = \ tree-vect-generic.o tree-ssa-loop.o tree-ssa-loop-niter.o \ tree-ssa-loop-manip.o tree-ssa-threadupdate.o tree-ssa-threadedge.o \ tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \ - tree-vect-patterns.o \ + tree-vect-patterns.o tree-ssa-loop-prefetch.o \ tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o \ tree-ssa-math-opts.o \ tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \ @@ -1975,6 +1975,12 @@ tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ tree-pass.h $(FLAGS_H) $(BASIC_BLOCK_H) hard-reg-set.h +tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \ + $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \ + output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ + tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \ + $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \ + tree-chrec.h toplev.h langhooks.h tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ @@ -1984,7 +1990,8 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \ output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ - tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h + tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h \ + $(PARAMS_H) tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) domwalk.h \ $(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \ diff --git a/gcc/common.opt b/gcc/common.opt index f8077ae3751..c7fee2c9c09 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -659,7 +659,11 @@ Common Report Var(flag_pie,1) VarExists Generate position-independent code for executables if possible (small mode) fprefetch-loop-arrays -Common Report Var(flag_prefetch_loop_arrays) +Common Report Var(flag_prefetch_loop_arrays,1) +Generate prefetch instructions, if available, for arrays in loops + +fprefetch-loop-arrays-rtl +Common Report Var(flag_prefetch_loop_arrays,2) Generate prefetch instructions, if available, for arrays in loops fprofile diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 6fb86e314e8..5c7ff5ebeae 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -321,7 +321,7 @@ Objective-C and Objective-C++ Dialects}. -funsafe-math-optimizations -funsafe-loop-optimizations -ffinite-math-only @gol -fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol -fomit-frame-pointer -foptimize-register-move @gol --foptimize-sibling-calls -fprefetch-loop-arrays @gol +-foptimize-sibling-calls -fprefetch-loop-arrays -fprefetch-loop-arrays-rtl @gol -fprofile-generate -fprofile-use @gol -fregmove -frename-registers @gol -freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol @@ -5171,7 +5171,9 @@ With this option, the compiler will create multiple copies of some local variables when unrolling a loop which can result in superior code. @item -fprefetch-loop-arrays +@itemx -fprefetch-loop-arrays-rtl @opindex fprefetch-loop-arrays +@opindex fprefetch-loop-arrays-rtl If supported by the target machine, generate instructions to prefetch memory to improve the performance of loops that access large arrays. @@ -5709,7 +5711,9 @@ Move branches with loop invariant conditions out of the loop, with duplicates of the loop on both branches (modified according to result of the condition). @item -fprefetch-loop-arrays +@itemx -fprefetch-loop-arrays-rtl @opindex fprefetch-loop-arrays +@opindex fprefetch-loop-arrays-rtl If supported by the target machine, generate instructions to prefetch memory to improve the performance of loops that access large arrays. diff --git a/gcc/loop.c b/gcc/loop.c index fcb7d1ab21f..1beb4dc16a6 100644 --- a/gcc/loop.c +++ b/gcc/loop.c @@ -11780,7 +11780,7 @@ rest_of_handle_loop_optimize (void) free_bb_for_insn (); profile_status = PROFILE_ABSENT; - do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0; + do_prefetch = flag_prefetch_loop_arrays == 2 ? LOOP_PREFETCH : 0; if (flag_rerun_loop_opt) { diff --git a/gcc/passes.c b/gcc/passes.c index 5e026957307..2e7f0b53b32 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -601,6 +601,7 @@ init_optimization_passes (void) vectorizer creates alias relations that are not supported by pass_may_alias. */ NEXT_PASS (pass_complete_unroll); + NEXT_PASS (pass_loop_prefetch); NEXT_PASS (pass_iv_optimize); NEXT_PASS (pass_tree_loop_done); *p = NULL; diff --git a/gcc/timevar.def b/gcc/timevar.def index e769cba2fe4..d6065e7ca01 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -107,6 +107,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching") DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization") DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear") +DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching") DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization") DEFTIMEVAR (TV_TREE_LOOP_INIT , "tree loop init") DEFTIMEVAR (TV_TREE_LOOP_FINI , "tree loop fini") diff --git a/gcc/tree-cfgcleanup.c b/gcc/tree-cfgcleanup.c index 4619d1dbad0..76667a6edc1 100644 --- a/gcc/tree-cfgcleanup.c +++ b/gcc/tree-cfgcleanup.c @@ -45,6 +45,7 @@ Boston, MA 02110-1301, USA. */ #include "cfglayout.h" #include "hashtab.h" #include "tree-ssa-propagate.h" +#include "tree-scalar-evolution.h" /* Remove any fallthru edge from EV. Return true if an edge was removed. */ @@ -559,23 +560,26 @@ cleanup_tree_cfg (void) void cleanup_tree_cfg_loop (void) { - bitmap changed_bbs = BITMAP_ALLOC (NULL); + bool changed = cleanup_tree_cfg (); - cleanup_tree_cfg (); - - fix_loop_structure (current_loops, changed_bbs); - calculate_dominance_info (CDI_DOMINATORS); + if (changed) + { + bitmap changed_bbs = BITMAP_ALLOC (NULL); + fix_loop_structure (current_loops, changed_bbs); + calculate_dominance_info (CDI_DOMINATORS); - /* This usually does nothing. But sometimes parts of cfg that originally - were inside a loop get out of it due to edge removal (since they - become unreachable by back edges from latch). */ - rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa); + /* This usually does nothing. But sometimes parts of cfg that originally + were inside a loop get out of it due to edge removal (since they + become unreachable by back edges from latch). */ + rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa); - BITMAP_FREE (changed_bbs); + BITMAP_FREE (changed_bbs); #ifdef ENABLE_CHECKING - verify_loop_structure (current_loops); + verify_loop_structure (current_loops); #endif + scev_reset (); + } } /* Merge the PHI nodes at BB into those at BB's sole successor. */ diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index 0e4824e9321..7774c3b42b7 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -667,6 +667,17 @@ extern void replace_exp (use_operand_p, tree); extern bool may_propagate_copy (tree, tree); extern bool may_propagate_copy_into_asm (tree); +/* Affine iv. */ + +typedef struct +{ + /* Iv = BASE + STEP * i. */ + tree base, step; + + /* True if this iv does not overflow. */ + bool no_overflow; +} affine_iv; + /* Description of number of iterations of a loop. All the expressions inside the structure can be evaluated at the end of the loop's preheader (and due to ssa form, also anywhere inside the body of the loop). */ @@ -697,6 +708,15 @@ struct tree_niter_desc MAX_SIGNED_INT. However if the (n <= 0) assumption is eliminated (by looking at the guard on entry of the loop), then the information would be lost. */ + + /* The simplified shape of the exit condition. The loop exits if + CONTROL CMP BOUND is false, where CMP is one of NE_EXPR, + LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP is + LE_EXPR and negative if CMP is GE_EXPR. This information is used + by loop unrolling. */ + affine_iv control; + tree bound; + enum tree_code cmp; }; /* In tree-vectorizer.c */ @@ -711,6 +731,7 @@ void tree_ssa_lim (struct loops *); void tree_ssa_unswitch_loops (struct loops *); void canonicalize_induction_variables (struct loops *); void tree_unroll_loops_completely (struct loops *, bool); +void tree_ssa_prefetch_arrays (struct loops *); void remove_empty_loops (struct loops *); void tree_ssa_iv_optimize (struct loops *); @@ -748,6 +769,10 @@ struct loop *tree_ssa_loop_version (struct loops *, struct loop *, tree, tree expand_simple_operations (tree); void substitute_in_loop_info (struct loop *, tree, tree); edge single_dom_exit (struct loop *); +bool can_unroll_loop_p (struct loop *loop, unsigned factor, + struct tree_niter_desc *niter); +void tree_unroll_loop (struct loops *, struct loop *, unsigned, + edge, struct tree_niter_desc *); /* In tree-ssa-threadedge.c */ extern bool potentially_threadable_block (basic_block); diff --git a/gcc/tree-outof-ssa.c b/gcc/tree-outof-ssa.c index 78ec5e7b356..40d6c930511 100644 --- a/gcc/tree-outof-ssa.c +++ b/gcc/tree-outof-ssa.c @@ -1299,7 +1299,8 @@ typedef struct value_expr_d typedef struct temp_expr_table_d { var_map map; - void **version_info; + void **version_info; + bitmap *expr_vars; value_expr_p *partition_dep_list; bitmap replaceable; bool saw_replaceable; @@ -1344,6 +1345,7 @@ new_temp_expr_table (var_map map) t->map = map; t->version_info = XCNEWVEC (void *, num_ssa_names + 1); + t->expr_vars = XCNEWVEC (bitmap, num_ssa_names + 1); t->partition_dep_list = XCNEWVEC (value_expr_p, num_var_partitions (map) + 1); @@ -1367,6 +1369,7 @@ free_temp_expr_table (temp_expr_table_p t) { value_expr_p p; tree *ret = NULL; + unsigned i; #ifdef ENABLE_CHECKING unsigned x; @@ -1383,6 +1386,11 @@ free_temp_expr_table (temp_expr_table_p t) BITMAP_FREE (t->partition_in_use); BITMAP_FREE (t->replaceable); + for (i = 0; i <= num_ssa_names; i++) + if (t->expr_vars[i]) + BITMAP_FREE (t->expr_vars[i]); + free (t->expr_vars); + free (t->partition_dep_list); if (t->saw_replaceable) ret = (tree *)t->version_info; @@ -1545,11 +1553,12 @@ add_dependance (temp_expr_table_p tab, int version, tree var) static bool check_replaceable (temp_expr_table_p tab, tree stmt) { - tree var, def; + tree var, def, basevar; int version; var_map map = tab->map; ssa_op_iter iter; tree call_expr; + bitmap def_vars = BITMAP_ALLOC (NULL), use_vars; if (TREE_CODE (stmt) != MODIFY_EXPR) return false; @@ -1580,12 +1589,19 @@ check_replaceable (temp_expr_table_p tab, tree stmt) } version = SSA_NAME_VERSION (def); + basevar = SSA_NAME_VAR (def); + bitmap_set_bit (def_vars, DECL_UID (basevar)); /* Add this expression to the dependency list for each use partition. */ FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE) { add_dependance (tab, version, var); + + use_vars = tab->expr_vars[SSA_NAME_VERSION (var)]; + if (use_vars) + bitmap_ior_into (def_vars, use_vars); } + tab->expr_vars[version] = def_vars; /* If there are VUSES, add a dependence on virtual defs. */ if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE)) @@ -1704,7 +1720,7 @@ static void find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb) { block_stmt_iterator bsi; - tree stmt, def; + tree stmt, def, use; stmt_ann_t ann; int partition; var_map map = tab->map; @@ -1717,30 +1733,34 @@ find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb) ann = stmt_ann (stmt); /* Determine if this stmt finishes an existing expression. */ - FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_USE) + FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) { - if (tab->version_info[SSA_NAME_VERSION (def)]) + unsigned ver = SSA_NAME_VERSION (use); + + if (tab->version_info[ver]) { bool same_root_var = false; - tree def2; ssa_op_iter iter2; + bitmap vars = tab->expr_vars[ver]; /* See if the root variables are the same. If they are, we do not want to do the replacement to avoid problems with code size, see PR tree-optimization/17549. */ - FOR_EACH_SSA_TREE_OPERAND (def2, stmt, iter2, SSA_OP_DEF) - if (SSA_NAME_VAR (def) == SSA_NAME_VAR (def2)) - { - same_root_var = true; - break; - } + FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter2, SSA_OP_DEF) + { + if (bitmap_bit_p (vars, DECL_UID (SSA_NAME_VAR (def)))) + { + same_root_var = true; + break; + } + } /* Mark expression as replaceable unless stmt is volatile or DEF sets the same root variable as STMT. */ if (!ann->has_volatile_ops && !same_root_var) - mark_replaceable (tab, def); + mark_replaceable (tab, use); else - finish_expr (tab, SSA_NAME_VERSION (def), false); + finish_expr (tab, ver, false); } } diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 5d49b6c7569..baa60da96e3 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -247,6 +247,7 @@ extern struct tree_opt_pass pass_record_bounds; extern struct tree_opt_pass pass_if_conversion; extern struct tree_opt_pass pass_vectorize; extern struct tree_opt_pass pass_complete_unroll; +extern struct tree_opt_pass pass_loop_prefetch; extern struct tree_opt_pass pass_iv_optimize; extern struct tree_opt_pass pass_tree_loop_done; extern struct tree_opt_pass pass_ch; diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h index 0fecaee2390..f7749545f9a 100644 --- a/gcc/tree-scalar-evolution.h +++ b/gcc/tree-scalar-evolution.h @@ -34,17 +34,6 @@ extern void gather_stats_on_scev_database (void); extern void scev_analysis (void); void scev_const_prop (void); -/* Affine iv. */ - -typedef struct -{ - /* Iv = BASE + STEP * i. */ - tree base, step; - - /* True if this iv does not overflow. */ - bool no_overflow; -} affine_iv; - extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool); #endif /* GCC_TREE_SCALAR_EVOLUTION_H */ diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c index ab9971dfabf..21d1ea14b14 100644 --- a/gcc/tree-ssa-loop-manip.c +++ b/gcc/tree-ssa-loop-manip.c @@ -36,6 +36,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "tree-pass.h" #include "cfglayout.h" #include "tree-scalar-evolution.h" +#include "params.h" /* Creates an induction variable with value BASE + STEP * iteration in LOOP. It is expected that neither BASE nor STEP are shared with other expressions @@ -618,3 +619,328 @@ tree_duplicate_loop_to_header_edge (struct loop *loop, edge e, return true; } + +/* Build if (COND) goto THEN_LABEL; else goto ELSE_LABEL; */ + +static tree +build_if_stmt (tree cond, tree then_label, tree else_label) +{ + return build3 (COND_EXPR, void_type_node, + cond, + build1 (GOTO_EXPR, void_type_node, then_label), + build1 (GOTO_EXPR, void_type_node, else_label)); +} + +/* Returns true if we can unroll LOOP FACTOR times. Number + of iterations of the loop is returned in NITER. */ + +bool +can_unroll_loop_p (struct loop *loop, unsigned factor, + struct tree_niter_desc *niter) +{ + edge exit; + + /* Check whether unrolling is possible. We only want to unroll loops + for that we are able to determine number of iterations. We also + want to split the extra iterations of the loop from its end, + therefore we require that the loop has precisely one + exit. */ + + exit = single_dom_exit (loop); + if (!exit) + return false; + + if (!number_of_iterations_exit (loop, exit, niter, false) + || niter->cmp == ERROR_MARK) + return false; + + /* And of course, we must be able to duplicate the loop. */ + if (!can_duplicate_loop_p (loop)) + return false; + + /* The final loop should be small enough. */ + if (tree_num_loop_insns (loop) * factor + > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)) + return false; + + return true; +} + +/* Determines the conditions that control execution of LOOP unrolled FACTOR + times. DESC is number of iterations of LOOP. ENTER_COND is set to + condition that must be true if the main loop can be entered. + EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing + how the exit from the unrolled loop should be controlled. */ + +static void +determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc, + unsigned factor, tree *enter_cond, + tree *exit_base, tree *exit_step, + enum tree_code *exit_cmp, tree *exit_bound) +{ + tree stmts; + tree base = desc->control.base; + tree step = desc->control.step; + tree bound = desc->bound; + tree type = TREE_TYPE (base); + tree bigstep, delta; + tree min = lower_bound_in_type (type, type); + tree max = upper_bound_in_type (type, type); + enum tree_code cmp = desc->cmp; + tree cond = boolean_true_node, assum; + + *enter_cond = boolean_false_node; + *exit_base = NULL_TREE; + *exit_step = NULL_TREE; + *exit_cmp = ERROR_MARK; + *exit_bound = NULL_TREE; + gcc_assert (cmp != ERROR_MARK); + + /* We only need to be correct when we answer question + "Do at least FACTOR more iterations remain?" in the unrolled loop. + Thus, transforming BASE + STEP * i <> BOUND to + BASE + STEP * i < BOUND is ok. */ + if (cmp == NE_EXPR) + { + if (tree_int_cst_sign_bit (step)) + cmp = GT_EXPR; + else + cmp = LT_EXPR; + } + else if (cmp == LT_EXPR) + { + gcc_assert (!tree_int_cst_sign_bit (step)); + } + else if (cmp == GT_EXPR) + { + gcc_assert (tree_int_cst_sign_bit (step)); + } + else + gcc_unreachable (); + + /* The main body of the loop may be entered iff: + + 1) desc->may_be_zero is false. + 2) it is possible to check that there are at least FACTOR iterations + of the loop, i.e., BOUND - step * FACTOR does not overflow. + 3) # of iterations is at least FACTOR */ + + if (!zero_p (desc->may_be_zero)) + cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, + invert_truthvalue (desc->may_be_zero), + cond); + + bigstep = fold_build2 (MULT_EXPR, type, step, + build_int_cst_type (type, factor)); + delta = fold_build2 (MINUS_EXPR, type, bigstep, step); + if (cmp == LT_EXPR) + assum = fold_build2 (GE_EXPR, boolean_type_node, + bound, + fold_build2 (PLUS_EXPR, type, min, delta)); + else + assum = fold_build2 (LE_EXPR, boolean_type_node, + bound, + fold_build2 (PLUS_EXPR, type, max, delta)); + cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond); + + bound = fold_build2 (MINUS_EXPR, type, bound, delta); + assum = fold_build2 (cmp, boolean_type_node, base, bound); + cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond); + + cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE); + if (stmts) + bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts); + /* cond now may be a gimple comparison, which would be OK, but also any + other gimple rhs (say a && b). In this case we need to force it to + operand. */ + if (!is_gimple_condexpr (cond)) + { + cond = force_gimple_operand (cond, &stmts, true, NULL_TREE); + if (stmts) + bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts); + } + *enter_cond = cond; + + base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE); + if (stmts) + bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts); + bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE); + if (stmts) + bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts); + + *exit_base = base; + *exit_step = bigstep; + *exit_cmp = cmp; + *exit_bound = bound; +} + +/* Unroll LOOP FACTOR times. LOOPS is the loops tree. DESC describes + number of iterations of LOOP. EXIT is the exit of the loop to that + DESC corresponds. + + If N is number of iterations of the loop and MAY_BE_ZERO is the condition + under that loop exits in the first iteration even if N != 0, + + while (1) + { + x = phi (init, next); + + pre; + if (st) + break; + post; + } + + becomes (with possibly the exit conditions formulated a bit differently, + avoiding the need to create a new iv): + + if (MAY_BE_ZERO || N < FACTOR) + goto rest; + + do + { + x = phi (init, next); + + pre; + post; + pre; + post; + ... + pre; + post; + N -= FACTOR; + + } while (N >= FACTOR); + + rest: + init' = phi (init, x); + + while (1) + { + x = phi (init', next); + + pre; + if (st) + break; + post; + } */ + +void +tree_unroll_loop (struct loops *loops, struct loop *loop, unsigned factor, + edge exit, struct tree_niter_desc *desc) +{ + tree dont_exit, exit_if, ctr_before, ctr_after; + tree enter_main_cond, exit_base, exit_step, exit_bound; + enum tree_code exit_cmp; + tree phi_old_loop, phi_new_loop, phi_rest, init, next, new_init, var; + struct loop *new_loop; + basic_block rest, exit_bb; + edge old_entry, new_entry, old_latch, precond_edge, new_exit; + edge nonexit, new_nonexit; + block_stmt_iterator bsi; + use_operand_p op; + bool ok; + unsigned est_niter; + sbitmap wont_exit; + + est_niter = expected_loop_iterations (loop); + determine_exit_conditions (loop, desc, factor, + &enter_main_cond, &exit_base, &exit_step, + &exit_cmp, &exit_bound); + + new_loop = loop_version (loops, loop, enter_main_cond, NULL, true); + gcc_assert (new_loop != NULL); + update_ssa (TODO_update_ssa); + + /* Unroll the loop and remove the old exits. */ + dont_exit = ((exit->flags & EDGE_TRUE_VALUE) + ? boolean_false_node + : boolean_true_node); + if (exit == EDGE_SUCC (exit->src, 0)) + nonexit = EDGE_SUCC (exit->src, 1); + else + nonexit = EDGE_SUCC (exit->src, 0); + nonexit->probability = REG_BR_PROB_BASE; + exit->probability = 0; + nonexit->count += exit->count; + exit->count = 0; + exit_if = last_stmt (exit->src); + COND_EXPR_COND (exit_if) = dont_exit; + update_stmt (exit_if); + + wont_exit = sbitmap_alloc (factor); + sbitmap_ones (wont_exit); + ok = tree_duplicate_loop_to_header_edge + (loop, loop_latch_edge (loop), loops, factor - 1, + wont_exit, NULL, NULL, NULL, DLTHE_FLAG_UPDATE_FREQ); + free (wont_exit); + gcc_assert (ok); + update_ssa (TODO_update_ssa); + + /* Prepare the cfg and update the phi nodes. */ + rest = loop_preheader_edge (new_loop)->src; + precond_edge = single_pred_edge (rest); + loop_split_edge_with (loop_latch_edge (loop), NULL); + exit_bb = single_pred (loop->latch); + + new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE); + new_exit->count = loop_preheader_edge (loop)->count; + est_niter = est_niter / factor + 1; + new_exit->probability = REG_BR_PROB_BASE / est_niter; + + new_nonexit = single_pred_edge (loop->latch); + new_nonexit->flags = EDGE_TRUE_VALUE; + new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability; + + old_entry = loop_preheader_edge (loop); + new_entry = loop_preheader_edge (new_loop); + old_latch = loop_latch_edge (loop); + for (phi_old_loop = phi_nodes (loop->header), + phi_new_loop = phi_nodes (new_loop->header); + phi_old_loop; + phi_old_loop = PHI_CHAIN (phi_old_loop), + phi_new_loop = PHI_CHAIN (phi_new_loop)) + { + init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry); + op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry); + gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op))); + next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch); + + /* Prefer using original variable as a base for the new ssa name. + This is necessary for virtual ops, and useful in order to avoid + losing debug info for real ops. */ + if (TREE_CODE (next) == SSA_NAME) + var = SSA_NAME_VAR (next); + else if (TREE_CODE (init) == SSA_NAME) + var = SSA_NAME_VAR (init); + else + { + var = create_tmp_var (TREE_TYPE (init), "unrinittmp"); + add_referenced_tmp_var (var); + } + + new_init = make_ssa_name (var, NULL_TREE); + phi_rest = create_phi_node (new_init, rest); + SSA_NAME_DEF_STMT (new_init) = phi_rest; + + add_phi_arg (phi_rest, init, precond_edge); + add_phi_arg (phi_rest, next, new_exit); + SET_USE (op, new_init); + } + + /* Finally create the new counter for number of iterations and add the new + exit instruction. */ + bsi = bsi_last (exit_bb); + create_iv (exit_base, exit_step, NULL_TREE, loop, + &bsi, true, &ctr_before, &ctr_after); + exit_if = build_if_stmt (build2 (exit_cmp, boolean_type_node, ctr_after, + exit_bound), + tree_block_label (loop->latch), + tree_block_label (rest)); + bsi_insert_after (&bsi, exit_if, BSI_NEW_STMT); + + verify_flow_info (); + verify_dominators (CDI_DOMINATORS); + verify_loop_structure (loops); + verify_loop_closed_ssa (); +} diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 7566e7cad49..f913df3141b 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -140,6 +140,10 @@ number_of_iterations_ne (tree type, affine_iv *iv, tree final, tree niter_type = unsigned_type_for (type); tree s, c, d, bits, assumption, tmp, bound; + niter->control = *iv; + niter->bound = final; + niter->cmp = NE_EXPR; + /* Rearrange the terms so that we get inequality s * i <> c, with s positive. Also cast everything to the unsigned type. */ if (tree_int_cst_sign_bit (iv->step)) @@ -410,6 +414,19 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1, tree niter_type = unsigned_type_for (type); tree delta, step, s; + if (nonzero_p (iv0->step)) + { + niter->control = *iv0; + niter->cmp = LT_EXPR; + niter->bound = iv1->base; + } + else + { + niter->control = *iv1; + niter->cmp = GT_EXPR; + niter->bound = iv0->base; + } + delta = fold_build2 (MINUS_EXPR, niter_type, fold_convert (niter_type, iv1->base), fold_convert (niter_type, iv0->base)); @@ -543,6 +560,9 @@ number_of_iterations_cond (tree type, affine_iv *iv0, enum tree_code code, niter->niter = NULL_TREE; niter->additional_info = boolean_true_node; + niter->bound = NULL_TREE; + niter->cmp = ERROR_MARK; + /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that the control variable is on lhs. */ if (code == GE_EXPR || code == GT_EXPR diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c index a735084803e..60cdefcbe24 100644 --- a/gcc/tree-ssa-loop.c +++ b/gcc/tree-ssa-loop.c @@ -401,6 +401,40 @@ struct tree_opt_pass pass_complete_unroll = 0 /* letter */ }; +/* Prefetching. */ + +static void +tree_ssa_loop_prefetch (void) +{ + if (!current_loops) + return; + + tree_ssa_prefetch_arrays (current_loops); +} + +static bool +gate_tree_ssa_loop_prefetch (void) +{ + return flag_prefetch_loop_arrays == 1; +} + +struct tree_opt_pass pass_loop_prefetch = +{ + "prefetch", /* name */ + gate_tree_ssa_loop_prefetch, /* gate */ + tree_ssa_loop_prefetch, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_TREE_PREFETCH, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | TODO_verify_loops, /* todo_flags_finish */ + 0 /* letter */ +}; + /* Induction variable optimizations. */ static void |