diff options
-rw-r--r-- | gcc/ChangeLog | 26 | ||||
-rw-r--r-- | gcc/Makefile.in | 8 | ||||
-rw-r--r-- | gcc/cfgloop.h | 6 | ||||
-rw-r--r-- | gcc/cfgloopmanip.c | 24 | ||||
-rw-r--r-- | gcc/doc/passes.texi | 8 | ||||
-rw-r--r-- | gcc/loop-unswitch.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/loop-6.c | 26 | ||||
-rw-r--r-- | gcc/timevar.def | 1 | ||||
-rw-r--r-- | gcc/tree-flow.h | 7 | ||||
-rw-r--r-- | gcc/tree-optimize.c | 1 | ||||
-rw-r--r-- | gcc/tree-pass.h | 1 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-manip.c | 340 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-unswitch.c | 293 | ||||
-rw-r--r-- | gcc/tree-ssa-loop.c | 34 |
15 files changed, 766 insertions, 15 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2475038afa9..cf3f949b133 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2004-09-23 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz> + + * cfgloop.h (update_single_exits_after_duplication): Declare. + (loopify, split_loop_bb): Declaration changed. + * cfgloopmanip.c (split_loop_bb): Take void * as an argument instead + of rtx. + (loopify): Added redirect_all_edges argument. + (update_single_exits_after_duplication): Export. + * loop-unswitch.c (unswitch_loop): Changed due to loopify change. + * tree-flow.h (tree_duplicate_loop_to_header_edge, + tree_ssa_loop_version): Declare. + * tree-ssa-loop-manip.c (copy_phi_node_args, rename_variables, + set_phi_def_stmts, tree_duplicate_loop_to_header_edge, + lv_adjust_loop_header_phi, lv_adjust_loop_entry_edge, + lv_update_pending_stmts, tree_ssa_loop_version): New functions. + + * tree-ssa-loop-unswitch.c: New file. + * Makefile.in (tree-ssa-loop-unswitch.o): Add. + * timevar.def (TV_TREE_LOOP_UNSWITCH): New timevar. + * tree-flow.h (tree_ssa_unswitch_loops): Declare. + * tree-optimize.c (init_tree_optimization_passes): Add pass_unswitch. + * tree-pass.h (pass_unswitch): Declare. + * tree-ssa-loop.c (tree_ssa_loop_unswitch, + gate_tree_ssa_loop_unswitch, pass_unswitch): New pass. + * doc/passes.texi: Documen tree level loop unswitching. + 2004-09-23 Kazu Hirata <kazu@cs.umass.edu> * cfgexpand.c, config/s390/tpf-eh.c: Fix comment typos. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 9608bd881e2..59b1fddb6c1 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -144,7 +144,7 @@ TCFLAGS = CFLAGS = -g STAGE1_CFLAGS = -g @stage1_cflags@ STAGE1_CHECKING = -DENABLE_CHECKING -DENABLE_ASSERT_CHECKING -BOOT_CFLAGS = -g -O2 +BOOT_CFLAGS = -g -O2 -funswitch-loops # Flags to determine code coverage. When coverage is disabled, this will # contain the optimization flags, as you normally want code coverage @@ -897,7 +897,7 @@ OBJS-common = \ tree-phinodes.o tree-ssanames.o tree-sra.o tree-complex.o tree-ssa-loop.o \ tree-ssa-loop-niter.o tree-ssa-loop-manip.o tree-ssa-threadupdate.o \ tree-vectorizer.o tree-ssa-loop-ivcanon.o tree-ssa-propagate.o \ - tree-ssa-loop-ivopts.o tree-if-conv.o \ + tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \ alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o \ cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \ cfgloopanal.o cfgloopmanip.o loop-init.o loop-unswitch.o loop-unroll.o \ @@ -1693,6 +1693,10 @@ tree-ssa-loop.o : tree-ssa-loop.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \ output.h diagnostic.h $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ tree-pass.h $(FLAGS_H) tree-inline.h $(SCEV_H) +tree-ssa-loop-unswitch.o : tree-ssa-loop-unswitch.c $(TREE_FLOW_H) $(CONFIG_H) \ + $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) domwalk.h $(PARAMS_H)\ + output.h diagnostic.h $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \ + tree-pass.h tree-ssa-loop-niter.o : tree-ssa-loop-niter.c $(TREE_FLOW_H) $(CONFIG_H) \ $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) cfgloop.h $(PARAMS_H) tree-inline.h \ output.h diagnostic.h $(TM_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H) \ diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index cfa8e100078..1b45a563bff 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -254,6 +254,8 @@ extern int flow_loop_scan (struct loop *, int); extern void flow_loop_free (struct loop *); void mark_irreducible_loops (struct loops *); void mark_single_exit_loops (struct loops *); +void update_single_exits_after_duplication (basic_block *, unsigned, + struct loop *); extern void create_loop_notes (void); /* Loop data structure manipulation/querying. */ @@ -313,10 +315,10 @@ extern struct loop * duplicate_loop (struct loops *, struct loop *, extern int duplicate_loop_to_header_edge (struct loop *, edge, struct loops *, unsigned, sbitmap, edge, edge *, unsigned *, int); -extern struct loop *loopify (struct loops *, edge, edge, basic_block); +extern struct loop *loopify (struct loops *, edge, edge, basic_block, bool); extern void unloop (struct loops *, struct loop *); extern bool remove_path (struct loops *, edge); -extern edge split_loop_bb (basic_block, rtx); +extern edge split_loop_bb (basic_block, void *); /* Induction variable analysis. */ diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index 616909700db..24b2399ddb8 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -53,7 +53,7 @@ static void fix_irreducible_loops (basic_block); /* Splits basic block BB after INSN, returns created edge. Updates loops and dominators. */ edge -split_loop_bb (basic_block bb, rtx insn) +split_loop_bb (basic_block bb, void *insn) { edge e; @@ -486,7 +486,7 @@ scale_loop_frequencies (struct loop *loop, int num, int den) struct loop * loopify (struct loops *loops, edge latch_edge, edge header_edge, - basic_block switch_bb) + basic_block switch_bb, bool redirect_all_edges) { basic_block succ_bb = latch_edge->dest; basic_block pred_bb = header_edge->src; @@ -513,12 +513,17 @@ loopify (struct loops *loops, edge latch_edge, edge header_edge, loop_redirect_edge (latch_edge, loop->header); loop_redirect_edge (BRANCH_EDGE (switch_bb), succ_bb); - loop_redirect_edge (header_edge, switch_bb); - loop_redirect_edge (FALLTHRU_EDGE (switch_bb), loop->header); - - /* Update dominators. */ - set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb); - set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb); + /* During loop versioning, one of the switch_bb edge is already properly + set. Do not redirect it again unless redirect_all_edges is true. */ + if (redirect_all_edges) + { + loop_redirect_edge (header_edge, switch_bb); + loop_redirect_edge (FALLTHRU_EDGE (switch_bb), loop->header); + + /* Update dominators. */ + set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb); + set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb); + } set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb); @@ -812,7 +817,7 @@ can_duplicate_loop_p (struct loop *loop) /* The NBBS blocks in BBS will get duplicated and the copies will be placed to LOOP. Update the single_exit information in superloops of LOOP. */ -static void +void update_single_exits_after_duplication (basic_block *bbs, unsigned nbbs, struct loop *loop) { @@ -834,7 +839,6 @@ update_single_exits_after_duplication (basic_block *bbs, unsigned nbbs, bbs[i]->rbi->duplicated = 0; } - /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating LOOPS structure and dominators. E's destination must be LOOP header for this to work, i.e. it must be entry or latch edge of this loop; these are diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi index 00a53978a7d..bf0c421e9e0 100644 --- a/gcc/doc/passes.texi +++ b/gcc/doc/passes.texi @@ -380,6 +380,14 @@ variable optimizations, including strength reduction, induction variable merging and induction variable elimination. The pass is implemented in @file{tree-ssa-loop-ivopts.c}. +Loop unswitching. This pass moves the conditional jumps that are invariant +out of the loops. To achieve this, a duplicate of the loop is created for +each possible outcome of conditional jump(s). The pass is implemented in +@file{tree-ssa-loop-unswitch.c}. This pass should eventually replace the +rtl-level loop unswitching in @file{loop-unswitch.c}, but currently +the rtl-level pass is not completely redundant yet due to deficiences +in tree level alias analysis. + The optimizations also use various utility functions contained in @file{tree-ssa-loop-manip.c}, @file{cfgloop.c}, @file{cfgloopanal.c} and @file{cfgloopmanip.c}. diff --git a/gcc/loop-unswitch.c b/gcc/loop-unswitch.c index 08780f0d58f..2eb3396a759 100644 --- a/gcc/loop-unswitch.c +++ b/gcc/loop-unswitch.c @@ -475,7 +475,7 @@ unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on, /* Loopify from the copy of LOOP body, constructing the new loop. */ nloop = loopify (loops, latch_edge, - loop->header->rbi->copy->pred, switch_bb); + loop->header->rbi->copy->pred, switch_bb, true); /* Remove branches that are now unreachable in new loops. */ remove_path (loops, true_edge); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9f90b343cc6..fbd76080aec 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2004-09-23 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz> + + * gcc.dg/tree-ssa/loop-6.c: New test. + 2004-09-23 Nathan Sidwell <nathan@codesourcery.com> PR c++/17620 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-6.c new file mode 100644 index 00000000000..e96f5e27c70 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-6.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -funswitch-loops -fdump-tree-unswitch-details -fdump-tree-vars" } */ + +int ch; +int a[100]; + +void xxx(void) +{ + int i; + + for (i = 0; i < 100; i++) + { + if (ch) + a[i] = ch; + else + a[i] = i; + } +} + +/* Loop should be unswitched. */ + +/* { dg-final { scan-tree-dump-times "Unswitching loop" 1 "unswitch" } } */ + +/* In effect there should be exactly three conditional jumps in the final program. */ + +/* { dg-final { scan-tree-dump-times "else" 3 "vars" } } */ diff --git a/gcc/timevar.def b/gcc/timevar.def index 87108b803d8..a7ebc3e36d4 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -84,6 +84,7 @@ DEFTIMEVAR (TV_TREE_DSE , "tree DSE") DEFTIMEVAR (TV_TREE_LOOP , "tree loop optimization") DEFTIMEVAR (TV_LIM , "loop invariant motion") DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv creation") +DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching") DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree loop vectorization") DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear transforms") diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index bdcd4aa6879..0cf86ca60eb 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -650,6 +650,7 @@ bool empty_block_p (basic_block); /* In tree-ssa-loop*.c */ void tree_ssa_lim (struct loops *); +void tree_ssa_unswitch_loops (struct loops *); void canonicalize_induction_variables (struct loops *); void tree_unroll_loops_completely (struct loops *); void tree_ssa_iv_optimize (struct loops *); @@ -675,6 +676,12 @@ void standard_iv_increment_position (struct loop *, block_stmt_iterator *, bool *); basic_block ip_end_pos (struct loop *); basic_block ip_normal_pos (struct loop *); +bool tree_duplicate_loop_to_header_edge (struct loop *, edge, struct loops *, + unsigned int, sbitmap, + edge, edge *, + unsigned int *, int); +struct loop *tree_ssa_loop_version (struct loops *, struct loop *, tree, + basic_block *); /* In tree-ssa-loop-im.c */ /* The possibilities of statement movement. */ diff --git a/gcc/tree-optimize.c b/gcc/tree-optimize.c index 971f0629b4e..3bcc1cde47b 100644 --- a/gcc/tree-optimize.c +++ b/gcc/tree-optimize.c @@ -391,6 +391,7 @@ init_tree_optimization_passes (void) p = &pass_loop.sub; NEXT_PASS (pass_loop_init); NEXT_PASS (pass_lim); + NEXT_PASS (pass_unswitch); NEXT_PASS (pass_iv_canon); NEXT_PASS (pass_if_conversion); NEXT_PASS (pass_vectorize); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index fa9151f105b..ae19e2de517 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -125,6 +125,7 @@ extern struct tree_opt_pass pass_tail_calls; extern struct tree_opt_pass pass_loop; extern struct tree_opt_pass pass_loop_init; extern struct tree_opt_pass pass_lim; +extern struct tree_opt_pass pass_unswitch; extern struct tree_opt_pass pass_iv_canon; extern struct tree_opt_pass pass_if_conversion; extern struct tree_opt_pass pass_vectorize; diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c index e6ff8a80559..a06b3520753 100644 --- a/gcc/tree-ssa-loop-manip.c +++ b/gcc/tree-ssa-loop-manip.c @@ -506,3 +506,343 @@ standard_iv_increment_position (struct loop *loop, block_stmt_iterator *bsi, *insert_after = false; } } + +/* Copies phi node arguments for duplicated blocks. The index of the first + duplicated block is FIRST_NEW_BLOCK. */ + +static void +copy_phi_node_args (unsigned first_new_block) +{ + unsigned i; + + for (i = first_new_block; i < (unsigned) last_basic_block; i++) + BASIC_BLOCK (i)->rbi->duplicated = 1; + + for (i = first_new_block; i < (unsigned) last_basic_block; i++) + add_phi_args_after_copy_bb (BASIC_BLOCK (i)); + + for (i = first_new_block; i < (unsigned) last_basic_block; i++) + BASIC_BLOCK (i)->rbi->duplicated = 0; +} + +/* Renames variables in the area copied by tree_duplicate_loop_to_header_edge. + FIRST_NEW_BLOCK is the first block in the copied area. DEFINITIONS is + a bitmap of all ssa names defined inside the loop. */ + +static void +rename_variables (unsigned first_new_block, bitmap definitions) +{ + unsigned i, copy_number = 0; + basic_block bb; + htab_t ssa_name_map = NULL; + + for (i = first_new_block; i < (unsigned) last_basic_block; i++) + { + bb = BASIC_BLOCK (i); + + /* We assume that first come all blocks from the first copy, then all + blocks from the second copy, etc. */ + if (copy_number != (unsigned) bb->rbi->copy_number) + { + allocate_ssa_names (definitions, &ssa_name_map); + copy_number = bb->rbi->copy_number; + } + + rewrite_to_new_ssa_names_bb (bb, ssa_name_map); + } + + htab_delete (ssa_name_map); +} + +/* Sets SSA_NAME_DEF_STMT for results of all phi nodes in BB. */ + +static void +set_phi_def_stmts (basic_block bb) +{ + tree phi; + + for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi)) + SSA_NAME_DEF_STMT (PHI_RESULT (phi)) = phi; +} + +/* The same ad cfgloopmanip.c:duplicate_loop_to_header_edge, but also updates + ssa. In order to achieve this, only loops whose exits all lead to the same + location are handled. + + FIXME: we create some degenerate phi nodes that could be avoided by copy + propagating them instead. Unfortunately this is not completely + straightforward due to problems with constant folding. */ + +bool +tree_duplicate_loop_to_header_edge (struct loop *loop, edge e, + struct loops *loops, + unsigned int ndupl, sbitmap wont_exit, + edge orig, edge *to_remove, + unsigned int *n_to_remove, int flags) +{ + unsigned first_new_block; + basic_block bb; + unsigned i; + tree phi, arg, map, def; + bitmap definitions; + + if (!(loops->state & LOOPS_HAVE_SIMPLE_LATCHES)) + return false; + if (!(loops->state & LOOPS_HAVE_PREHEADERS)) + return false; + +#ifdef ENABLE_CHECKING + verify_loop_closed_ssa (); +#endif + + gcc_assert (!any_marked_for_rewrite_p ()); + + first_new_block = last_basic_block; + if (!duplicate_loop_to_header_edge (loop, e, loops, ndupl, wont_exit, + orig, to_remove, n_to_remove, flags)) + return false; + + /* Readd the removed phi args for e. */ + map = PENDING_STMT (e); + PENDING_STMT (e) = NULL; + + for (phi = phi_nodes (e->dest), arg = map; + phi; + phi = TREE_CHAIN (phi), arg = TREE_CHAIN (arg)) + { + def = TREE_VALUE (arg); + add_phi_arg (&phi, def, e); + } + gcc_assert (arg == NULL); + + /* Copy the phi node arguments. */ + copy_phi_node_args (first_new_block); + + /* Rename the variables. */ + definitions = marked_ssa_names (); + rename_variables (first_new_block, definitions); + unmark_all_for_rewrite (); + BITMAP_XFREE (definitions); + + /* For some time we have the identical ssa names as results in multiple phi + nodes. When phi node is resized, it sets SSA_NAME_DEF_STMT of its result + to the new copy. This means that we cannot easily ensure that the ssa + names defined in those phis are pointing to the right one -- so just + recompute SSA_NAME_DEF_STMT for them. */ + + for (i = first_new_block; i < (unsigned) last_basic_block; i++) + { + bb = BASIC_BLOCK (i); + set_phi_def_stmts (bb); + if (bb->rbi->copy_number == 1) + set_phi_def_stmts (bb->rbi->original); + } + + scev_reset (); +#ifdef ENABLE_CHECKING + verify_loop_closed_ssa (); +#endif + + return true; +} + +/*--------------------------------------------------------------------------- + Loop versioning + ---------------------------------------------------------------------------*/ + +/* Adjust phi nodes for 'first' basic block. 'second' basic block is a copy + of 'first'. Both of them are dominated by 'new_head' basic block. When + 'new_head' was created by 'second's incoming edge it received phi arguments + on the edge by split_edge(). Later, additional edge 'e' was created to + connect 'new_head' and 'first'. Now this routine adds phi args on this + additional edge 'e' that new_head to second edge received as part of edge + splitting. +*/ + +static void +lv_adjust_loop_header_phi (basic_block first, basic_block second, + basic_block new_head, edge e) +{ + tree phi1, phi2; + + /* Browse all 'second' basic block phi nodes and add phi args to + edge 'e' for 'first' head. PHI args are always in correct order. */ + + for (phi2 = phi_nodes (second), phi1 = phi_nodes (first); + phi2 && phi1; + phi2 = TREE_CHAIN (phi2), phi1 = TREE_CHAIN (phi1)) + { + int i; + for (i = 0; i < PHI_NUM_ARGS (phi2); i++) + { + if (PHI_ARG_EDGE (phi2, i)->src == new_head) + { + tree def = PHI_ARG_DEF (phi2, i); + add_phi_arg (&phi1, def, e); + } + } + } +} + +/* Adjust entry edge for lv. + + e is a incoming edge. + + --- edge e ---- > [second_head] + + Split it and insert new conditional expression and adjust edges. + + --- edge e ---> [cond expr] ---> [first_head] + | + +---------> [second_head] + +*/ + +static basic_block +lv_adjust_loop_entry_edge (basic_block first_head, + basic_block second_head, + edge e, + tree cond_expr) +{ + block_stmt_iterator bsi; + basic_block new_head = NULL; + tree goto1 = NULL_TREE; + tree goto2 = NULL_TREE; + tree new_cond_expr = NULL_TREE; + edge e0, e1; + + gcc_assert (e->dest == second_head); + + /* Split edge 'e'. This will create a new basic block, where we can + insert conditional expr. */ + new_head = split_edge (e); + + /* Build new conditional expr */ + goto1 = build1 (GOTO_EXPR, void_type_node, tree_block_label (first_head)); + goto2 = build1 (GOTO_EXPR, void_type_node, tree_block_label (second_head)); + new_cond_expr = build3 (COND_EXPR, void_type_node, cond_expr, goto1, goto2); + + /* Add new cond. in new head. */ + bsi = bsi_start (new_head); + bsi_insert_after (&bsi, new_cond_expr, BSI_NEW_STMT); + + /* Adjust edges appropriately to connect new head with first head + as well as second head. */ + e0 = new_head->succ; + e0->flags &= ~EDGE_FALLTHRU; + e0->flags |= EDGE_FALSE_VALUE; + e1 = make_edge (new_head, first_head, EDGE_TRUE_VALUE); + set_immediate_dominator (CDI_DOMINATORS, first_head, new_head); + set_immediate_dominator (CDI_DOMINATORS, second_head, new_head); + + /* Adjust loop header phi nodes. */ + lv_adjust_loop_header_phi (first_head, second_head, new_head, e1); + + return new_head; +} + +/* Add phi args using PENDINT_STMT list. */ + +static void +lv_update_pending_stmts (edge e) +{ + basic_block dest; + tree phi, arg, def; + + if (!PENDING_STMT (e)) + return; + + dest = e->dest; + + for (phi = phi_nodes (dest), arg = PENDING_STMT (e); + phi; + phi = TREE_CHAIN (phi), arg = TREE_CHAIN (arg)) + { + def = TREE_VALUE (arg); + add_phi_arg (&phi, def, e); + } + + PENDING_STMT (e) = NULL; +} + + +/* Main entry point for Loop Versioning transformation. + +This transformation given a condition and a loop, creates +-if (condition) { loop_copy1 } else { loop_copy2 }, +where loop_copy1 is the loop transformed in one way, and loop_copy2 +is the loop transformed in another way (or unchanged). 'condition' +may be a run time test for things that were not resolved by static +analysis (overlapping ranges (anti-aliasing), alignment, etc.). */ + +struct loop * +tree_ssa_loop_version (struct loops *loops, struct loop * loop, + tree cond_expr, basic_block *condition_bb) +{ + edge entry, latch_edge, exit; + basic_block first_head, second_head; + int irred_flag; + struct loop *nloop; + + /* CHECKME: Loop versioning does not handle nested loop at this point. */ + if (loop->inner) + return NULL; + + /* Record entry and latch edges for the loop */ + entry = loop_preheader_edge (loop); + + /* Note down head of loop as first_head. */ + first_head = entry->dest; + + /* Duplicate loop. */ + irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP; + entry->flags &= ~EDGE_IRREDUCIBLE_LOOP; + if (!tree_duplicate_loop_to_header_edge (loop, entry, loops, 1, + NULL, NULL, NULL, NULL, 0)) + { + entry->flags |= irred_flag; + return NULL; + } + + /* After duplication entry edge now points to new loop head block. + Note down new head as second_head. */ + second_head = entry->dest; + + /* Split loop entry edge and insert new block with cond expr. */ + *condition_bb = lv_adjust_loop_entry_edge (first_head, second_head, entry, + cond_expr); + + latch_edge = loop->latch->rbi->copy->succ; + nloop = loopify (loops, + latch_edge, + loop->header->rbi->copy->pred, + *condition_bb, + false /* Do not redirect all edges. */); + + exit = loop->single_exit; + if (exit) + nloop->single_exit = find_edge (exit->src->rbi->copy, exit->dest); + + /* loopify redirected latch_edge. Update its PENDING_STMTS. */ + lv_update_pending_stmts (latch_edge); + + /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */ + lv_update_pending_stmts (FALLTHRU_EDGE (*condition_bb)); + + /* Adjust irreducible flag. */ + if (irred_flag) + { + (*condition_bb)->flags |= BB_IRREDUCIBLE_LOOP; + loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP; + loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP; + (*condition_bb)->pred->flags |= EDGE_IRREDUCIBLE_LOOP; + } + + /* At this point condition_bb is loop predheader with two successors, + first_head and second_head. Make sure that loop predheader has only + one successor. */ + loop_split_edge_with (loop_preheader_edge (loop), NULL); + loop_split_edge_with (loop_preheader_edge (nloop), NULL); + + return nloop; +} diff --git a/gcc/tree-ssa-loop-unswitch.c b/gcc/tree-ssa-loop-unswitch.c new file mode 100644 index 00000000000..45b7e87a229 --- /dev/null +++ b/gcc/tree-ssa-loop-unswitch.c @@ -0,0 +1,293 @@ +/* Loop unswitching. + Copyright (C) 2004 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "rtl.h" +#include "tm_p.h" +#include "hard-reg-set.h" +#include "basic-block.h" +#include "output.h" +#include "diagnostic.h" +#include "tree-flow.h" +#include "tree-dump.h" +#include "timevar.h" +#include "cfgloop.h" +#include "domwalk.h" +#include "params.h" +#include "tree-pass.h" + +/* This file implements the loop unswitching, i.e. transformation of loops like + + while (A) + { + if (inv) + B; + + X; + + if (!inv) + C; + } + + where inv is the loop invariant, into + + if (inv) + { + while (A) + { + B; + X; + } + } + else + { + while (A) + { + X; + C; + } + } + + Inv is considered invariant iff the values it compares are both invariant; + tree-ssa-loop-im.c ensures that all the suitable conditions are in this + shape. */ + +static struct loop *tree_unswitch_loop (struct loops *, struct loop *, basic_block, + tree); +static bool tree_unswitch_single_loop (struct loops *, struct loop *, int); +static tree tree_may_unswitch_on (basic_block, struct loop *); + +/* Main entry point. Perform loop unswitching on all suitable LOOPS. */ + +void +tree_ssa_unswitch_loops (struct loops *loops) +{ + int i, num; + struct loop *loop; + bool changed = false; + + /* Go through inner loops (only original ones). */ + num = loops->num; + + for (i = 1; i < num; i++) + { + /* Removed loop? */ + loop = loops->parray[i]; + if (!loop) + continue; + + if (loop->inner) + continue; + + changed |= tree_unswitch_single_loop (loops, loop, 0); +#ifdef ENABLE_CHECKING + verify_dominators (CDI_DOMINATORS); + verify_loop_structure (loops); +#endif + } + +#if 0 + /* The necessary infrastructure is not in yet. */ + if (changed) + cleanup_tree_cfg_loop (); +#endif +} + +/* Checks whether we can unswitch LOOP on condition at end of BB -- one of its + basic blocks (for what it means see comments below). */ + +static tree +tree_may_unswitch_on (basic_block bb, struct loop *loop) +{ + tree stmt, def, cond; + basic_block def_bb; + use_optype uses; + unsigned i; + + /* BB must end in a simple conditional jump. */ + stmt = last_stmt (bb); + if (!stmt || TREE_CODE (stmt) != COND_EXPR) + return NULL_TREE; + + /* Condition must be invariant. */ + get_stmt_operands (stmt); + uses = STMT_USE_OPS (stmt); + for (i = 0; i < NUM_USES (uses); i++) + { + def = SSA_NAME_DEF_STMT (USE_OP (uses, i)); + def_bb = bb_for_stmt (def); + if (def_bb + && flow_bb_inside_loop_p (loop, def_bb)) + return NULL_TREE; + } + + cond = COND_EXPR_COND (stmt); + /* To keep the things simple, we do not directly remove the conditions, + but just replace tests with 0/1. Prevent the infinite loop where we + would unswitch again on such a condition. */ + if (integer_zerop (cond) || integer_nonzerop (cond)) + return NULL_TREE; + + return cond; +} + +/* Simplifies COND using checks in front of the entry of the LOOP. Just very + simplish (sufficient to prevent us from duplicating loop in unswitching + unneccesarily). */ + +static tree +simplify_using_entry_checks (struct loop *loop, tree cond) +{ + edge e = loop_preheader_edge (loop); + tree stmt; + + while (1) + { + stmt = last_stmt (e->src); + if (stmt + && TREE_CODE (stmt) == COND_EXPR + && operand_equal_p (COND_EXPR_COND (stmt), cond, 0)) + return (e->flags & EDGE_TRUE_VALUE + ? boolean_true_node + : boolean_false_node); + + if (e->src->pred->pred_next) + return cond; + + e = e->src->pred; + if (e->src == ENTRY_BLOCK_PTR) + return cond; + } +} + +/* Unswitch single LOOP. NUM is number of unswitchings done; we do not allow + it to grow too much, it is too easy to create example on that the code would + grow exponentially. */ + +static bool +tree_unswitch_single_loop (struct loops *loops, struct loop *loop, int num) +{ + basic_block *bbs; + struct loop *nloop; + unsigned i; + tree cond = NULL_TREE, stmt; + bool changed = false; + + /* Do not unswitch too much. */ + if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; Not unswitching anymore, hit max level\n"); + return false; + } + + /* Only unswitch innermost loops. */ + if (loop->inner) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; Not unswitching, not innermost loop\n"); + return false; + } + + /* The loop should not be too large, to limit code growth. */ + if (tree_num_loop_insns (loop) + > (unsigned) PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; Not unswitching, loop too big\n"); + return false; + } + + i = 0; + bbs = get_loop_body (loop); + + while (1) + { + /* Find a bb to unswitch on. */ + for (; i < loop->num_nodes; i++) + if ((cond = tree_may_unswitch_on (bbs[i], loop))) + break; + + if (i == loop->num_nodes) + { + free (bbs); + return changed; + } + + cond = simplify_using_entry_checks (loop, cond); + stmt = last_stmt (bbs[i]); + if (integer_nonzerop (cond)) + { + /* Remove false path. */ + COND_EXPR_COND (stmt) = boolean_true_node; + changed = true; + } + else if (integer_zerop (cond)) + { + /* Remove true path. */ + COND_EXPR_COND (stmt) = boolean_false_node; + changed = true; + } + else + break; + + modify_stmt (stmt); + i++; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ";; Unswitching loop\n"); + + /* Unswitch the loop on this condition. */ + nloop = tree_unswitch_loop (loops, loop, bbs[i], cond); + if (!nloop) + return changed; + + /* Invoke itself on modified loops. */ + tree_unswitch_single_loop (loops, nloop, num + 1); + tree_unswitch_single_loop (loops, loop, num + 1); + return true; +} + +/* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON. We only support + unswitching of innermost loops. COND is the condition determining which + loop is entered -- the new loop is entered if COND is true. Returns NULL + if impossible, new loop otherwise. */ + +static struct loop * +tree_unswitch_loop (struct loops *loops, struct loop *loop, + basic_block unswitch_on, tree cond) +{ + basic_block condition_bb; + + /* Some sanity checking. */ + gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on)); + gcc_assert (unswitch_on->succ != NULL); + gcc_assert (unswitch_on->succ->succ_next != NULL); + gcc_assert (unswitch_on->succ->succ_next->succ_next == NULL); + gcc_assert (loop->inner == NULL); + + return tree_ssa_loop_version (loops, loop, unshare_expr (cond), + &condition_bb); +} diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c index 2f87d87bacd..19f784d9bbf 100644 --- a/gcc/tree-ssa-loop.c +++ b/gcc/tree-ssa-loop.c @@ -159,6 +159,40 @@ struct tree_opt_pass pass_lim = 0 /* letter */ }; +/* Loop unswitching pass. */ + +static void +tree_ssa_loop_unswitch (void) +{ + if (!current_loops) + return; + + tree_ssa_unswitch_loops (current_loops); +} + +static bool +gate_tree_ssa_loop_unswitch (void) +{ + return flag_unswitch_loops != 0; +} + +struct tree_opt_pass pass_unswitch = +{ + "unswitch", /* name */ + gate_tree_ssa_loop_unswitch, /* gate */ + tree_ssa_loop_unswitch, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_TREE_LOOP_UNSWITCH, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ +}; + /* Loop autovectorization. */ static void |