diff options
author | rakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-01-03 02:29:00 +0000 |
---|---|---|
committer | rakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-01-03 02:29:00 +0000 |
commit | 7cef6c97cbe9575d82af3934ba9db98706d40dbd (patch) | |
tree | 65c401413cfba6d2049e2a002755074729d4e079 | |
parent | f517b36eec084c177098f54c4755ab7222fb7e2f (diff) | |
download | gcc-7cef6c97cbe9575d82af3934ba9db98706d40dbd.tar.gz |
* loop-unswitch.c (unswitch_loop): Pass probabilities to loopify.
* tree-ssa-loop-unswitch.c (tree_unswitch_loop): Pass probabilities
to loop_version.
* cfgloopmanip.c (scale_loop_frequencies): Export.
(loopify): Scale the frequencies by prescribed coefficients.
(set_zero_probability): New function.
(duplicate_loop_to_header_edge): Improve updating of frequencies.
(lv_adjust_loop_entry_edge, loop_version): Set probabilities
and frequencies according to arguments.
* tree-ssa-loop-manip.c (tree_unroll_loop): Set probabilities
correctly.
* cfg.c (scale_bbs_frequencies_int): Allow scaling the frequencies up.
* modulo-sched.c (sms_schedule): Set probabilities for entering
versioned loop correctly.
* tree-vect-transform.c (vect_transform_loop): Ditto.
* cfgloop.h (loopify, loop_version): Declaration changed.
(scale_loop_frequencies): Declared.
* gcc.dg/tree-ssa/update-unroll-1.c: New test.
* gcc.dg/tree-ssa/update-unswitch-1.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@120378 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 32 | ||||
-rw-r--r-- | gcc/cfg.c | 21 | ||||
-rw-r--r-- | gcc/cfgloop.h | 6 | ||||
-rw-r--r-- | gcc/cfgloopmanip.c | 139 | ||||
-rw-r--r-- | gcc/loop-unswitch.c | 3 | ||||
-rw-r--r-- | gcc/modulo-sched.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/update-unroll-1.c | 22 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/update-unswitch-1.c | 24 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-manip.c | 71 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-unswitch.c | 8 | ||||
-rw-r--r-- | gcc/tree-vect-transform.c | 4 |
12 files changed, 291 insertions, 56 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8879773e93c..3716690037e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2006-01-03 Zdenek Dvorak <dvorakz@suse.cz> + + * loop-unswitch.c (unswitch_loop): Pass probabilities to loopify. + * tree-ssa-loop-unswitch.c (tree_unswitch_loop): Pass probabilities + to loop_version. + * cfgloopmanip.c (scale_loop_frequencies): Export. + (loopify): Scale the frequencies by prescribed coefficients. + (set_zero_probability): New function. + (duplicate_loop_to_header_edge): Improve updating of frequencies. + (lv_adjust_loop_entry_edge, loop_version): Set probabilities + and frequencies according to arguments. + * tree-ssa-loop-manip.c (tree_unroll_loop): Set probabilities + correctly. + * cfg.c (scale_bbs_frequencies_int): Allow scaling the frequencies up. + * modulo-sched.c (sms_schedule): Set probabilities for entering + versioned loop correctly. + * tree-vect-transform.c (vect_transform_loop): Ditto. + * cfgloop.h (loopify, loop_version): Declaration changed. + (scale_loop_frequencies): Declared. + 2007-01-02 Jan Hubicka <jh@suse.cz> * cgraph.c: Include tree-flow.h @@ -41,12 +61,12 @@ 2007-01-02 Jan Hubicka <jh@suse.cz> - * tree-mudflap.c (mf_decl_cache_locals, mf_build_check_statement_for): - Do not add referenced vars. - * tree-cfg.c (update_modified_stmts): Do not update when SSA operands - are not active. - * passes.c (init_optimization_passes): Put mudflap_2 after - free_datastructures. + * tree-mudflap.c (mf_decl_cache_locals, mf_build_check_statement_for): + Do not add referenced vars. + * tree-cfg.c (update_modified_stmts): Do not update when SSA operands + are not active. + * passes.c (init_optimization_passes): Put mudflap_2 after + free_datastructures. 2007-01-02 Jan Hubicka <jh@suse.cz> diff --git a/gcc/cfg.c b/gcc/cfg.c index aa8eaca9eec..9f5da32bfe8 100644 --- a/gcc/cfg.c +++ b/gcc/cfg.c @@ -949,15 +949,28 @@ scale_bbs_frequencies_int (basic_block *bbs, int nbbs, int num, int den) edge e; if (num < 0) num = 0; - if (num > den) + + /* Scale NUM and DEN to avoid overflows. Frequencies are in order of + 10^4, if we make DEN <= 10^3, we can afford to upscale by 100 + and still safely fit in int during calculations. */ + if (den > 1000) + { + if (num > 1000000) + return; + + num = RDIV (1000 * num, den); + den = 1000; + } + if (num > 100 * den) return; - /* Assume that the users are producing the fraction from frequencies - that never grow far enough to risk arithmetic overflow. */ - gcc_assert (num < 65536); + for (i = 0; i < nbbs; i++) { edge_iterator ei; bbs[i]->frequency = RDIV (bbs[i]->frequency * num, den); + /* Make sure the frequencies do not grow over BB_FREQ_MAX. */ + if (bbs[i]->frequency > BB_FREQ_MAX) + bbs[i]->frequency = BB_FREQ_MAX; bbs[i]->count = RDIV (bbs[i]->count * num, den); FOR_EACH_EDGE (e, ei, bbs[i]->succs) e->count = RDIV (e->count * num, den); diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index aa12dbf3ed0..19fc93fcc66 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -254,10 +254,12 @@ extern bool duplicate_loop_to_header_edge (struct loop *, edge, unsigned, sbitmap, edge, VEC (edge, heap) **, int); extern struct loop *loopify (edge, edge, - basic_block, edge, edge, bool); + basic_block, edge, edge, bool, + unsigned, unsigned); struct loop * loop_version (struct loop *, void *, - basic_block *, bool); + basic_block *, unsigned, unsigned, unsigned, bool); extern bool remove_path (edge); +void scale_loop_frequencies (struct loop *, int, int); /* Induction variable analysis. */ diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index 9021fbafd4e..fd7597e9a12 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -44,7 +44,6 @@ static void fix_loop_placements (struct loop *, bool *); static bool fix_bb_placement (basic_block); static void fix_bb_placements (basic_block, bool *); static void place_new_loop (struct loop *); -static void scale_loop_frequencies (struct loop *, int, int); static basic_block create_preheader (struct loop *, int); static void unloop (struct loop *, bool *); @@ -396,7 +395,7 @@ add_loop (struct loop *loop, struct loop *outer) } /* Multiply all frequencies in LOOP by NUM/DEN. */ -static void +void scale_loop_frequencies (struct loop *loop, int num, int den) { basic_block *bbs; @@ -413,12 +412,13 @@ scale_loop_frequencies (struct loop *loop, int num, int den) LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB, FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE. - Returns newly created loop. */ + Returns the newly created loop. Frequencies and counts in the new loop + are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */ struct loop * loopify (edge latch_edge, edge header_edge, basic_block switch_bb, edge true_edge, edge false_edge, - bool redirect_all_edges) + bool redirect_all_edges, unsigned true_scale, unsigned false_scale) { basic_block succ_bb = latch_edge->dest; basic_block pred_bb = header_edge->src; @@ -427,7 +427,7 @@ loopify (edge latch_edge, edge header_edge, sbitmap seen; struct loop *loop = XCNEW (struct loop); struct loop *outer = succ_bb->loop_father->outer; - int freq, prob, tot_prob; + int freq; gcov_type cnt; edge e; edge_iterator ei; @@ -437,10 +437,6 @@ loopify (edge latch_edge, edge header_edge, freq = EDGE_FREQUENCY (header_edge); cnt = header_edge->count; - prob = EDGE_SUCC (switch_bb, 0)->probability; - tot_prob = prob + EDGE_SUCC (switch_bb, 1)->probability; - if (tot_prob == 0) - tot_prob = 1; /* Redirect edges. */ loop_redirect_edge (latch_edge, loop->header); @@ -469,12 +465,17 @@ loopify (edge latch_edge, edge header_edge, add_bb_to_loop (switch_bb, outer); /* Fix frequencies. */ - switch_bb->frequency = freq; - switch_bb->count = cnt; - FOR_EACH_EDGE (e, ei, switch_bb->succs) - e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE; - scale_loop_frequencies (loop, prob, tot_prob); - scale_loop_frequencies (succ_bb->loop_father, tot_prob - prob, tot_prob); + if (redirect_all_edges) + { + switch_bb->frequency = freq; + switch_bb->count = cnt; + FOR_EACH_EDGE (e, ei, switch_bb->succs) + { + e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE; + } + } + scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE); + scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE); /* Update dominators of blocks outside of LOOP. */ dom_bbs = XCNEWVEC (basic_block, n_basic_blocks); @@ -804,6 +805,41 @@ update_single_exit_for_duplicated_loops (struct loop *orig_loops[], unsigned n) update_single_exit_for_duplicated_loop (orig_loops[i]); } +/* Sets probability and count of edge E to zero. The probability and count + is redistributed evenly to the remaining edges coming from E->src. */ + +static void +set_zero_probability (edge e) +{ + basic_block bb = e->src; + edge_iterator ei; + edge ae, last = NULL; + unsigned n = EDGE_COUNT (bb->succs); + gcov_type cnt = e->count, cnt1; + unsigned prob = e->probability, prob1; + + gcc_assert (n > 1); + cnt1 = cnt / (n - 1); + prob1 = prob / (n - 1); + + FOR_EACH_EDGE (ae, ei, bb->succs) + { + if (ae == e) + continue; + + ae->probability += prob1; + ae->count += cnt1; + last = ae; + } + + /* Move the rest to one of the edges. */ + last->probability += prob % (n - 1); + last->count += cnt % (n - 1); + + e->probability = 0; + e->count = 0; +} + /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating loop structure and dominators. E's destination must be LOOP header for this to work, i.e. it must be entry or latch edge of this loop; these are @@ -834,10 +870,13 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, unsigned i, j, n; int is_latch = (latch == e->src); int scale_act = 0, *scale_step = NULL, scale_main = 0; + int scale_after_exit = 0; int p, freq_in, freq_le, freq_out_orig; int prob_pass_thru, prob_pass_wont_exit, prob_pass_main; int add_irreducible_flag; basic_block place_after; + bitmap bbs_to_scale = NULL; + bitmap_iterator bi; gcc_assert (e->dest == loop->header); gcc_assert (ndupl > 0); @@ -887,10 +926,26 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, prob_pass_wont_exit = RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in); + if (orig + && REG_BR_PROB_BASE - orig->probability != 0) + { + /* The blocks that are dominated by a removed exit edge ORIG have + frequencies scaled by this. */ + scale_after_exit = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, + REG_BR_PROB_BASE - orig->probability); + bbs_to_scale = BITMAP_ALLOC (NULL); + for (i = 0; i < n; i++) + { + if (bbs[i] != orig->src + && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src)) + bitmap_set_bit (bbs_to_scale, i); + } + } + scale_step = XNEWVEC (int, ndupl); - for (i = 1; i <= ndupl; i++) - scale_step[i - 1] = TEST_BIT (wont_exit, i) + for (i = 1; i <= ndupl; i++) + scale_step[i - 1] = TEST_BIT (wont_exit, i) ? prob_pass_wont_exit : prob_pass_thru; @@ -1043,6 +1098,17 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, { if (to_remove) VEC_safe_push (edge, heap, *to_remove, new_spec_edges[SE_ORIG]); + set_zero_probability (new_spec_edges[SE_ORIG]); + + /* Scale the frequencies of the blocks dominated by the exit. */ + if (bbs_to_scale) + { + EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) + { + scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit, + REG_BR_PROB_BASE); + } + } } /* Record the first copy in the control flow order if it is not @@ -1068,6 +1134,17 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, { if (to_remove) VEC_safe_push (edge, heap, *to_remove, orig); + set_zero_probability (orig); + + /* Scale the frequencies of the blocks dominated by the exit. */ + if (bbs_to_scale) + { + EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi) + { + scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit, + REG_BR_PROB_BASE); + } + } } /* Update the original loop. */ @@ -1103,6 +1180,7 @@ duplicate_loop_to_header_edge (struct loop *loop, edge e, free (first_active); free (bbs); + BITMAP_FREE (bbs_to_scale); return true; } @@ -1239,13 +1317,12 @@ force_single_succ_latches (void) --- edge e ---> [cond expr] ---> [first_head] | +---------> [second_head] -*/ + + THEN_PROB is the probability of then branch of the condition. */ static basic_block -lv_adjust_loop_entry_edge (basic_block first_head, - basic_block second_head, - edge e, - void *cond_expr) +lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head, + edge e, void *cond_expr, unsigned then_prob) { basic_block new_head = NULL; edge e1; @@ -1256,13 +1333,18 @@ lv_adjust_loop_entry_edge (basic_block first_head, insert conditional expr. */ new_head = split_edge (e); - lv_add_condition_to_bb (first_head, second_head, new_head, cond_expr); /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */ + e = single_succ_edge (new_head); e1 = make_edge (new_head, first_head, current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0); + e1->probability = then_prob; + e->probability = REG_BR_PROB_BASE - then_prob; + e1->count = RDIV (e->count * e1->probability, REG_BR_PROB_BASE); + e->count = RDIV (e->count * e->probability, REG_BR_PROB_BASE); + set_immediate_dominator (CDI_DOMINATORS, first_head, new_head); set_immediate_dominator (CDI_DOMINATORS, second_head, new_head); @@ -1281,12 +1363,18 @@ lv_adjust_loop_entry_edge (basic_block first_head, may be a run time test for things that were not resolved by static analysis (overlapping ranges (anti-aliasing), alignment, etc.). + THEN_PROB is the probability of the then edge of the if. THEN_SCALE + is the ratio by that the frequencies in the original loop should + be scaled. ELSE_SCALE is the ratio by that the frequencies in the + new loop should be scaled. + If PLACE_AFTER is true, we place the new loop after LOOP in the instruction stream, otherwise it is placed before LOOP. */ struct loop * loop_version (struct loop *loop, void *cond_expr, basic_block *condition_bb, + unsigned then_prob, unsigned then_scale, unsigned else_scale, bool place_after) { basic_block first_head, second_head; @@ -1318,7 +1406,7 @@ loop_version (struct loop *loop, /* Split loop entry edge and insert new block with cond expr. */ cond_bb = lv_adjust_loop_entry_edge (first_head, second_head, - entry, cond_expr); + entry, cond_expr, then_prob); if (condition_bb) *condition_bb = cond_bb; @@ -1334,7 +1422,8 @@ loop_version (struct loop *loop, nloop = loopify (latch_edge, single_pred_edge (get_bb_copy (loop->header)), cond_bb, true_edge, false_edge, - false /* Do not redirect all edges. */); + false /* Do not redirect all edges. */, + then_scale, else_scale); exit = single_exit (loop); if (exit) diff --git a/gcc/loop-unswitch.c b/gcc/loop-unswitch.c index 38706d174d8..b0e2aaa12be 100644 --- a/gcc/loop-unswitch.c +++ b/gcc/loop-unswitch.c @@ -451,7 +451,8 @@ unswitch_loop (struct loop *loop, basic_block unswitch_on, rtx cond, rtx cinsn) /* Loopify from the copy of LOOP body, constructing the new loop. */ nloop = loopify (latch_edge, single_pred_edge (get_bb_copy (loop->header)), switch_bb, - BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true); + BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true, + prob, REG_BR_PROB_BASE - prob); /* Remove branches that are now unreachable in new loops. */ remove_path (true_edge); diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c index 3352e20adcd..93f5b9a120b 100644 --- a/gcc/modulo-sched.c +++ b/gcc/modulo-sched.c @@ -868,6 +868,10 @@ canon_loop (struct loop *loop) } } +/* Probability in % that the sms-ed loop rolls enough so that optimized + version may be entered. Just a guess. */ +#define PROB_SMS_ENOUGH_ITERATIONS 80 + /* Main entry point, perform SMS scheduling on the loops of the function that consist of single basic blocks. */ static void @@ -882,7 +886,7 @@ sms_schedule (void) partial_schedule_ptr ps; struct df *df; basic_block bb = NULL; - struct loop *loop, *nloop; + struct loop *loop; basic_block condition_bb = NULL; edge latch_edge; gcov_type trip_count = 0; @@ -1181,8 +1185,12 @@ sms_schedule (void) { rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg, GEN_INT(stage_count)); + unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS + * REG_BR_PROB_BASE) / 100; - nloop = loop_version (loop, comp_rtx, &condition_bb, true); + loop_version (loop, comp_rtx, &condition_bb, + prob, prob, REG_BR_PROB_BASE - prob, + true); } /* Set new iteration count of loop kernel. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 135b506d332..53683622321 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2006-01-03 Zdenek Dvorak <dvorakz@suse.cz> + + * gcc.dg/tree-ssa/update-unroll-1.c: New test. + * gcc.dg/tree-ssa/update-unswitch-1.c: New test. + 2007-01-02 Jan Hubicka <jh@suse.cz> * gcc.dg/pr16194.c: We now output error on all three functions, not just diff --git a/gcc/testsuite/gcc.dg/tree-ssa/update-unroll-1.c b/gcc/testsuite/gcc.dg/tree-ssa/update-unroll-1.c new file mode 100644 index 00000000000..d911dd8e58b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/update-unroll-1.c @@ -0,0 +1,22 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-options "-O1 -fprefetch-loop-arrays -march=athlon -fdump-tree-aprefetch-blocks" } */ + +int a[10000]; + +int foo(unsigned n) +{ + unsigned i, s = 0; + + for (i = 0; i < n; i++) + s += a[i]; + + return s; +} + +/* We used to make the probability that the body of the loop (unrolled + to enable prefetching) is entered 0, which is not correct. */ + +/* { dg-final { scan-tree-dump-not "Invalid sum" "aprefetch"} } */ +/* { dg-final { scan-tree-dump-not "SUCC: 7 .100.0%" "aprefetch"} } */ +/* { dg-final { cleanup-tree-dump "aprefetch" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/update-unswitch-1.c b/gcc/testsuite/gcc.dg/tree-ssa/update-unswitch-1.c new file mode 100644 index 00000000000..499b78b0137 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/update-unswitch-1.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -funswitch-loops -fdump-tree-unswitch-blocks" } */ + +int bla(int p) +{ + unsigned i, s = 1; + + for (i = 4; i < 100; i++) + { + if (p) + s += i/2; + else + s *= i/2; + } + + return s; +} + +/* We used to make the probability that the first of the loops created + by unswitching is entered 100%, which is not correct. */ + +/* { dg-final { scan-tree-dump-not "Invalid sum" "unswitch"} } */ +/* { dg-final { scan-tree-dump-not "SUCC: 3 .100.0%" "unswitch"} } */ +/* { dg-final { cleanup-tree-dump "unswitch" } } */ diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c index d27985f3291..f224aa055b8 100644 --- a/gcc/tree-ssa-loop-manip.c +++ b/gcc/tree-ssa-loop-manip.c @@ -805,6 +805,9 @@ determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc, post; } */ +/* Probability in % that the unrolled loop is entered. Just a guess. */ +#define PROB_UNROLLED_LOOP_ENTERED 90 + void tree_unroll_loop (struct loop *loop, unsigned factor, edge exit, struct tree_niter_desc *desc) @@ -816,11 +819,12 @@ tree_unroll_loop (struct loop *loop, unsigned factor, struct loop *new_loop; basic_block rest, exit_bb; edge old_entry, new_entry, old_latch, precond_edge, new_exit; - edge nonexit, new_nonexit; + edge new_nonexit; block_stmt_iterator bsi; use_operand_p op; bool ok; - unsigned est_niter; + unsigned est_niter, prob_entry, scale_unrolled, scale_rest, freq_e, freq_h; + unsigned new_est_niter; unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP; sbitmap wont_exit; @@ -829,7 +833,29 @@ tree_unroll_loop (struct loop *loop, unsigned factor, &enter_main_cond, &exit_base, &exit_step, &exit_cmp, &exit_bound); - new_loop = loop_version (loop, enter_main_cond, NULL, true); + /* Let us assume that the unrolled loop is quite likely to be entered. */ + if (integer_nonzerop (enter_main_cond)) + prob_entry = REG_BR_PROB_BASE; + else + prob_entry = PROB_UNROLLED_LOOP_ENTERED * REG_BR_PROB_BASE / 100; + + /* The values for scales should keep profile consistent, and somewhat close + to correct. + + TODO: The current value of SCALE_REST makes it appear that the loop that + is created by splitting the remaining iterations of the unrolled loop is + executed the same number of times as the original loop, and with the same + frequencies, which is obviously wrong. This does not appear to cause + problems, so we do not bother with fixing it for now. To make the profile + correct, we would need to change the probability of the exit edge of the + loop, and recompute the distribution of frequencies in its body because + of this change (scale the frequencies of blocks before and after the exit + by appropriate factors). */ + scale_unrolled = prob_entry; + scale_rest = REG_BR_PROB_BASE; + + new_loop = loop_version (loop, enter_main_cond, NULL, + prob_entry, scale_unrolled, scale_rest, true); gcc_assert (new_loop != NULL); update_ssa (TODO_update_ssa); @@ -837,14 +863,6 @@ tree_unroll_loop (struct loop *loop, unsigned factor, dont_exit = ((exit->flags & EDGE_TRUE_VALUE) ? boolean_false_node : boolean_true_node); - if (exit == EDGE_SUCC (exit->src, 0)) - nonexit = EDGE_SUCC (exit->src, 1); - else - nonexit = EDGE_SUCC (exit->src, 0); - nonexit->probability = REG_BR_PROB_BASE; - exit->probability = 0; - nonexit->count += exit->count; - exit->count = 0; exit_if = last_stmt (exit->src); COND_EXPR_COND (exit_if) = dont_exit; update_stmt (exit_if); @@ -858,6 +876,29 @@ tree_unroll_loop (struct loop *loop, unsigned factor, gcc_assert (ok); update_ssa (TODO_update_ssa); + /* Determine the probability of the exit edge. */ + new_est_niter = est_niter / factor; + + /* Without profile feedback, loops for that we do not know a better estimate + are assumed to roll 10 times. When we unroll such loop, it appears to + roll too little, and it may even seem to be cold. To avoid this, we + ensure that the created loop appears to roll at least 5 times (but at + most as many times as before unrolling). */ + if (new_est_niter < 5) + { + if (est_niter < 5) + new_est_niter = est_niter; + else + new_est_niter = 5; + } + + /* Ensure that the frequencies in the loop match the new estimated + number of iterations. */ + freq_h = loop->header->frequency; + freq_e = EDGE_FREQUENCY (loop_preheader_edge (loop)); + if (freq_h != 0) + scale_loop_frequencies (loop, freq_e * new_est_niter, freq_h); + /* Prepare the cfg and update the phi nodes. */ rest = loop_preheader_edge (new_loop)->src; precond_edge = single_pred_edge (rest); @@ -866,12 +907,16 @@ tree_unroll_loop (struct loop *loop, unsigned factor, new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr); new_exit->count = loop_preheader_edge (loop)->count; - est_niter = est_niter / factor + 1; - new_exit->probability = REG_BR_PROB_BASE / est_niter; + new_exit->probability = REG_BR_PROB_BASE / new_est_niter; + + rest->count += new_exit->count; + rest->frequency += EDGE_FREQUENCY (new_exit); new_nonexit = single_pred_edge (loop->latch); new_nonexit->flags = EDGE_TRUE_VALUE; new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability; + scale_bbs_frequencies_int (&loop->latch, 1, new_nonexit->probability, + REG_BR_PROB_BASE); old_entry = loop_preheader_edge (loop); new_entry = loop_preheader_edge (new_loop); diff --git a/gcc/tree-ssa-loop-unswitch.c b/gcc/tree-ssa-loop-unswitch.c index 52465db8611..7a329c96487 100644 --- a/gcc/tree-ssa-loop-unswitch.c +++ b/gcc/tree-ssa-loop-unswitch.c @@ -269,13 +269,17 @@ static struct loop * tree_unswitch_loop (struct loop *loop, basic_block unswitch_on, tree cond) { - basic_block condition_bb; + unsigned prob_true; + edge edge_true, edge_false; /* Some sanity checking. */ gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on)); gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2); gcc_assert (loop->inner == NULL); + extract_true_false_edges_from_block (unswitch_on, &edge_true, &edge_false); + prob_true = edge_true->probability; return loop_version (loop, unshare_expr (cond), - &condition_bb, false); + NULL, prob_true, prob_true, + REG_BR_PROB_BASE - prob_true, false); } diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index e767d1fabaf..ec3f511e74e 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -4699,11 +4699,13 @@ vect_transform_loop (loop_vec_info loop_vinfo) basic_block new_exit_bb; edge new_exit_e, e; tree orig_phi, new_phi, arg; + unsigned prob = 4 * REG_BR_PROB_BASE / 5; cond_expr = vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list); initialize_original_copy_tables (); - nloop = loop_version (loop, cond_expr, &condition_bb, true); + nloop = loop_version (loop, cond_expr, &condition_bb, + prob, prob, REG_BR_PROB_BASE - prob, true); free_original_copy_tables(); /** Loop versioning violates an assumption we try to maintain during |