diff options
author | Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz> | 2004-02-17 17:41:44 +0100 |
---|---|---|
committer | Zdenek Dvorak <rakdver@gcc.gnu.org> | 2004-02-17 16:41:44 +0000 |
commit | 50654f6c03917824e03777073557ac839cb56104 (patch) | |
tree | 871928dcce64f79c8e877a86be241c2ed02c9cf3 /gcc/loop-unroll.c | |
parent | cc7ce44e4c87839efaaddd07d1f03cc50a78d047 (diff) | |
download | gcc-50654f6c03917824e03777073557ac839cb56104.tar.gz |
loop-iv.c: New file.
* loop-iv.c: New file.
* Makefile.in (loop-iv.o): New.
* basic_block.h (FOR_BB_INSNS, FOR_BB_INSNS_REVERSE): New macros.
* cfgloop.c (fill_sons_in_loop, get_loop_body_in_dom_order,
num_loop_branches): New functions.
* cfgloop.h (get_loop_body_in_dom_order, num_loop_branches,
iv_analysis_loop_init, iv_get_reaching_def, iv_analyse, get_iv_value,
find_simple_exit, iv_number_of_iterations, iv_analysis_done,
get_simple_loop_desc, free_simple_loop_desc): Declare.
(simple_loop_desc): New inline function.
(struct rtx_iv, struct niter_desc): New.
* cfgloopmanip.c (loopify): Specify semantics more precisely.
* expr.c (force_operand): Handle subregs of expressions created by
loop unroller.
* loop-init.c (loop_optimizer_init, loop_optimizer_finalize): Move
parts of the initialization to toplev.c
* loop-unroll.c (loop_exit_at_end_p): New.
(unroll_and_peel_loops): Call iv_analysis_done.
(decide_peel_once_rolling, decide_peel_completely,
decide_unroll_stupid, decide_unroll_constant_iterations,
decide_unroll_runtime_iterations, decide_peel_simple,
peel_loop_simple, unroll_loop_stupid, unroll_loop_constant_iterations,
unroll_loop_runtime_iterations): Use new simple loop analysis.
* loop-unswitch.c (compare_and_jump_seq): New.
(may_unswitch_on_p): Renamed to ...
(may_unswitch_on): Use new iv analysis.
(reversed_condition): Export.
(unswitch_single_loop, unswitch_loop): Use new iv analysis.
* predict.c (estimate_probability): Use new simple loop analysis.
* rtl.h (get_mode_bounds, reversed_condition,compare_and_jump_seq,
canon_condition, simplify_using_condition): Declare.
* stor-layout.c (get_mode_bounds): New.
* toplev.c (rest_of_handle_loop2): Some parts of
initialization/finalization moved here from loop-init.c.
From-SVN: r77951
Diffstat (limited to 'gcc/loop-unroll.c')
-rw-r--r-- | gcc/loop-unroll.c | 356 |
1 files changed, 237 insertions, 119 deletions
diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c index 6c796af577c..b093a7de081 100644 --- a/gcc/loop-unroll.c +++ b/gcc/loop-unroll.c @@ -85,7 +85,7 @@ void unroll_and_peel_loops (struct loops *loops, int flags) { struct loop *loop, *next; - int check; + bool check; /* First perform complete loop peeling (it is almost surely a win, and affects parameters for further decision a lot). */ @@ -110,7 +110,7 @@ unroll_and_peel_loops (struct loops *loops, int flags) else next = loop->outer; - check = 1; + check = true; /* And perform the appropriate transformations. */ switch (loop->lpt_decision.decision) { @@ -130,7 +130,7 @@ unroll_and_peel_loops (struct loops *loops, int flags) unroll_loop_stupid (loops, loop); break; case LPT_NONE: - check = 0; + check = false; break; default: abort (); @@ -144,6 +144,29 @@ unroll_and_peel_loops (struct loops *loops, int flags) } loop = next; } + + iv_analysis_done (); +} + +/* Check whether exit of the LOOP is at the end of loop body. */ + +static bool +loop_exit_at_end_p (struct loop *loop) +{ + struct niter_desc *desc = get_simple_loop_desc (loop); + rtx insn; + + if (desc->in_edge->dest != loop->latch) + return false; + + /* Check that the latch is empty. */ + FOR_BB_INSNS (loop->latch, insn) + { + if (INSN_P (insn)) + return false; + } + + return true; } /* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */ @@ -168,10 +191,9 @@ peel_loops_completely (struct loops *loops, int flags) next = loop->outer; loop->lpt_decision.decision = LPT_NONE; - loop->has_desc = 0; if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering loop %d for complete peeling\n", + fprintf (rtl_dump_file, "\n;; *** Considering loop %d for complete peeling ***\n", loop->num); loop->ninsns = num_loop_insns (loop); @@ -216,7 +238,7 @@ decide_unrolling_and_peeling (struct loops *loops, int flags) loop->lpt_decision.decision = LPT_NONE; if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering loop %d\n", loop->num); + fprintf (rtl_dump_file, "\n;; *** Considering loop %d ***\n", loop->num); /* Do not peel cold areas. */ if (!maybe_hot_bb_p (loop->header)) @@ -269,8 +291,10 @@ decide_unrolling_and_peeling (struct loops *loops, int flags) static void decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) { + struct niter_desc *desc; + if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering peeling once rolling loop\n"); + fprintf (rtl_dump_file, "\n;; Considering peeling once rolling loop\n"); /* Is the loop small enough? */ if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns) @@ -281,11 +305,13 @@ decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) } /* Check for simple loops. */ - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; + desc = get_simple_loop_desc (loop); /* Check number of iterations. */ - if (!loop->simple || !loop->desc.const_iter || loop->desc.niter != 0) + if (!desc->simple_p + || desc->assumptions + || !desc->const_iter + || desc->niter != 0) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n"); @@ -303,9 +329,10 @@ static void decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) { unsigned npeel; + struct niter_desc *desc; if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering peeling completely\n"); + fprintf (rtl_dump_file, "\n;; Considering peeling completely\n"); /* Skip non-innermost loops. */ if (loop->inner) @@ -346,26 +373,24 @@ decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) } /* Check for simple loops. */ - if (!loop->has_desc) - { - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; - } + desc = get_simple_loop_desc (loop); /* Check number of iterations. */ - if (!loop->simple || !loop->desc.const_iter) + if (!desc->simple_p + || desc->assumptions + || !desc->const_iter) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n"); return; } - if (loop->desc.niter > npeel - 1) + if (desc->niter > npeel - 1) { if (rtl_dump_file) { fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much ("); - fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,(HOST_WIDEST_INT) loop->desc.niter); + fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel); } return; @@ -397,8 +422,8 @@ peel_loop_completely (struct loops *loops, struct loop *loop) sbitmap wont_exit; unsigned HOST_WIDE_INT npeel; unsigned n_remove_edges, i; - edge *remove_edges; - struct loop_desc *desc = &loop->desc; + edge *remove_edges, ei; + struct niter_desc *desc = get_simple_loop_desc (loop); npeel = desc->niter; @@ -407,7 +432,7 @@ peel_loop_completely (struct loops *loops, struct loop *loop) wont_exit = sbitmap_alloc (npeel + 1); sbitmap_ones (wont_exit); RESET_BIT (wont_exit, 0); - if (desc->may_be_zero) + if (desc->noloop_assumptions) RESET_BIT (wont_exit, 1); remove_edges = xcalloc (npeel, sizeof (edge)); @@ -427,19 +452,24 @@ peel_loop_completely (struct loops *loops, struct loop *loop) free (remove_edges); } + ei = desc->in_edge; + free_simple_loop_desc (loop); + /* Now remove the unreachable part of the last iteration and cancel the loop. */ - remove_path (loops, desc->in_edge); + remove_path (loops, ei); if (rtl_dump_file) fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel); } /* Decide whether to unroll LOOP iterating constant number of times and how much. */ + static void decide_unroll_constant_iterations (struct loop *loop, int flags) { - unsigned nunroll, nunroll_by_av, best_copies, best_unroll = -1, n_copies, i; + unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; + struct niter_desc *desc; if (!(flags & UAP_UNROLL)) { @@ -448,7 +478,8 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) } if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering unrolling loop with constant number of iterations\n"); + fprintf (rtl_dump_file, + "\n;; Considering unrolling loop with constant number of iterations\n"); /* nunroll = total number of copies of the original loop body in unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ @@ -468,14 +499,10 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) } /* Check for simple loops. */ - if (!loop->has_desc) - { - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; - } + desc = get_simple_loop_desc (loop); /* Check number of iterations. */ - if (!loop->simple || !loop->desc.const_iter) + if (!desc->simple_p || !desc->const_iter || desc->assumptions) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n"); @@ -483,7 +510,7 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) } /* Check whether the loop rolls enough to consider. */ - if (loop->desc.niter < 2 * nunroll) + if (desc->niter < 2 * nunroll) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n"); @@ -497,16 +524,17 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) best_copies = 2 * nunroll + 10; i = 2 * nunroll + 2; - if ((unsigned) i - 1 >= loop->desc.niter) - i = loop->desc.niter - 2; + if (i - 1 >= desc->niter) + i = desc->niter - 2; for (; i >= nunroll - 1; i--) { - unsigned exit_mod = loop->desc.niter % (i + 1); + unsigned exit_mod = desc->niter % (i + 1); - if (loop->desc.postincr) + if (!loop_exit_at_end_p (loop)) n_copies = exit_mod + i + 1; - else if (exit_mod != (unsigned) i || loop->desc.may_be_zero) + else if (exit_mod != (unsigned) i + || desc->noloop_assumptions != NULL_RTX) n_copies = exit_mod + i + 2; else n_copies = i + 1; @@ -524,6 +552,11 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; loop->lpt_decision.times = best_unroll; + + if (rtl_dump_file) + fprintf (rtl_dump_file, + ";; Decided to unroll the constant times rolling loop, %d times.\n", + loop->lpt_decision.times); } /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1 @@ -554,11 +587,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) unsigned n_remove_edges, i; edge *remove_edges; unsigned max_unroll = loop->lpt_decision.times; - struct loop_desc *desc = &loop->desc; + struct niter_desc *desc = get_simple_loop_desc (loop); + bool exit_at_end = loop_exit_at_end_p (loop); niter = desc->niter; - if (niter <= (unsigned) max_unroll + 1) + if (niter <= max_unroll + 1) abort (); /* Should not get here (such loop should be peeled instead). */ exit_mod = niter % (max_unroll + 1); @@ -569,9 +603,9 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge)); n_remove_edges = 0; - if (desc->postincr) + if (!exit_at_end) { - /* Counter is incremented after the exit test; leave exit test + /* The exit is not at the end of the loop; leave exit test in the first copy, so that the loops that start with test of exit condition have continuous body after unrolling. */ @@ -580,15 +614,22 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) /* Peel exit_mod iterations. */ RESET_BIT (wont_exit, 0); - if (desc->may_be_zero) + if (desc->noloop_assumptions) RESET_BIT (wont_exit, 1); - if (exit_mod - && !duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), - loops, exit_mod, - wont_exit, desc->out_edge, remove_edges, &n_remove_edges, - DLTHE_FLAG_UPDATE_FREQ)) - abort (); + if (exit_mod) + { + if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), + loops, exit_mod, + wont_exit, desc->out_edge, + remove_edges, &n_remove_edges, + DLTHE_FLAG_UPDATE_FREQ)) + abort (); + + desc->noloop_assumptions = NULL_RTX; + desc->niter -= exit_mod; + desc->niter_max -= exit_mod; + } SET_BIT (wont_exit, 1); } @@ -602,12 +643,12 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) /* We know that niter >= max_unroll + 2; so we do not need to care of case when we would exit before reaching the loop. So just peel - exit_mod + 1 iterations. - */ - if (exit_mod != (unsigned) max_unroll || desc->may_be_zero) + exit_mod + 1 iterations. */ + if (exit_mod != max_unroll + || desc->noloop_assumptions) { RESET_BIT (wont_exit, 0); - if (desc->may_be_zero) + if (desc->noloop_assumptions) RESET_BIT (wont_exit, 1); if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), @@ -616,6 +657,10 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) DLTHE_FLAG_UPDATE_FREQ)) abort (); + desc->niter -= exit_mod + 1; + desc->niter_max -= exit_mod + 1; + desc->noloop_assumptions = NULL_RTX; + SET_BIT (wont_exit, 0); SET_BIT (wont_exit, 1); } @@ -632,6 +677,27 @@ unroll_loop_constant_iterations (struct loops *loops, struct loop *loop) free (wont_exit); + if (exit_at_end) + { + basic_block exit_block = desc->in_edge->src->rbi->copy; + /* Find a new in and out edge; they are in the last copy we have made. */ + + if (exit_block->succ->dest == desc->out_edge->dest) + { + desc->out_edge = exit_block->succ; + desc->in_edge = exit_block->succ->succ_next; + } + else + { + desc->out_edge = exit_block->succ->succ_next; + desc->in_edge = exit_block->succ; + } + } + + desc->niter /= max_unroll + 1; + desc->niter_max /= max_unroll + 1; + desc->niter_expr = GEN_INT (desc->niter); + /* Remove the edges. */ for (i = 0; i < n_remove_edges; i++) remove_path (loops, remove_edges[i]); @@ -647,6 +713,7 @@ static void decide_unroll_runtime_iterations (struct loop *loop, int flags) { unsigned nunroll, nunroll_by_av, i; + struct niter_desc *desc; if (!(flags & UAP_UNROLL)) { @@ -655,7 +722,8 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) } if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n"); + fprintf (rtl_dump_file, + "\n;; Considering unrolling loop with runtime computable number of iterations\n"); /* nunroll = total number of copies of the original loop body in unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ @@ -675,21 +743,18 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) } /* Check for simple loops. */ - if (!loop->has_desc) - { - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; - } + desc = get_simple_loop_desc (loop); /* Check simpleness. */ - if (!loop->simple) + if (!desc->simple_p || desc->assumptions) { if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Unable to prove that the number of iterations can be counted in runtime\n"); + fprintf (rtl_dump_file, + ";; Unable to prove that the number of iterations can be counted in runtime\n"); return; } - if (loop->desc.const_iter) + if (desc->const_iter) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Loop iterates constant times\n"); @@ -706,10 +771,16 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) /* Success; now force nunroll to be power of 2, as we are unable to cope with overflows in computation of number of iterations. */ - for (i = 1; 2 * i <= nunroll; i *= 2); + for (i = 1; 2 * i <= nunroll; i *= 2) + continue; loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; loop->lpt_decision.times = i - 1; + + if (rtl_dump_file) + fprintf (rtl_dump_file, + ";; Decided to unroll the runtime computable times rolling loop, %d times.\n", + loop->lpt_decision.times); } /* Unroll LOOP for that we are able to count number of iterations in runtime @@ -746,7 +817,7 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) static void unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) { - rtx niter, init_code, branch_code, jump, label; + rtx old_niter, niter, init_code, branch_code, tmp; unsigned i, j, p; basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch; unsigned n_dom_bbs; @@ -756,7 +827,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) edge *remove_edges, e; bool extra_zero_check, last_may_exit; unsigned max_unroll = loop->lpt_decision.times; - struct loop_desc *desc = &loop->desc; + struct niter_desc *desc = get_simple_loop_desc (loop); + bool exit_at_end = loop_exit_at_end_p (loop); /* Remember blocks whose dominators will have to be updated. */ dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block)); @@ -777,7 +849,7 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) } free (body); - if (desc->postincr) + if (!exit_at_end) { /* Leave exit in first copy (for explanation why see comment in unroll_loop_constant_iterations). */ @@ -798,15 +870,15 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) /* Get expression for number of iterations. */ start_sequence (); - niter = count_loop_iterations (desc, NULL, NULL); - if (!niter) - abort (); - niter = force_operand (niter, NULL); + old_niter = niter = gen_reg_rtx (desc->mode); + tmp = force_operand (copy_rtx (desc->niter_expr), niter); + if (tmp != niter) + emit_move_insn (niter, tmp); /* Count modulo by ANDing it with max_unroll; we use the fact that the number of unrollings is a power of two, and thus this is correct even if there is overflow in the computation. */ - niter = expand_simple_binop (GET_MODE (desc->var), AND, + niter = expand_simple_binop (desc->mode, AND, niter, GEN_INT (max_unroll), NULL_RTX, 0, OPTAB_LIB_WIDEN); @@ -824,10 +896,11 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) /* Peel the first copy of loop body (almost always we must leave exit test here; the only exception is when we have extra zero check and the number - of iterations is reliable (i.e. comes out of NE condition). Also record - the place of (possible) extra zero check. */ + of iterations is reliable. Also record the place of (possible) extra + zero check. */ sbitmap_zero (wont_exit); - if (extra_zero_check && desc->cond == NE) + if (extra_zero_check + && !desc->noloop_assumptions) SET_BIT (wont_exit, 1); ezc_swtch = loop_preheader_edge (loop)->src; if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), @@ -857,20 +930,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) p = REG_BR_PROB_BASE / (i + 2); preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); - label = block_label (preheader); - start_sequence (); - do_compare_rtx_and_jump (copy_rtx (niter), GEN_INT (j), EQ, 0, - GET_MODE (desc->var), NULL_RTX, NULL_RTX, - label); - jump = get_last_insn (); - JUMP_LABEL (jump) = label; - REG_NOTES (jump) - = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (p), REG_NOTES (jump)); - - LABEL_NUSES (label)++; - branch_code = get_insns (); - end_sequence (); + branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, + block_label (preheader), p, NULL_RTX); swtch = loop_split_edge_with (swtch->pred, branch_code); set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); @@ -886,20 +947,8 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) p = REG_BR_PROB_BASE / (max_unroll + 1); swtch = ezc_swtch; preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); - label = block_label (preheader); - start_sequence (); - do_compare_rtx_and_jump (copy_rtx (niter), const0_rtx, EQ, 0, - GET_MODE (desc->var), NULL_RTX, NULL_RTX, - label); - jump = get_last_insn (); - JUMP_LABEL (jump) = label; - REG_NOTES (jump) - = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (p), REG_NOTES (jump)); - - LABEL_NUSES (label)++; - branch_code = get_insns (); - end_sequence (); + branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, + block_label (preheader), p, NULL_RTX); swtch = loop_split_edge_with (swtch->succ, branch_code); set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); @@ -925,11 +974,45 @@ unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) free (wont_exit); + if (exit_at_end) + { + basic_block exit_block = desc->in_edge->src->rbi->copy; + /* Find a new in and out edge; they are in the last copy we have made. */ + + if (exit_block->succ->dest == desc->out_edge->dest) + { + desc->out_edge = exit_block->succ; + desc->in_edge = exit_block->succ->succ_next; + } + else + { + desc->out_edge = exit_block->succ->succ_next; + desc->in_edge = exit_block->succ; + } + } + /* Remove the edges. */ for (i = 0; i < n_remove_edges; i++) remove_path (loops, remove_edges[i]); free (remove_edges); + /* We must be careful when updating the number of iterations due to + preconditioning and the fact that the value must be valid at entry + of the loop. After passing through the above code, we see that + the correct new number of iterations is this: */ + if (desc->const_iter) + abort (); + desc->niter_expr = + simplify_gen_binary (UDIV, desc->mode, old_niter, GEN_INT (max_unroll + 1)); + desc->niter_max /= max_unroll + 1; + if (exit_at_end) + { + desc->niter_expr = + simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); + desc->noloop_assumptions = NULL_RTX; + desc->niter_max--; + } + if (rtl_dump_file) fprintf (rtl_dump_file, ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n", @@ -941,6 +1024,7 @@ static void decide_peel_simple (struct loop *loop, int flags) { unsigned npeel; + struct niter_desc *desc; if (!(flags & UAP_PEEL)) { @@ -949,7 +1033,7 @@ decide_peel_simple (struct loop *loop, int flags) } if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering simply peeling loop\n"); + fprintf (rtl_dump_file, "\n;; Considering simply peeling loop\n"); /* npeel = number of iterations to peel. */ npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns; @@ -965,14 +1049,10 @@ decide_peel_simple (struct loop *loop, int flags) } /* Check for simple loops. */ - if (!loop->has_desc) - { - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; - } + desc = get_simple_loop_desc (loop); /* Check number of iterations. */ - if (loop->simple && loop->desc.const_iter) + if (desc->simple_p && !desc->assumptions && desc->const_iter) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Loop iterates constant times\n"); @@ -981,7 +1061,7 @@ decide_peel_simple (struct loop *loop, int flags) /* Do not simply peel loops with branches inside -- it increases number of mispredicts. */ - if (loop->desc.n_branches > 1) + if (num_loop_branches (loop) > 1) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Not peeling, contains branches\n"); @@ -1016,6 +1096,10 @@ decide_peel_simple (struct loop *loop, int flags) /* Success. */ loop->lpt_decision.decision = LPT_PEEL_SIMPLE; loop->lpt_decision.times = npeel; + + if (rtl_dump_file) + fprintf (rtl_dump_file, ";; Decided to simply peel the loop, %d times.\n", + loop->lpt_decision.times); } /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: @@ -1037,6 +1121,7 @@ peel_loop_simple (struct loops *loops, struct loop *loop) { sbitmap wont_exit; unsigned npeel = loop->lpt_decision.times; + struct niter_desc *desc = get_simple_loop_desc (loop); wont_exit = sbitmap_alloc (npeel + 1); sbitmap_zero (wont_exit); @@ -1048,6 +1133,23 @@ peel_loop_simple (struct loops *loops, struct loop *loop) free (wont_exit); + if (desc->simple_p) + { + if (desc->const_iter) + { + desc->niter -= npeel; + desc->niter_expr = GEN_INT (desc->niter); + desc->noloop_assumptions = NULL_RTX; + } + else + { + /* We cannot just update niter_expr, as its value might be clobbered + inside loop. We could handle this by counting the number into + temporary just like we do in runtime unrolling, but it does not + seem worthwhile. */ + free_simple_loop_desc (loop); + } + } if (rtl_dump_file) fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel); } @@ -1057,6 +1159,7 @@ static void decide_unroll_stupid (struct loop *loop, int flags) { unsigned nunroll, nunroll_by_av, i; + struct niter_desc *desc; if (!(flags & UAP_UNROLL_ALL)) { @@ -1065,7 +1168,7 @@ decide_unroll_stupid (struct loop *loop, int flags) } if (rtl_dump_file) - fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n"); + fprintf (rtl_dump_file, "\n;; Considering unrolling loop stupidly\n"); /* nunroll = total number of copies of the original loop body in unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ @@ -1085,14 +1188,10 @@ decide_unroll_stupid (struct loop *loop, int flags) } /* Check for simple loops. */ - if (!loop->has_desc) - { - loop->simple = simple_loop_p (loop, &loop->desc); - loop->has_desc = 1; - } + desc = get_simple_loop_desc (loop); /* Check simpleness. */ - if (loop->simple) + if (desc->simple_p && !desc->assumptions) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; The loop is simple\n"); @@ -1101,7 +1200,7 @@ decide_unroll_stupid (struct loop *loop, int flags) /* Do not unroll loops with branches inside -- it increases number of mispredicts. */ - if (loop->desc.n_branches > 1) + if (num_loop_branches (loop) > 1) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n"); @@ -1109,7 +1208,8 @@ decide_unroll_stupid (struct loop *loop, int flags) } /* If we have profile feedback, check whether the loop rolls. */ - if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll) + if (loop->header->count + && expected_loop_iterations (loop) < 2 * nunroll) { if (rtl_dump_file) fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n"); @@ -1119,10 +1219,16 @@ decide_unroll_stupid (struct loop *loop, int flags) /* Success. Now force nunroll to be power of 2, as it seems that this improves results (partially because of better alignments, partially because of some dark magic). */ - for (i = 1; 2 * i <= nunroll; i *= 2); + for (i = 1; 2 * i <= nunroll; i *= 2) + continue; loop->lpt_decision.decision = LPT_UNROLL_STUPID; loop->lpt_decision.times = i - 1; + + if (rtl_dump_file) + fprintf (rtl_dump_file, + ";; Decided to unroll the loop stupidly, %d times.\n", + loop->lpt_decision.times); } /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: @@ -1147,6 +1253,7 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop) { sbitmap wont_exit; unsigned nunroll = loop->lpt_decision.times; + struct niter_desc *desc = get_simple_loop_desc (loop); wont_exit = sbitmap_alloc (nunroll + 1); sbitmap_zero (wont_exit); @@ -1158,6 +1265,17 @@ unroll_loop_stupid (struct loops *loops, struct loop *loop) free (wont_exit); + if (desc->simple_p) + { + /* We indeed may get here provided that there are nontrivial assumptions + for a loop to be really simple. We could update the counts, but the + problem is that we are unable to decide which exit will be taken + (not really true in case the number of iterations is constant, + but noone will do anything with this information, so we do not + worry about it). */ + desc->simple_p = false; + } + if (rtl_dump_file) fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n", nunroll, num_loop_insns (loop)); |