diff options
author | hubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-10-15 08:02:06 +0000 |
---|---|---|
committer | hubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4> | 2014-10-15 08:02:06 +0000 |
commit | c836de3fcd856e4a526169d0c7eec8ab65b0eb8f (patch) | |
tree | e4fe1dea885c050d43cd384f86a97b01f26934e9 /gcc/tree-ssa-loop-ivcanon.c | |
parent | 0601876c5e29df9cfc1dd6380bd3df6b00cf3f67 (diff) | |
download | gcc-c836de3fcd856e4a526169d0c7eec8ab65b0eb8f.tar.gz |
* loop-unroll.c: (decide_unrolling_and_peeling): Rename to
(decide_unrolling): ... this one.
(peel_loops_completely): Remove.
(decide_peel_simple): Remove.
(decide_peel_once_rolling): Remove.
(decide_peel_completely): Remove.
(peel_loop_simple): Remove.
(peel_loop_completely): Remove.
(unroll_and_peel_loops): Rename to ...
(unroll_loops): ... this one; handle only unrolling.
* cfgloop.h (lpt_dec): Remove LPT_PEEL_COMPLETELY and
LPT_PEEL_SIMPLE.
(UAP_PEEL): Remove.
(unroll_and_peel_loops): Remove.
(unroll_loops): New.
* passes.def: Replace
pass_rtl_unroll_and_peel_loops by pass_rtl_unroll_loops.
* loop-init.c (gate_rtl_unroll_and_peel_loops,
rtl_unroll_and_peel_loops): Rename to ...
(gate_rtl_unroll_loops, rtl_unroll_loops): ... these; update.
(pass_rtl_unroll_and_peel_loops): Rename to ...
(pass_rtl_unroll_loops): ... this one.
* tree-pass.h (make_pass_rtl_unroll_and_peel_loops): Remove.
(make_pass_rtl_unroll_loops): New.
* tree-ssa-loop-ivcanon.c: (estimated_peeled_sequence_size, try_peel_loop): New.
(canonicalize_loop_induction_variables): Update.
* gcc.dg/tree-prof/peel-1.c: Update.
* gcc.dg/tree-prof/unroll-1.c: Update.
* gcc.dg/gcc.dg/unroll_1.c: Update.
* gcc.dg/gcc.dg/unroll_2.c: Update.
* gcc.dg/gcc.dg/unroll_3.c: Update.
* gcc.dg/gcc.dg/unroll_4.c: Update.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@216238 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-ssa-loop-ivcanon.c')
-rw-r--r-- | gcc/tree-ssa-loop-ivcanon.c | 144 |
1 files changed, 140 insertions, 4 deletions
diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index 0a5ca593379..136b235a205 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -28,9 +28,12 @@ along with GCC; see the file COPYING3. If not see variables. In that case the created optimization possibilities are likely to pay up. - Additionally in case we detect that it is beneficial to unroll the - loop completely, we do it right here to expose the optimization - possibilities to the following passes. */ + We also perform + - complette unrolling (or peeling) when the loops is rolling few enough + times + - simple peeling (i.e. copying few initial iterations prior the loop) + when number of iteration estimate is known (typically by the profile + info). */ #include "config.h" #include "system.h" @@ -657,11 +660,12 @@ try_unroll_loop_completely (struct loop *loop, HOST_WIDE_INT maxiter, location_t locus) { - unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns; + unsigned HOST_WIDE_INT n_unroll = 0, ninsns, max_unroll, unr_insns; gimple cond; struct loop_size size; bool n_unroll_found = false; edge edge_to_cancel = NULL; + int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS; /* See if we proved number of iterations to be low constant. @@ -821,6 +825,8 @@ try_unroll_loop_completely (struct loop *loop, loop->num); return false; } + dump_printf_loc (report_flags, locus, + "loop turned into non-loop; it never loops.\n"); initialize_original_copy_tables (); wont_exit = sbitmap_alloc (n_unroll + 1); @@ -902,6 +908,133 @@ try_unroll_loop_completely (struct loop *loop, return true; } +/* Return number of instructions after peeling. */ +static unsigned HOST_WIDE_INT +estimated_peeled_sequence_size (struct loop_size *size, + unsigned HOST_WIDE_INT npeel) +{ + return MAX (npeel * (HOST_WIDE_INT) (size->overall + - size->eliminated_by_peeling), 1); +} + +/* If the loop is expected to iterate N times and is + small enough, duplicate the loop body N+1 times before + the loop itself. This way the hot path will never + enter the loop. + Parameters are the same as for try_unroll_loops_completely */ + +static bool +try_peel_loop (struct loop *loop, + edge exit, tree niter, + HOST_WIDE_INT maxiter) +{ + int npeel; + struct loop_size size; + int peeled_size; + sbitmap wont_exit; + unsigned i; + vec<edge> to_remove = vNULL; + edge e; + + /* If the iteration bound is known and large, then we can safely eliminate + the check in peeled copies. */ + if (TREE_CODE (niter) != INTEGER_CST) + exit = NULL; + + if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0) + return false; + + /* Peel only innermost loops. */ + if (loop->inner) + { + if (dump_file) + fprintf (dump_file, "Not peeling: outer loop\n"); + return false; + } + + if (!optimize_loop_for_speed_p (loop)) + { + if (dump_file) + fprintf (dump_file, "Not peeling: cold loop\n"); + return false; + } + + /* Check if there is an estimate on the number of iterations. */ + npeel = estimated_loop_iterations_int (loop); + if (npeel < 0) + { + if (dump_file) + fprintf (dump_file, "Not peeling: number of iterations is not " + "estimated\n"); + return false; + } + if (maxiter >= 0 && maxiter <= npeel) + { + if (dump_file) + fprintf (dump_file, "Not peeling: upper bound is known so can " + "unroll complettely\n"); + return false; + } + + /* We want to peel estimated number of iterations + 1 (so we never + enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES + and be sure to avoid overflows. */ + if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) + { + if (dump_file) + fprintf (dump_file, "Not peeling: rolls too much " + "(%i + 1 > --param max-peel-times)\n", npeel); + return false; + } + npeel++; + + /* Check peeled loops size. */ + tree_estimate_loop_size (loop, exit, NULL, &size, + PARAM_VALUE (PARAM_MAX_PEELED_INSNS)); + if ((peeled_size = estimated_peeled_sequence_size (&size, npeel)) + > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) + { + if (dump_file) + fprintf (dump_file, "Not peeling: peeled sequence size is too large " + "(%i insns > --param max-peel-insns)", peeled_size); + return false; + } + + /* Duplicate possibly eliminating the exits. */ + initialize_original_copy_tables (); + wont_exit = sbitmap_alloc (npeel + 1); + bitmap_ones (wont_exit); + bitmap_clear_bit (wont_exit, 0); + if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), + npeel, wont_exit, + exit, &to_remove, + DLTHE_FLAG_UPDATE_FREQ + | DLTHE_FLAG_COMPLETTE_PEEL)) + { + free_original_copy_tables (); + free (wont_exit); + return false; + } + FOR_EACH_VEC_ELT (to_remove, i, e) + { + bool ok = remove_path (e); + gcc_assert (ok); + } + free (wont_exit); + free_original_copy_tables (); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Peeled loop %d, %i times.\n", + loop->num, npeel); + } + if (loop->any_upper_bound) + loop->nb_iterations_upper_bound -= npeel; + loop->nb_iterations_estimate = 0; + /* Make sure to mark loop cold so we do not try to peel it more. */ + scale_loop_profile (loop, 1, 0); + loop->header->count = 0; + return true; +} /* Adds a canonical induction variable to LOOP if suitable. CREATE_IV is true if we may create a new iv. UL determines which loops we are allowed to completely unroll. If TRY_EVAL is true, we try @@ -981,6 +1114,9 @@ canonicalize_loop_induction_variables (struct loop *loop, && exit && just_once_each_iteration_p (loop, exit->src)) create_canonical_iv (loop, exit, niter); + if (ul == UL_ALL) + modified |= try_peel_loop (loop, exit, niter, maxiter); + return modified; } |