summaryrefslogtreecommitdiff
path: root/gcc/tree-ssa-loop-ivcanon.c
diff options
context:
space:
mode:
authorhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2014-10-15 08:02:06 +0000
committerhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2014-10-15 08:02:06 +0000
commitc836de3fcd856e4a526169d0c7eec8ab65b0eb8f (patch)
treee4fe1dea885c050d43cd384f86a97b01f26934e9 /gcc/tree-ssa-loop-ivcanon.c
parent0601876c5e29df9cfc1dd6380bd3df6b00cf3f67 (diff)
downloadgcc-c836de3fcd856e4a526169d0c7eec8ab65b0eb8f.tar.gz
* loop-unroll.c: (decide_unrolling_and_peeling): Rename to
(decide_unrolling): ... this one. (peel_loops_completely): Remove. (decide_peel_simple): Remove. (decide_peel_once_rolling): Remove. (decide_peel_completely): Remove. (peel_loop_simple): Remove. (peel_loop_completely): Remove. (unroll_and_peel_loops): Rename to ... (unroll_loops): ... this one; handle only unrolling. * cfgloop.h (lpt_dec): Remove LPT_PEEL_COMPLETELY and LPT_PEEL_SIMPLE. (UAP_PEEL): Remove. (unroll_and_peel_loops): Remove. (unroll_loops): New. * passes.def: Replace pass_rtl_unroll_and_peel_loops by pass_rtl_unroll_loops. * loop-init.c (gate_rtl_unroll_and_peel_loops, rtl_unroll_and_peel_loops): Rename to ... (gate_rtl_unroll_loops, rtl_unroll_loops): ... these; update. (pass_rtl_unroll_and_peel_loops): Rename to ... (pass_rtl_unroll_loops): ... this one. * tree-pass.h (make_pass_rtl_unroll_and_peel_loops): Remove. (make_pass_rtl_unroll_loops): New. * tree-ssa-loop-ivcanon.c: (estimated_peeled_sequence_size, try_peel_loop): New. (canonicalize_loop_induction_variables): Update. * gcc.dg/tree-prof/peel-1.c: Update. * gcc.dg/tree-prof/unroll-1.c: Update. * gcc.dg/gcc.dg/unroll_1.c: Update. * gcc.dg/gcc.dg/unroll_2.c: Update. * gcc.dg/gcc.dg/unroll_3.c: Update. * gcc.dg/gcc.dg/unroll_4.c: Update. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@216238 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-ssa-loop-ivcanon.c')
-rw-r--r--gcc/tree-ssa-loop-ivcanon.c144
1 files changed, 140 insertions, 4 deletions
diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c
index 0a5ca593379..136b235a205 100644
--- a/gcc/tree-ssa-loop-ivcanon.c
+++ b/gcc/tree-ssa-loop-ivcanon.c
@@ -28,9 +28,12 @@ along with GCC; see the file COPYING3. If not see
variables. In that case the created optimization possibilities are likely
to pay up.
- Additionally in case we detect that it is beneficial to unroll the
- loop completely, we do it right here to expose the optimization
- possibilities to the following passes. */
+ We also perform
+ - complette unrolling (or peeling) when the loops is rolling few enough
+ times
+ - simple peeling (i.e. copying few initial iterations prior the loop)
+ when number of iteration estimate is known (typically by the profile
+ info). */
#include "config.h"
#include "system.h"
@@ -657,11 +660,12 @@ try_unroll_loop_completely (struct loop *loop,
HOST_WIDE_INT maxiter,
location_t locus)
{
- unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
+ unsigned HOST_WIDE_INT n_unroll = 0, ninsns, max_unroll, unr_insns;
gimple cond;
struct loop_size size;
bool n_unroll_found = false;
edge edge_to_cancel = NULL;
+ int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
/* See if we proved number of iterations to be low constant.
@@ -821,6 +825,8 @@ try_unroll_loop_completely (struct loop *loop,
loop->num);
return false;
}
+ dump_printf_loc (report_flags, locus,
+ "loop turned into non-loop; it never loops.\n");
initialize_original_copy_tables ();
wont_exit = sbitmap_alloc (n_unroll + 1);
@@ -902,6 +908,133 @@ try_unroll_loop_completely (struct loop *loop,
return true;
}
+/* Return number of instructions after peeling. */
+static unsigned HOST_WIDE_INT
+estimated_peeled_sequence_size (struct loop_size *size,
+ unsigned HOST_WIDE_INT npeel)
+{
+ return MAX (npeel * (HOST_WIDE_INT) (size->overall
+ - size->eliminated_by_peeling), 1);
+}
+
+/* If the loop is expected to iterate N times and is
+ small enough, duplicate the loop body N+1 times before
+ the loop itself. This way the hot path will never
+ enter the loop.
+ Parameters are the same as for try_unroll_loops_completely */
+
+static bool
+try_peel_loop (struct loop *loop,
+ edge exit, tree niter,
+ HOST_WIDE_INT maxiter)
+{
+ int npeel;
+ struct loop_size size;
+ int peeled_size;
+ sbitmap wont_exit;
+ unsigned i;
+ vec<edge> to_remove = vNULL;
+ edge e;
+
+ /* If the iteration bound is known and large, then we can safely eliminate
+ the check in peeled copies. */
+ if (TREE_CODE (niter) != INTEGER_CST)
+ exit = NULL;
+
+ if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0)
+ return false;
+
+ /* Peel only innermost loops. */
+ if (loop->inner)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not peeling: outer loop\n");
+ return false;
+ }
+
+ if (!optimize_loop_for_speed_p (loop))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not peeling: cold loop\n");
+ return false;
+ }
+
+ /* Check if there is an estimate on the number of iterations. */
+ npeel = estimated_loop_iterations_int (loop);
+ if (npeel < 0)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not peeling: number of iterations is not "
+ "estimated\n");
+ return false;
+ }
+ if (maxiter >= 0 && maxiter <= npeel)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not peeling: upper bound is known so can "
+ "unroll complettely\n");
+ return false;
+ }
+
+ /* We want to peel estimated number of iterations + 1 (so we never
+ enter the loop on quick path). Check against PARAM_MAX_PEEL_TIMES
+ and be sure to avoid overflows. */
+ if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not peeling: rolls too much "
+ "(%i + 1 > --param max-peel-times)\n", npeel);
+ return false;
+ }
+ npeel++;
+
+ /* Check peeled loops size. */
+ tree_estimate_loop_size (loop, exit, NULL, &size,
+ PARAM_VALUE (PARAM_MAX_PEELED_INSNS));
+ if ((peeled_size = estimated_peeled_sequence_size (&size, npeel))
+ > PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+ "(%i insns > --param max-peel-insns)", peeled_size);
+ return false;
+ }
+
+ /* Duplicate possibly eliminating the exits. */
+ initialize_original_copy_tables ();
+ wont_exit = sbitmap_alloc (npeel + 1);
+ bitmap_ones (wont_exit);
+ bitmap_clear_bit (wont_exit, 0);
+ if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
+ npeel, wont_exit,
+ exit, &to_remove,
+ DLTHE_FLAG_UPDATE_FREQ
+ | DLTHE_FLAG_COMPLETTE_PEEL))
+ {
+ free_original_copy_tables ();
+ free (wont_exit);
+ return false;
+ }
+ FOR_EACH_VEC_ELT (to_remove, i, e)
+ {
+ bool ok = remove_path (e);
+ gcc_assert (ok);
+ }
+ free (wont_exit);
+ free_original_copy_tables ();
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Peeled loop %d, %i times.\n",
+ loop->num, npeel);
+ }
+ if (loop->any_upper_bound)
+ loop->nb_iterations_upper_bound -= npeel;
+ loop->nb_iterations_estimate = 0;
+ /* Make sure to mark loop cold so we do not try to peel it more. */
+ scale_loop_profile (loop, 1, 0);
+ loop->header->count = 0;
+ return true;
+}
/* Adds a canonical induction variable to LOOP if suitable.
CREATE_IV is true if we may create a new iv. UL determines
which loops we are allowed to completely unroll. If TRY_EVAL is true, we try
@@ -981,6 +1114,9 @@ canonicalize_loop_induction_variables (struct loop *loop,
&& exit && just_once_each_iteration_p (loop, exit->src))
create_canonical_iv (loop, exit, niter);
+ if (ul == UL_ALL)
+ modified |= try_peel_loop (loop, exit, niter, maxiter);
+
return modified;
}