summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorrevitale <revitale@138bc75d-0d04-0410-961f-82ee72b054a4>2011-08-03 07:59:23 +0000
committerrevitale <revitale@138bc75d-0d04-0410-961f-82ee72b054a4>2011-08-03 07:59:23 +0000
commita5ea7ace8312a19bff4dd99c73595634ab5aaa77 (patch)
treec4f836d44c70da07b802965eaf0ed9739970f979 /gcc
parent6bd80865bc5ba567e0c554657bb6daad80123cdb (diff)
downloadgcc-a5ea7ace8312a19bff4dd99c73595634ab5aaa77.tar.gz
Optimize stage count
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@177235 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/modulo-sched.c344
2 files changed, 302 insertions, 57 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 89eb733c916..8f865c6ccf7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2011-08-03 Revital Eres <revital.eres@linaro.org>
+
+ * modulo-sched.c (calculate_stage_count,
+ calculate_must_precede_follow, get_sched_window,
+ try_scheduling_node_in_cycle, remove_node_from_ps): Add
+ declaration.
+ (update_node_sched_params, set_must_precede_follow, optimize_sc):
+ New functions.
+ (reset_sched_times): Call update_node_sched_params.
+ (sms_schedule): Call optimize_sc.
+ (get_sched_window): Change function arguments.
+ (sms_schedule_by_order): Update call to get_sched_window.
+ Call set_must_precede_follow.
+ (calculate_stage_count): Add function argument.
+
2011-08-02 Richard Henderson <rth@redhat.com>
PR target/49864
diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index 8c349d39ed5..a6d941019fb 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -203,7 +203,16 @@ static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
rtx, rtx);
static void duplicate_insns_of_cycles (partial_schedule_ptr,
int, int, int, rtx);
-static int calculate_stage_count (partial_schedule_ptr ps);
+static int calculate_stage_count (partial_schedule_ptr, int);
+static void calculate_must_precede_follow (ddg_node_ptr, int, int,
+ int, int, sbitmap, sbitmap, sbitmap);
+static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
+ sbitmap, int, int *, int *, int *);
+static bool try_scheduling_node_in_cycle (partial_schedule_ptr, ddg_node_ptr,
+ int, int, sbitmap, int *, sbitmap,
+ sbitmap);
+static bool remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
+
#define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
#define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
#define SCHED_FIRST_REG_MOVE(x) \
@@ -576,6 +585,36 @@ free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
}
}
+/* Update the sched_params (time, row and stage) for node U using the II,
+ the CYCLE of U and MIN_CYCLE.
+ We're not simply taking the following
+ SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
+ because the stages may not be aligned on cycle 0. */
+static void
+update_node_sched_params (ddg_node_ptr u, int ii, int cycle, int min_cycle)
+{
+ int sc_until_cycle_zero;
+ int stage;
+
+ SCHED_TIME (u) = cycle;
+ SCHED_ROW (u) = SMODULO (cycle, ii);
+
+ /* The calculation of stage count is done adding the number
+ of stages before cycle zero and after cycle zero. */
+ sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
+
+ if (SCHED_TIME (u) < 0)
+ {
+ stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero - stage;
+ }
+ else
+ {
+ stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
+ SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
+ }
+}
+
/* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of
SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT
will move to cycle zero. */
@@ -592,7 +631,6 @@ reset_sched_times (partial_schedule_ptr ps, int amount)
ddg_node_ptr u = crr_insn->node;
int normalized_time = SCHED_TIME (u) - amount;
int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
- int sc_until_cycle_zero, stage;
if (dump_file)
{
@@ -608,23 +646,9 @@ reset_sched_times (partial_schedule_ptr ps, int amount)
gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
- SCHED_TIME (u) = normalized_time;
- SCHED_ROW (u) = SMODULO (normalized_time, ii);
-
- /* The calculation of stage count is done adding the number
- of stages before cycle zero and after cycle zero. */
- sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii);
-
- if (SCHED_TIME (u) < 0)
- {
- stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
- SCHED_STAGE (u) = sc_until_cycle_zero - stage;
- }
- else
- {
- stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
- SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
- }
+
+ crr_insn->cycle = normalized_time;
+ update_node_sched_params (u, ii, normalized_time, new_min_cycle);
}
}
@@ -661,6 +685,206 @@ permute_partial_schedule (partial_schedule_ptr ps, rtx last)
PREV_INSN (last));
}
+/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
+ respectively only if cycle C falls on the border of the scheduling
+ window boundaries marked by START and END cycles. STEP is the
+ direction of the window. */
+static inline void
+set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
+ sbitmap *tmp_precede, sbitmap must_precede, int c,
+ int start, int end, int step)
+{
+ *tmp_precede = NULL;
+ *tmp_follow = NULL;
+
+ if (c == start)
+ {
+ if (step == 1)
+ *tmp_precede = must_precede;
+ else /* step == -1. */
+ *tmp_follow = must_follow;
+ }
+ if (c == end - step)
+ {
+ if (step == 1)
+ *tmp_follow = must_follow;
+ else /* step == -1. */
+ *tmp_precede = must_precede;
+ }
+
+}
+
+/* Return True if the branch can be moved to row ii-1 while
+ normalizing the partial schedule PS to start from cycle zero and thus
+ optimize the SC. Otherwise return False. */
+static bool
+optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
+{
+ int amount = PS_MIN_CYCLE (ps);
+ sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
+ int start, end, step;
+ int ii = ps->ii;
+ bool ok = false;
+ int stage_count, stage_count_curr;
+
+ /* Compare the SC after normalization and SC after bringing the branch
+ to row ii-1. If they are equal just bail out. */
+ stage_count = calculate_stage_count (ps, amount);
+ stage_count_curr =
+ calculate_stage_count (ps, SCHED_TIME (g->closing_branch) - (ii - 1));
+
+ if (stage_count == stage_count_curr)
+ {
+ if (dump_file)
+ fprintf (dump_file, "SMS SC already optimized.\n");
+
+ ok = false;
+ goto clear;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "SMS Trying to optimize branch location\n");
+ fprintf (dump_file, "SMS partial schedule before trial:\n");
+ print_partial_schedule (ps, dump_file);
+ }
+
+ /* First, normalize the partial scheduling. */
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "SMS partial schedule after normalization (ii, %d, SC %d):\n",
+ ii, stage_count);
+ print_partial_schedule (ps, dump_file);
+ }
+
+ if (SMODULO (SCHED_TIME (g->closing_branch), ii) == ii - 1)
+ {
+ ok = true;
+ goto clear;
+ }
+
+ sbitmap_ones (sched_nodes);
+
+ /* Calculate the new placement of the branch. It should be in row
+ ii-1 and fall into it's scheduling window. */
+ if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
+ &step, &end) == 0)
+ {
+ bool success;
+ ps_insn_ptr next_ps_i;
+ int branch_cycle = SCHED_TIME (g->closing_branch);
+ int row = SMODULO (branch_cycle, ps->ii);
+ int num_splits = 0;
+ sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
+ int c;
+
+ if (dump_file)
+ fprintf (dump_file, "\nTrying to schedule node %d "
+ "INSN = %d in (%d .. %d) step %d\n",
+ g->closing_branch->cuid,
+ (INSN_UID (g->closing_branch->insn)), start, end, step);
+
+ gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
+ if (step == 1)
+ {
+ c = start + ii - SMODULO (start, ii) - 1;
+ gcc_assert (c >= start);
+ if (c >= end)
+ {
+ ok = false;
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d\n", c);
+ goto clear;
+ }
+ }
+ else
+ {
+ c = start - SMODULO (start, ii) - 1;
+ gcc_assert (c <= start);
+
+ if (c <= end)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d\n", c);
+ ok = false;
+ goto clear;
+ }
+ }
+
+ must_precede = sbitmap_alloc (g->num_nodes);
+ must_follow = sbitmap_alloc (g->num_nodes);
+
+ /* Try to schedule the branch is it's new cycle. */
+ calculate_must_precede_follow (g->closing_branch, start, end,
+ step, ii, sched_nodes,
+ must_precede, must_follow);
+
+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+ must_precede, c, start, end, step);
+
+ /* Find the element in the partial schedule related to the closing
+ branch so we can remove it from it's current cycle. */
+ for (next_ps_i = ps->rows[row];
+ next_ps_i; next_ps_i = next_ps_i->next_in_row)
+ if (next_ps_i->node->cuid == g->closing_branch->cuid)
+ break;
+
+ gcc_assert (next_ps_i);
+ gcc_assert (remove_node_from_ps (ps, next_ps_i));
+ success =
+ try_scheduling_node_in_cycle (ps, g->closing_branch,
+ g->closing_branch->cuid, c,
+ sched_nodes, &num_splits,
+ tmp_precede, tmp_follow);
+ gcc_assert (num_splits == 0);
+ if (!success)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS failed to schedule branch at cycle: %d, "
+ "bringing it back to cycle %d\n", c, branch_cycle);
+
+ /* The branch was failed to be placed in row ii - 1.
+ Put it back in it's original place in the partial
+ schedualing. */
+ set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
+ must_precede, branch_cycle, start, end,
+ step);
+ success =
+ try_scheduling_node_in_cycle (ps, g->closing_branch,
+ g->closing_branch->cuid,
+ branch_cycle, sched_nodes,
+ &num_splits, tmp_precede,
+ tmp_follow);
+ gcc_assert (success && (num_splits == 0));
+ ok = false;
+ }
+ else
+ {
+ /* The branch is placed in row ii - 1. */
+ if (dump_file)
+ fprintf (dump_file,
+ "SMS success in moving branch to cycle %d\n", c);
+
+ update_node_sched_params (g->closing_branch, ii, c,
+ PS_MIN_CYCLE (ps));
+ ok = true;
+ }
+
+ free (must_precede);
+ free (must_follow);
+ }
+
+clear:
+ free (sched_nodes);
+ return ok;
+}
+
static void
duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
int to_stage, int for_prolog, rtx count_reg)
@@ -1116,6 +1340,7 @@ sms_schedule (void)
int mii, rec_mii;
unsigned stage_count = 0;
HOST_WIDEST_INT loop_count = 0;
+ bool opt_sc_p = false;
if (! (g = g_arr[loop->num]))
continue;
@@ -1197,14 +1422,32 @@ sms_schedule (void)
set_node_sched_params (g);
ps = sms_schedule_by_order (g, mii, maxii, node_order);
-
- if (ps)
- {
- stage_count = calculate_stage_count (ps);
- gcc_assert(stage_count >= 1);
- PS_STAGE_COUNT(ps) = stage_count;
- }
-
+
+ if (ps)
+ {
+ /* Try to achieve optimized SC by normalizing the partial
+ schedule (having the cycles start from cycle zero).
+ The branch location must be placed in row ii-1 in the
+ final scheduling. If failed, shift all instructions to
+ position the branch in row ii-1. */
+ opt_sc_p = optimize_sc (ps, g);
+ if (opt_sc_p)
+ stage_count = calculate_stage_count (ps, 0);
+ else
+ {
+ /* Bring the branch to cycle ii-1. */
+ int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
+
+ if (dump_file)
+ fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
+
+ stage_count = calculate_stage_count (ps, amount);
+ }
+
+ gcc_assert (stage_count >= 1);
+ PS_STAGE_COUNT (ps) = stage_count;
+ }
+
/* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1 means that there is no interleaving between iterations thus
we let the scheduling passes do the job in this case. */
@@ -1225,12 +1468,16 @@ sms_schedule (void)
else
{
struct undo_replace_buff_elem *reg_move_replaces;
- int amount = SCHED_TIME (g->closing_branch) + 1;
+
+ if (!opt_sc_p)
+ {
+ /* Rotate the partial schedule to have the branch in row ii-1. */
+ int amount = SCHED_TIME (g->closing_branch) - (ps->ii - 1);
+
+ reset_sched_times (ps, amount);
+ rotate_partial_schedule (ps, amount);
+ }
- /* Set the stage boundaries. The closing_branch was scheduled
- and should appear in the last (ii-1) row. */
- reset_sched_times (ps, amount);
- rotate_partial_schedule (ps, amount);
set_columns_for_ps (ps);
canon_loop (loop);
@@ -1382,13 +1629,11 @@ sms_schedule (void)
scheduling window is empty and zero otherwise. */
static int
-get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
+get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
{
int start, step, end;
ddg_edge_ptr e;
- int u = nodes_order [i];
- ddg_node_ptr u_node = &ps->g->nodes[u];
sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
@@ -1800,7 +2045,7 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
/* Try to get non-empty scheduling window. */
success = 0;
- if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
+ if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
&step, &end) == 0)
{
if (dump_file)
@@ -1817,24 +2062,11 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
for (c = start; c != end; c += step)
{
- sbitmap tmp_precede = NULL;
- sbitmap tmp_follow = NULL;
-
- if (c == start)
- {
- if (step == 1)
- tmp_precede = must_precede;
- else /* step == -1. */
- tmp_follow = must_follow;
- }
- if (c == end - step)
- {
- if (step == 1)
- tmp_follow = must_follow;
- else /* step == -1. */
- tmp_precede = must_precede;
- }
+ sbitmap tmp_precede, tmp_follow;
+ set_must_precede_follow (&tmp_follow, must_follow,
+ &tmp_precede, must_precede,
+ c, start, end, step);
success =
try_scheduling_node_in_cycle (ps, u_node, u, c,
sched_nodes,
@@ -2899,12 +3131,10 @@ ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
}
/* Calculate the stage count of the partial schedule PS. The calculation
- takes into account the rotation to bring the closing branch to row
- ii-1. */
+ takes into account the rotation amount passed in ROTATION_AMOUNT. */
int
-calculate_stage_count (partial_schedule_ptr ps)
+calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
{
- int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1;
int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);