summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2001-08-19 23:46:10 +0000
committerhubicka <hubicka@138bc75d-0d04-0410-961f-82ee72b054a4>2001-08-19 23:46:10 +0000
commit312866af43c7d34fa7b50f97f20058108888e844 (patch)
tree41105a8a5becd86725e0db5742de2ed66c51ad55
parent1a980d18e7227175be0c7720b6c329c1eb25fdaf (diff)
downloadgcc-312866af43c7d34fa7b50f97f20058108888e844.tar.gz
* final.c (compute_alignments): New function.
(init_insn_lengths): Do not care label_align. (LABEL_ALIGN_AFTER_BARRIER): Default to 1. (LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP): Default to 0. (JUMP_ALIGN, JUMP_ALIGN_MAX_SKIP): New. (shorted_branches): Realloc label_align array; do not call init_insn_lengths; Do not care about loop alignments. * output.h (compute_alignments): Declare. * toplev.c (rest_of_compilation): Call compute_alignments. * tm.texi (JUMP_ALIGN, JUMP_ALIGN_MAX_SKIP): Document. * predict.c (block_info_def): Add npredecesors, remove nvisited; change visited to tovisit. (propagate_freq): Use faster traversing algorithm. (estimate_loops_at_level, estimate_bb_frequencies): Change visited to tovisit; reverse meaning. * predict.c (struct block_info_def): Remove nvisited. (propagate_freq): Use EDGE_DFS_BACK to detect irreducible regions. (estimate_bb_frequencies): Call mark_dfs_back_edges. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@45042 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog23
-rw-r--r--gcc/doc/tm.texi20
-rw-r--r--gcc/final.c175
-rw-r--r--gcc/output.h3
-rw-r--r--gcc/predict.c88
-rw-r--r--gcc/toplev.c1
6 files changed, 214 insertions, 96 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 54b09bcce9d..d37e8f4d98d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,26 @@
+Mon Aug 20 01:44:50 CEST 2001 Jan Hubicka <jh@suse.cz>
+
+ * final.c (compute_alignments): New function.
+ (init_insn_lengths): Do not care label_align.
+ (LABEL_ALIGN_AFTER_BARRIER): Default to 1.
+ (LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP): Default to 0.
+ (JUMP_ALIGN, JUMP_ALIGN_MAX_SKIP): New.
+ (shorted_branches): Realloc label_align array; do
+ not call init_insn_lengths; Do not care about loop alignments.
+ * output.h (compute_alignments): Declare.
+ * toplev.c (rest_of_compilation): Call compute_alignments.
+ * tm.texi (JUMP_ALIGN, JUMP_ALIGN_MAX_SKIP): Document.
+
+ * predict.c (block_info_def): Add npredecesors, remove nvisited;
+ change visited to tovisit.
+ (propagate_freq): Use faster traversing algorithm.
+ (estimate_loops_at_level, estimate_bb_frequencies): Change visited
+ to tovisit; reverse meaning.
+
+ * predict.c (struct block_info_def): Remove nvisited.
+ (propagate_freq): Use EDGE_DFS_BACK to detect irreducible regions.
+ (estimate_bb_frequencies): Call mark_dfs_back_edges.
+
2001-08-19 Geoffrey Keating <geoffk@redhat.com>
* doc/invoke.texi (MIPS Options): Document -mfused-madd.
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 8f567dbda8f..80e8e75209d 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -7226,10 +7226,10 @@ the target supports DWARF 2 frame unwind information.
This describes commands for alignment.
@table @code
-@findex LABEL_ALIGN_AFTER_BARRIER
-@item LABEL_ALIGN_AFTER_BARRIER (@var{label})
-The alignment (log base 2) to put in front of @var{label}, which follows
-a @code{BARRIER}.
+@findex JUMP_ALIGN
+@item JUMP_ALIGN (@var{label})
+The alignment (log base 2) to put in front of @var{label}, which is
+a common destination of jumps and has no fallthru incomming edge.
This macro need not be defined if you don't want any special alignment
to be done at such a time. Most machine descriptions do not currently
@@ -7238,8 +7238,16 @@ define the macro.
Unless it's necessary to inspect the @var{label} parameter, it is better
to set the variable @var{align_jumps} in the target's
@code{OVERRIDE_OPTIONS}. Otherwise, you should try to honour the user's
-selection in @var{align_jumps} in a @code{LABEL_ALIGN_AFTER_BARRIER}
-implementation.
+selection in @var{align_jumps} in a @code{JUMP_ALIGN} implementation.
+
+@findex LABEL_ALIGN_AFTER_BARRIER
+@item LABEL_ALIGN_AFTER_BARRIER (@var{label})
+The alignment (log base 2) to put in front of @var{label}, which follows
+a @code{BARRIER}.
+
+This macro need not be defined if you don't want any special alignment
+to be done at such a time. Most machine descriptions do not currently
+define the macro.
@findex LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP
@item LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP
diff --git a/gcc/final.c b/gcc/final.c
index ec57842e079..ed4b6fff3d3 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -643,11 +643,6 @@ static struct label_alignment *label_align;
void
init_insn_lengths ()
{
- if (label_align)
- {
- free (label_align);
- label_align = 0;
- }
if (uid_shuid)
{
free (uid_shuid);
@@ -791,11 +786,19 @@ get_attr_length (insn)
#endif
#ifndef LABEL_ALIGN_AFTER_BARRIER
-#define LABEL_ALIGN_AFTER_BARRIER(LABEL) align_jumps_log
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL) 1
#endif
#ifndef LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP
-#define LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP (align_jumps-1)
+#define LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP 0
+#endif
+
+#ifndef JUMP_ALIGN
+#define JUMP_ALIGN(LABEL) align_jumps_log
+#endif
+
+#ifndef JUMP_ALIGN_MAX_SKIP
+#define JUMP_ALIGN_MAX_SKIP (align_jumps-1)
#endif
#ifndef ADDR_VEC_ALIGN
@@ -946,6 +949,88 @@ insn_current_reference_address (branch)
}
#endif /* HAVE_ATTR_length */
+void
+compute_alignments ()
+{
+ int i;
+ int log, max_skip, max_log;
+
+ if (label_align)
+ {
+ free (label_align);
+ label_align = 0;
+ }
+
+ max_labelno = max_label_num ();
+ min_labelno = get_first_label_num ();
+ label_align = (struct label_alignment *)
+ xcalloc (max_labelno - min_labelno + 1, sizeof (struct label_alignment));
+
+ /* If not optimizing or optimizing for size, don't assign any alignments. */
+ if (optimize || optimize_size)
+ return;
+
+ for (i = 0; i < n_basic_blocks; i++)
+ {
+ basic_block bb = BASIC_BLOCK (i);
+ rtx label = bb->head;
+ int fallthru_frequency = 0, branch_frequency = 0, has_fallthru = 0;
+ edge e;
+
+ if (GET_CODE (label) != CODE_LABEL)
+ continue;
+ max_log = LABEL_ALIGN (label);
+ max_skip = LABEL_ALIGN_MAX_SKIP;
+
+ for (e = bb->pred; e; e = e->pred_next)
+ {
+ if (e->flags & EDGE_FALLTHRU)
+ has_fallthru = 1, fallthru_frequency += EDGE_FREQUENCY (e);
+ else
+ branch_frequency += EDGE_FREQUENCY (e);
+ }
+
+ /* There are two purposes to align block with no fallthru incomming edge:
+ 1) to avoid fetch stalls when branch destination is near cache boundary
+ 2) to improve cache effciency in case the previous block is not executed
+ (so it does not need to be in the cache).
+
+ We to catch first case, we align frequently executed blocks.
+ To catch the second, we align blocks that are executed more frequently
+ than the predecesor and the predecesor is likely to not be executed
+ when function is called. */
+
+ if (!has_fallthru
+ && (branch_frequency > BB_FREQ_MAX / 10
+ || (bb->frequency > BASIC_BLOCK (i - 1)->frequency * 10
+ && (BASIC_BLOCK (i - 1)->frequency
+ <= ENTRY_BLOCK_PTR->frequency / 2))))
+ {
+ log = JUMP_ALIGN (label);
+ if (max_log < log)
+ {
+ max_log = log;
+ max_skip = JUMP_ALIGN_MAX_SKIP;
+ }
+ }
+ /* In case block is frequent and reached mostly by non-fallthru edge,
+ align it. It is most likely an first block of loop. */
+ if (has_fallthru
+ && branch_frequency + fallthru_frequency > BB_FREQ_MAX / 10
+ && branch_frequency > fallthru_frequency * 5)
+ {
+ log = LOOP_ALIGN (label);
+ if (max_log < log)
+ {
+ max_log = log;
+ max_skip = LOOP_ALIGN_MAX_SKIP;
+ }
+ }
+ LABEL_TO_ALIGNMENT (label) = max_log;
+ LABEL_TO_MAX_SKIP (label) = max_skip;
+ }
+}
+
/* Make a pass over all insns and compute their actual lengths by shortening
any branches of variable length if possible. */
@@ -983,21 +1068,34 @@ shorten_branches (first)
#endif
- /* We must do some computations even when not actually shortening, in
- order to get the alignment information for the labels. */
-
- init_insn_lengths ();
-
/* Compute maximum UID and allocate label_align / uid_shuid. */
max_uid = get_max_uid ();
- max_labelno = max_label_num ();
- min_labelno = get_first_label_num ();
- label_align = (struct label_alignment *)
- xcalloc ((max_labelno - min_labelno + 1), sizeof (struct label_alignment));
-
uid_shuid = (int *) xmalloc (max_uid * sizeof *uid_shuid);
+ if (max_labelno != max_label_num ())
+ {
+ int old = max_labelno;
+ int n_labels;
+ int n_old_labels;
+
+ max_labelno = max_label_num ();
+
+ n_labels = max_labelno - min_labelno + 1;
+ n_old_labels = old - min_labelno + 1;
+
+ label_align = (struct label_alignment *) xrealloc
+ (label_align, n_labels * sizeof (struct label_alignment));
+
+ /* Range of labels grows monotonically in the function. Abort here
+ means that the initialization of array got lost. */
+ if (n_old_labels > n_labels)
+ abort ();
+
+ memset (label_align + n_old_labels, 0,
+ (n_labels - n_old_labels) * sizeof (struct label_alignment));
+ }
+
/* Initialize label_align and set up uid_shuid to be strictly
monotonically rising with insn order. */
/* We use max_log here to keep track of the maximum alignment we want to
@@ -1023,6 +1121,14 @@ shorten_branches (first)
else if (GET_CODE (insn) == CODE_LABEL)
{
rtx next;
+
+ /* Merge in alignments computed by compute_alignments. */
+ log = LABEL_TO_ALIGNMENT (insn);
+ if (max_log < log)
+ {
+ max_log = log;
+ max_skip = LABEL_TO_MAX_SKIP (insn);
+ }
log = LABEL_ALIGN (insn);
if (max_log < log)
@@ -1074,41 +1180,6 @@ shorten_branches (first)
break;
}
}
- /* Again, we allow NOTE_INSN_LOOP_BEG - INSN - CODE_LABEL
- sequences in order to handle reorg output efficiently. */
- else if (GET_CODE (insn) == NOTE
- && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
- {
- rtx label;
- int nest = 0;
-
- /* Search for the label that starts the loop.
- Don't skip past the end of the loop, since that could
- lead to putting an alignment where it does not belong.
- However, a label after a nested (non-)loop would be OK. */
- for (label = insn; label; label = NEXT_INSN (label))
- {
- if (GET_CODE (label) == NOTE
- && NOTE_LINE_NUMBER (label) == NOTE_INSN_LOOP_BEG)
- nest++;
- else if (GET_CODE (label) == NOTE
- && NOTE_LINE_NUMBER (label) == NOTE_INSN_LOOP_END
- && --nest == 0)
- break;
- else if (GET_CODE (label) == CODE_LABEL)
- {
- log = LOOP_ALIGN (label);
- if (max_log < log)
- {
- max_log = log;
- max_skip = LOOP_ALIGN_MAX_SKIP;
- }
- break;
- }
- }
- }
- else
- continue;
}
#ifdef HAVE_ATTR_length
diff --git a/gcc/output.h b/gcc/output.h
index 4c4c5aebcc1..f1426ec26dd 100644
--- a/gcc/output.h
+++ b/gcc/output.h
@@ -20,6 +20,9 @@ along with GNU CC; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+/* Compute branch alignments based on frequency information in the CFG. */
+extern void compute_alignments PARAMS ((void));
+
/* Initialize data in final at the beginning of a compilation. */
extern void init_final PARAMS ((const char *));
diff --git a/gcc/predict.c b/gcc/predict.c
index 63819e3276b..14dfc7dc8bf 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -606,8 +606,11 @@ typedef struct block_info_def
/* To keep queue of basic blocks to process. */
basic_block next;
- /* True if block already converted. */
- int visited:1;
+ /* True if block needs to be visited in prop_freqency. */
+ int tovisit:1;
+
+ /* Number of predecesors we need to visit first. */
+ int npredecesors;
} *block_info;
/* Similar information for edges. */
@@ -634,6 +637,27 @@ propagate_freq (head)
basic_block last = bb;
edge e;
basic_block nextbb;
+ int n;
+
+ /* For each basic block we need to visit count number of his predecesors
+ we need to visit first. */
+ for (n = 0; n < n_basic_blocks; n++)
+ {
+ basic_block bb = BASIC_BLOCK (n);
+ if (BLOCK_INFO (bb)->tovisit)
+ {
+ int count = 0;
+ for (e = bb->pred; e; e = e->pred_next)
+ if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
+ count++;
+ else if (BLOCK_INFO (e->src)->tovisit
+ && rtl_dump_file && !EDGE_INFO (e)->back_edge)
+ fprintf (rtl_dump_file,
+ "Irreducible region hit, ignoring edge to %i->%i\n",
+ e->src->index, bb->index);
+ BLOCK_INFO (bb)->npredecesors = count;
+ }
+ }
BLOCK_INFO (head)->frequency = 1;
for (; bb; bb = nextbb)
@@ -646,31 +670,16 @@ propagate_freq (head)
/* Compute frequency of basic block. */
if (bb != head)
{
+#ifdef ENABLE_CHECKING
for (e = bb->pred; e; e = e->pred_next)
- if (!BLOCK_INFO (e->src)->visited && !(e->flags & EDGE_DFS_BACK))
- break;
-
- /* We haven't proceeded all predecessors of edge e yet. */
- if (e)
- {
- if (!nextbb)
- nextbb = e->dest;
- else
- BLOCK_INFO (last)->next = e->dest;
- last = e->dest;
- continue;
- }
- if (rtl_dump_file)
- for (e = bb->pred; e; e = e->pred_next)
- if (!BLOCK_INFO (e->src)->visited && !EDGE_INFO (e)->back_edge)
- fprintf (rtl_dump_file,
- "Irreducible region hit, ignoring edge to %i->%i\n",
- e->src->index, bb->index);
+ if (BLOCK_INFO (e->src)->tovisit && !(e->flags & EDGE_DFS_BACK))
+ abort ();
+#endif
for (e = bb->pred; e; e = e->pred_next)
if (EDGE_INFO (e)->back_edge)
cyclic_probability += EDGE_INFO (e)->back_edge_prob;
- else if (BLOCK_INFO (e->src)->visited)
+ else if (!(e->flags & EDGE_DFS_BACK))
frequency += (e->probability
* BLOCK_INFO (e->src)->frequency /
REG_BR_PROB_BASE);
@@ -681,7 +690,7 @@ propagate_freq (head)
BLOCK_INFO (bb)->frequency = frequency / (1 - cyclic_probability);
}
- BLOCK_INFO (bb)->visited = 1;
+ BLOCK_INFO (bb)->tovisit = 0;
/* Compute back edge frequencies. */
for (e = bb->succ; e; e = e->succ_next)
@@ -692,16 +701,19 @@ propagate_freq (head)
/* Propagate to successor blocks. */
for (e = bb->succ; e; e = e->succ_next)
- if (!EDGE_INFO (e)->back_edge
- && !BLOCK_INFO (e->dest)->visited
- && !BLOCK_INFO (e->dest)->next && e->dest != last)
+ if (!(e->flags & EDGE_DFS_BACK)
+ && BLOCK_INFO (e->dest)->npredecesors)
{
- if (!nextbb)
- nextbb = e->dest;
- else
- BLOCK_INFO (last)->next = e->dest;
- last = e->dest;
- }
+ BLOCK_INFO (e->dest)->npredecesors--;
+ if (!BLOCK_INFO (e->dest)->npredecesors)
+ {
+ if (!nextbb)
+ nextbb = e->dest;
+ else
+ BLOCK_INFO (last)->next = e->dest;
+ last = e->dest;
+ }
+ }
}
}
@@ -739,8 +751,8 @@ estimate_loops_at_level (first_loop)
for (l = loop->shared ? first_loop : loop; l != loop->next; l = l->next)
if (loop->header == l->header)
EXECUTE_IF_SET_IN_SBITMAP (l->nodes, 0, n,
- BLOCK_INFO (BASIC_BLOCK (n))->visited =
- 0);
+ BLOCK_INFO (BASIC_BLOCK (n))->tovisit = 1
+ );
propagate_freq (loop->header);
}
}
@@ -848,7 +860,7 @@ estimate_bb_frequencies (loops)
else
bb = BASIC_BLOCK (i);
bb->aux = bi + i + 2;
- BLOCK_INFO (bb)->visited = 1;
+ BLOCK_INFO (bb)->tovisit = 0;
for (e = bb->succ; e; e = e->succ_next)
{
e->aux = ei + edgenum, edgenum++;
@@ -862,9 +874,9 @@ estimate_bb_frequencies (loops)
/* Now fake loop around whole function to finalize probabilities. */
for (i = 0; i < n_basic_blocks; i++)
- BLOCK_INFO (BASIC_BLOCK (i))->visited = 0;
- BLOCK_INFO (ENTRY_BLOCK_PTR)->visited = 0;
- BLOCK_INFO (EXIT_BLOCK_PTR)->visited = 0;
+ BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
+ BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
+ BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
propagate_freq (ENTRY_BLOCK_PTR);
for (i = 0; i < n_basic_blocks; i++)
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 8f2e8f571c9..4898926d6f4 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -3608,6 +3608,7 @@ rest_of_compilation (decl)
close_dump_file (DFI_bbro, print_rtl_with_bb, insns);
timevar_pop (TV_REORDER_BLOCKS);
}
+ compute_alignments ();
/* If a machine dependent reorganization is needed, call it. */
#ifdef MACHINE_DEPENDENT_REORG