summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog20
-rw-r--r--gcc/ddg.c5
-rw-r--r--gcc/df-problems.c138
-rw-r--r--gcc/df.h59
-rw-r--r--gcc/loop-invariant.c1
-rw-r--r--gcc/loop-iv.c8
6 files changed, 160 insertions, 71 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6340525846b..28a4a549973 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2008-01-19 Kenneth Zadeck <zadeck@naturalbridge.com>
+
+ PR rtl-optimization/26854
+ PR rtl-optimization/34400
+ * ddg.c (create_ddg_dep_from_intra_loop_link): Do not use
+ DF_RD->gen.
+ * df.h (df_changeable_flags.DF_RD_NO_TRIM): New.
+ (df_rd_bb_info.expanded_lr_out): New.
+ * loop_invariant.c (find_defs): Added DF_RD_NO_TRIM flag.
+ * loop_iv.c (iv_analysis_loop_init): Ditto.
+ * df-problems.c (df_rd_free_bb_info, df_rd_alloc, df_rd_confluence_n,
+ df_rd_bb_local_compute, df_rd_transfer_function, df_rd_free):
+ Added code to allocate, initialize or free expanded_lr_out.
+ (df_rd_bb_local_compute_process_def): Restructured to make
+ more understandable.
+ (df_rd_confluence_n): Add code to do nothing with fake edges and
+ code to no apply invalidate_by_call sets if the sets are being trimmed.
+ (df_lr_local_finalize): Renamed to df_lr_finalize.
+ (df_live_local_finalize): Renamed to df_live_finalize.
+
2008-01-20 Richard Sandiford <rsandifo@nildram.co.uk>
PR target/34831
diff --git a/gcc/ddg.c b/gcc/ddg.c
index 14b18745823..c67b6c24869 100644
--- a/gcc/ddg.c
+++ b/gcc/ddg.c
@@ -184,12 +184,13 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
{
int regno = REGNO (SET_DEST (set));
struct df_ref *first_def;
- struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
+ struct df_ref *last_def;
first_def = df_bb_regno_first_def_find (g->bb, regno);
gcc_assert (first_def);
- if (bitmap_bit_p (bb_info->gen, first_def->id))
+ last_def = df_bb_regno_last_def_find (g->bb, regno);
+ if (first_def == last_def)
return;
}
}
diff --git a/gcc/df-problems.c b/gcc/df-problems.c
index ff5a4ad03da..f45c6d2ff36 100644
--- a/gcc/df-problems.c
+++ b/gcc/df-problems.c
@@ -245,6 +245,8 @@ df_rd_free_bb_info (basic_block bb ATTRIBUTE_UNUSED,
struct df_rd_bb_info *bb_info = (struct df_rd_bb_info *) vbb_info;
if (bb_info)
{
+ if (bb_info->expanded_lr_out)
+ BITMAP_FREE (bb_info->expanded_lr_out);
BITMAP_FREE (bb_info->kill);
BITMAP_FREE (bb_info->sparse_kill);
BITMAP_FREE (bb_info->gen);
@@ -298,6 +300,8 @@ df_rd_alloc (bitmap all_blocks)
struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
if (bb_info)
{
+ if (bb_info->expanded_lr_out)
+ bitmap_clear (bb_info->expanded_lr_out);
bitmap_clear (bb_info->kill);
bitmap_clear (bb_info->sparse_kill);
bitmap_clear (bb_info->gen);
@@ -306,6 +310,10 @@ df_rd_alloc (bitmap all_blocks)
{
bb_info = (struct df_rd_bb_info *) pool_alloc (df_rd->block_pool);
df_rd_set_bb_info (bb_index, bb_info);
+ if (df->changeable_flags & DF_RD_NO_TRIM)
+ bb_info->expanded_lr_out = NULL;
+ else
+ bb_info->expanded_lr_out = BITMAP_ALLOC (&problem_data->rd_bitmaps);
bb_info->kill = BITMAP_ALLOC (&problem_data->rd_bitmaps);
bb_info->sparse_kill = BITMAP_ALLOC (&problem_data->rd_bitmaps);
bb_info->gen = BITMAP_ALLOC (&problem_data->rd_bitmaps);
@@ -320,56 +328,53 @@ df_rd_alloc (bitmap all_blocks)
/* Process a list of DEFs for df_rd_bb_local_compute. */
static void
-df_rd_bb_local_compute_process_def (struct df_rd_bb_info *bb_info,
+df_rd_bb_local_compute_process_def (struct df_rd_bb_info *bb_info,
struct df_ref **def_rec,
enum df_ref_flags top_flag)
{
- while (*def_rec)
+ for (; *def_rec; def_rec++)
{
struct df_ref *def = *def_rec;
- if (top_flag == (DF_REF_FLAGS (def) & DF_REF_AT_TOP))
+ unsigned int regno = DF_REF_REGNO (def);
+
+ /* This makes sure we do the artificial defs in the right order
+ since they are all in the same list. */
+ if (top_flag != (DF_REF_FLAGS (def) & DF_REF_AT_TOP))
+ continue;
+
+ /* Skip over the hard regs if we do not care about them. */
+ if ((df->changeable_flags & DF_NO_HARD_REGS) &&
+ (regno < FIRST_PSEUDO_REGISTER))
+ continue;
+
+ /* Only the last def(s) for a regno in the block has any
+ effect. */
+ if (bitmap_bit_p (seen_in_block, regno))
+ continue;
+
+ /* The first def for regno in insn gets to knock out the
+ defs from other instructions. */
+ if ((!bitmap_bit_p (seen_in_insn, regno))
+ /* If the def is to only part of the reg, it does
+ not kill the other defs that reach here. */
+ && (!(DF_REF_FLAGS (def) &
+ (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER))))
{
- unsigned int regno = DF_REF_REGNO (def);
unsigned int begin = DF_DEFS_BEGIN (regno);
unsigned int n_defs = DF_DEFS_COUNT (regno);
-
- if ((!(df->changeable_flags & DF_NO_HARD_REGS))
- || (regno >= FIRST_PSEUDO_REGISTER))
- {
- /* Only the last def(s) for a regno in the block has any
- effect. */
- if (!bitmap_bit_p (seen_in_block, regno))
- {
- /* The first def for regno in insn gets to knock out the
- defs from other instructions. */
- if ((!bitmap_bit_p (seen_in_insn, regno))
- /* If the def is to only part of the reg, it does
- not kill the other defs that reach here. */
- && (!(DF_REF_FLAGS (def) &
- (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER))))
- {
- if (n_defs > DF_SPARSE_THRESHOLD)
- {
- bitmap_set_bit (bb_info->sparse_kill, regno);
- bitmap_clear_range(bb_info->gen, begin, n_defs);
- }
- else
- {
- bitmap_set_range (bb_info->kill, begin, n_defs);
- bitmap_clear_range (bb_info->gen, begin, n_defs);
- }
- }
-
- bitmap_set_bit (seen_in_insn, regno);
- /* All defs for regno in the instruction may be put into
- the gen set. */
- if (!(DF_REF_FLAGS (def)
- & (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
- bitmap_set_bit (bb_info->gen, DF_REF_ID (def));
- }
- }
+ if (n_defs > DF_SPARSE_THRESHOLD)
+ bitmap_set_bit (bb_info->sparse_kill, regno);
+ else
+ bitmap_set_range (bb_info->kill, begin, n_defs);
+ bitmap_clear_range(bb_info->gen, begin, n_defs);
}
- def_rec++;
+
+ bitmap_set_bit (seen_in_insn, regno);
+ /* All defs for regno in the instruction may be put into
+ the gen set. */
+ if (!(DF_REF_FLAGS (def)
+ & (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
+ bitmap_set_bit (bb_info->gen, DF_REF_ID (def));
}
}
@@ -380,14 +385,28 @@ df_rd_bb_local_compute (unsigned int bb_index)
{
basic_block bb = BASIC_BLOCK (bb_index);
struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
+ struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
rtx insn;
bitmap_clear (seen_in_block);
bitmap_clear (seen_in_insn);
+ if (!(df->changeable_flags & DF_RD_NO_TRIM))
+ {
+ unsigned int regno;
+ bitmap_iterator bi;
+ int first_reg = (df->changeable_flags & DF_NO_HARD_REGS) ? FIRST_PSEUDO_REGISTER : 0;
+ EXECUTE_IF_SET_IN_BITMAP (lr_bb_info->out, first_reg, regno, bi)
+ {
+ unsigned int begin = DF_DEFS_BEGIN (regno);
+ unsigned int n_defs = DF_DEFS_COUNT (regno);
+ bitmap_set_range (bb_info->expanded_lr_out, begin, n_defs);
+ }
+ }
+
/* Artificials are only hard regs. */
if (!(df->changeable_flags & DF_NO_HARD_REGS))
- df_rd_bb_local_compute_process_def (bb_info,
+ df_rd_bb_local_compute_process_def (bb_info,
df_get_artificial_defs (bb_index),
0);
@@ -482,7 +501,13 @@ df_rd_confluence_n (edge e)
bitmap op1 = df_rd_get_bb_info (e->dest->index)->in;
bitmap op2 = df_rd_get_bb_info (e->src->index)->out;
- if (e->flags & EDGE_EH)
+ if (e->flags & EDGE_FAKE)
+ return;
+
+ /* If we are trimming the solution, the invalidated_by_call code in
+ the lr problem makes this unnecessary. However, if we do not
+ trim, we must take this into account. */
+ if ((df->changeable_flags & DF_RD_NO_TRIM) && e->flags & EDGE_EH)
{
struct df_rd_problem_data *problem_data
= (struct df_rd_problem_data *) df_rd->problem_data;
@@ -490,7 +515,7 @@ df_rd_confluence_n (edge e)
bitmap dense_invalidated = problem_data->dense_invalidated_by_call;
bitmap_iterator bi;
unsigned int regno;
- bitmap tmp = BITMAP_ALLOC (&df_bitmap_obstack);
+ bitmap tmp = BITMAP_ALLOC (&problem_data->rd_bitmaps);
bitmap_copy (tmp, op2);
bitmap_and_compl_into (tmp, dense_invalidated);
@@ -522,13 +547,13 @@ df_rd_transfer_function (int bb_index)
bitmap gen = bb_info->gen;
bitmap kill = bb_info->kill;
bitmap sparse_kill = bb_info->sparse_kill;
+ bool changed = false;
- if (bitmap_empty_p (sparse_kill))
- return bitmap_ior_and_compl (out, gen, in, kill);
+ if ((df->changeable_flags & DF_RD_NO_TRIM) && bitmap_empty_p (sparse_kill))
+ changed = bitmap_ior_and_compl (out, gen, in, kill);
else
{
struct df_rd_problem_data *problem_data;
- bool changed = false;
bitmap tmp;
/* Note that TMP is _not_ a temporary bitmap if we end up replacing
@@ -545,6 +570,8 @@ df_rd_transfer_function (int bb_index)
}
bitmap_and_compl_into (tmp, kill);
bitmap_ior_into (tmp, gen);
+ if (!(df->changeable_flags & DF_RD_NO_TRIM))
+ bitmap_and_into (tmp, bb_info->expanded_lr_out);
changed = !bitmap_equal_p (tmp, out);
if (changed)
{
@@ -552,9 +579,10 @@ df_rd_transfer_function (int bb_index)
bb_info->out = tmp;
}
else
- BITMAP_FREE (tmp);
- return changed;
+ BITMAP_FREE (tmp);
}
+
+ return changed;
}
@@ -574,6 +602,8 @@ df_rd_free (void)
struct df_rd_bb_info *bb_info = df_rd_get_bb_info (i);
if (bb_info)
{
+ if (bb_info->expanded_lr_out)
+ BITMAP_FREE (bb_info->expanded_lr_out);
BITMAP_FREE (bb_info->kill);
BITMAP_FREE (bb_info->sparse_kill);
BITMAP_FREE (bb_info->gen);
@@ -1015,7 +1045,7 @@ df_lr_transfer_function (int bb_index)
/* Run the fast dce as a side effect of building LR. */
static void
-df_lr_local_finalize (bitmap all_blocks ATTRIBUTE_UNUSED)
+df_lr_finalize (bitmap all_blocks ATTRIBUTE_UNUSED)
{
if (df->changeable_flags & DF_LR_RUN_DCE)
{
@@ -1161,7 +1191,7 @@ df_lr_verify_solution_end (void)
if (df_lr->solutions_dirty)
/* Do not check if the solution is still dirty. See the comment
- in df_lr_local_finalize for details. */
+ in df_lr_finalize for details. */
df_lr->solutions_dirty = false;
else
FOR_ALL_BB (bb)
@@ -1204,7 +1234,7 @@ static struct df_problem problem_LR =
df_lr_confluence_0, /* Confluence operator 0. */
df_lr_confluence_n, /* Confluence operator n. */
df_lr_transfer_function, /* Transfer function. */
- df_lr_local_finalize, /* Finalize function. */
+ df_lr_finalize, /* Finalize function. */
df_lr_free, /* Free all of the problem information. */
NULL, /* Remove this problem from the stack of dataflow problems. */
NULL, /* Debugging. */
@@ -1560,7 +1590,7 @@ df_live_transfer_function (int bb_index)
/* And the LR info with the must-initialized registers, to produce the LIVE info. */
static void
-df_live_local_finalize (bitmap all_blocks)
+df_live_finalize (bitmap all_blocks)
{
if (df_live->solutions_dirty)
@@ -1751,7 +1781,7 @@ static struct df_problem problem_LIVE =
NULL, /* Confluence operator 0. */
df_live_confluence_n, /* Confluence operator n. */
df_live_transfer_function, /* Transfer function. */
- df_live_local_finalize, /* Finalize function. */
+ df_live_finalize, /* Finalize function. */
df_live_free, /* Free all of the problem information. */
df_live_free, /* Remove this problem from the stack of dataflow problems. */
NULL, /* Debugging. */
diff --git a/gcc/df.h b/gcc/df.h
index 04bac49bf0a..e5c6870cac3 100644
--- a/gcc/df.h
+++ b/gcc/df.h
@@ -402,22 +402,30 @@ enum df_changeable_flags
{
/* Scanning flags. */
/* Flag to control the running of dce as a side effect of building LR. */
- DF_LR_RUN_DCE = 1, /* Run DCE. */
- DF_NO_HARD_REGS = 2, /* Skip hard registers in RD and CHAIN Building. */
- DF_EQ_NOTES = 4, /* Build chains with uses present in EQUIV/EQUAL notes. */
- DF_NO_REGS_EVER_LIVE = 8, /* Do not compute the regs_ever_live. */
+ DF_LR_RUN_DCE = 1 << 0, /* Run DCE. */
+ DF_NO_HARD_REGS = 1 << 1, /* Skip hard registers in RD and CHAIN Building. */
+
+ /* Do not trim the solution using the LR result. This can make the
+ solution take much longer and take more memory. This is
+ necessary for the loop optimizations, but has a very small time
+ and space penalty because the loop optimizations process only a
+ single loop at a time. Any pass that looks at the entire
+ function should not set this flag. */
+ DF_RD_NO_TRIM = 1 << 2,
+ DF_EQ_NOTES = 1 << 3, /* Build chains with uses present in EQUIV/EQUAL notes. */
+ DF_NO_REGS_EVER_LIVE = 1 << 4, /* Do not compute the regs_ever_live. */
/* Cause df_insn_rescan df_notes_rescan and df_insn_delete, to
return immediately. This is used by passes that know how to update
the scanning them selves. */
- DF_NO_INSN_RESCAN = 16,
+ DF_NO_INSN_RESCAN = 1 << 5,
/* Cause df_insn_rescan df_notes_rescan and df_insn_delete, to
return after marking the insn for later processing. This allows all
rescans to be batched. */
- DF_DEFER_INSN_RESCAN = 32,
+ DF_DEFER_INSN_RESCAN = 1 << 6,
- DF_VERIFY_SCHEDULED = 64
+ DF_VERIFY_SCHEDULED = 1 << 7
};
/* Two of these structures are inline in df, one for the uses and one
@@ -705,16 +713,43 @@ struct df_scan_bb_info
/* Reaching definitions. All bitmaps are indexed by the id field of
- the ref except sparse_kill (see above). */
+ the ref except sparse_kill which is indexed by regno. */
struct df_rd_bb_info
{
- /* Local sets to describe the basic blocks. See the note in the RU
- datastructures for kill and sparse_kill. */
+ /* Local sets to describe the basic blocks. */
bitmap kill;
bitmap sparse_kill;
- bitmap gen; /* The set of defs generated in this block. */
- /* The results of the dataflow problem. */
+ /* Expanded version of the DF_LT->out bitmap to match the positions
+ of gen, in and out here. Only allocated if DF_RD_NO_TRIM is
+ false. */
+ bitmap expanded_lr_out;
+
+ /* The set of defs generated in this block. This is not set unless
+ the def reaches the end of the block. */
+ bitmap gen;
+
+ /* The results of the dataflow problem.
+
+ If DF_RD_NO_TRIM is not set, these sets are SOMEWHAT trimmed by
+ the output of the DF_LR problem. The out set is precisely
+ trimmed during propagation which means that the result is also
+ trimmed when the propagation terminates. The in set is not
+ explicitly trimmed, because this is expensive (adding about 5% to
+ the cost of a bootstrap). However since the out sets are trimmed
+ and the in sets are built from the out of the pred, the in set is
+ MOSTLY trimmed.
+
+ The counter case happens at a branch where the variable V is in
+ DF_LR->in the true branch but not the false branch. If V is
+ defined before the branch, RD will propagate that into the
+ DF_RD_in sets of both branches. When the block is processed, the
+ DF_RD->out set will have V trimmed out of it but it will still be
+ left in DF_RD->in.
+
+ If this not a problem for the current optimizers since they were
+ designed before any trimming was available. This can be fixed by
+ checking the DF_LR->in set directly. */
bitmap in; /* At the top of the block. */
bitmap out; /* At the bottom of the block. */
};
diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
index ba1f2888b00..f056a776958 100644
--- a/gcc/loop-invariant.c
+++ b/gcc/loop-invariant.c
@@ -639,6 +639,7 @@ find_defs (struct loop *loop, basic_block *body)
df_remove_problem (df_chain);
df_process_deferred_rescans ();
df_chain_add_problem (DF_UD_CHAIN);
+ df_set_flags (DF_RD_NO_TRIM);
df_set_blocks (blocks);
df_analyze ();
diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c
index 3e1b5db5af7..824629c0f6c 100644
--- a/gcc/loop-iv.c
+++ b/gcc/loop-iv.c
@@ -22,9 +22,10 @@ along with GCC; see the file COPYING3. If not see
doloop optimization and branch prediction. The iv information is computed
on demand.
- Induction variable is analyzed by walking the use-def chains. When a biv
- is found, it is cached in the bivs hash table. When register is proved
- to be a giv, its description is stored to DF_REF_DATA of the def reference.
+ Induction variables are analyzed by walking the use-def chains. When
+ a basic induction variable (biv) is found, it is cached in the bivs
+ hash table. When register is proved to be a biv, its description
+ is stored to DF_REF_DATA of the def reference.
The analysis works always with one loop -- you must call
iv_analysis_loop_init (loop) for it. All the other functions then work with
@@ -277,6 +278,7 @@ iv_analysis_loop_init (struct loop *loop)
df_remove_problem (df_chain);
df_process_deferred_rescans ();
df_chain_add_problem (DF_UD_CHAIN);
+ df_set_flags (DF_RD_NO_TRIM);
df_set_blocks (blocks);
df_analyze ();
if (dump_file)