diff options
-rw-r--r-- | gcc/ChangeLog | 20 | ||||
-rw-r--r-- | gcc/ddg.c | 5 | ||||
-rw-r--r-- | gcc/df-problems.c | 138 | ||||
-rw-r--r-- | gcc/df.h | 59 | ||||
-rw-r--r-- | gcc/loop-invariant.c | 1 | ||||
-rw-r--r-- | gcc/loop-iv.c | 8 |
6 files changed, 160 insertions, 71 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6340525846b..28a4a549973 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2008-01-19 Kenneth Zadeck <zadeck@naturalbridge.com> + + PR rtl-optimization/26854 + PR rtl-optimization/34400 + * ddg.c (create_ddg_dep_from_intra_loop_link): Do not use + DF_RD->gen. + * df.h (df_changeable_flags.DF_RD_NO_TRIM): New. + (df_rd_bb_info.expanded_lr_out): New. + * loop_invariant.c (find_defs): Added DF_RD_NO_TRIM flag. + * loop_iv.c (iv_analysis_loop_init): Ditto. + * df-problems.c (df_rd_free_bb_info, df_rd_alloc, df_rd_confluence_n, + df_rd_bb_local_compute, df_rd_transfer_function, df_rd_free): + Added code to allocate, initialize or free expanded_lr_out. + (df_rd_bb_local_compute_process_def): Restructured to make + more understandable. + (df_rd_confluence_n): Add code to do nothing with fake edges and + code to no apply invalidate_by_call sets if the sets are being trimmed. + (df_lr_local_finalize): Renamed to df_lr_finalize. + (df_live_local_finalize): Renamed to df_live_finalize. + 2008-01-20 Richard Sandiford <rsandifo@nildram.co.uk> PR target/34831 diff --git a/gcc/ddg.c b/gcc/ddg.c index 14b18745823..c67b6c24869 100644 --- a/gcc/ddg.c +++ b/gcc/ddg.c @@ -184,12 +184,13 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node, { int regno = REGNO (SET_DEST (set)); struct df_ref *first_def; - struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb); + struct df_ref *last_def; first_def = df_bb_regno_first_def_find (g->bb, regno); gcc_assert (first_def); - if (bitmap_bit_p (bb_info->gen, first_def->id)) + last_def = df_bb_regno_last_def_find (g->bb, regno); + if (first_def == last_def) return; } } diff --git a/gcc/df-problems.c b/gcc/df-problems.c index ff5a4ad03da..f45c6d2ff36 100644 --- a/gcc/df-problems.c +++ b/gcc/df-problems.c @@ -245,6 +245,8 @@ df_rd_free_bb_info (basic_block bb ATTRIBUTE_UNUSED, struct df_rd_bb_info *bb_info = (struct df_rd_bb_info *) vbb_info; if (bb_info) { + if (bb_info->expanded_lr_out) + BITMAP_FREE (bb_info->expanded_lr_out); BITMAP_FREE (bb_info->kill); BITMAP_FREE (bb_info->sparse_kill); BITMAP_FREE (bb_info->gen); @@ -298,6 +300,8 @@ df_rd_alloc (bitmap all_blocks) struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index); if (bb_info) { + if (bb_info->expanded_lr_out) + bitmap_clear (bb_info->expanded_lr_out); bitmap_clear (bb_info->kill); bitmap_clear (bb_info->sparse_kill); bitmap_clear (bb_info->gen); @@ -306,6 +310,10 @@ df_rd_alloc (bitmap all_blocks) { bb_info = (struct df_rd_bb_info *) pool_alloc (df_rd->block_pool); df_rd_set_bb_info (bb_index, bb_info); + if (df->changeable_flags & DF_RD_NO_TRIM) + bb_info->expanded_lr_out = NULL; + else + bb_info->expanded_lr_out = BITMAP_ALLOC (&problem_data->rd_bitmaps); bb_info->kill = BITMAP_ALLOC (&problem_data->rd_bitmaps); bb_info->sparse_kill = BITMAP_ALLOC (&problem_data->rd_bitmaps); bb_info->gen = BITMAP_ALLOC (&problem_data->rd_bitmaps); @@ -320,56 +328,53 @@ df_rd_alloc (bitmap all_blocks) /* Process a list of DEFs for df_rd_bb_local_compute. */ static void -df_rd_bb_local_compute_process_def (struct df_rd_bb_info *bb_info, +df_rd_bb_local_compute_process_def (struct df_rd_bb_info *bb_info, struct df_ref **def_rec, enum df_ref_flags top_flag) { - while (*def_rec) + for (; *def_rec; def_rec++) { struct df_ref *def = *def_rec; - if (top_flag == (DF_REF_FLAGS (def) & DF_REF_AT_TOP)) + unsigned int regno = DF_REF_REGNO (def); + + /* This makes sure we do the artificial defs in the right order + since they are all in the same list. */ + if (top_flag != (DF_REF_FLAGS (def) & DF_REF_AT_TOP)) + continue; + + /* Skip over the hard regs if we do not care about them. */ + if ((df->changeable_flags & DF_NO_HARD_REGS) && + (regno < FIRST_PSEUDO_REGISTER)) + continue; + + /* Only the last def(s) for a regno in the block has any + effect. */ + if (bitmap_bit_p (seen_in_block, regno)) + continue; + + /* The first def for regno in insn gets to knock out the + defs from other instructions. */ + if ((!bitmap_bit_p (seen_in_insn, regno)) + /* If the def is to only part of the reg, it does + not kill the other defs that reach here. */ + && (!(DF_REF_FLAGS (def) & + (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)))) { - unsigned int regno = DF_REF_REGNO (def); unsigned int begin = DF_DEFS_BEGIN (regno); unsigned int n_defs = DF_DEFS_COUNT (regno); - - if ((!(df->changeable_flags & DF_NO_HARD_REGS)) - || (regno >= FIRST_PSEUDO_REGISTER)) - { - /* Only the last def(s) for a regno in the block has any - effect. */ - if (!bitmap_bit_p (seen_in_block, regno)) - { - /* The first def for regno in insn gets to knock out the - defs from other instructions. */ - if ((!bitmap_bit_p (seen_in_insn, regno)) - /* If the def is to only part of the reg, it does - not kill the other defs that reach here. */ - && (!(DF_REF_FLAGS (def) & - (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)))) - { - if (n_defs > DF_SPARSE_THRESHOLD) - { - bitmap_set_bit (bb_info->sparse_kill, regno); - bitmap_clear_range(bb_info->gen, begin, n_defs); - } - else - { - bitmap_set_range (bb_info->kill, begin, n_defs); - bitmap_clear_range (bb_info->gen, begin, n_defs); - } - } - - bitmap_set_bit (seen_in_insn, regno); - /* All defs for regno in the instruction may be put into - the gen set. */ - if (!(DF_REF_FLAGS (def) - & (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER))) - bitmap_set_bit (bb_info->gen, DF_REF_ID (def)); - } - } + if (n_defs > DF_SPARSE_THRESHOLD) + bitmap_set_bit (bb_info->sparse_kill, regno); + else + bitmap_set_range (bb_info->kill, begin, n_defs); + bitmap_clear_range(bb_info->gen, begin, n_defs); } - def_rec++; + + bitmap_set_bit (seen_in_insn, regno); + /* All defs for regno in the instruction may be put into + the gen set. */ + if (!(DF_REF_FLAGS (def) + & (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER))) + bitmap_set_bit (bb_info->gen, DF_REF_ID (def)); } } @@ -380,14 +385,28 @@ df_rd_bb_local_compute (unsigned int bb_index) { basic_block bb = BASIC_BLOCK (bb_index); struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index); + struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index); rtx insn; bitmap_clear (seen_in_block); bitmap_clear (seen_in_insn); + if (!(df->changeable_flags & DF_RD_NO_TRIM)) + { + unsigned int regno; + bitmap_iterator bi; + int first_reg = (df->changeable_flags & DF_NO_HARD_REGS) ? FIRST_PSEUDO_REGISTER : 0; + EXECUTE_IF_SET_IN_BITMAP (lr_bb_info->out, first_reg, regno, bi) + { + unsigned int begin = DF_DEFS_BEGIN (regno); + unsigned int n_defs = DF_DEFS_COUNT (regno); + bitmap_set_range (bb_info->expanded_lr_out, begin, n_defs); + } + } + /* Artificials are only hard regs. */ if (!(df->changeable_flags & DF_NO_HARD_REGS)) - df_rd_bb_local_compute_process_def (bb_info, + df_rd_bb_local_compute_process_def (bb_info, df_get_artificial_defs (bb_index), 0); @@ -482,7 +501,13 @@ df_rd_confluence_n (edge e) bitmap op1 = df_rd_get_bb_info (e->dest->index)->in; bitmap op2 = df_rd_get_bb_info (e->src->index)->out; - if (e->flags & EDGE_EH) + if (e->flags & EDGE_FAKE) + return; + + /* If we are trimming the solution, the invalidated_by_call code in + the lr problem makes this unnecessary. However, if we do not + trim, we must take this into account. */ + if ((df->changeable_flags & DF_RD_NO_TRIM) && e->flags & EDGE_EH) { struct df_rd_problem_data *problem_data = (struct df_rd_problem_data *) df_rd->problem_data; @@ -490,7 +515,7 @@ df_rd_confluence_n (edge e) bitmap dense_invalidated = problem_data->dense_invalidated_by_call; bitmap_iterator bi; unsigned int regno; - bitmap tmp = BITMAP_ALLOC (&df_bitmap_obstack); + bitmap tmp = BITMAP_ALLOC (&problem_data->rd_bitmaps); bitmap_copy (tmp, op2); bitmap_and_compl_into (tmp, dense_invalidated); @@ -522,13 +547,13 @@ df_rd_transfer_function (int bb_index) bitmap gen = bb_info->gen; bitmap kill = bb_info->kill; bitmap sparse_kill = bb_info->sparse_kill; + bool changed = false; - if (bitmap_empty_p (sparse_kill)) - return bitmap_ior_and_compl (out, gen, in, kill); + if ((df->changeable_flags & DF_RD_NO_TRIM) && bitmap_empty_p (sparse_kill)) + changed = bitmap_ior_and_compl (out, gen, in, kill); else { struct df_rd_problem_data *problem_data; - bool changed = false; bitmap tmp; /* Note that TMP is _not_ a temporary bitmap if we end up replacing @@ -545,6 +570,8 @@ df_rd_transfer_function (int bb_index) } bitmap_and_compl_into (tmp, kill); bitmap_ior_into (tmp, gen); + if (!(df->changeable_flags & DF_RD_NO_TRIM)) + bitmap_and_into (tmp, bb_info->expanded_lr_out); changed = !bitmap_equal_p (tmp, out); if (changed) { @@ -552,9 +579,10 @@ df_rd_transfer_function (int bb_index) bb_info->out = tmp; } else - BITMAP_FREE (tmp); - return changed; + BITMAP_FREE (tmp); } + + return changed; } @@ -574,6 +602,8 @@ df_rd_free (void) struct df_rd_bb_info *bb_info = df_rd_get_bb_info (i); if (bb_info) { + if (bb_info->expanded_lr_out) + BITMAP_FREE (bb_info->expanded_lr_out); BITMAP_FREE (bb_info->kill); BITMAP_FREE (bb_info->sparse_kill); BITMAP_FREE (bb_info->gen); @@ -1015,7 +1045,7 @@ df_lr_transfer_function (int bb_index) /* Run the fast dce as a side effect of building LR. */ static void -df_lr_local_finalize (bitmap all_blocks ATTRIBUTE_UNUSED) +df_lr_finalize (bitmap all_blocks ATTRIBUTE_UNUSED) { if (df->changeable_flags & DF_LR_RUN_DCE) { @@ -1161,7 +1191,7 @@ df_lr_verify_solution_end (void) if (df_lr->solutions_dirty) /* Do not check if the solution is still dirty. See the comment - in df_lr_local_finalize for details. */ + in df_lr_finalize for details. */ df_lr->solutions_dirty = false; else FOR_ALL_BB (bb) @@ -1204,7 +1234,7 @@ static struct df_problem problem_LR = df_lr_confluence_0, /* Confluence operator 0. */ df_lr_confluence_n, /* Confluence operator n. */ df_lr_transfer_function, /* Transfer function. */ - df_lr_local_finalize, /* Finalize function. */ + df_lr_finalize, /* Finalize function. */ df_lr_free, /* Free all of the problem information. */ NULL, /* Remove this problem from the stack of dataflow problems. */ NULL, /* Debugging. */ @@ -1560,7 +1590,7 @@ df_live_transfer_function (int bb_index) /* And the LR info with the must-initialized registers, to produce the LIVE info. */ static void -df_live_local_finalize (bitmap all_blocks) +df_live_finalize (bitmap all_blocks) { if (df_live->solutions_dirty) @@ -1751,7 +1781,7 @@ static struct df_problem problem_LIVE = NULL, /* Confluence operator 0. */ df_live_confluence_n, /* Confluence operator n. */ df_live_transfer_function, /* Transfer function. */ - df_live_local_finalize, /* Finalize function. */ + df_live_finalize, /* Finalize function. */ df_live_free, /* Free all of the problem information. */ df_live_free, /* Remove this problem from the stack of dataflow problems. */ NULL, /* Debugging. */ @@ -402,22 +402,30 @@ enum df_changeable_flags { /* Scanning flags. */ /* Flag to control the running of dce as a side effect of building LR. */ - DF_LR_RUN_DCE = 1, /* Run DCE. */ - DF_NO_HARD_REGS = 2, /* Skip hard registers in RD and CHAIN Building. */ - DF_EQ_NOTES = 4, /* Build chains with uses present in EQUIV/EQUAL notes. */ - DF_NO_REGS_EVER_LIVE = 8, /* Do not compute the regs_ever_live. */ + DF_LR_RUN_DCE = 1 << 0, /* Run DCE. */ + DF_NO_HARD_REGS = 1 << 1, /* Skip hard registers in RD and CHAIN Building. */ + + /* Do not trim the solution using the LR result. This can make the + solution take much longer and take more memory. This is + necessary for the loop optimizations, but has a very small time + and space penalty because the loop optimizations process only a + single loop at a time. Any pass that looks at the entire + function should not set this flag. */ + DF_RD_NO_TRIM = 1 << 2, + DF_EQ_NOTES = 1 << 3, /* Build chains with uses present in EQUIV/EQUAL notes. */ + DF_NO_REGS_EVER_LIVE = 1 << 4, /* Do not compute the regs_ever_live. */ /* Cause df_insn_rescan df_notes_rescan and df_insn_delete, to return immediately. This is used by passes that know how to update the scanning them selves. */ - DF_NO_INSN_RESCAN = 16, + DF_NO_INSN_RESCAN = 1 << 5, /* Cause df_insn_rescan df_notes_rescan and df_insn_delete, to return after marking the insn for later processing. This allows all rescans to be batched. */ - DF_DEFER_INSN_RESCAN = 32, + DF_DEFER_INSN_RESCAN = 1 << 6, - DF_VERIFY_SCHEDULED = 64 + DF_VERIFY_SCHEDULED = 1 << 7 }; /* Two of these structures are inline in df, one for the uses and one @@ -705,16 +713,43 @@ struct df_scan_bb_info /* Reaching definitions. All bitmaps are indexed by the id field of - the ref except sparse_kill (see above). */ + the ref except sparse_kill which is indexed by regno. */ struct df_rd_bb_info { - /* Local sets to describe the basic blocks. See the note in the RU - datastructures for kill and sparse_kill. */ + /* Local sets to describe the basic blocks. */ bitmap kill; bitmap sparse_kill; - bitmap gen; /* The set of defs generated in this block. */ - /* The results of the dataflow problem. */ + /* Expanded version of the DF_LT->out bitmap to match the positions + of gen, in and out here. Only allocated if DF_RD_NO_TRIM is + false. */ + bitmap expanded_lr_out; + + /* The set of defs generated in this block. This is not set unless + the def reaches the end of the block. */ + bitmap gen; + + /* The results of the dataflow problem. + + If DF_RD_NO_TRIM is not set, these sets are SOMEWHAT trimmed by + the output of the DF_LR problem. The out set is precisely + trimmed during propagation which means that the result is also + trimmed when the propagation terminates. The in set is not + explicitly trimmed, because this is expensive (adding about 5% to + the cost of a bootstrap). However since the out sets are trimmed + and the in sets are built from the out of the pred, the in set is + MOSTLY trimmed. + + The counter case happens at a branch where the variable V is in + DF_LR->in the true branch but not the false branch. If V is + defined before the branch, RD will propagate that into the + DF_RD_in sets of both branches. When the block is processed, the + DF_RD->out set will have V trimmed out of it but it will still be + left in DF_RD->in. + + If this not a problem for the current optimizers since they were + designed before any trimming was available. This can be fixed by + checking the DF_LR->in set directly. */ bitmap in; /* At the top of the block. */ bitmap out; /* At the bottom of the block. */ }; diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c index ba1f2888b00..f056a776958 100644 --- a/gcc/loop-invariant.c +++ b/gcc/loop-invariant.c @@ -639,6 +639,7 @@ find_defs (struct loop *loop, basic_block *body) df_remove_problem (df_chain); df_process_deferred_rescans (); df_chain_add_problem (DF_UD_CHAIN); + df_set_flags (DF_RD_NO_TRIM); df_set_blocks (blocks); df_analyze (); diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c index 3e1b5db5af7..824629c0f6c 100644 --- a/gcc/loop-iv.c +++ b/gcc/loop-iv.c @@ -22,9 +22,10 @@ along with GCC; see the file COPYING3. If not see doloop optimization and branch prediction. The iv information is computed on demand. - Induction variable is analyzed by walking the use-def chains. When a biv - is found, it is cached in the bivs hash table. When register is proved - to be a giv, its description is stored to DF_REF_DATA of the def reference. + Induction variables are analyzed by walking the use-def chains. When + a basic induction variable (biv) is found, it is cached in the bivs + hash table. When register is proved to be a biv, its description + is stored to DF_REF_DATA of the def reference. The analysis works always with one loop -- you must call iv_analysis_loop_init (loop) for it. All the other functions then work with @@ -277,6 +278,7 @@ iv_analysis_loop_init (struct loop *loop) df_remove_problem (df_chain); df_process_deferred_rescans (); df_chain_add_problem (DF_UD_CHAIN); + df_set_flags (DF_RD_NO_TRIM); df_set_blocks (blocks); df_analyze (); if (dump_file) |