summaryrefslogtreecommitdiff
path: root/gcc/gcse.c
diff options
context:
space:
mode:
authorlaw <law@138bc75d-0d04-0410-961f-82ee72b054a4>1999-09-20 14:53:51 +0000
committerlaw <law@138bc75d-0d04-0410-961f-82ee72b054a4>1999-09-20 14:53:51 +0000
commit6627f3ed51b90f1171ee46f98dc3554244790d3b (patch)
tree077a01b5e5554bcfc46f5636896f0c1d59398553 /gcc/gcse.c
parent5b722c78ff0f7fdca9fdd9b6771bd998e5eb5b40 (diff)
downloadgcc-6627f3ed51b90f1171ee46f98dc3554244790d3b.tar.gz
* basic-block.h (compute_flow_dominators): Declare.
* gcse.c (alloc_code_hoist_mem): New function. (free_code_hoist_mem, compute_code_hoist_vbeinout): Likewise. (compute_code_hoist_data, hoist_expr_reaches_here_p): Likewise. (hoist_code, one_code_hoisting_pass): Likewise. (gcse_main): If optimizing for size, then hoist expressions computed in multiple dominated basic blocks. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@29523 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/gcse.c')
-rw-r--r--gcc/gcse.c382
1 files changed, 382 insertions, 0 deletions
diff --git a/gcc/gcse.c b/gcc/gcse.c
index a02320ec5be..def34898cd7 100644
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@@ -600,6 +600,14 @@ static int one_pre_gcse_pass PROTO ((int));
static void add_label_notes PROTO ((rtx, rtx));
+static void alloc_code_hoist_mem PROTO ((int, int));
+static void free_code_hoist_mem PROTO ((void));
+static void compute_code_hoist_vbeinout PROTO ((void));
+static void compute_code_hoist_data PROTO ((void));
+static int hoist_expr_reaches_here_p PROTO ((int, int, int, char *));
+static void hoist_code PROTO ((void));
+static int one_code_hoisting_pass PROTO ((void));
+
static void alloc_rd_mem PROTO ((int, int));
static void free_rd_mem PROTO ((void));
static void handle_rd_kill_set PROTO ((rtx, int, int));
@@ -730,8 +738,28 @@ gcse_main (f, file)
if (max_pass_bytes < bytes_used)
max_pass_bytes = bytes_used;
+ /* Free up memory, then reallocate for code hoisting. We can
+ not re-use the existing allocated memory because the tables
+ will not have info for the insns or registers created by
+ partial redundancy elimination. */
free_gcse_mem ();
+ /* It does not make sense to run code hoisting unless we optimizing
+ for code size -- it rarely makes programs faster, and can make
+ them bigger if we did partial redundancy elimination (when optimizing
+ for space, we use a classic gcse algorithm instead of partial
+ redundancy algorithms). */
+ if (optimize_size)
+ {
+ max_gcse_regno = max_reg_num ();
+ alloc_gcse_mem (f);
+ changed |= one_code_hoisting_pass ();
+ free_gcse_mem ();
+
+ if (max_pass_bytes < bytes_used)
+ max_pass_bytes = bytes_used;
+ }
+
if (file)
{
fprintf (file, "\n");
@@ -5044,3 +5072,357 @@ delete_null_pointer_checks (f)
free (nonnull_avin);
free (nonnull_avout);
}
+
+/* Code Hoisting variables and subroutines. */
+
+/* Very busy expressions. */
+static sbitmap *hoist_vbein;
+static sbitmap *hoist_vbeout;
+
+/* Hoistable expressions. */
+static sbitmap *hoist_exprs;
+
+/* Dominator bitmaps. */
+static sbitmap *dominators;
+static sbitmap *post_dominators;
+
+/* ??? We could compute post dominators and run this algorithm in
+ reverse to to perform tail merging, doing so would probably be
+ more effective than the tail merging code in jump.c.
+
+ It's unclear if tail merging could be run in parallel with
+ code hoisting. It would be nice. */
+
+/* Allocate vars used for code hoisting analysis. */
+
+static void
+alloc_code_hoist_mem (n_blocks, n_exprs)
+ int n_blocks, n_exprs;
+{
+ antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
+ transp = sbitmap_vector_alloc (n_blocks, n_exprs);
+ comp = sbitmap_vector_alloc (n_blocks, n_exprs);
+
+ hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
+ hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
+ hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
+ transpout = sbitmap_vector_alloc (n_blocks, n_exprs);
+
+ dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
+ post_dominators = sbitmap_vector_alloc (n_blocks, n_blocks);
+}
+
+/* Free vars used for code hoisting analysis. */
+
+static void
+free_code_hoist_mem ()
+{
+ free (antloc);
+ free (transp);
+ free (comp);
+
+ free (hoist_vbein);
+ free (hoist_vbeout);
+ free (hoist_exprs);
+ free (transpout);
+
+ free (dominators);
+ free (post_dominators);
+}
+
+/* Compute the very busy expressions at entry/exit from each block.
+
+ An expression is very busy if all paths from a given point
+ compute the expression. */
+
+static void
+compute_code_hoist_vbeinout ()
+{
+ int bb, changed, passes;
+
+ sbitmap_vector_zero (hoist_vbeout, n_basic_blocks);
+ sbitmap_vector_zero (hoist_vbein, n_basic_blocks);
+
+ passes = 0;
+ changed = 1;
+ while (changed)
+ {
+ changed = 0;
+ /* We scan the blocks in the reverse order to speed up
+ the convergence. */
+ for (bb = n_basic_blocks - 1; bb >= 0; bb--)
+ {
+ changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb],
+ hoist_vbeout[bb], transp[bb]);
+ if (bb != n_basic_blocks - 1)
+ sbitmap_intersect_of_successors (hoist_vbeout[bb], hoist_vbein,
+ bb, s_succs);
+ }
+ passes++;
+ }
+
+ if (gcse_file)
+ fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes);
+}
+
+/* Top level routine to do the dataflow analysis needed by code hoisting. */
+
+static void
+compute_code_hoist_data ()
+{
+ compute_local_properties (transp, comp, antloc, 0);
+ compute_transpout ();
+ compute_code_hoist_vbeinout ();
+ compute_flow_dominators (dominators, post_dominators);
+ if (gcse_file)
+ fprintf (gcse_file, "\n");
+}
+
+/* Determine if the expression identified by EXPR_INDEX would
+ reach BB unimpared if it was placed at the end of EXPR_BB.
+
+ It's unclear exactly what Muchnick meant by "unimpared". It seems
+ to me that the expression must either be computed or transparent in
+ *every* block in the path(s) from EXPR_BB to BB. Any other definition
+ would allow the expression to be hoisted out of loops, even if
+ the expression wasn't a loop invariant.
+
+ Contrast this to reachability for PRE where an expression is
+ considered reachable if *any* path reaches instead of *all*
+ paths. */
+
+static int
+hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited)
+ int expr_bb;
+ int expr_index;
+ int bb;
+ char *visited;
+{
+ edge pred;
+
+ if (visited == NULL)
+ {
+ visited = (char *) alloca (n_basic_blocks);
+ bzero (visited, n_basic_blocks);
+ }
+
+ visited[expr_bb] = 1;
+ for (pred = BASIC_BLOCK (bb)->pred; pred != NULL; pred = pred->pred_next)
+ {
+ int pred_bb = pred->src->index;
+
+ if (pred->src == ENTRY_BLOCK_PTR)
+ break;
+ else if (visited[pred_bb])
+ continue;
+ /* Does this predecessor generate this expression? */
+ else if (TEST_BIT (comp[pred_bb], expr_index))
+ break;
+ else if (! TEST_BIT (transp[pred_bb], expr_index))
+ break;
+ /* Not killed. */
+ else
+ {
+ visited[pred_bb] = 1;
+ if (! hoist_expr_reaches_here_p (expr_bb, expr_index,
+ pred_bb, visited))
+ break;
+ }
+ }
+
+ return (pred == NULL);
+}
+
+/* Actually perform code hoisting. */
+static void
+hoist_code ()
+{
+ int bb, dominated, i;
+ struct expr **index_map;
+
+ sbitmap_vector_zero (hoist_exprs, n_basic_blocks);
+
+ /* Compute a mapping from expression number (`bitmap_index') to
+ hash table entry. */
+
+ index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *));
+ bzero ((char *) index_map, n_exprs * sizeof (struct expr *));
+ for (i = 0; i < expr_hash_table_size; i++)
+ {
+ struct expr *expr;
+
+ for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash)
+ index_map[expr->bitmap_index] = expr;
+ }
+
+ /* Walk over each basic block looking for potentially hoistable
+ expressions, nothing gets hoisted from the entry block. */
+ for (bb = 0; bb < n_basic_blocks; bb++)
+ {
+ int found = 0;
+ int insn_inserted_p;
+
+ /* Examine each expression that is very busy at the exit of this
+ block. These are the potentially hoistable expressions. */
+ for (i = 0; i < hoist_vbeout[bb]->n_bits; i++)
+ {
+ int hoistable = 0;
+ if (TEST_BIT (hoist_vbeout[bb], i)
+ && TEST_BIT (transpout[bb], i))
+ {
+ /* We've found a potentially hoistable expression, now
+ we look at every block BB dominates to see if it
+ computes the expression. */
+ for (dominated = 0; dominated < n_basic_blocks; dominated++)
+ {
+ /* Ignore self dominance. */
+ if (bb == dominated
+ || ! TEST_BIT (dominators[dominated], bb))
+ continue;
+
+ /* We've found a dominated block, now see if it computes
+ the busy expression and whether or not moving that
+ expression to the "beginning" of that block is safe. */
+ if (!TEST_BIT (antloc[dominated], i))
+ continue;
+
+ /* Note if the expression would reach the dominated block
+ unimpared if it was placed at the end of BB.
+
+ Keep track of how many times this expression is hoistable
+ from a dominated block into BB. */
+ if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
+ hoistable++;
+ }
+
+ /* If we found more than one hoistable occurence of this
+ expression, then note it in the bitmap of expressions to
+ hoist. It makes no sense to hoist things which are computed
+ in only one BB, and doing so tends to pessimize register
+ allocation. One could increase this value to try harder
+ to avoid any possible code expansion due to register
+ allocation issues; however experiments have shown that
+ the vast majority of hoistable expressions are only movable
+ from two successors, so raising this threshhold is likely
+ to nullify any benefit we get from code hoisting. */
+ if (hoistable > 1)
+ {
+ SET_BIT (hoist_exprs[bb], i);
+ found = 1;
+ }
+ }
+ }
+
+ /* If we found nothing to hoist, then quit now. */
+ if (! found)
+ continue;
+
+ /* Loop over all the hoistable expressions. */
+ for (i = 0; i < hoist_exprs[bb]->n_bits; i++)
+ {
+ /* We want to insert the expression into BB only once, so
+ note when we've inserted it. */
+ insn_inserted_p = 0;
+
+ /* These tests should be the same as the tests above. */
+ if (TEST_BIT (hoist_vbeout[bb], i))
+ {
+ /* We've found a potentially hoistable expression, now
+ we look at every block BB dominates to see if it
+ computes the expression. */
+ for (dominated = 0; dominated < n_basic_blocks; dominated++)
+ {
+ /* Ignore self dominance. */
+ if (bb == dominated
+ || ! TEST_BIT (dominators[dominated], bb))
+ continue;
+
+ /* We've found a dominated block, now see if it computes
+ the busy expression and whether or not moving that
+ expression to the "beginning" of that block is safe. */
+ if (!TEST_BIT (antloc[dominated], i))
+ continue;
+
+ /* The expression is computed in the dominated block and
+ it would be safe to compute it at the start of the
+ dominated block. Now we have to determine if the
+ expresion would reach the dominated block if it was
+ placed at the end of BB. */
+ if (hoist_expr_reaches_here_p (bb, i, dominated, NULL))
+ {
+ struct expr *expr = index_map[i];
+ struct occr *occr = expr->antic_occr;
+ rtx insn;
+ rtx set;
+
+
+ /* Find the right occurence of this expression. */
+ while (BLOCK_NUM (occr->insn) != dominated && occr)
+ occr = occr->next;
+
+ /* Should never happen. */
+ if (!occr)
+ abort ();
+
+ insn = occr->insn;
+
+ set = single_set (insn);
+ if (! set)
+ abort ();
+
+ /* Create a pseudo-reg to store the result of reaching
+ expressions into. Get the mode for the new pseudo
+ from the mode of the original destination pseudo. */
+ if (expr->reaching_reg == NULL)
+ expr->reaching_reg
+ = gen_reg_rtx (GET_MODE (SET_DEST (set)));
+
+ /* In theory this should never fail since we're creating
+ a reg->reg copy.
+
+ However, on the x86 some of the movXX patterns actually
+ contain clobbers of scratch regs. This may cause the
+ insn created by validate_change to not match any
+ pattern and thus cause validate_change to fail. */
+ if (validate_change (insn, &SET_SRC (set),
+ expr->reaching_reg, 0))
+ {
+ occr->deleted_p = 1;
+ if (!insn_inserted_p)
+ {
+ insert_insn_end_bb (index_map[i], bb, 0);
+ insn_inserted_p = 1;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+/* Top level routine to perform one code hoisting (aka unification) pass
+
+ Return non-zero if a change was made. */
+
+static int
+one_code_hoisting_pass ()
+{
+ int changed = 0;
+
+ alloc_expr_hash_table (max_cuid);
+ compute_expr_hash_table ();
+ if (gcse_file)
+ dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table,
+ expr_hash_table_size, n_exprs);
+ if (n_exprs > 0)
+ {
+ alloc_code_hoist_mem (n_basic_blocks, n_exprs);
+ compute_code_hoist_data ();
+ hoist_code ();
+ free_code_hoist_mem ();
+ }
+ free_expr_hash_table ();
+
+ return changed;
+}