summaryrefslogtreecommitdiff
path: root/gcc/gcse.c
diff options
context:
space:
mode:
authormmitchel <mmitchel@138bc75d-0d04-0410-961f-82ee72b054a4>1999-11-03 22:45:45 +0000
committermmitchel <mmitchel@138bc75d-0d04-0410-961f-82ee72b054a4>1999-11-03 22:45:45 +0000
commit1459acf14a65b29cf31a0b13d960d906ad7ec82b (patch)
tree1fea2ecb5a5e9533654a8466ce98b970d99793af /gcc/gcse.c
parentaff5aef33ce732f37b867180c878df5b7e8723e5 (diff)
downloadgcc-1459acf14a65b29cf31a0b13d960d906ad7ec82b.tar.gz
* gcse.c (struct null_pointer_info): New type.
(get_bitmap_width): New function. (current_block): Remove. (nonnull_local): Likewise. (nonnull_killed): Likewise. (invalidate_nonnull_info): Take a null_pointer_info as input. (delete_null_pointer_checks_1): New function. (delete_null_pointer_checks): Use it. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@30384 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/gcse.c')
-rw-r--r--gcc/gcse.c353
1 files changed, 242 insertions, 111 deletions
diff --git a/gcc/gcse.c b/gcc/gcse.c
index e2d7cf7df70..067cfbfdd91 100644
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@@ -510,6 +510,18 @@ static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
/* for available exprs */
static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
+/* Objects of this type are passed around by the null-pointer check
+ removal routines. */
+struct null_pointer_info {
+ /* The basic block being processed. */
+ int current_block;
+ /* The first register to be handled in this pass. */
+ int min_reg;
+ /* One greater than the last register to be handled in this pass. */
+ int max_reg;
+ sbitmap *nonnull_local;
+ sbitmap *nonnull_killed;
+};
static void compute_can_copy PROTO ((void));
@@ -520,6 +532,7 @@ static void alloc_gcse_mem PROTO ((rtx));
static void free_gcse_mem PROTO ((void));
static void alloc_reg_set_mem PROTO ((int));
static void free_reg_set_mem PROTO ((void));
+static int get_bitmap_width PROTO ((int, int, int));
static void record_one_set PROTO ((int, rtx));
static void record_set_info PROTO ((rtx, rtx, void *));
static void compute_sets PROTO ((rtx));
@@ -622,6 +635,9 @@ static int handle_avail_expr PROTO ((rtx, struct expr *));
static int classic_gcse PROTO ((void));
static int one_classic_gcse_pass PROTO ((int));
static void invalidate_nonnull_info PROTO ((rtx, rtx, void *));
+static void delete_null_pointer_checks_1 PROTO ((int_list_ptr *, int *,
+ sbitmap *, sbitmap *,
+ struct null_pointer_info *));
static rtx process_insert_insn PROTO ((struct expr *));
static int pre_edge_insert PROTO ((struct edge_list *, struct expr **));
static int expr_reaches_here_p_work PROTO ((struct occr *, struct expr *, int, int, char *));
@@ -948,6 +964,50 @@ free_gcse_mem ()
free (mem_set_in_block);
}
+/* Many of the global optimization algorithms work by solving dataflow
+ equations for various expressions. Initially, some local value is
+ computed for each expression in each block. Then, the values
+ across the various blocks are combined (by following flow graph
+ edges) to arrive at global values. Conceptually, each set of
+ equations is independent. We may therefore solve all the equations
+ in parallel, solve them one at a time, or pick any intermediate
+ approach.
+
+ When you're going to need N two-dimensional bitmaps, each X (say,
+ the number of blocks) by Y (say, the number of expressions), call
+ this function. It's not important what X and Y represent; only
+ that Y correspond to the things that can be done in parallel. This
+ function will return an appropriate chunking factor C; you should
+ solve C sets of equations in parallel. By going through this
+ function, we can easily trade space against time; by solving fewer
+ equations in parallel we use less space. */
+
+static int
+get_bitmap_width (n, x, y)
+ int n;
+ int x;
+ int y;
+{
+ /* It's not really worth figuring out *exactly* how much memory will
+ be used by a particular choice. The important thing is to get
+ something approximately right. */
+ size_t max_bitmap_memory = 10 * 1024 * 1024;
+
+ /* The number of bytes we'd use for a single column of minimum
+ width. */
+ size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
+
+ /* Often, it's reasonable just to solve all the equations in
+ parallel. */
+ if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
+ return y;
+
+ /* Otherwise, pick the largest width we can, without going over the
+ limit. */
+ return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
+ / column_size);
+}
+
/* Compute the local properties of each recorded expression.
Local properties are those that are defined by the block, irrespective
@@ -4923,23 +4983,19 @@ compute_transpout ()
/* Removal of useless null pointer checks */
-/* These need to be file static for communication between
- invalidate_nonnull_info and delete_null_pointer_checks. */
-static int current_block;
-static sbitmap *nonnull_local;
-static sbitmap *nonnull_killed;
-
/* Called via note_stores. X is set by SETTER. If X is a register we must
- invalidate nonnull_local and set nonnull_killed.
+ invalidate nonnull_local and set nonnull_killed. DATA is really a
+ `null_pointer_info *'.
We ignore hard registers. */
static void
invalidate_nonnull_info (x, setter, data)
rtx x;
rtx setter ATTRIBUTE_UNUSED;
- void *data ATTRIBUTE_UNUSED;
+ void *data;
{
int offset, regno;
+ struct null_pointer_info* npi = (struct null_pointer_info *) data;
offset = 0;
while (GET_CODE (x) == SUBREG)
@@ -4947,89 +5003,34 @@ invalidate_nonnull_info (x, setter, data)
/* Ignore anything that is not a register or is a hard register. */
if (GET_CODE (x) != REG
- || REGNO (x) < FIRST_PSEUDO_REGISTER)
+ || REGNO (x) < npi->min_reg
+ || REGNO (x) >= npi->max_reg)
return;
- regno = REGNO (x);
+ regno = REGNO (x) - npi->min_reg;
- RESET_BIT (nonnull_local[current_block], regno);
- SET_BIT (nonnull_killed[current_block], regno);
-
+ RESET_BIT (npi->nonnull_local[npi->current_block], regno);
+ SET_BIT (npi->nonnull_killed[npi->current_block], regno);
}
-/* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
- at compile time.
-
- This is conceptually similar to global constant/copy propagation and
- classic global CSE (it even uses the same dataflow equations as cprop).
-
- If a register is used as memory address with the form (mem (reg)), then we
- know that REG can not be zero at that point in the program. Any instruction
- which sets REG "kills" this property.
-
- So, if every path leading to a conditional branch has an available memory
- reference of that form, then we know the register can not have the value
- zero at the conditional branch.
-
- So we merely need to compute the local properies and propagate that data
- around the cfg, then optimize where possible.
+/* Do null-pointer check elimination for the registers indicated in
+ NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
+ they are not our responsibility to free. */
- We run this pass two times. Once before CSE, then again after CSE. This
- has proven to be the most profitable approach. It is rare for new
- optimization opportunities of this nature to appear after the first CSE
- pass.
-
- This could probably be integrated with global cprop with a little work. */
-
-void
-delete_null_pointer_checks (f)
- rtx f;
+static void
+delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin,
+ nonnull_avout, npi)
+ int_list_ptr *s_preds;
+ int *block_reg;
+ sbitmap *nonnull_avin;
+ sbitmap *nonnull_avout;
+ struct null_pointer_info *npi;
{
- int_list_ptr *s_preds, *s_succs;
- int *num_preds, *num_succs;
int changed, bb;
- sbitmap *nonnull_avin, *nonnull_avout;
+ int current_block;
+ sbitmap *nonnull_local = npi->nonnull_local;
+ sbitmap *nonnull_killed = npi->nonnull_killed;
- /* First break the program into basic blocks. */
- find_basic_blocks (f, max_reg_num (), NULL, 1);
-
- /* If we have only a single block, then there's nothing to do. */
- if (n_basic_blocks <= 1)
- {
- /* Free storage allocated by find_basic_blocks. */
- free_basic_block_vars (0);
- return;
- }
-
- /* Trying to perform global optimizations on flow graphs which have
- a high connectivity will take a long time and is unlikely to be
- particularly useful.
-
- In normal circumstances a cfg should have about twice has many edges
- as blocks. But we do not want to punish small functions which have
- a couple switch statements. So we require a relatively large number
- of basic blocks and the ratio of edges to blocks to be high. */
- if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
- {
- /* Free storage allocated by find_basic_blocks. */
- free_basic_block_vars (0);
- return;
- }
-
- /* We need predecessor/successor lists as well as pred/succ counts for
- each basic block. */
- s_preds = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
- s_succs = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
- num_preds = (int *) gmalloc (n_basic_blocks * sizeof (int));
- num_succs = (int *) gmalloc (n_basic_blocks * sizeof (int));
- compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
-
- /* Allocate bitmaps to hold local and global properties. */
- nonnull_local = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
- nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
- nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
- nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
-
/* Compute local properties, nonnull and killed. A register will have
the nonnull property if at the end of the current block its value is
known to be nonnull. The killed property indicates that somewhere in
@@ -5044,6 +5045,9 @@ delete_null_pointer_checks (f)
{
rtx insn, stop_insn;
+ /* Set the current block for invalidate_nonnull_info. */
+ npi->current_block = current_block;
+
/* Scan each insn in the basic block looking for memory references and
register sets. */
stop_insn = NEXT_INSN (BLOCK_END (current_block));
@@ -5052,6 +5056,7 @@ delete_null_pointer_checks (f)
insn = NEXT_INSN (insn))
{
rtx set;
+ rtx reg;
/* Ignore anything that is not a normal insn. */
if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
@@ -5063,7 +5068,7 @@ delete_null_pointer_checks (f)
set = single_set (insn);
if (!set)
{
- note_stores (PATTERN (insn), invalidate_nonnull_info, NULL);
+ note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
continue;
}
@@ -5071,22 +5076,24 @@ delete_null_pointer_checks (f)
in case it uses its address register as a dest (which kills
the nonnull property). */
if (GET_CODE (SET_SRC (set)) == MEM
- && GET_CODE (XEXP (SET_SRC (set), 0)) == REG
- && REGNO (XEXP (SET_SRC (set), 0)) >= FIRST_PSEUDO_REGISTER)
+ && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
+ && REGNO (reg) >= npi->min_reg
+ && REGNO (reg) < npi->max_reg)
SET_BIT (nonnull_local[current_block],
- REGNO (XEXP (SET_SRC (set), 0)));
+ REGNO (reg) - npi->min_reg);
/* Now invalidate stuff clobbered by this insn. */
- note_stores (PATTERN (insn), invalidate_nonnull_info, NULL);
+ note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
/* And handle stores, we do these last since any sets in INSN can
not kill the nonnull property if it is derived from a MEM
appearing in a SET_DEST. */
if (GET_CODE (SET_DEST (set)) == MEM
- && GET_CODE (XEXP (SET_DEST (set), 0)) == REG
- && REGNO (XEXP (SET_DEST (set), 0)) >= FIRST_PSEUDO_REGISTER)
+ && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
+ && REGNO (reg) >= npi->min_reg
+ && REGNO (reg) < npi->max_reg)
SET_BIT (nonnull_local[current_block],
- REGNO (XEXP (SET_DEST (set), 0)));
+ REGNO (reg) - npi->min_reg);
}
}
@@ -5117,33 +5124,21 @@ delete_null_pointer_checks (f)
for (bb = 0; bb < n_basic_blocks; bb++)
{
rtx last_insn = BLOCK_END (bb);
- rtx condition, earliest, reg;
+ rtx condition, earliest;
int compare_and_branch;
- /* We only want conditional branches. */
- if (GET_CODE (last_insn) != JUMP_INSN
- || !condjump_p (last_insn)
- || simplejump_p (last_insn))
+ /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
+ since BLOCK_REG[BB] is zero if this block did not end with a
+ comparison against zero, this condition works. */
+ if (block_reg[bb] < npi->min_reg
+ || block_reg[bb] >= npi->max_reg)
continue;
/* LAST_INSN is a conditional jump. Get its condition. */
condition = get_condition (last_insn, &earliest);
- /* If we were unable to get the condition, or it is not a equality
- comparison against zero then there's nothing we can do. */
- if (!condition
- || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
- || GET_CODE (XEXP (condition, 1)) != CONST_INT
- || XEXP (condition, 1) != CONST0_RTX (GET_MODE (XEXP (condition, 0))))
- continue;
-
- /* We must be checking a register against zero. */
- reg = XEXP (condition, 0);
- if (GET_CODE (reg) != REG)
- continue;
-
/* Is the register known to have a nonzero value? */
- if (!TEST_BIT (nonnull_avout[bb], REGNO (reg)))
+ if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
continue;
/* Try to compute whether the compare/branch at the loop end is one or
@@ -5170,6 +5165,139 @@ delete_null_pointer_checks (f)
delete_insn (last_insn);
if (compare_and_branch == 2)
delete_insn (earliest);
+
+ /* Don't check this block again. (Note that BLOCK_END is
+ invalid here; we deleted the last instruction in the
+ block.) */
+ block_reg[bb] = 0;
+ }
+}
+
+/* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
+ at compile time.
+
+ This is conceptually similar to global constant/copy propagation and
+ classic global CSE (it even uses the same dataflow equations as cprop).
+
+ If a register is used as memory address with the form (mem (reg)), then we
+ know that REG can not be zero at that point in the program. Any instruction
+ which sets REG "kills" this property.
+
+ So, if every path leading to a conditional branch has an available memory
+ reference of that form, then we know the register can not have the value
+ zero at the conditional branch.
+
+ So we merely need to compute the local properies and propagate that data
+ around the cfg, then optimize where possible.
+
+ We run this pass two times. Once before CSE, then again after CSE. This
+ has proven to be the most profitable approach. It is rare for new
+ optimization opportunities of this nature to appear after the first CSE
+ pass.
+
+ This could probably be integrated with global cprop with a little work. */
+
+void
+delete_null_pointer_checks (f)
+ rtx f;
+{
+ int_list_ptr *s_preds, *s_succs;
+ int *num_preds, *num_succs;
+ sbitmap *nonnull_avin, *nonnull_avout;
+ int *block_reg;
+ int bb;
+ int reg;
+ int regs_per_pass;
+ int max_reg;
+ struct null_pointer_info npi;
+
+ /* First break the program into basic blocks. */
+ find_basic_blocks (f, max_reg_num (), NULL, 1);
+
+ /* If we have only a single block, then there's nothing to do. */
+ if (n_basic_blocks <= 1)
+ {
+ /* Free storage allocated by find_basic_blocks. */
+ free_basic_block_vars (0);
+ return;
+ }
+
+ /* Trying to perform global optimizations on flow graphs which have
+ a high connectivity will take a long time and is unlikely to be
+ particularly useful.
+
+ In normal circumstances a cfg should have about twice has many edges
+ as blocks. But we do not want to punish small functions which have
+ a couple switch statements. So we require a relatively large number
+ of basic blocks and the ratio of edges to blocks to be high. */
+ if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
+ {
+ /* Free storage allocated by find_basic_blocks. */
+ free_basic_block_vars (0);
+ return;
+ }
+
+ /* We need predecessor/successor lists as well as pred/succ counts for
+ each basic block. */
+ s_preds = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
+ s_succs = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
+ num_preds = (int *) gmalloc (n_basic_blocks * sizeof (int));
+ num_succs = (int *) gmalloc (n_basic_blocks * sizeof (int));
+ compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
+
+ /* We need four bitmaps, each with a bit for each register in each
+ basic block. */
+ max_reg = max_reg_num ();
+ regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
+
+ /* Allocate bitmaps to hold local and global properties. */
+ npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
+ npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
+ nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
+ nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
+
+ /* Go through the basic blocks, seeing whether or not each block
+ ends with a conditional branch whose condition is a comparison
+ against zero. Record the register compared in BLOCK_REG. */
+ block_reg = (int *) xcalloc (n_basic_blocks, sizeof (int));
+ for (bb = 0; bb < n_basic_blocks; bb++)
+ {
+ rtx last_insn = BLOCK_END (bb);
+ rtx condition, earliest, reg;
+
+ /* We only want conditional branches. */
+ if (GET_CODE (last_insn) != JUMP_INSN
+ || !condjump_p (last_insn)
+ || simplejump_p (last_insn))
+ continue;
+
+ /* LAST_INSN is a conditional jump. Get its condition. */
+ condition = get_condition (last_insn, &earliest);
+
+ /* If we were unable to get the condition, or it is not a equality
+ comparison against zero then there's nothing we can do. */
+ if (!condition
+ || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
+ || GET_CODE (XEXP (condition, 1)) != CONST_INT
+ || (XEXP (condition, 1)
+ != CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
+ continue;
+
+ /* We must be checking a register against zero. */
+ reg = XEXP (condition, 0);
+ if (GET_CODE (reg) != REG)
+ continue;
+
+ block_reg[bb] = REGNO (reg);
+ }
+
+ /* Go through the algorithm for each block of registers. */
+ for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
+ {
+ npi.min_reg = reg;
+ npi.max_reg = MIN (reg + regs_per_pass, max_reg);
+ delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin,
+ nonnull_avout, &npi);
}
/* Free storage allocated by find_basic_blocks. */
@@ -5181,9 +5309,12 @@ delete_null_pointer_checks (f)
free (num_preds);
free (num_succs);
+ /* Free the table of registers compared at the end of every block. */
+ free (block_reg);
+
/* Free bitmaps. */
- free (nonnull_local);
- free (nonnull_killed);
+ free (npi.nonnull_local);
+ free (npi.nonnull_killed);
free (nonnull_avin);
free (nonnull_avout);
}