diff options
-rw-r--r-- | gcc/ChangeLog | 22 | ||||
-rw-r--r-- | gcc/Makefile.in | 9 | ||||
-rw-r--r-- | gcc/flow.c | 16 | ||||
-rw-r--r-- | gcc/rtl.def | 15 | ||||
-rw-r--r-- | gcc/rtl.h | 5 | ||||
-rw-r--r-- | gcc/ssa.c | 1503 | ||||
-rw-r--r-- | gcc/toplev.c | 39 | ||||
-rw-r--r-- | include/ChangeLog | 4 | ||||
-rw-r--r-- | include/partition.h | 81 | ||||
-rw-r--r-- | libiberty/ChangeLog | 7 | ||||
-rw-r--r-- | libiberty/Makefile.in | 22 | ||||
-rw-r--r-- | libiberty/partition.c | 185 |
12 files changed, 1893 insertions, 15 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3c0d560f0f7..a5bfcda917c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2000-03-09 Richard Henderson <rth@cygnus.com> + Alex Samuel <samuel@codesourcery.com> and others + + * Makefile.in (ssa.o): New rule. + (OBJS): Add ssa.o. + (STAGESTUFF): Add *.ssa and *.ussa. + (mostlyclean): Delete *.ssa, *.ussa, */*.ssa, */*.ussa. + * rtl.def (PHI): New RTL expression. + * rtl.h (clear_log_links): New declaration. + (convert_to_ssa): Likewise. + (convert_from_ssa): Likewise. + * flow.c (split_edge): If the entry node falls through to the + split edge's source block, split the entry edge. + (clear_log_links): New function. + * toplev.c (ssa_dump): New variable. + (flag_ssa): Likewise. + (f_options): Add "ssa". + (compile_file): Create SSA dump files. + (rest_of_compilation): Go to and from SSA if enabled. + (decide_d_option): Handle -de for SSA dump files. + * ssa.c: New file. + Thu Mar 9 20:01:38 2000 Jim Wilson <wilson@cygnus.com> * expr.c (expand_assignment): For a CALL_EXPR, special case PARM_DECL diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 89d9e556260..88f965ff24b 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -674,7 +674,7 @@ OBJS = diagnostic.o \ insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o lcm.o \ profile.o insn-attrtab.o $(out_object_file) $(EXTRA_OBJS) convert.o \ mbchar.o dyn-string.o splay-tree.o graph.o sbitmap.o resource.o hash.o \ - predict.o lists.o ggc-common.o $(GGC) simplify-rtx.o + predict.o lists.o ggc-common.o $(GGC) simplify-rtx.o ssa.o # GEN files are listed separately, so they can be built before doing parallel # makes for cc1 or cc1plus. Otherwise sequent parallel make attempts to load @@ -704,6 +704,7 @@ STAGESTUFF = *$(objext) insn-flags.h insn-config.h insn-codes.h \ gcov$(exeext) *.bp \ *.greg *.lreg *.combine *.flow *.cse *.jump *.rtl *.tree *.loop \ *.dbr *.jump2 *.sched *.cse2 *.sched2 *.stack *.gcse *.flow2 *.peephole2 \ + *.ssa *.ussa \ *.[si] libcpp.a \ $(LANG_STAGESTUFF) @@ -1565,6 +1566,8 @@ resource.o : resource.c $(CONFIG_H) $(RTL_H) hard-reg-set.h system.h \ insn-attr.h lcm.o : lcm.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) hard-reg-set.h flags.h \ real.h insn-config.h insn-attr.h $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) +ssa.o : ssa.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) $(BASIC_BLOCK_H) \ + output.h insn-config.h profile.o : profile.c $(CONFIG_H) system.h $(RTL_H) flags.h insn-flags.h \ gcov-io.h $(TREE_H) output.h $(REGS_H) toplev.h function.h insn-config.h \ ggc.h @@ -2355,11 +2358,11 @@ mostlyclean: $(INTL_MOSTLYCLEAN) lang.mostlyclean # Delete debugging dump files. -rm -f *.greg *.lreg *.combine *.flow *.cse *.jump *.rtl *.tree *.loop -rm -f *.dbr *.jump2 *.sched *.cse2 *.sched2 *.stack *.addressof - -rm -f *.regmove *.mach *.bp *.gcse *.flow2 *.peephole2 + -rm -f *.regmove *.mach *.bp *.gcse *.flow2 *.peephole2 *.ssa *.ussa -rm -f */*.greg */*.lreg */*.combine */*.flow */*.cse */*.jump */*.rtl -rm -f */*.tree */*.loop */*.dbr */*.jump2 */*.sched */*.cse2 -rm -f */*.sched2 */*.stack */*.regmove */*.gcse */*.flow2 - -rm -f */*.peephole2 + -rm -f */*.peephole2 */*.ssa */*.ussa # Delete some files made during installation. -rm -f specs float.h-* enquire SYSCALLS.c.X SYSCALLS.c -rm -f collect collect2 mips-tfile mips-tdump alloca.s diff --git a/gcc/flow.c b/gcc/flow.c index 570483567a3..799114e7363 100644 --- a/gcc/flow.c +++ b/gcc/flow.c @@ -362,6 +362,7 @@ static void fixup_reorder_chain PARAMS ((void)); it being unused. */ void verify_flow_info PARAMS ((void)); int flow_loop_outside_edge_p PARAMS ((const struct loop *, edge)); +void clear_log_links PARAMS ((rtx)); /* Find basic blocks of the current function. F is the first insn of the function and NREGS the number of register @@ -1369,7 +1370,8 @@ split_edge (edge_in) basic_block jump_block; rtx pos; - if ((e->flags & EDGE_CRITICAL) == 0) + if ((e->flags & EDGE_CRITICAL) == 0 + && e->src != ENTRY_BLOCK_PTR) { /* Non critical -- we can simply add a jump to the end of the existing predecessor. */ @@ -7047,7 +7049,6 @@ flow_loop_outside_edge_p (loop, e) } - typedef struct reorder_block_def { int flags; int index; @@ -7769,3 +7770,14 @@ reorder_basic_blocks () flow_loops_free (&loops_info); } + +/* Clear LOG_LINKS fields of insns in a chain. */ +void +clear_log_links (insns) + rtx insns; +{ + rtx i; + for (i = insns; i; i = NEXT_INSN (i)) + if (GET_RTX_CLASS (GET_CODE (i)) == 'i') + LOG_LINKS (i) = 0; +} diff --git a/gcc/rtl.def b/gcc/rtl.def index 996a38e9e9e..fd2f13d63d4 100644 --- a/gcc/rtl.def +++ b/gcc/rtl.def @@ -880,6 +880,21 @@ DEF_RTL_EXPR(CONSTANT_P_RTX, "constant_p_rtx", "e", 'x') after we select a call method. */ DEF_RTL_EXPR(CALL_PLACEHOLDER, "call_placeholder", "uuuu", 'x') +/* The SSA phi operator. + + The argument is a vector of 2N rtxes. Element 2N+1 is a CONST_INT + containing the block number of the predecessor through which control + has passed when the register at element 2N is used. + + Note that PHI may only appear at the beginning of a basic block. + + ??? There may be multiple PHI insns, but they are all evaluated + in parallel. This probably ought to be changed to use a real + PARALLEL, as that would be less confusing and more in the spirit + of canonical RTL. It is, however, easier to manipulate this way. */ +DEF_RTL_EXPR(PHI, "phi", "E", 'x') + + /* Local variables: mode:c diff --git a/gcc/rtl.h b/gcc/rtl.h index 7fc1a1c751f..cdabfdba058 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -1149,6 +1149,7 @@ void free_EXPR_LIST_node PARAMS ((rtx)); void free_INSN_LIST_node PARAMS ((rtx)); rtx alloc_INSN_LIST PARAMS ((rtx, rtx)); rtx alloc_EXPR_LIST PARAMS ((int, rtx, rtx)); +void clear_log_links PARAMS ((rtx)); /* regclass.c */ @@ -1710,6 +1711,10 @@ extern rtx addr_side_effect_eval PARAMS ((rtx, int, int)); extern int stack_regs_mentioned PARAMS ((rtx insn)); #endif +/* In ssa.c */ +extern void convert_to_ssa PARAMS ((void)); +extern void convert_from_ssa PARAMS ((void)); + /* In toplev.c */ extern rtx stack_limit_rtx; diff --git a/gcc/ssa.c b/gcc/ssa.c new file mode 100644 index 00000000000..7320f8ec7f0 --- /dev/null +++ b/gcc/ssa.c @@ -0,0 +1,1503 @@ +/* Static Single Assignment conversion routines for the GNU compiler. + Copyright (C) 2000 Free Software Foundation, Inc. + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU CC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* References: + + Building an Optimizing Compiler + Robert Morgan + Butterworth-Heinemann, 1998 + + Static Single Assignment Construction + Preston Briggs, Tim Harvey, Taylor Simpson + Technical Report, Rice University, 1995 + ftp://ftp.cs.rice.edu/public/preston/optimizer/SSA.ps.gz +*/ + +#include "config.h" +#include "system.h" + +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "flags.h" +#include "function.h" +#include "real.h" +#include "insn-config.h" +#include "recog.h" +#include "basic-block.h" +#include "output.h" +#include "partition.h" + + +/* TODO: + + ??? What to do about strict_low_part. Probably I'll have to split + them out of their current instructions first thing. + + Actually the best solution may be to have a kind of "mid-level rtl" + in which the RTL encodes exactly what we want, without exposing a + lot of niggling processor details. At some later point we lower + the representation, calling back into optabs to finish any necessary + expansion. +*/ + + +/* Element I is the single instruction that sets register I+PSEUDO. */ +varray_type ssa_definition; + +/* Element I is an INSN_LIST of instructions that use register I+PSEUDO. */ +varray_type ssa_uses; + +/* Element I-PSEUDO is the normal register that originated the ssa + register in question. */ +varray_type ssa_rename_from; + +/* The running target ssa register for a given normal register. */ +static rtx *ssa_rename_to; + +/* The number of registers that were live on entry to the SSA routines. */ +static int ssa_max_reg_num; + +/* Local function prototypes. */ + +static inline rtx * phi_alternative + PARAMS ((rtx, int)); + +static int remove_phi_alternative + PARAMS ((rtx, int)); +static void simplify_to_immediate_dominators + PARAMS ((int *idom, sbitmap *dominators)); +static void compute_dominance_frontiers_1 + PARAMS ((sbitmap *frontiers, int *idom, int bb, sbitmap done)); +static void compute_dominance_frontiers + PARAMS ((sbitmap *frontiers, int *idom)); +static void find_evaluations_1 + PARAMS ((rtx dest, rtx set, void *data)); +static void find_evaluations + PARAMS ((sbitmap *evals, int nregs)); +static void compute_iterated_dominance_frontiers + PARAMS ((sbitmap *idfs, sbitmap *frontiers, sbitmap *evals, int nregs)); +static void insert_phi_node + PARAMS ((int regno, int b)); +static void insert_phi_nodes + PARAMS ((sbitmap *idfs, sbitmap *evals, int nregs)); +static int rename_insn_1 + PARAMS ((rtx *ptr, void *data)); +static void rename_block + PARAMS ((int b, int *idom)); +static void rename_registers + PARAMS ((int nregs, int *idom)); + +static inline int ephi_add_node + PARAMS ((rtx reg, rtx *nodes, int *n_nodes)); +static int * ephi_forward + PARAMS ((int t, sbitmap visited, sbitmap *succ, int *tstack)); +static void ephi_backward + PARAMS ((int t, sbitmap visited, sbitmap *pred, rtx *nodes)); +static void ephi_create + PARAMS ((int t, sbitmap visited, sbitmap *pred, sbitmap *succ, rtx *nodes)); +static void eliminate_phi + PARAMS ((edge e, partition reg_partition)); + +static int make_regs_equivalent_over_bad_edges + PARAMS ((int bb, partition reg_partition)); +static int make_equivalent_phi_alternatives_equivalent + PARAMS ((int bb, partition reg_partition)); +static partition compute_conservative_reg_partition + PARAMS (()); +static int rename_equivalent_regs_in_insn + PARAMS ((rtx *ptr, void *data)); +static void rename_equivalent_regs + PARAMS ((partition reg_partition)); + + +/* Determine if the insn is a PHI node. */ +#define PHI_NODE_P(X) \ + (X && GET_CODE (X) == INSN \ + && GET_CODE (PATTERN (X)) == SET \ + && GET_CODE (SET_SRC (PATTERN (X))) == PHI) + +/* Given the SET of a PHI node, return the address of the alternative + for predecessor block C. */ + +static inline rtx * +phi_alternative (set, c) + rtx set; + int c; +{ + rtvec phi_vec = XVEC (SET_SRC (set), 0); + int v; + + for (v = GET_NUM_ELEM (phi_vec) - 2; v >= 0; v -= 2) + if (INTVAL (RTVEC_ELT (phi_vec, v + 1)) == c) + return &RTVEC_ELT (phi_vec, v); + + return NULL; +} + +/* Given the SET of a phi node, remove the alternative for predecessor + block C. Return non-zero on success, or zero if no alternative is + found for C. */ + +static int +remove_phi_alternative (set, c) + rtx set; + int c; +{ + rtvec phi_vec = XVEC (SET_SRC (set), 0); + int num_elem = GET_NUM_ELEM (phi_vec); + int v; + + for (v = num_elem - 2; v >= 0; v -= 2) + if (INTVAL (RTVEC_ELT (phi_vec, v + 1)) == c) + { + if (v < num_elem - 2) + { + RTVEC_ELT (phi_vec, v) = RTVEC_ELT (phi_vec, num_elem - 2); + RTVEC_ELT (phi_vec, v + 1) = RTVEC_ELT (phi_vec, num_elem - 1); + } + PUT_NUM_ELEM (phi_vec, num_elem - 2); + return 1; + } + + return 0; +} + +/* Computing the Immediate Dominators: + + Throughout, we don't actually want the full dominators set as + calculated by flow, but rather the immediate dominators. +*/ + +static void +simplify_to_immediate_dominators (idom, dominators) + int *idom; + sbitmap *dominators; +{ + sbitmap *tmp; + int b; + + tmp = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks); + + /* Begin with tmp(n) = dom(n) - { n }. */ + for (b = n_basic_blocks; --b >= 0; ) + { + sbitmap_copy (tmp[b], dominators[b]); + RESET_BIT (tmp[b], b); + } + + /* Subtract out all of our dominator's dominators. */ + for (b = n_basic_blocks; --b >= 0; ) + { + sbitmap tmp_b = tmp[b]; + int s; + + for (s = n_basic_blocks; --s >= 0; ) + if (TEST_BIT (tmp_b, s)) + sbitmap_difference (tmp_b, tmp_b, tmp[s]); + } + + /* Find the one bit set in the bitmap and put it in the output array. */ + for (b = n_basic_blocks; --b >= 0; ) + { + int t; + EXECUTE_IF_SET_IN_SBITMAP (tmp[b], 0, t, { idom[b] = t; }); + } + + sbitmap_vector_free (tmp); +} + + +/* For all registers, find all blocks in which they are set. + + This is the transform of what would be local kill information that + we ought to be getting from flow. */ + +static sbitmap *fe_evals; +static int fe_current_bb; + +static void +find_evaluations_1 (dest, set, data) + rtx dest; + rtx set ATTRIBUTE_UNUSED; + void *data ATTRIBUTE_UNUSED; +{ + if (GET_CODE (dest) == REG + && REGNO (dest) >= FIRST_PSEUDO_REGISTER) + SET_BIT (fe_evals[REGNO (dest) - FIRST_PSEUDO_REGISTER], fe_current_bb); +} + +static void +find_evaluations (evals, nregs) + sbitmap *evals; + int nregs; +{ + int bb; + + sbitmap_vector_zero (evals, nregs); + fe_evals = evals; + + for (bb = n_basic_blocks; --bb >= 0; ) + { + rtx p, last; + + fe_current_bb = bb; + p = BLOCK_HEAD (bb); + last = BLOCK_END (bb); + while (1) + { + if (GET_RTX_CLASS (GET_CODE (p)) == 'i') + note_stores (PATTERN (p), find_evaluations_1, NULL); + + if (p == last) + break; + p = NEXT_INSN (p); + } + } +} + + +/* Computing the Dominance Frontier: + + As decribed in Morgan, section 3.5, this may be done simply by + walking the dominator tree bottom-up, computing the frontier for + the children before the parent. When considering a block B, + there are two cases: + + (1) A flow graph edge leaving B that does not lead to a child + of B in the dominator tree must be a block that is either equal + to B or not dominated by B. Such blocks belong in the frontier + of B. + + (2) Consider a block X in the frontier of one of the children C + of B. If X is not equal to B and is not dominated by B, it + is in the frontier of B. +*/ + +static void +compute_dominance_frontiers_1 (frontiers, idom, bb, done) + sbitmap *frontiers; + int *idom; + int bb; + sbitmap done; +{ + basic_block b = BASIC_BLOCK (bb); + edge e; + int c; + + SET_BIT (done, bb); + sbitmap_zero (frontiers[bb]); + + /* Do the frontier of the children first. Not all children in the + dominator tree (blocks dominated by this one) are children in the + CFG, so check all blocks. */ + for (c = 0; c < n_basic_blocks; ++c) + if (idom[c] == bb && ! TEST_BIT (done, c)) + compute_dominance_frontiers_1 (frontiers, idom, c, done); + + /* Find blocks conforming to rule (1) above. */ + for (e = b->succ; e; e = e->succ_next) + { + if (e->dest == EXIT_BLOCK_PTR) + continue; + if (idom[e->dest->index] != bb) + SET_BIT (frontiers[bb], e->dest->index); + } + + /* Find blocks conforming to rule (2). */ + for (c = 0; c < n_basic_blocks; ++c) + if (idom[c] == bb) + { + int x; + EXECUTE_IF_SET_IN_SBITMAP (frontiers[c], 0, x, + { + if (idom[x] != bb) + SET_BIT (frontiers[bb], x); + }); + } +} + +static void +compute_dominance_frontiers (frontiers, idom) + sbitmap *frontiers; + int *idom; +{ + sbitmap done = sbitmap_alloc (n_basic_blocks); + sbitmap_zero (done); + + compute_dominance_frontiers_1 (frontiers, idom, 0, done); + + sbitmap_free (done); +} + + +/* Computing the Iterated Dominance Frontier: + + This is the set of merge points for a given register. + + This is not particularly intuitive. See section 7.1 of Morgan, in + particular figures 7.3 and 7.4 and the immediately surrounding text. +*/ + +static void +compute_iterated_dominance_frontiers (idfs, frontiers, evals, nregs) + sbitmap *idfs; + sbitmap *frontiers; + sbitmap *evals; + int nregs; +{ + sbitmap worklist; + int reg, passes = 0; + + worklist = sbitmap_alloc (n_basic_blocks); + + for (reg = 0; reg < nregs; ++reg) + { + sbitmap idf = idfs[reg]; + int b, changed; + + /* Start the iterative process by considering those blocks that + evaluate REG. We'll add their dominance frontiers to the + IDF, and then consider the blocks we just added. */ + sbitmap_copy (worklist, evals[reg]); + + /* Morgan's algorithm is incorrect here. Blocks that evaluate + REG aren't necessarily in REG's IDF. Start with an empty IDF. */ + sbitmap_zero (idf); + + /* Iterate until the worklist is empty. */ + do + { + changed = 0; + passes++; + EXECUTE_IF_SET_IN_SBITMAP (worklist, 0, b, + { + RESET_BIT (worklist, b); + /* For each block on the worklist, add to the IDF all + blocks on its dominance frontier that aren't already + on the IDF. Every block that's added is also added + to the worklist. */ + sbitmap_union_of_diff (worklist, worklist, frontiers[b], idf); + sbitmap_a_or_b (idf, idf, frontiers[b]); + changed = 1; + }); + } + while (changed); + } + + sbitmap_free (worklist); + + if (rtl_dump_file) + { + fprintf(rtl_dump_file, + "Iterated dominance frontier: %d passes on %d regs.\n", + passes, nregs); + } +} + + +/* Insert the phi nodes. */ + +static void +insert_phi_node (regno, bb) + int regno, bb; +{ + basic_block b = BASIC_BLOCK (bb); + edge e; + int npred, i; + rtvec vec; + rtx phi, reg; + + /* Find out how many predecessors there are. */ + for (e = b->pred, npred = 0; e; e = e->pred_next) + if (e->src != ENTRY_BLOCK_PTR) + npred++; + + /* If this block has no "interesting" preds, then there is nothing to + do. Consider a block that only has the entry block as a pred. */ + if (npred == 0) + return; + + /* This is the register to which the phi function will be assinged. */ + reg = regno_reg_rtx[regno + FIRST_PSEUDO_REGISTER]; + + /* Construct the arguments to the PHI node. The use of pc_rtx is just + a placeholder; we'll insert the proper value in rename_registers. */ + vec = rtvec_alloc (npred * 2); + for (e = b->pred, i = 0; e ; e = e->pred_next, i += 2) + if (e->src != ENTRY_BLOCK_PTR) + { + RTVEC_ELT (vec, i + 0) = pc_rtx; + RTVEC_ELT (vec, i + 1) = GEN_INT (e->src->index); + } + + phi = gen_rtx_PHI (VOIDmode, vec); + phi = gen_rtx_SET (VOIDmode, reg, phi); + + if (GET_CODE (b->head) == CODE_LABEL) + emit_insn_after (phi, b->head); + else + b->head = emit_insn_before (phi, b->head); +} + + +static void +insert_phi_nodes (idfs, evals, nregs) + sbitmap *idfs; + sbitmap *evals ATTRIBUTE_UNUSED; + int nregs; +{ + int reg; + + for (reg = 0; reg < nregs; ++reg) + { + int b; + EXECUTE_IF_SET_IN_SBITMAP (idfs[reg], 0, b, + { + if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, + reg + FIRST_PSEUDO_REGISTER)) + insert_phi_node (reg, b); + }); + } +} + +/* Rename the registers to conform to SSA. + + This is essentially the algorithm presented in Figure 7.8 of Morgan, + with a few changes to reduce pattern search time in favour of a bit + more memory usage. */ + + +/* One of these is created for each set. It will live in a list local + to its basic block for the duration of that block's processing. */ +struct rename_set_data +{ + struct rename_set_data *next; + rtx *reg_loc; + rtx set_dest; + rtx new_reg; + rtx prev_reg; +}; + +static void new_registers_for_updates + PARAMS ((struct rename_set_data *set_data, + struct rename_set_data *old_set_data, rtx insn)); + +/* This is part of a rather ugly hack to allow the pre-ssa regno to be + reused. If, during processing, a register has not yet been touched, + ssa_rename_to[regno] will be NULL. Now, in the course of pushing + and popping values from ssa_rename_to, when we would ordinarily + pop NULL back in, we pop RENAME_NO_RTX. We treat this exactly the + same as NULL, except that it signals that the original regno has + already been reused. */ +#define RENAME_NO_RTX pc_rtx + +/* Part one of the first step of rename_block, called through for_each_rtx. + Mark pseudos that are set for later update. Transform uses of pseudos. */ + +static int +rename_insn_1 (ptr, data) + rtx *ptr; + void *data; +{ + rtx x = *ptr; + struct rename_set_data **set_datap = data; + + if (x == NULL_RTX) + return 0; + + switch (GET_CODE (x)) + { + case SET: + { + rtx *destp = &SET_DEST (x); + rtx dest = SET_DEST (x); + + /* Subregs at word 0 are interesting. Subregs at word != 0 are + presumed to be part of a contiguous multi-word set sequence. */ + while (GET_CODE (dest) == SUBREG + && SUBREG_WORD (dest) == 0) + { + destp = &SUBREG_REG (dest); + dest = SUBREG_REG (dest); + } + + if (GET_CODE (dest) == REG + && REGNO (dest) >= FIRST_PSEUDO_REGISTER) + { + /* We found a genuine set of an interesting register. Tag + it so that we can create a new name for it after we finish + processing this insn. */ + + struct rename_set_data *r; + r = (struct rename_set_data *) xmalloc (sizeof(*r)); + + r->reg_loc = destp; + r->set_dest = SET_DEST (x); + r->next = *set_datap; + *set_datap = r; + + /* Since we do not wish to (directly) traverse the + SET_DEST, recurse through for_each_rtx for the SET_SRC + and return. */ + for_each_rtx (&SET_SRC (x), rename_insn_1, data); + return -1; + } + + /* Otherwise, this was not an interesting destination. Continue + on, marking uses as normal. */ + return 0; + } + + case REG: + if (REGNO (x) >= FIRST_PSEUDO_REGISTER + && REGNO (x) < ssa_max_reg_num) + { + rtx new_reg = ssa_rename_to[REGNO(x) - FIRST_PSEUDO_REGISTER]; + + if (new_reg != NULL_RTX && new_reg != RENAME_NO_RTX) + { + if (GET_MODE (x) != GET_MODE (new_reg)) + abort (); + *ptr = new_reg; + /* ??? Mark for a new ssa_uses entry. */ + } + /* Else this is a use before a set. Warn? */ + } + return -1; + + case PHI: + /* Never muck with the phi. We do that elsewhere, special-like. */ + return -1; + + default: + /* Anything else, continue traversing. */ + return 0; + } +} + +/* Second part of the first step of rename_block. The portion of the list + beginning at SET_DATA through OLD_SET_DATA contain the sets present in + INSN. Update data structures accordingly. */ + +static void +new_registers_for_updates (set_data, old_set_data, insn) + struct rename_set_data *set_data, *old_set_data; + rtx insn; +{ + while (set_data != old_set_data) + { + int regno, new_regno; + rtx old_reg, new_reg, prev_reg; + + old_reg = *set_data->reg_loc; + regno = REGNO (*set_data->reg_loc); + + /* For the first set we come across, reuse the original regno. */ + if (ssa_rename_to[regno - FIRST_PSEUDO_REGISTER] == NULL_RTX) + { + new_reg = old_reg; + prev_reg = RENAME_NO_RTX; + } + else + { + prev_reg = ssa_rename_to[regno - FIRST_PSEUDO_REGISTER]; + new_reg = gen_reg_rtx (GET_MODE (old_reg)); + } + + set_data->new_reg = new_reg; + set_data->prev_reg = prev_reg; + new_regno = REGNO (new_reg); + ssa_rename_to[regno - FIRST_PSEUDO_REGISTER] = new_reg; + + if (new_regno >= (int) ssa_definition->num_elements) + { + int new_limit = new_regno * 5 / 4; + ssa_definition = VARRAY_GROW (ssa_definition, new_limit); + ssa_uses = VARRAY_GROW (ssa_uses, new_limit); + ssa_rename_from = VARRAY_GROW (ssa_rename_from, new_limit); + } + + VARRAY_RTX (ssa_definition, new_regno) = insn; + VARRAY_RTX (ssa_rename_from, new_regno) = old_reg; + + set_data = set_data->next; + } +} + +static void +rename_block (bb, idom) + int bb; + int *idom; +{ + basic_block b = BASIC_BLOCK (bb); + edge e; + rtx insn, next, last; + struct rename_set_data *set_data = NULL; + int c; + + /* Step One: Walk the basic block, adding new names for sets and + replacing uses. */ + + next = b->head; + last = b->end; + do + { + insn = next; + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') + { + struct rename_set_data *old_set_data = set_data; + + for_each_rtx (&PATTERN (insn), rename_insn_1, &set_data); + for_each_rtx (®_NOTES (insn), rename_insn_1, &set_data); + + new_registers_for_updates (set_data, old_set_data, insn); + } + + next = NEXT_INSN (insn); + } + while (insn != last); + + /* Step Two: Update the phi nodes of this block's successors. */ + + for (e = b->succ; e; e = e->succ_next) + { + if (e->dest == EXIT_BLOCK_PTR) + continue; + + insn = e->dest->head; + if (GET_CODE (insn) == CODE_LABEL) + insn = NEXT_INSN (insn); + + while (PHI_NODE_P (insn)) + { + rtx phi = PATTERN (insn); + int regno; + rtx reg; + + /* Find out which of our outgoing registers this node is + indended to replace. Note that if this not the first PHI + node to have been created for this register, we have to + jump through rename links to figure out which register + we're talking about. This can easily be recognized by + noting that the regno is new to this pass. */ + regno = REGNO (SET_DEST (phi)); + if (regno >= ssa_max_reg_num) + regno = REGNO (VARRAY_RTX (ssa_rename_from, regno)); + reg = ssa_rename_to[regno - FIRST_PSEUDO_REGISTER]; + + /* It is possible for the variable to be uninitialized on + edges in. Reduce the arity of the PHI so that we don't + consider those edges. */ + if (reg == NULL || reg == RENAME_NO_RTX) + { + if (! remove_phi_alternative (phi, bb)) + abort (); + } + else + { + /* When we created the PHI nodes, we did not know what mode + the register should be. Now that we've found an original, + we can fill that in. */ + if (GET_MODE (SET_DEST (phi)) == VOIDmode) + PUT_MODE (SET_DEST (phi), GET_MODE (reg)); + else if (GET_MODE (SET_DEST (phi)) != GET_MODE (reg)) + abort(); + + *phi_alternative (phi, bb) = reg; + /* ??? Mark for a new ssa_uses entry. */ + } + + insn = NEXT_INSN (insn); + } + } + + /* Step Three: Do the same to the children of this block in + dominator order. */ + + for (c = 0; c < n_basic_blocks; ++c) + if (idom[c] == bb) + rename_block (c, idom); + + /* Step Four: Update the sets to refer to their new register. */ + + while (set_data) + { + struct rename_set_data *next; + rtx old_reg; + + old_reg = *set_data->reg_loc; + *set_data->reg_loc = set_data->new_reg; + ssa_rename_to[REGNO (old_reg)-FIRST_PSEUDO_REGISTER] + = set_data->prev_reg; + + next = set_data->next; + free (set_data); + set_data = next; + } +} + +static void +rename_registers (nregs, idom) + int nregs; + int *idom; +{ + VARRAY_RTX_INIT (ssa_definition, nregs * 3, "ssa_definition"); + VARRAY_RTX_INIT (ssa_uses, nregs * 3, "ssa_uses"); + VARRAY_RTX_INIT (ssa_rename_from, nregs * 3, "ssa_rename_from"); + + ssa_rename_to = (rtx *) alloca (nregs * sizeof(rtx)); + bzero (ssa_rename_to, nregs * sizeof(rtx)); + + rename_block (0, idom); + + /* ??? Update basic_block_live_at_start, and other flow info + as needed. */ + + ssa_rename_to = NULL; +} + + +/* The main entry point for moving to SSA. */ + +void +convert_to_ssa() +{ + /* Element I is the set of blocks that set register I. */ + sbitmap *evals; + + /* Dominator bitmaps. */ + sbitmap *dominators; + sbitmap *dfs; + sbitmap *idfs; + + /* Element I is the immediate dominator of block I. */ + int *idom; + + int nregs; + + find_basic_blocks (get_insns (), max_reg_num(), NULL); + /* The dominator algorithms assume all blocks are reachable, clean + up first. */ + cleanup_cfg (get_insns ()); + life_analysis (get_insns (), max_reg_num (), NULL, 1); + + /* Compute dominators. */ + dominators = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks); + compute_flow_dominators (dominators, NULL); + + idom = (int *) alloca (n_basic_blocks * sizeof (int)); + memset ((void *)idom, -1, (size_t)n_basic_blocks * sizeof (int)); + simplify_to_immediate_dominators (idom, dominators); + + sbitmap_vector_free (dominators); + + if (rtl_dump_file) + { + int i; + fputs (";; Immediate Dominators:\n", rtl_dump_file); + for (i = 0; i < n_basic_blocks; ++i) + fprintf (rtl_dump_file, ";\t%3d = %3d\n", i, idom[i]); + fflush (rtl_dump_file); + } + + /* Compute dominance frontiers. */ + + dfs = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks); + compute_dominance_frontiers (dfs, idom); + + if (rtl_dump_file) + { + dump_sbitmap_vector (rtl_dump_file, ";; Dominance Frontiers:", + "; Basic Block", dfs, n_basic_blocks); + fflush (rtl_dump_file); + } + + /* Compute register evaluations. */ + + ssa_max_reg_num = max_reg_num(); + nregs = ssa_max_reg_num - FIRST_PSEUDO_REGISTER; + evals = sbitmap_vector_alloc (nregs, n_basic_blocks); + find_evaluations (evals, nregs); + + /* Compute the iterated dominance frontier for each register. */ + + idfs = sbitmap_vector_alloc (nregs, n_basic_blocks); + compute_iterated_dominance_frontiers (idfs, dfs, evals, nregs); + + if (rtl_dump_file) + { + dump_sbitmap_vector (rtl_dump_file, ";; Iterated Dominance Frontiers:", + "; Register-FIRST_PSEUDO_REGISTER", idfs, nregs); + fflush (rtl_dump_file); + } + + /* Insert the phi nodes. */ + + insert_phi_nodes (idfs, evals, nregs); + + /* Rename the registers to satisfy SSA. */ + + rename_registers (nregs, idom); + + /* All done! Clean up and go home. */ + + sbitmap_vector_free (dfs); + sbitmap_vector_free (evals); + sbitmap_vector_free (idfs); +} + + +/* This is intended to be the FIND of a UNION/FIND algorithm managing + the partitioning of the pseudos. Glancing through the rest of the + global optimizations, it seems things will work out best if the + partition is set up just before convert_from_ssa is called. See + section 11.4 of Morgan. + + ??? Morgan's algorithm, perhaps with some care, may allow copy + propagation to happen concurrently with the conversion from SSA. + + However, it presents potential problems with critical edges -- to + split or not to split. He mentions beginning the partitioning by + unioning registers associated by a PHI across abnormal critical + edges. This is the approache taken here. It is unclear to me how + we are able to do that arbitrarily, though. + + Alternately, Briggs presents an algorithm in which critical edges + need not be split, at the expense of the creation of new pseudos, + and the need for some concurrent register renaming. Moreover, it + is ameanable for modification such that the instructions can be + placed anywhere in the target block, which solves the before-call + placement problem. However, I don't immediately see how we could + do that concurrently with copy propoagation. + + More study is required. */ + + +/* + * Eliminate the PHI across the edge from C to B. + */ + +/* REG is the representative temporary of its partition. Add it to the + set of nodes to be processed, if it hasn't been already. Return the + index of this register in the node set. */ + +static inline int +ephi_add_node (reg, nodes, n_nodes) + rtx reg, *nodes; + int *n_nodes; +{ + int i; + for (i = *n_nodes - 1; i >= 0; --i) + if (REGNO (reg) == REGNO (nodes[i])) + return i; + + nodes[i = (*n_nodes)++] = reg; + return i; +} + +/* Part one of the topological sort. This is a forward (downward) search + through the graph collecting a stack of nodes to process. Assuming no + cycles, the nodes at top of the stack when we are finished will have + no other dependancies. */ + +static int * +ephi_forward (t, visited, succ, tstack) + int t; + sbitmap visited; + sbitmap *succ; + int *tstack; +{ + int s; + + SET_BIT (visited, t); + + EXECUTE_IF_SET_IN_SBITMAP (succ[t], 0, s, + { + if (! TEST_BIT (visited, s)) + tstack = ephi_forward (s, visited, succ, tstack); + }); + + *tstack++ = t; + return tstack; +} + +/* Part two of the topological sort. The is a backward search through + a cycle in the graph, copying the data forward as we go. */ + +static void +ephi_backward (t, visited, pred, nodes) + int t; + sbitmap visited, *pred; + rtx *nodes; +{ + int p; + + SET_BIT (visited, t); + + EXECUTE_IF_SET_IN_SBITMAP (pred[t], 0, p, + { + if (! TEST_BIT (visited, p)) + { + ephi_backward (p, visited, pred, nodes); + emit_move_insn (nodes[p], nodes[t]); + } + }); +} + +/* Part two of the topological sort. Create the copy for a register + and any cycle of which it is a member. */ + +static void +ephi_create (t, visited, pred, succ, nodes) + int t; + sbitmap visited, *pred, *succ; + rtx *nodes; +{ + rtx reg_u = NULL_RTX; + int unvisited_predecessors = 0; + int p; + + /* Iterate through the predecessor list looking for unvisited nodes. + If there are any, we have a cycle, and must deal with that. At + the same time, look for a visited predecessor. If there is one, + we won't need to create a temporary. */ + + EXECUTE_IF_SET_IN_SBITMAP (pred[t], 0, p, + { + if (! TEST_BIT (visited, p)) + unvisited_predecessors = 1; + else if (!reg_u) + reg_u = nodes[p]; + }); + + if (unvisited_predecessors) + { + /* We found a cycle. Copy out one element of the ring (if necessary), + then traverse the ring copying as we go. */ + + if (!reg_u) + { + reg_u = gen_reg_rtx (GET_MODE (nodes[t])); + emit_move_insn (reg_u, nodes[t]); + } + + EXECUTE_IF_SET_IN_SBITMAP (pred[t], 0, p, + { + if (! TEST_BIT (visited, p)) + { + ephi_backward (p, visited, pred, nodes); + emit_move_insn (nodes[p], reg_u); + } + }); + } + else + { + /* No cycle. Just copy the value from a successor. */ + + int s; + EXECUTE_IF_SET_IN_SBITMAP (succ[t], 0, s, + { + SET_BIT (visited, t); + emit_move_insn (nodes[t], nodes[s]); + return; + }); + } +} + +/* Convert the edge to normal form. */ + +static void +eliminate_phi (e, reg_partition) + edge e; + partition reg_partition; +{ + int n_nodes; + sbitmap *pred, *succ; + sbitmap visited; + rtx *nodes; + int *stack, *tstack; + rtx insn; + int i; + + /* Collect an upper bound on the number of registers needing processing. */ + + insn = e->dest->head; + if (GET_CODE (insn) == CODE_LABEL) + insn = next_nonnote_insn (insn); + + n_nodes = 0; + while (PHI_NODE_P (insn)) + { + insn = next_nonnote_insn (insn); + n_nodes += 2; + } + + if (n_nodes == 0) + return; + + /* Build the auxilliary graph R(B). + + The nodes of the graph are the members of the register partition + present in Phi(B). There is an edge from FIND(T0)->FIND(T1) for + each T0 = PHI(...,T1,...), where T1 is for the edge from block C. */ + + nodes = (rtx *) alloca (n_nodes * sizeof(rtx)); + pred = sbitmap_vector_alloc (n_nodes, n_nodes); + succ = sbitmap_vector_alloc (n_nodes, n_nodes); + sbitmap_vector_zero (pred, n_nodes); + sbitmap_vector_zero (succ, n_nodes); + + insn = e->dest->head; + if (GET_CODE (insn) == CODE_LABEL) + insn = next_nonnote_insn (insn); + + n_nodes = 0; + for (; PHI_NODE_P (insn); insn = next_nonnote_insn (insn)) + { + rtx* preg = phi_alternative (PATTERN (insn), e->src->index); + rtx tgt = SET_DEST (PATTERN (insn)); + rtx reg; + + /* There may be no phi alternative corresponding to this edge. + This indicates that the phi variable is undefined along this + edge. */ + if (preg == NULL) + continue; + reg = *preg; + + if (GET_CODE (reg) != REG || GET_CODE (tgt) != REG) + abort(); + + /* If the two registers are already in the same partition, + nothing will need to be done. */ + if (partition_find (reg_partition, REGNO (reg)) + != partition_find (reg_partition, REGNO (tgt))) + { + int ireg, itgt; + + ireg = ephi_add_node (reg, nodes, &n_nodes); + itgt = ephi_add_node (tgt, nodes, &n_nodes); + + SET_BIT (pred[ireg], itgt); + SET_BIT (succ[itgt], ireg); + } + } + + if (n_nodes == 0) + goto out; + + /* Begin a topological sort of the graph. */ + + visited = sbitmap_alloc (n_nodes); + sbitmap_zero (visited); + + tstack = stack = (int *) alloca (n_nodes * sizeof (int)); + + for (i = 0; i < n_nodes; ++i) + if (! TEST_BIT (visited, i)) + tstack = ephi_forward (i, visited, succ, tstack); + + sbitmap_zero (visited); + + /* As we find a solution to the tsort, collect the implementation + insns in a sequence. */ + start_sequence (); + + while (tstack != stack) + { + i = *--tstack; + if (! TEST_BIT (visited, i)) + ephi_create (i, visited, pred, succ, nodes); + } + + insn = gen_sequence (); + end_sequence (); + insert_insn_on_edge (insn, e); + if (rtl_dump_file) + fprintf (rtl_dump_file, "Emitting copy on edge (%d,%d)\n", + e->src->index, e->dest->index); + + sbitmap_free (visited); +out: + sbitmap_vector_free (pred); + sbitmap_vector_free (succ); +} + + +/* For basic block B, consider all phi insns which provide an + alternative corresponding to an incoming abnormal critical edge. + Place the phi alternative corresponding to that abnormal critical + edge in the same register class as the destination of the set. + + From Morgan, p. 178: + + For each abnormal critical edge (C, B), + if T0 = phi (T1, ..., Ti, ..., Tm) is a phi node in B, + and C is the ith predecessor of B, + then T0 and Ti must be equivalent. + + Return non-zero iff any such cases were found for which the two + regs were not already in the same class. */ + +static int +make_regs_equivalent_over_bad_edges (bb, reg_partition) + int bb; + partition reg_partition; +{ + int changed = 0; + basic_block b = BASIC_BLOCK (bb); + rtx phi = b->head; + + /* Advance to the first phi node. */ + if (GET_CODE (phi) == CODE_LABEL) + phi = next_nonnote_insn (phi); + + /* Scan all the phi nodes. */ + for (; + PHI_NODE_P (phi); + phi = next_nonnote_insn (phi)) + { + edge e; + int tgt_regno; + rtx set = PATTERN (phi); + rtx tgt = SET_DEST (set); + + /* The set target is expected to be a pseudo. */ + if (GET_CODE (tgt) != REG + || REGNO (tgt) < FIRST_PSEUDO_REGISTER) + abort (); + tgt_regno = REGNO (tgt); + + /* Scan incoming abnormal critical edges. */ + for (e = b->pred; e; e = e->pred_next) + if (e->flags & (EDGE_ABNORMAL | EDGE_CRITICAL)) + { + rtx *alt = phi_alternative (set, e->src->index); + int alt_regno; + + /* If there is no alternative corresponding to this edge, + the value is undefined along the edge, so just go on. */ + if (alt == 0) + continue; + + /* The phi alternative is expected to be a pseudo. */ + if (GET_CODE (*alt) != REG + || REGNO (*alt) < FIRST_PSEUDO_REGISTER) + abort (); + alt_regno = REGNO (*alt); + + /* If the set destination and the phi alternative aren't + already in the same class... */ + if (partition_find (reg_partition, tgt_regno) + != partition_find (reg_partition, alt_regno)) + { + /* ... make them such. */ + partition_union (reg_partition, + tgt_regno, alt_regno); + ++changed; + } + } + } + + return changed; +} + + +/* Consider phi insns in basic block BB pairwise. If the set target + of both isns are equivalent pseudos, make the corresponding phi + alternatives in each phi corresponding equivalent. + + Return nonzero if any new register classes were unioned. */ + +static int +make_equivalent_phi_alternatives_equivalent (bb, reg_partition) + int bb; + partition reg_partition; +{ + int changed = 0; + rtx phi = BLOCK_HEAD (bb); + basic_block b = BASIC_BLOCK (bb); + + /* Advance to the first phi node. */ + if (GET_CODE (phi) == CODE_LABEL) + phi = next_nonnote_insn (phi); + + /* Scan all the phi nodes. */ + for (; + PHI_NODE_P (phi); + phi = next_nonnote_insn (phi)) + { + rtx set = PATTERN (phi); + /* The regno of the destination of the set. */ + int tgt_regno = REGNO (SET_DEST (PATTERN (phi))); + + rtx phi2 = next_nonnote_insn (phi); + + /* Scan all phi nodes following this one. */ + for (; + PHI_NODE_P (phi2); + phi2 = next_nonnote_insn (phi2)) + { + rtx set2 = PATTERN (phi2); + /* The regno of the destination of the set. */ + int tgt2_regno = REGNO (SET_DEST (set2)); + + /* Are the set destinations equivalent regs? */ + if (partition_find (reg_partition, tgt_regno) == + partition_find (reg_partition, tgt2_regno)) + { + edge e; + /* Scan over edges. */ + for (e = b->pred; e; e = e->pred_next) + { + int pred_block = e->src->index; + /* Identify the phi altnernatives from both phi + nodes corresponding to this edge. */ + rtx *alt = phi_alternative (set, pred_block); + rtx *alt2 = phi_alternative (set2, pred_block); + + /* If one of the phi nodes doesn't have a + corresponding alternative, just skip it. */ + if (alt == 0 || alt2 == 0) + continue; + + /* Both alternatives should be pseudos. */ + if (GET_CODE (*alt) != REG + || REGNO (*alt) < FIRST_PSEUDO_REGISTER) + abort (); + if (GET_CODE (*alt2) != REG + || REGNO (*alt2) < FIRST_PSEUDO_REGISTER) + abort (); + + /* If the altneratives aren't already in the same + class ... */ + if (partition_find (reg_partition, REGNO (*alt)) + != partition_find (reg_partition, REGNO (*alt2))) + { + /* ... make them so. */ + partition_union (reg_partition, + REGNO (*alt), REGNO (*alt2)); + ++changed; + } + } + } + } + } + + return changed; +} + + +/* Compute a conservative partition of outstanding pseudo registers. + See Morgan 7.3.1. */ + +static partition +compute_conservative_reg_partition () +{ + int bb; + int changed = 0; + + /* We don't actually work with hard registers, but it's easier to + carry them around anyway rather than constantly doing register + number arithmetic. */ + partition p = + partition_new (ssa_definition->num_elements + FIRST_PSEUDO_REGISTER); + + /* The first priority is to make sure registers that might have to + be copied on abnormal critical edges are placed in the same + partition. This saves us from having to split abnormal critical + edges. */ + for (bb = n_basic_blocks; --bb >= 0; ) + changed += make_regs_equivalent_over_bad_edges (bb, p); + + /* Now we have to insure that corresponding arguments of phi nodes + assigning to corresponding regs are equivalent. Iterate until + nothing changes. */ + while (changed > 0) + { + changed = 0; + for (bb = n_basic_blocks; --bb >= 0; ) + changed += make_equivalent_phi_alternatives_equivalent (bb, p); + } + + return p; +} + + +/* Rename regs in insn PTR that are equivalent. DATA is the register + partition which specifies equivalences. */ + +static int +rename_equivalent_regs_in_insn (ptr, data) + rtx *ptr; + void* data; +{ + rtx x = *ptr; + partition reg_partition = (partition) data; + + if (x == NULL_RTX) + return 0; + + switch (GET_CODE (x)) + { + case SET: + { + rtx *destp = &SET_DEST (x); + rtx dest = SET_DEST (x); + + /* Subregs at word 0 are interesting. Subregs at word != 0 are + presumed to be part of a contiguous multi-word set sequence. */ + while (GET_CODE (dest) == SUBREG + && SUBREG_WORD (dest) == 0) + { + destp = &SUBREG_REG (dest); + dest = SUBREG_REG (dest); + } + + if (GET_CODE (dest) == REG + && REGNO (dest) >= FIRST_PSEUDO_REGISTER) + { + /* Got a pseudo; replace it. */ + int regno = REGNO (dest); + int new_regno = partition_find (reg_partition, regno); + if (regno != new_regno) + *destp = regno_reg_rtx [new_regno]; + + for_each_rtx (&SET_SRC (x), + rename_equivalent_regs_in_insn, + data); + return -1; + } + + /* Otherwise, this was not an interesting destination. Continue + on, marking uses as normal. */ + return 0; + } + + case REG: + if (REGNO (x) >= FIRST_PSEUDO_REGISTER) + { + int regno = REGNO (x); + int new_regno = partition_find (reg_partition, regno); + if (regno != new_regno) + { + rtx new_reg = regno_reg_rtx[new_regno]; + if (GET_MODE (x) != GET_MODE (new_reg)) + abort (); + *ptr = new_reg; + } + } + return -1; + + case PHI: + /* No need to rename the phi nodes. We'll check equivalence + when inserting copies. */ + return -1; + + default: + /* Anything else, continue traversing. */ + return 0; + } +} + + +/* Rename regs that are equivalent in REG_PARTITION. */ + +static void +rename_equivalent_regs (reg_partition) + partition reg_partition; +{ + int bb; + + for (bb = n_basic_blocks; --bb >= 0; ) + { + basic_block b = BASIC_BLOCK (bb); + rtx next = b->head; + rtx last = b->end; + rtx insn; + + do + { + insn = next; + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') + { + for_each_rtx (&PATTERN (insn), + rename_equivalent_regs_in_insn, + reg_partition); + for_each_rtx (®_NOTES (insn), + rename_equivalent_regs_in_insn, + reg_partition); + } + + next = NEXT_INSN (insn); + } + while (insn != last); + } +} + + +/* The main entry point for moving from SSA. */ + +void +convert_from_ssa() +{ + int bb; + partition reg_partition; + + reg_partition = compute_conservative_reg_partition (); + rename_equivalent_regs (reg_partition); + + /* Eliminate the PHI nodes. */ + for (bb = n_basic_blocks; --bb >= 0; ) + { + basic_block b = BASIC_BLOCK (bb); + edge e; + + for (e = b->pred; e; e = e->pred_next) + if (e->src != ENTRY_BLOCK_PTR) + eliminate_phi (e, reg_partition); + } + + partition_delete (reg_partition); + + /* Actually delete the PHI nodes. */ + for (bb = n_basic_blocks; --bb >= 0; ) + { + rtx insn = BLOCK_HEAD (bb); + int start = (GET_CODE (insn) != CODE_LABEL); + + if (! start) + insn = next_nonnote_insn (insn); + while (PHI_NODE_P (insn)) + { + insn = delete_insn (insn); + if (GET_CODE (insn) == NOTE) + insn = next_nonnote_insn (insn); + } + if (start) + BLOCK_HEAD (bb) = insn; + } + + /* Commit all the copy nodes needed to convert out of SSA form. */ + commit_edge_insertions (); + + count_or_remove_death_notes (NULL, 1); +} diff --git a/gcc/toplev.c b/gcc/toplev.c index 55b03a42a87..b1a0694336f 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -257,6 +257,7 @@ int stack_reg_dump = 0; #ifdef MACHINE_DEPENDENT_REORG int mach_dep_reorg_dump = 0; #endif +int ssa_dump = 0; static int flag_print_mem = 0; static int version_flag = 0; static char * filename = 0; @@ -695,6 +696,9 @@ int flag_gnu_linker = 0; int flag_gnu_linker = 1; #endif +/* Enable SSA. */ +int flag_ssa = 0; + /* Tag all structures with __attribute__(packed) */ int flag_pack_struct = 0; @@ -997,6 +1001,8 @@ lang_independent_options f_options[] = "Suppress output of instruction numbers and line number notes in debugging dumps"}, {"instrument-functions", &flag_instrument_function_entry_exit, 1, "Instrument function entry/exit with profiling calls"}, + {"ssa", &flag_ssa, 1, + "Enable SSA optimizations" }, {"leading-underscore", &flag_leading_underscore, 1, "External symbols have a leading underscore" }, {"ident", &flag_no_ident, 0, @@ -2152,6 +2158,11 @@ compile_file (name) if (graph_dump_format != no_graph) clean_graph_dump_file (dump_base_name, ".03.addressof"); } + if (ssa_dump) + { + clean_dump_file (".033.ssa"); + clean_dump_file (".037.ussa"); + } if (gcse_dump) { clean_dump_file (".04.gcse"); @@ -3125,6 +3136,30 @@ rest_of_compilation (decl) if (ggc_p) ggc_collect (); + if (flag_ssa) + { + if (ssa_dump) + open_dump_file (".033.ssa", decl_printable_name (decl, 2)); + convert_to_ssa (); + if (ssa_dump) + close_dump_file (print_rtl_with_bb, insns); + + if (ssa_dump) + open_dump_file (".037.ussa", decl_printable_name (decl, 2)); + convert_from_ssa (); + /* New registers have been created. Rescan their usage. */ + reg_scan (insns, max_reg_num (), 1); + if (ssa_dump) + close_dump_file (print_rtl_with_bb, insns); + + /* Life analysis used in SSA adds log_links but these shouldn't + be there until the flow stage, so clear them away. */ + clear_log_links (insns); + + if (ggc_p) + ggc_collect (); + } + /* Perform global cse. */ if (optimize > 0 && flag_gcse) @@ -4021,6 +4056,7 @@ decode_d_option (arg) mach_dep_reorg_dump = 1; #endif peephole2_dump = 1; + ssa_dump = 1; break; case 'A': flag_debug_asm = 1; @@ -4039,6 +4075,9 @@ decode_d_option (arg) dbr_sched_dump = 1; break; #endif + case 'e': + ssa_dump = 1; + break; case 'f': flow_dump = 1; break; diff --git a/include/ChangeLog b/include/ChangeLog index a73e5fefb06..be7061c6394 100644 --- a/include/ChangeLog +++ b/include/ChangeLog @@ -1,3 +1,7 @@ +2000-03-09 Alex Samuel <samuel@codesourcery.com> + + * partition.h: New file. + 2000-03-09 Zack Weinberg <zack@wolery.cumb.org> * hashtab.h (struct htab): Add del_f. diff --git a/include/partition.h b/include/partition.h new file mode 100644 index 00000000000..f49d67a8cad --- /dev/null +++ b/include/partition.h @@ -0,0 +1,81 @@ +/* List implentation of a partition of consecutive integers. + Copyright (C) 2000 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU CC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* This package implements a partition of consecutive integers. The + elements are partitioned into classes. Each class is represented + by one of its elements, the canonical element, which is chosen + arbitrarily from elements in the class. The principal operations + on a partition are FIND, which takes an element, determines its + class, and returns the canonical element for that class, and UNION, + which unites the two classes that contain two given elements into a + single class. + + The list implementation used here provides constant-time finds. By + storing the size of each class with the class's canonical element, + it is able to perform unions over all the classes in the partition + in O (N log N) time. */ + +#ifndef _PARTITION_H +#define _PARTITION_H + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#include <ansidecl.h> +#include <stdio.h> + +struct partition_elem +{ + /* The canonical element that represents the class containing this + element. */ + int class_element; + /* The next element in this class. Elements in each class form a + circular list. */ + struct partition_elem* next; + /* The number of elements in this class. Valid only if this is the + canonical element for its class. */ + unsigned class_count; +}; + +typedef struct partition_def +{ + /* The number of elements in this partition. */ + int num_elements; + /* The elements in the partition. */ + struct partition_elem elements[1]; +} *partition; + +extern partition partition_new PARAMS((int)); +extern void partition_delete PARAMS((partition)); +extern int partition_union PARAMS((partition, + int, + int)); +extern void partition_print PARAMS((partition, + FILE*)); + +/* Returns the canonical element corresponding to the class containing + ELEMENT__ in PARTITION__. */ + +#define partition_find(partition__, element__) \ + ((partition__)->elements[(element__)].class_element) + +#endif /* _PARTITION_H */ diff --git a/libiberty/ChangeLog b/libiberty/ChangeLog index a9d849c3970..de8ed944ad0 100644 --- a/libiberty/ChangeLog +++ b/libiberty/ChangeLog @@ -1,3 +1,10 @@ +2000-03-09 Alex Samuel <samuel@codesourcery.com> + + * Makefile.in (CFILES): Add partition.c. + (REQUIRED_OFILES): Add partition.o. + (partition.o): New rule. + * partition.c: New file. + 2000-03-09 Zack Weinberg <zack@wolery.cumb.org> * hashtab.c (htab_create): Set del_f. diff --git a/libiberty/Makefile.in b/libiberty/Makefile.in index c6eb4668e20..1039d5922c6 100644 --- a/libiberty/Makefile.in +++ b/libiberty/Makefile.in @@ -123,21 +123,22 @@ HFILES = alloca-conf.h # NOTE: If you add new files to the library, add them to this list # (alphabetical), and add them to REQUIRED_OFILES or funcs in # configure.in. -CFILES = asprintf.c alloca.c argv.c atexit.c basename.c bcmp.c bcopy.c \ +CFILES = asprintf.c alloca.c argv.c atexit.c basename.c bcmp.c bcopy.c \ bzero.c calloc.c choose-temp.c clock.c concat.c cplus-dem.c fdmatch.c \ - fnmatch.c getcwd.c getpwd.c getopt.c getopt1.c getpagesize.c \ - getruntime.c floatformat.c hashtab.c hex.c index.c insque.c memchr.c \ - memcmp.c memcpy.c memmove.c memset.c mkstemps.c objalloc.c obstack.c \ - pexecute.c putenv.c random.c rename.c rindex.c setenv.c sigsetmask.c \ - spaces.c splay-tree.c strcasecmp.c strncasecmp.c strchr.c strdup.c \ - strerror.c strrchr.c strsignal.c strstr.c strtod.c strtol.c strtoul.c \ - tmpnam.c vasprintf.c vfork.c vfprintf.c vprintf.c vsprintf.c \ - waitpid.c xatexit.c xexit.c xmalloc.c xmemdup.c xstrdup.c xstrerror.c + fnmatch.c getcwd.c getpwd.c getopt.c getopt1.c getpagesize.c \ + getruntime.c floatformat.c hashtab.c hex.c index.c insque.c memchr.c \ + memcmp.c memcpy.c memmove.c memset.c mkstemps.c objalloc.c obstack.c \ + partition.c pexecute.c putenv.c random.c rename.c rindex.c \ + setenv.c sigsetmask.c spaces.c splay-tree.c strcasecmp.c \ + strncasecmp.c strchr.c strdup.c strerror.c strrchr.c \ + strsignal.c strstr.c strtod.c strtol.c strtoul.c tmpnam.c \ + vasprintf.c vfork.c vfprintf.c vprintf.c vsprintf.c waitpid.c \ + xatexit.c xexit.c xmalloc.c xmemdup.c xstrdup.c xstrerror.c # These are always included in the library. REQUIRED_OFILES = argv.o choose-temp.o concat.o cplus-dem.o \ fdmatch.o fnmatch.o getopt.o getopt1.o getpwd.o getruntime.o hashtab.o \ - hex.o floatformat.o objalloc.o obstack.o pexecute.o spaces.o \ + hex.o floatformat.o objalloc.o obstack.o partition.o pexecute.o spaces.o \ splay-tree.o strerror.o strsignal.o xatexit.o xexit.o xmalloc.o \ xmemdup.o xstrdup.o xstrerror.o @@ -271,6 +272,7 @@ floatformat.o: $(INCDIR)/floatformat.h mkstemps.o: config.h objalloc.o: $(INCDIR)/objalloc.h obstack.o: config.h $(INCDIR)/obstack.h +partition.o: $(INCDIR)/partition.h pexecute.o: config.h $(INCDIR)/libiberty.h setenv.o: config.h spaces.o: $(INCDIR)/libiberty.h diff --git a/libiberty/partition.c b/libiberty/partition.c new file mode 100644 index 00000000000..c1d584774bf --- /dev/null +++ b/libiberty/partition.c @@ -0,0 +1,185 @@ +/* List implentation of a partition of consecutive integers. + Copyright (C) 2000 Free Software Foundation, Inc. + Contributed by CodeSourcery, LLC. + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU CC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include "libiberty.h" +#include "partition.h" + +/* Creates a partition of NUM_ELEMENTS elements. Initially each + element is in a class by itself. */ + +partition +partition_new (num_elements) + int num_elements; +{ + int e; + + partition part = (partition) + xmalloc (sizeof (struct partition_def) + + (num_elements - 1) * sizeof (struct partition_elem)); + part->num_elements = num_elements; + for (e = 0; e < num_elements; ++e) + { + part->elements[e].class_element = e; + part->elements[e].next = &(part->elements[e]); + part->elements[e].class_count = 1; + } + + return part; +} + +/* Freeds a partition. */ + +void +partition_delete (part) + partition part; +{ + free (part); +} + +/* Unites the classes containing ELEM1 and ELEM2 into a single class + of partition PART. If ELEM1 and ELEM2 are already in the same + class, does nothing. Returns the canonical element of the + resulting union class. */ + +int +partition_union (part, elem1, elem2) + partition part; + int elem1; + int elem2; +{ + struct partition_elem *elements = part->elements; + struct partition_elem *e1; + struct partition_elem *e2; + struct partition_elem *p; + struct partition_elem *old_next; + /* The canonical element of the resulting union class. */ + int class_element = elements[elem1].class_element; + + /* If they're already in the same class, do nothing. */ + if (class_element == elements[elem2].class_element) + return class_element; + + /* Make sure ELEM1 is in the larger class of the two. If not, swap + them. This way we always scan the shorter list. */ + if (elements[elem1].class_count < elements[elem2].class_count) + { + int temp = elem1; + elem1 = elem2; + elem2 = temp; + class_element = elements[elem1].class_element; + } + + e1 = &(elements[elem1]); + e2 = &(elements[elem2]); + + /* Keep a count of the number of elements in the list. */ + elements[class_element].class_count + += elements[e2->class_element].class_count; + + /* Update the class fields in elem2's class list. */ + e2->class_element = class_element; + for (p = e2->next; p != e2; p = p->next) + p->class_element = class_element; + + /* Splice ELEM2's class list into ELEM1's. These are circular + lists. */ + old_next = e1->next; + e1->next = e2->next; + e2->next = old_next; + + return class_element; +} + +/* Compare elements ELEM1 and ELEM2 from array of integers, given a + pointer to each. Used to qsort such an array. */ + +static int +elem_compare (elem1, elem2) + const void *elem1; + const void *elem2; +{ + int e1 = * (int *) elem1; + int e2 = * (int *) elem2; + if (e1 < e2) + return -1; + else if (e1 > e2) + return 1; + else + return 0; +} + +/* Prints PART to the file pointer FP. The elements of each + class are sorted. */ + +void +partition_print (part, fp) + partition part; + FILE *fp; +{ + char *done; + int num_elements = part->num_elements; + struct partition_elem *elements = part->elements; + int *class_elements; + int e; + + /* Flag the elements we've already printed. */ + done = (char *) xmalloc (num_elements); + memset (done, 0, num_elements); + + /* A buffer used to sort elements in a class. */ + class_elements = (int *) xmalloc (num_elements * sizeof (int)); + + fputc ('[', fp); + for (e = 0; e < num_elements; ++e) + /* If we haven't printed this element, print its entire class. */ + if (! done[e]) + { + int c = e; + int count = elements[elements[e].class_element].class_count; + int i; + + /* Collect the elements in this class. */ + for (i = 0; i < count; ++i) { + class_elements[i] = c; + done[c] = 1; + c = elements[c].next - elements; + } + /* Sort them. */ + qsort ((void *) class_elements, count, sizeof (int), &elem_compare); + /* Print them. */ + fputc ('(', fp); + for (i = 0; i < count; ++i) + fprintf (fp, i == 0 ? "%d" : " %d", class_elements[i]); + fputc (')', fp); + } + fputc (']', fp); + + free (done); +} + |