diff options
-rw-r--r-- | gcc/ChangeLog | 43 | ||||
-rw-r--r-- | gcc/basic-block.h | 179 | ||||
-rw-r--r-- | gcc/cfgcleanup.c | 205 | ||||
-rw-r--r-- | gcc/flow.c | 43 | ||||
-rw-r--r-- | gcc/recog.c | 2 | ||||
-rw-r--r-- | gcc/recog.h | 1 | ||||
-rw-r--r-- | gcc/struct-equiv.c | 1208 |
7 files changed, 1491 insertions, 190 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 345f7d6bec7..69bbe6d1b08 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,48 @@ 2005-12-13 J"orn Rennecke <joern.rennecke@st.com> + PR rtl-optimization/20070 / part1 + * flow.c (update_life_info): If PROP_POST_REGSTACK is set, call + count_or_remove_death_notes with kill == -1. + (mark_set_1): Don't add REG_DEAD / REG_UNUSED notes for stack + registers if PROP_POST_REGSTACK is set. + (mark_used_reg): Likewise. + (count_or_remove_death_notes): If kill is -1, don't remove REG_DEAD / + REG_UNUSED notes for stack regs. + * cfgcleanup.c (condjump_equiv_p): Change parameters and processing + to match rtx_equiv_p machinery. Change caller. + (outgoing_edges_match): Likewise. + (try_crossjump_to_edge): Use struct_equiv_block_eq + instead of flow_find_cross_jump. + * basic-block.h (PROP_POST_REGSTACK, STRUCT_EQUIV_START): Define. + (STRUCT_EQUIV_RERUN, STRUCT_EQUIV_FINAL): Likewise. + (STRUCT_EQUIV_NEED_FULL_BLOCK, STRUCT_EQUIV_MATCH_JUMPS): Likewise. + (STRUCT_EQUIV_MAX_LOCAL): Likewise. + (struct struct_equiv_checkpoint, struct equiv_info): Likewise. + (insns_match_p): Update prototype. + (flow_find_cross_jump): Remove prototype. + (struct_equiv_block_eq, struct_equiv_init): Declare. + (rtx_equiv_p, condjump_equiv_p): Likewise. + * struct-equiv.c: Include reload.h. + (IMPOSSIBLE_MOVE_FACTOR): Define. + (assign_reg_reg_set, struct_equiv_make_checkpoint): New functions. + (struct_equiv_improve_checkpoint): Likewise. + (struct_equiv_restore_checkpoint, rtx_equiv_p): Likewise. + (set_dest_equiv_p, set_dest_addr_equiv_p, struct_equiv_init): Likewise. + (struct_equiv_merge, find_dying_input): Likewise. + (resolve_input_conflict, note_local_live): Likewise. + (death_notes_match_p): Change parameters and processing + to match rtx_equiv_p machinery. Change caller. + (insns_match_p): Likewise. + (flow_find_cross_jump): Replace with: + (struct_equiv_block_eq). + + Back out this change: + 2005-03-07 Kazu Hirata <kazu@cs.umass.edu> + * recog.c (verify_changes): Make it static. + * recog.h: Remove the corresponding prototype. + +2005-12-13 J"orn Rennecke <joern.rennecke@st.com> + * rtlhooks.c (gen_lowpart_general): Handle SUBREGs of floating point values. diff --git a/gcc/basic-block.h b/gcc/basic-block.h index 9a88e8d3c0c..a3cecae8b6f 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -807,6 +807,9 @@ enum update_life_extent to flag analysis of asms. */ #define PROP_DEAD_INSN 1024 /* Internal flag used within flow.c to flag analysis of dead insn. */ +#define PROP_POST_REGSTACK 2048 /* We run after reg-stack and need + to preserve REG_DEAD notes for + stack regs. */ #define PROP_FINAL (PROP_DEATH_NOTES | PROP_LOG_LINKS \ | PROP_REG_INFO | PROP_KILL_DEAD_CODE \ | PROP_SCAN_DEAD_CODE | PROP_AUTOINC \ @@ -831,6 +834,17 @@ enum update_life_extent #define CLEANUP_CFGLAYOUT 128 /* Do cleanup in cfglayout mode. */ #define CLEANUP_LOG_LINKS 256 /* Update log links. */ +/* The following are ORed in on top of the CLEANUP* flags in calls to + struct_equiv_block_eq. */ +#define STRUCT_EQUIV_START 512 /* Initializes the search range. */ +#define STRUCT_EQUIV_RERUN 1024 /* Rerun to find register use in + found equivalence. */ +#define STRUCT_EQUIV_FINAL 2048 /* Make any changes necessary to get + actual equivalence. */ +#define STRUCT_EQUIV_NEED_FULL_BLOCK 4096 /* struct_equiv_block_eq is required + to match only full blocks */ +#define STRUCT_EQUIV_MATCH_JUMPS 8192 /* Also include the jumps at the end of the block in the comparison. */ + extern void life_analysis (FILE *, int); extern int update_life_info (sbitmap, enum update_life_extent, int); extern int update_life_info_in_dirty_blocks (enum update_life_extent, int); @@ -992,7 +1006,168 @@ extern basic_block get_bb_copy (basic_block); #include "cfghooks.h" /* In struct-equiv.c */ -extern bool insns_match_p (int, rtx, rtx); -extern int flow_find_cross_jump (int, basic_block, basic_block, rtx *, rtx *); + +/* Constants used to size arrays in struct equiv_info (currently only one). + When these limits are exceeded, struct_equiv returns zero. + The maximum number of pseudo registers that are different in the two blocks, + but appear in equivalent places and are dead at the end (or where one of + a pair is dead at the end). */ +#define STRUCT_EQUIV_MAX_LOCAL 16 +/* The maximum number of references to an input register that struct_equiv + can handle. */ + +/* Structure used to track state during struct_equiv that can be rolled + back when we find we can't match an insn, or if we want to match part + of it in a different way. + This information pertains to the pair of partial blocks that has been + matched so far. Since this pair is structurally equivalent, this is + conceptually just one partial block expressed in two potentially + different ways. */ +struct struct_equiv_checkpoint +{ + int ninsns; /* Insns are matched so far. */ + int local_count; /* Number of block-local registers. */ + int input_count; /* Number of inputs to the block. */ + + /* X_START and Y_START are the first insns (in insn stream order) + of the partial blocks that have been considered for matching so far. + Since we are scanning backwards, they are also the instructions that + are currently considered - or the last ones that have been considered - + for matching (Unless we tracked back to these because a preceding + instruction failed to match). */ + rtx x_start, y_start; + + /* INPUT_VALID indicates if we have actually set up X_INPUT / Y_INPUT + during the current pass; we keep X_INPUT / Y_INPUT around between passes + so that we can match REG_EQUAL / REG_EQUIV notes referring to these. */ + bool input_valid; + + /* Some information would be expensive to exactly checkpoint, so we + merely increment VERSION any time information about local + registers, inputs and/or register liveness changes. When backtracking, + it is decremented for changes that can be undone, and if a discrepancy + remains, NEED_RERUN in the relevant struct equiv_info is set to indicate + that a new pass should be made over the entire block match to get + accurate register information. */ + int version; +}; + +/* A struct equiv_info is used to pass information to struct_equiv and + to gather state while two basic blocks are checked for structural + equivalence. */ + +struct equiv_info +{ + /* Fields set up by the caller to struct_equiv_block_eq */ + + basic_block x_block, y_block; /* The two blocks being matched. */ + + /* MODE carries the mode bits from cleanup_cfg if we are called from + try_crossjump_to_edge, and additionally it carries the + STRUCT_EQUIV_* bits described above. */ + int mode; + + /* INPUT_COST is the cost that adding an extra input to the matched blocks + is supposed to have, and is taken into account when considering if the + matched sequence should be extended backwards. input_cost < 0 means + don't accept any inputs at all. */ + int input_cost; + + + /* Fields to track state inside of struct_equiv_block_eq. Some of these + are also outputs. */ + + /* X_INPUT and Y_INPUT are used by struct_equiv to record a register that + is used as an input parameter, i.e. where different registers are used + as sources. This is only used for a register that is live at the end + of the blocks, or in some identical code at the end of the blocks; + Inputs that are dead at the end go into X_LOCAL / Y_LOCAL. */ + rtx x_input, y_input; + /* When a previous pass has identified a valid input, INPUT_REG is set + by struct_equiv_block_eq, and it is henceforth replaced in X_BLOCK + for the input. */ + rtx input_reg; + + /* COMMON_LIVE keeps track of the registers which are currently live + (as we scan backwards from the end) and have the same numbers in both + blocks. N.B. a register that is in common_live is unsuitable to become + a local reg. */ + regset common_live; + /* Likewise, X_LOCAL_LIVE / Y_LOCAL_LIVE keep track of registers that are + local to one of the blocks; these registers must not be accepted as + identical when encountered in both blocks. */ + regset x_local_live, y_local_live; + + /* EQUIV_USED indicates for which insns a REG_EQUAL or REG_EQUIV note is + being used, to avoid having to backtrack in the next pass, so that we + get accurate life info for this insn then. For each such insn, + the bit with the number corresponding to the CUR.NINSNS value at the + time of scanning is set. */ + bitmap equiv_used; + + /* Current state that can be saved & restored easily. */ + struct struct_equiv_checkpoint cur; + /* BEST_MATCH is used to store the best match so far, weighing the + cost of matched insns COSTS_N_INSNS (CUR.NINSNS) against the cost + CUR.INPUT_COUNT * INPUT_COST of setting up the inputs. */ + struct struct_equiv_checkpoint best_match; + /* If a checkpoint restore failed, or an input conflict newly arises, + NEED_RERUN is set. This has to be tested by the caller to re-run + the comparison if the match appears otherwise sound. The state kept in + x_start, y_start, equiv_used and check_input_conflict ensures that + we won't loop indefinetly. */ + bool need_rerun; + /* If there is indication of an input conflict at the end, + CHECK_INPUT_CONFLICT is set so that we'll check for input conflicts + for each insn in the next pass. This is needed so that we won't discard + a partial match if there is a longer match that has to be abandoned due + to an input conflict. */ + bool check_input_conflict; + /* HAD_INPUT_CONFLICT is set if CHECK_INPUT_CONFLICT was already set and we + have passed a point where there were multiple dying inputs. This helps + us decide if we should set check_input_conflict for the next pass. */ + bool had_input_conflict; + + /* LIVE_UPDATE controls if we want to change any life info at all. We + set it to false during REG_EQUAL / REG_EUQIV note comparison of the final + pass so that we don't introduce new registers just for the note; if we + can't match the notes without the current register information, we drop + them. */ + bool live_update; + + /* X_LOCAL and Y_LOCAL are used to gather register numbers of register pairs + that are local to X_BLOCK and Y_BLOCK, with CUR.LOCAL_COUNT being the index + to the next free entry. */ + rtx x_local[STRUCT_EQUIV_MAX_LOCAL], y_local[STRUCT_EQUIV_MAX_LOCAL]; + /* LOCAL_RVALUE is nonzero if the corresponding X_LOCAL / Y_LOCAL entry + was a source operand (including STRICT_LOW_PART) for the last invocation + of struct_equiv mentioning it, zero if it was a destination-only operand. + Since we are scanning backwards, this means the register is input/local + for the (partial) block scanned so far. */ + bool local_rvalue[STRUCT_EQUIV_MAX_LOCAL]; + + + /* Additional fields that are computed for the convenience of the caller. */ + + /* DYING_INPUTS is set to the number of local registers that turn out + to be inputs to the (possibly partial) block. */ + int dying_inputs; + /* X_END and Y_END are the last insns in X_BLOCK and Y_BLOCK, respectively, + that are being compared. A final jump insn will not be included. */ + rtx x_end, y_end; + + /* If we are matching tablejumps, X_LABEL in X_BLOCK coresponds to + Y_LABEL in Y_BLOCK. */ + rtx x_label, y_label; + +}; + +extern bool insns_match_p (rtx, rtx, struct equiv_info *); +extern int struct_equiv_block_eq (int, struct equiv_info *); +extern bool struct_equiv_init (int, struct equiv_info *); +extern bool rtx_equiv_p (rtx *, rtx, int, struct equiv_info *); + +/* In cfgrtl.c */ +extern bool condjump_equiv_p (struct equiv_info *, bool); #endif /* GCC_BASIC_BLOCK_H */ diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c index 80d68de7d93..c4939d07ceb 100644 --- a/gcc/cfgcleanup.c +++ b/gcc/cfgcleanup.c @@ -60,7 +60,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA static bool first_pass; static bool try_crossjump_to_edge (int, edge, edge); static bool try_crossjump_bb (int, basic_block); -static bool outgoing_edges_match (int, basic_block, basic_block); +static bool outgoing_edges_match (int *, struct equiv_info *); static void merge_blocks_move_predecessor_nojumps (basic_block, basic_block); static void merge_blocks_move_successor_nojumps (basic_block, basic_block); @@ -879,20 +879,20 @@ merge_blocks_move (edge e, basic_block b, basic_block c, int mode) } /* Return true iff the condbranches at the end of BB1 and BB2 match. */ -static bool -condjump_equiv_p (basic_block bb1, basic_block bb2) +bool +condjump_equiv_p (struct equiv_info *info, bool call_init) { - edge b1, f1, b2, f2; + basic_block bb1 = info->x_block; + basic_block bb2 = info->y_block; + edge b1 = BRANCH_EDGE (bb1); + edge b2 = BRANCH_EDGE (bb2); + edge f1 = FALLTHRU_EDGE (bb1); + edge f2 = FALLTHRU_EDGE (bb2); bool reverse, match; rtx set1, set2, cond1, cond2; + rtx src1, src2; enum rtx_code code1, code2; - - b1 = BRANCH_EDGE (bb1); - b2 = BRANCH_EDGE (bb2); - f1 = FALLTHRU_EDGE (bb1); - f2 = FALLTHRU_EDGE (bb2); - /* Get around possible forwarders on fallthru edges. Other cases should be optimized out already. */ if (FORWARDER_BLOCK_P (f1->dest)) @@ -923,8 +923,10 @@ condjump_equiv_p (basic_block bb1, basic_block bb2) != (XEXP (SET_SRC (set2), 1) == pc_rtx)) reverse = !reverse; - cond1 = XEXP (SET_SRC (set1), 0); - cond2 = XEXP (SET_SRC (set2), 0); + src1 = SET_SRC (set1); + src2 = SET_SRC (set2); + cond1 = XEXP (src1, 0); + cond2 = XEXP (src2, 0); code1 = GET_CODE (cond1); if (reverse) code2 = reversed_comparison_code (cond2, BB_END (bb2)); @@ -934,15 +936,35 @@ condjump_equiv_p (basic_block bb1, basic_block bb2) if (code2 == UNKNOWN) return false; + if (call_init && !struct_equiv_init (STRUCT_EQUIV_START | info->mode, info)) + gcc_unreachable (); + /* Make the sources of the pc sets unreadable so that when we call + insns_match_p it won't process them. + The death_notes_match_p from insns_match_p won't see the local registers + used for the pc set, but that could only cause missed optimizations when + there are actually condjumps that use stack registers. */ + SET_SRC (set1) = pc_rtx; + SET_SRC (set2) = pc_rtx; /* Verify codes and operands match. */ - match = ((code1 == code2 - && rtx_renumbered_equal_p (XEXP (cond1, 0), XEXP (cond2, 0)) - && rtx_renumbered_equal_p (XEXP (cond1, 1), XEXP (cond2, 1))) - || (code1 == swap_condition (code2) - && rtx_renumbered_equal_p (XEXP (cond1, 1), - XEXP (cond2, 0)) - && rtx_renumbered_equal_p (XEXP (cond1, 0), - XEXP (cond2, 1)))); + if (code1 == code2) + { + match = (insns_match_p (BB_END (bb1), BB_END (bb2), info) + && rtx_equiv_p (&XEXP (cond1, 0), XEXP (cond2, 0), 1, info) + && rtx_equiv_p (&XEXP (cond1, 1), XEXP (cond2, 1), 1, info)); + + } + else if (code1 == swap_condition (code2)) + { + match = (insns_match_p (BB_END (bb1), BB_END (bb2), info) + && rtx_equiv_p (&XEXP (cond1, 1), XEXP (cond2, 0), 1, info) + && rtx_equiv_p (&XEXP (cond1, 0), XEXP (cond2, 1), 1, info)); + + } + else + match = false; + SET_SRC (set1) = src1; + SET_SRC (set2) = src2; + match &= verify_changes (0); /* If we return true, we will join the blocks. Which means that we will only have one branch prediction bit to work with. Thus @@ -971,7 +993,7 @@ condjump_equiv_p (basic_block bb1, basic_block bb2) "Outcomes of branch in bb %i and %i differ too much (%i %i)\n", bb1->index, bb2->index, b1->probability, prob2); - return false; + match = false; } } @@ -979,17 +1001,25 @@ condjump_equiv_p (basic_block bb1, basic_block bb2) fprintf (dump_file, "Conditionals in bb %i and %i match.\n", bb1->index, bb2->index); + if (!match) + cancel_changes (0); return match; } -/* Return true iff outgoing edges of BB1 and BB2 match, together with - the branch instruction. This means that if we commonize the control - flow before end of the basic block, the semantic remains unchanged. + +/* Return true iff outgoing edges of INFO->y_block and INFO->x_block match, + together with the branch instruction. This means that if we commonize the + control flow before end of the basic block, the semantic remains unchanged. + If we need to compare jumps, we set STRUCT_EQUIV_MATCH_JUMPS in *MODE, + and pass *MODE to struct_equiv_init or assign it to INFO->mode, as + appropriate. We may assume that there exists one edge with a common destination. */ static bool -outgoing_edges_match (int mode, basic_block bb1, basic_block bb2) +outgoing_edges_match (int *mode, struct equiv_info *info) { + basic_block bb1 = info->y_block; + basic_block bb2 = info->x_block; int nehedges1 = 0, nehedges2 = 0; edge fallthru1 = 0, fallthru2 = 0; edge e1, e2; @@ -1005,6 +1035,7 @@ outgoing_edges_match (int mode, basic_block bb1, basic_block bb2) & (EDGE_COMPLEX | EDGE_FAKE)) == 0 && (!JUMP_P (BB_END (bb2)) || simplejump_p (BB_END (bb2)))); + *mode |= STRUCT_EQUIV_MATCH_JUMPS; /* Match conditional jumps - this may get tricky when fallthru and branch edges are crossed. */ if (EDGE_COUNT (bb1->succs) == 2 @@ -1015,7 +1046,8 @@ outgoing_edges_match (int mode, basic_block bb1, basic_block bb2) || !any_condjump_p (BB_END (bb2)) || !onlyjump_p (BB_END (bb2))) return false; - return condjump_equiv_p (bb1, bb2); + info->mode = *mode; + return condjump_equiv_p (info, true); } /* Generic case - we are seeing a computed jump, table jump or trapping @@ -1063,31 +1095,22 @@ outgoing_edges_match (int mode, basic_block bb1, basic_block bb2) identical = false; } - if (identical) + if (identical + && struct_equiv_init (STRUCT_EQUIV_START | *mode, info)) { - replace_label_data rr; bool match; - /* Temporarily replace references to LABEL1 with LABEL2 + /* Indicate that LABEL1 is to be replaced with LABEL2 in BB1->END so that we could compare the instructions. */ - rr.r1 = label1; - rr.r2 = label2; - rr.update_label_nuses = false; - for_each_rtx (&BB_END (bb1), replace_label, &rr); + info->y_label = label1; + info->x_label = label2; - match = insns_match_p (mode, BB_END (bb1), BB_END (bb2)); + match = insns_match_p (BB_END (bb1), BB_END (bb2), info); if (dump_file && match) fprintf (dump_file, "Tablejumps in bb %i and %i match.\n", bb1->index, bb2->index); - /* Set the original label in BB1->END because when deleting - a block whose end is a tablejump, the tablejump referenced - from the instruction is deleted too. */ - rr.r1 = label2; - rr.r2 = label1; - for_each_rtx (&BB_END (bb1), replace_label, &rr); - return match; } } @@ -1097,7 +1120,8 @@ outgoing_edges_match (int mode, basic_block bb1, basic_block bb2) /* First ensure that the instructions match. There may be many outgoing edges so this test is generally cheaper. */ - if (!insns_match_p (mode, BB_END (bb1), BB_END (bb2))) + if (!struct_equiv_init (STRUCT_EQUIV_START | *mode, info) + || !insns_match_p (BB_END (bb1), BB_END (bb2), info)) return false; /* Search the outgoing edges, ensure that the counts do match, find possible @@ -1163,14 +1187,13 @@ outgoing_edges_match (int mode, basic_block bb1, basic_block bb2) static bool try_crossjump_to_edge (int mode, edge e1, edge e2) { - int nmatch; + int nmatch, i; basic_block src1 = e1->src, src2 = e2->src; basic_block redirect_to, redirect_from, to_remove; - rtx newpos1, newpos2; edge s; edge_iterator ei; - - newpos1 = newpos2 = NULL_RTX; + struct equiv_info info; + rtx x_active, y_active; /* If we have partitioned hot/cold basic blocks, it is a bad idea to try this optimization. @@ -1217,19 +1240,66 @@ try_crossjump_to_edge (int mode, edge e1, edge e2) return false; /* Look for the common insn sequence, part the first ... */ - if (!outgoing_edges_match (mode, src1, src2)) + info.x_block = src2; + info.y_block = src1; + if (!outgoing_edges_match (&mode, &info)) return false; /* ... and part the second. */ - nmatch = flow_find_cross_jump (mode, src1, src2, &newpos1, &newpos2); + info.input_cost = optimize_size ? COSTS_N_INSNS (1) : -1; + nmatch = struct_equiv_block_eq (STRUCT_EQUIV_START | mode, &info); /* Don't proceed with the crossjump unless we found a sufficient number of matching instructions or the 'from' block was totally matched (such that its predecessors will hopefully be redirected and the block removed). */ - if ((nmatch < PARAM_VALUE (PARAM_MIN_CROSSJUMP_INSNS)) - && (newpos1 != BB_HEAD (src1))) + if (!nmatch) return false; + if ((nmatch -info.cur.input_count < PARAM_VALUE (PARAM_MIN_CROSSJUMP_INSNS)) + && (info.cur.y_start != BB_HEAD (src1))) + return false; + while (info.need_rerun) + { + nmatch = struct_equiv_block_eq (STRUCT_EQUIV_RERUN | mode, &info); + if (!nmatch) + return false; + if ((nmatch -info.cur.input_count < PARAM_VALUE (PARAM_MIN_CROSSJUMP_INSNS)) + && (info.cur.y_start != BB_HEAD (src1))) + return false; + } + nmatch = struct_equiv_block_eq (STRUCT_EQUIV_FINAL | mode, &info); + if ((nmatch -info.cur.input_count < PARAM_VALUE (PARAM_MIN_CROSSJUMP_INSNS)) + && (info.cur.y_start != BB_HEAD (src1))) + return false; + + /* Skip possible basic block header. */ + x_active = info.cur.x_start; + if (LABEL_P (x_active)) + x_active = NEXT_INSN (x_active); + if (NOTE_P (x_active)) + x_active = NEXT_INSN (x_active); + + y_active = info.cur.y_start; + if (LABEL_P (y_active)) + y_active = NEXT_INSN (y_active); + if (NOTE_P (y_active)) + y_active = NEXT_INSN (y_active); + + /* In order for this code to become active, either we have to be called + before reload, or struct_equiv_block_eq needs to add register scavenging + code to allocate input_reg after reload. */ + if (info.input_reg) + { + emit_insn_before (gen_move_insn (info.input_reg, info.x_input), + x_active); + emit_insn_before (gen_move_insn (info.input_reg, info.y_input), + y_active); + } + + for (i = 0; i < info.cur.local_count; i++) + if (info.local_rvalue[i]) + emit_insn_before (gen_move_insn (info.x_local[i], info.y_local[i]), + y_active); /* Here we know that the insns in the end of SRC1 which are common with SRC2 will be deleted. @@ -1265,30 +1335,36 @@ try_crossjump_to_edge (int mode, edge e1, edge e2) /* Avoid splitting if possible. We must always split when SRC2 has EH predecessor edges, or we may end up with basic blocks with both normal and EH predecessor edges. */ - if (newpos2 == BB_HEAD (src2) + if (info.cur.x_start == BB_HEAD (src2) && !(EDGE_PRED (src2, 0)->flags & EDGE_EH)) redirect_to = src2; else { - if (newpos2 == BB_HEAD (src2)) + if (info.cur.x_start == BB_HEAD (src2)) { /* Skip possible basic block header. */ - if (LABEL_P (newpos2)) - newpos2 = NEXT_INSN (newpos2); - if (NOTE_P (newpos2)) - newpos2 = NEXT_INSN (newpos2); + if (LABEL_P (info.cur.x_start)) + info.cur.x_start = NEXT_INSN (info.cur.x_start); + if (NOTE_P (info.cur.x_start)) + info.cur.x_start = NEXT_INSN (info.cur.x_start); } if (dump_file) fprintf (dump_file, "Splitting bb %i before %i insns\n", src2->index, nmatch); - redirect_to = split_block (src2, PREV_INSN (newpos2))->dest; + redirect_to = split_block (src2, PREV_INSN (info.cur.x_start))->dest; + COPY_REG_SET (info.y_block->il.rtl->global_live_at_end, + info.x_block->il.rtl->global_live_at_end); } if (dump_file) - fprintf (dump_file, - "Cross jumping from bb %i to bb %i; %i common insns\n", - src1->index, src2->index, nmatch); + { + fprintf (dump_file, "Cross jumping from bb %i to bb %i; %i common insns", + src1->index, src2->index, nmatch); + if (info.cur.local_count) + fprintf (dump_file, ", %i local registers", info.cur.local_count); + fprintf (dump_file, "\n"); + } redirect_to->count += src1->count; redirect_to->frequency += src1->frequency; @@ -1352,14 +1428,7 @@ try_crossjump_to_edge (int mode, edge e1, edge e2) /* Edit SRC1 to go to REDIRECT_TO at NEWPOS1. */ - /* Skip possible basic block header. */ - if (LABEL_P (newpos1)) - newpos1 = NEXT_INSN (newpos1); - - if (NOTE_P (newpos1)) - newpos1 = NEXT_INSN (newpos1); - - redirect_from = split_block (src1, PREV_INSN (newpos1))->src; + redirect_from = split_block (src1, PREV_INSN (y_active))->src; to_remove = single_succ (redirect_from); redirect_edge_and_branch_force (single_succ_edge (redirect_from), redirect_to); diff --git a/gcc/flow.c b/gcc/flow.c index bdb40323b02..f958bfdb66e 100644 --- a/gcc/flow.c +++ b/gcc/flow.c @@ -643,7 +643,8 @@ update_life_info (sbitmap blocks, enum update_life_extent extent, /* If asked, remove notes from the blocks we'll update. */ if (extent == UPDATE_LIFE_GLOBAL_RM_NOTES) - count_or_remove_death_notes (blocks, 1); + count_or_remove_death_notes (blocks, + prop_flags & PROP_POST_REGSTACK ? -1 : 1); } /* Clear log links in case we are asked to (re)compute them. */ @@ -2926,7 +2927,13 @@ mark_set_1 (struct propagate_block_info *pbi, enum rtx_code code, rtx reg, rtx c if (flags & PROP_REG_INFO) REG_N_DEATHS (regno_first) += 1; - if (flags & PROP_DEATH_NOTES) + if (flags & PROP_DEATH_NOTES +#ifdef STACK_REGS + && (!(flags & PROP_POST_REGSTACK) + || !IN_RANGE (REGNO (reg), FIRST_STACK_REG, + LAST_STACK_REG)) +#endif + ) { /* Note that dead stores have already been deleted when possible. If we get here, we have found a @@ -2939,7 +2946,13 @@ mark_set_1 (struct propagate_block_info *pbi, enum rtx_code code, rtx reg, rtx c } else { - if (flags & PROP_DEATH_NOTES) + if (flags & PROP_DEATH_NOTES +#ifdef STACK_REGS + && (!(flags & PROP_POST_REGSTACK) + || !IN_RANGE (REGNO (reg), FIRST_STACK_REG, + LAST_STACK_REG)) +#endif + ) { /* This is a case where we have a multi-word hard register and some, but not all, of the words of the register are @@ -2998,7 +3011,12 @@ mark_set_1 (struct propagate_block_info *pbi, enum rtx_code code, rtx reg, rtx c here and count it. */ else if (GET_CODE (reg) == SCRATCH) { - if (flags & PROP_DEATH_NOTES) + if (flags & PROP_DEATH_NOTES +#ifdef STACK_REGS + && (!(flags & PROP_POST_REGSTACK) + || !IN_RANGE (REGNO (reg), FIRST_STACK_REG, LAST_STACK_REG)) +#endif + ) REG_NOTES (insn) = alloc_EXPR_LIST (REG_UNUSED, reg, REG_NOTES (insn)); } @@ -3764,6 +3782,10 @@ mark_used_reg (struct propagate_block_info *pbi, rtx reg, if (! some_was_live) { if ((pbi->flags & PROP_DEATH_NOTES) +#ifdef STACK_REGS + && (!(pbi->flags & PROP_POST_REGSTACK) + || !IN_RANGE (REGNO (reg), FIRST_STACK_REG, LAST_STACK_REG)) +#endif && ! find_regno_note (insn, REG_DEAD, regno_first)) REG_NOTES (insn) = alloc_EXPR_LIST (REG_DEAD, reg, REG_NOTES (insn)); @@ -4385,7 +4407,9 @@ struct tree_opt_pass pass_recompute_reg_usage = /* Optionally removes all the REG_DEAD and REG_UNUSED notes from a set of blocks. If BLOCKS is NULL, assume the universal set. Returns a count - of the number of registers that died. */ + of the number of registers that died. + If KILL is 1, remove old REG_DEAD / REG_UNUSED notes. If it is 0, don't. + if it is -1, remove them unless they pertain to a stack reg. */ int count_or_remove_death_notes (sbitmap blocks, int kill) @@ -4457,7 +4481,14 @@ count_or_remove_death_notes_bb (basic_block bb, int kill) /* Fall through. */ case REG_UNUSED: - if (kill) + if (kill > 0 + || (kill +#ifdef STACK_REGS + && (!REG_P (XEXP (link, 0)) + || !IN_RANGE (REGNO (XEXP (link, 0)), + FIRST_STACK_REG, LAST_STACK_REG)) +#endif + )) { rtx next = XEXP (link, 1); free_EXPR_LIST_node (link); diff --git a/gcc/recog.c b/gcc/recog.c index ece44f792bd..1df4b7fd1de 100644 --- a/gcc/recog.c +++ b/gcc/recog.c @@ -339,7 +339,7 @@ num_changes_pending (void) /* Tentatively apply the changes numbered NUM and up. Return 1 if all changes are valid, zero otherwise. */ -static int +int verify_changes (int num) { int i; diff --git a/gcc/recog.h b/gcc/recog.h index 0ed7c9e4741..b219c407cf3 100644 --- a/gcc/recog.h +++ b/gcc/recog.h @@ -76,6 +76,7 @@ extern int asm_operand_ok (rtx, const char *); extern int validate_change (rtx, rtx *, rtx, int); extern int validate_change_maybe_volatile (rtx, rtx *, rtx); extern int insn_invalid_p (rtx); +extern int verify_changes (int); extern void confirm_change_group (void); extern int apply_change_group (void); extern int num_validated_changes (void); diff --git a/gcc/struct-equiv.c b/gcc/struct-equiv.c index 9169958333f..60f146f9a56 100644 --- a/gcc/struct-equiv.c +++ b/gcc/struct-equiv.c @@ -19,7 +19,60 @@ along with GCC; see the file COPYING. If not, write to the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -/* This file contains helper functions for Cross jumping (tail merging). */ +/* Try to match two basic blocks - or their ends - for structural equivalence. + We scan the blocks from their ends backwards, and expect that insns are + identical, except for certain cases involving registers. A mismatch + We scan the blocks from their ends backwards, hoping to find a match, I.e. + insns are identical, except for certain cases involving registers. A + mismatch between register number RX (used in block X) and RY (used in the + same way in block Y) can be handled in one of the following cases: + 1. RX and RY are local to their respective blocks; they are set there and + die there. If so, they can effectively be ignored. + 2. RX and RY die in their blocks, but live at the start. If any path + gets redirected through X instead of Y, the caller must emit + compensation code to move RY to RX. If there are overlapping inputs, + the function resolve_input_conflict ensures that this can be done. + Information about these registers are tracked in the X_LOCAL, Y_LOCAL, + LOCAL_COUNT and LOCAL_RVALUE fields. + 3. RX and RY live throughout their blocks, including the start and the end. + Either RX and RY must be identical, or we have to replace all uses in + block X with a new pseudo, which is stored in the INPUT_REG field. The + caller can then use block X instead of block Y by copying RY to the new + pseudo. + + The main entry point to this file is struct_equiv_block_eq. This function + uses a struct equiv_info to accept some of its inputs, to keep track of its + internal state, to pass down to its helper functions, and to communicate + some of the results back to the caller. + + Most scans will result in a failure to match a sufficient number of insns + to make any optimization worth while, therefore the process is geared more + to quick scanning rather than the ability to exactly backtrack when we + find a mismatch. The information gathered is still meaningful to make a + preliminary decision if we want to do an optimization, we might only + slightly overestimate the number of matchable insns, and underestimate + the number of inputs an miss an input conflict. Sufficient information + is gathered so that when we make another pass, we won't have to backtrack + at the same point. + Another issue is that information in memory atttributes and/or REG_NOTES + might have to be merged or discarded to make a valid match. We don't want + to discard such information when we are not certain that we want to merge + the two (partial) blocks. + For these reasons, struct_equiv_block_eq has to be called first with the + STRUCT_EQUIV_START bit set in the mode parameter. This will calculate the + number of matched insns and the number and types of inputs. If the + need_rerun field is set, the results are only tentative, and the caller + has to call again with STRUCT_EQUIV_RERUN till need_rerun is false in + order to get a reliable match. + To install the changes necessary for the match, the function has to be + called again with STRUCT_EQUIV_FINAL. + + While scanning an insn, we process first all the SET_DESTs, then the + SET_SRCes, then the REG_NOTES, in order to keep the register liveness + information consistent. + If we were to mix up the order for sources / destinations in an insn where + a source is also a destination, we'd end up being mistaken to think that + the register is not live in the preceding insn. */ #include "config.h" #include "system.h" @@ -34,8 +87,26 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "tm_p.h" #include "target.h" #include "emit-rtl.h" +#include "reload.h" static void merge_memattrs (rtx, rtx); +static bool set_dest_equiv_p (rtx x, rtx y, struct equiv_info *info); +static bool set_dest_addr_equiv_p (rtx x, rtx y, struct equiv_info *info); +static void find_dying_inputs (struct equiv_info *info); +static bool resolve_input_conflict (struct equiv_info *info); + +/* After reload, some moves, as indicated by SECONDARY_RELOAD_CLASS and + SECONDARY_MEMORY_NEEDED, cannot be done directly. For our purposes, we + consider them impossible to generate after reload (even though some + might be synthesized when you throw enough code at them). + Since we don't know while procesing a cross-jump if a local register + that is currently live will eventually be live and thus be an input, + we keep track of potential inputs that would require an impossible move + by using a prohibitively high cost for them. + This number, multiplied with the larger of STRUCT_EQUIV_MAX_LOCAL and + FIRST_PSEUDO_REGISTER, must fit in the input_cost field of + struct equiv_info. */ +#define IMPOSSIBLE_MOVE_FACTOR 20000 @@ -129,18 +200,638 @@ merge_memattrs (rtx x, rtx y) return; } +/* In SET, assign the bit for the register number of REG the value VALUE. + If REG is a hard register, do so for all its consituent registers. + Return the number of registers that have become included (as a positive + number) or excluded (as a negative number). */ +static int +assign_reg_reg_set (regset set, rtx reg, int value) +{ + unsigned regno = REGNO (reg); + int nregs, i, old; + + if (regno >= FIRST_PSEUDO_REGISTER) + { + gcc_assert (!reload_completed); + nregs = 1; + } + else + nregs = hard_regno_nregs[regno][GET_MODE (reg)]; + for (old = 0, i = nregs; --i >= 0; regno++) + { + if ((value != 0) == REGNO_REG_SET_P (set, regno)) + continue; + if (value) + old++, SET_REGNO_REG_SET (set, regno); + else + old--, CLEAR_REGNO_REG_SET (set, regno); + } + return old; +} + +/* Record state about current inputs / local registers / liveness + in *P. */ +static inline void +struct_equiv_make_checkpoint (struct struct_equiv_checkpoint *p, + struct equiv_info *info) +{ + *p = info->cur; +} + +/* Call struct_equiv_make_checkpoint (P, INFO) if the current partial block + is suitable to split off - i.e. there is no dangling cc0 user - and + if the current cost of the common instructions, minus the cost for + setting up the inputs, is higher than what has been recorded before + in CHECKPOINT[N]. Also, if we do so, confirm or cancel any pending + changes. */ +static void +struct_equiv_improve_checkpoint (struct struct_equiv_checkpoint *p, + struct equiv_info *info) +{ +#ifdef HAVE_cc0 + if (reg_mentioned_p (cc0_rtx, info->x_start) && !sets_cc0_p (info->x_start)) + return; +#endif + if (info->cur.input_count >= IMPOSSIBLE_MOVE_FACTOR) + return; + if (info->input_cost >= 0 + ? (COSTS_N_INSNS(info->cur.ninsns - p->ninsns) + > info->input_cost * (info->cur.input_count - p->input_count)) + : info->cur.ninsns > p->ninsns && !info->cur.input_count) + { + if (info->check_input_conflict && ! resolve_input_conflict (info)) + return; + /* We have a profitable set of changes. If this is the final pass, + commit them now. Otherwise, we don't know yet if we can make any + change, so put the old code back for now. */ + if (info->mode & STRUCT_EQUIV_FINAL) + confirm_change_group (); + else + cancel_changes (0); + struct_equiv_make_checkpoint (p, info); + } +} + +/* Restore state about current inputs / local registers / liveness + from P. */ +static void +struct_equiv_restore_checkpoint (struct struct_equiv_checkpoint *p, + struct equiv_info *info) +{ + info->cur.ninsns = p->ninsns; + info->cur.x_start = p->x_start; + info->cur.y_start = p->y_start; + info->cur.input_count = p->input_count; + info->cur.input_valid = p->input_valid; + while (info->cur.local_count > p->local_count) + { + info->cur.local_count--; + info->cur.version--; + if (REGNO_REG_SET_P (info->x_local_live, + REGNO (info->x_local[info->cur.local_count]))) + { + assign_reg_reg_set (info->x_local_live, + info->x_local[info->cur.local_count], 0); + assign_reg_reg_set (info->y_local_live, + info->y_local[info->cur.local_count], 0); + info->cur.version--; + } + } + if (info->cur.version != p->version) + info->need_rerun = true; +} + + +/* Update register liveness to reflect that X is now life (if rvalue is + nonzero) or dead (if rvalue is zero) in INFO->x_block, and likewise Y + in INFO->y_block. Return the number of registers the liveness of which + changed in each block (as a negative number if registers became dead). */ +static int +note_local_live (struct equiv_info *info, rtx x, rtx y, int rvalue) +{ + int x_change = assign_reg_reg_set (info->x_local_live, x, rvalue); + int y_change = assign_reg_reg_set (info->y_local_live, y, rvalue); + + gcc_assert (x_change == y_change); + if (y_change) + { + if (reload_completed) + { + unsigned x_regno ATTRIBUTE_UNUSED = REGNO (x); + unsigned y_regno = REGNO (y); + enum machine_mode x_mode = GET_MODE (x); + + if (secondary_reload_class (0, REGNO_REG_CLASS (y_regno), x_mode, x) + != NO_REGS +#ifdef SECONDARY_MEMORY_NEEDED + || SECONDARY_MEMORY_NEEDED (REGNO_REG_CLASS (y_regno), + REGNO_REG_CLASS (x_regno), x_mode) +#endif + ) + y_change *= IMPOSSIBLE_MOVE_FACTOR; + } + info->cur.input_count += y_change; + info->cur.version++; + } + return x_change; +} + +/* Check if *XP is equivalent to Y. Until an an unreconcilable difference is + found, use in-group changes with validate_change on *XP to make register + assignments agree. It is the (not necessarily direct) callers + responsibility to verify / confirm / cancel these changes, as appropriate. + RVALUE indicates if the processed piece of rtl is used as a destination, in + which case we can't have different registers being an input. Returns + nonzero if the two blocks have been identified as equivalent, zero otherwise. + RVALUE == 0: destination + RVALUE == 1: source + RVALUE == -1: source, ignore SET_DEST of SET / clobber. */ +bool +rtx_equiv_p (rtx *xp, rtx y, int rvalue, struct equiv_info *info) +{ + rtx x = *xp; + enum rtx_code code; + int length; + const char *format; + int i; + + if (!y || !x) + return x == y; + code = GET_CODE (y); + if (code != REG && x == y) + return true; + if (GET_CODE (x) != code + || GET_MODE (x) != GET_MODE (y)) + return false; + + /* ??? could extend to allow CONST_INT inputs. */ + switch (code) + { + case SUBREG: + gcc_assert (!reload_completed + || !info->live_update); + break; + case REG: + { + unsigned x_regno = REGNO (x); + unsigned y_regno = REGNO (y); + int x_common_live, y_common_live; + + if (reload_completed + && (x_regno >= FIRST_PSEUDO_REGISTER + || y_regno >= FIRST_PSEUDO_REGISTER)) + { + /* We should only see this in REG_NOTEs. */ + gcc_assert (!info->live_update); + /* Returning false will cause us to remove the notes. */ + return false; + } +#ifdef STACK_REGS + /* After reg-stack, can only accept literal matches of stack regs. */ + if (info->mode & CLEANUP_POST_REGSTACK + && (IN_RANGE (x_regno, FIRST_STACK_REG, LAST_STACK_REG) + || IN_RANGE (y_regno, FIRST_STACK_REG, LAST_STACK_REG))) + return x_regno == y_regno; +#endif + + /* If the register is a locally live one in one block, the + corresponding one must be locally live in the other, too, and + match of identical regnos doesn't apply. */ + if (REGNO_REG_SET_P (info->x_local_live, x_regno)) + { + if (!REGNO_REG_SET_P (info->y_local_live, y_regno)) + return false; + } + else if (REGNO_REG_SET_P (info->y_local_live, y_regno)) + return false; + else if (x_regno == y_regno) + { + if (!rvalue && info->cur.input_valid + && (reg_overlap_mentioned_p (x, info->x_input) + || reg_overlap_mentioned_p (x, info->y_input))) + return false; + + /* Update liveness information. */ + if (info->live_update + && assign_reg_reg_set (info->common_live, x, rvalue)) + info->cur.version++; + + return true; + } + + x_common_live = REGNO_REG_SET_P (info->common_live, x_regno); + y_common_live = REGNO_REG_SET_P (info->common_live, y_regno); + if (x_common_live != y_common_live) + return false; + else if (x_common_live) + { + if (! rvalue || info->input_cost < 0 || no_new_pseudos) + return false; + /* If info->live_update is not set, we are processing notes. + We then allow a match with x_input / y_input found in a + previous pass. */ + if (info->live_update && !info->cur.input_valid) + { + info->cur.input_valid = true; + info->x_input = x; + info->y_input = y; + info->cur.input_count += optimize_size ? 2 : 1; + if (info->input_reg + && GET_MODE (info->input_reg) != GET_MODE (info->x_input)) + info->input_reg = NULL_RTX; + if (!info->input_reg) + info->input_reg = gen_reg_rtx (GET_MODE (info->x_input)); + } + else if ((info->live_update + ? ! info->cur.input_valid : ! info->x_input) + || ! rtx_equal_p (x, info->x_input) + || ! rtx_equal_p (y, info->y_input)) + return false; + validate_change (info->cur.x_start, xp, info->input_reg, 1); + } + else + { + int x_nregs = (x_regno >= FIRST_PSEUDO_REGISTER + ? 1 : hard_regno_nregs[x_regno][GET_MODE (x)]); + int y_nregs = (y_regno >= FIRST_PSEUDO_REGISTER + ? 1 : hard_regno_nregs[y_regno][GET_MODE (y)]); + int size = GET_MODE_SIZE (GET_MODE (x)); + enum machine_mode x_mode = GET_MODE (x); + unsigned x_regno_i, y_regno_i; + int x_nregs_i, y_nregs_i, size_i; + int local_count = info->cur.local_count; + + /* This might be a register local to each block. See if we have + it already registered. */ + for (i = local_count - 1; i >= 0; i--) + { + x_regno_i = REGNO (info->x_local[i]); + x_nregs_i = (x_regno_i >= FIRST_PSEUDO_REGISTER + ? 1 : hard_regno_nregs[x_regno_i][GET_MODE (x)]); + y_regno_i = REGNO (info->y_local[i]); + y_nregs_i = (y_regno_i >= FIRST_PSEUDO_REGISTER + ? 1 : hard_regno_nregs[y_regno_i][GET_MODE (y)]); + size_i = GET_MODE_SIZE (GET_MODE (info->x_local[i])); + + /* If we have a new pair of registers that is wider than an + old pair and enclosing it with matching offsets, + remove the old pair. If we find a matching, wider, old + pair, use the old one. If the width is the same, use the + old one if the modes match, but the new if they don't. + We don't want to get too fancy with subreg_regno_offset + here, so we just test two straightforwad cases each. */ + if (info->live_update + && (x_mode != GET_MODE (info->x_local[i]) + ? size >= size_i : size > size_i)) + { + /* If the new pair is fully enclosing a matching + existing pair, remove the old one. N.B. because + we are removing one entry here, the check below + if we have space for a new entry will succeed. */ + if ((x_regno <= x_regno_i + && x_regno + x_nregs >= x_regno_i + x_nregs_i + && x_nregs == y_nregs && x_nregs_i == y_nregs_i + && x_regno - x_regno_i == y_regno - y_regno_i) + || (x_regno == x_regno_i && y_regno == y_regno_i + && x_nregs >= x_nregs_i && y_nregs >= y_nregs_i)) + { + info->cur.local_count = --local_count; + info->x_local[i] = info->x_local[local_count]; + info->y_local[i] = info->y_local[local_count]; + continue; + } + } + else + { + + /* If the new pair is fully enclosed within a matching + existing pair, succeed. */ + if (x_regno >= x_regno_i + && x_regno + x_nregs <= x_regno_i + x_nregs_i + && x_nregs == y_nregs && x_nregs_i == y_nregs_i + && x_regno - x_regno_i == y_regno - y_regno_i) + break; + if (x_regno == x_regno_i && y_regno == y_regno_i + && x_nregs <= x_nregs_i && y_nregs <= y_nregs_i) + break; + } + + /* Any other overlap causes a match failure. */ + if (x_regno + x_nregs > x_regno_i + && x_regno_i + x_nregs_i > x_regno) + return false; + if (y_regno + y_nregs > y_regno_i + && y_regno_i + y_nregs_i > y_regno) + return false; + } + if (i < 0) + { + /* Not found. Create a new entry if possible. */ + if (!info->live_update + || info->cur.local_count >= STRUCT_EQUIV_MAX_LOCAL) + return false; + info->x_local[info->cur.local_count] = x; + info->y_local[info->cur.local_count] = y; + info->cur.local_count++; + info->cur.version++; + } + note_local_live (info, x, y, rvalue); + } + return true; + } + case SET: + gcc_assert (rvalue < 0); + /* Ignore the destinations role as a destination. Still, we have + to consider input registers embedded in the addresses of a MEM. + N.B., we process the rvalue aspect of STRICT_LOW_PART / + ZERO_EXTEND / SIGN_EXTEND along with their lvalue aspect. */ + if(!set_dest_addr_equiv_p (SET_DEST (x), SET_DEST (y), info)) + return false; + /* Process source. */ + return rtx_equiv_p (&SET_SRC (x), SET_SRC (y), 1, info); + case PRE_MODIFY: + /* Process destination. */ + if (!rtx_equiv_p (&XEXP (x, 0), XEXP (y, 0), 0, info)) + return false; + /* Process source. */ + return rtx_equiv_p (&XEXP (x, 1), XEXP (y, 1), 1, info); + case POST_MODIFY: + { + rtx x_dest0, x_dest1; + + /* Process destination. */ + x_dest0 = XEXP (x, 0); + gcc_assert (REG_P (x_dest0)); + if (!rtx_equiv_p (&XEXP (x, 0), XEXP (y, 0), 0, info)) + return false; + x_dest1 = XEXP (x, 0); + /* validate_change might have changed the destination. Put it back + so that we can do a valid source match. */ + XEXP (x, 0) = x_dest0; + if (!rtx_equiv_p (&XEXP (x, 1), XEXP (y, 1), 0, info)) + return false; + gcc_assert (x_dest1 == XEXP (x, 0)); + /* Process source. */ + return rtx_equiv_p (&XEXP (x, 1), XEXP (y, 1), 1, info); + if (!rtx_equiv_p (&XEXP(x, 0), XEXP (y, 0), 0, info)) + return false; + /* Process both subexpressions as inputs. */ + break; + } + case CLOBBER: + gcc_assert (rvalue < 0); + return true; + /* Some special forms are also rvalues when they appear in lvalue + positions. However, we must ont try to match a register after we + have already altered it with validate_change, consider the rvalue + aspect while we process the lvalue. */ + case STRICT_LOW_PART: + case ZERO_EXTEND: + case SIGN_EXTEND: + { + rtx x_inner, y_inner; + enum rtx_code code; + int change; + + if (rvalue) + break; + x_inner = XEXP (x, 0); + y_inner = XEXP (y, 0); + if (GET_MODE (x_inner) != GET_MODE (y_inner)) + return false; + code = GET_CODE (x_inner); + if (code != GET_CODE (y_inner)) + return false; + /* The address of a MEM is an input that will be processed during + rvalue == -1 processing. */ + if (code == SUBREG) + { + if (SUBREG_BYTE (x_inner) != SUBREG_BYTE (y_inner)) + return false; + x = x_inner; + x_inner = SUBREG_REG (x_inner); + y_inner = SUBREG_REG (y_inner); + if (GET_MODE (x_inner) != GET_MODE (y_inner)) + return false; + code = GET_CODE (x_inner); + if (code != GET_CODE (y_inner)) + return false; + } + if (code == MEM) + return true; + gcc_assert (code == REG); + if (! rtx_equiv_p (&XEXP (x, 0), y_inner, rvalue, info)) + return false; + if (REGNO (x_inner) == REGNO (y_inner)) + { + change = assign_reg_reg_set (info->common_live, x_inner, 1); + info->cur.version++; + } + else + change = note_local_live (info, x_inner, y_inner, 1); + gcc_assert (change); + return true; + } + /* The AUTO_INC / POST_MODIFY / PRE_MODIFY sets are modelled to take + place during input processing, however, that is benign, since they + are paired with reads. */ + case MEM: + return !rvalue || rtx_equiv_p (&XEXP (x, 0), XEXP (y, 0), rvalue, info); + case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC: + return (rtx_equiv_p (&XEXP (x, 0), XEXP (y, 0), 0, info) + && rtx_equiv_p (&XEXP (x, 0), XEXP (y, 0), 1, info)); + case PARALLEL: + gcc_assert (rvalue < 0); + break; + case LABEL_REF: + /* Check special tablejump match case. */ + if (XEXP (y, 0) == info->y_label) + return (XEXP (x, 0) == info->x_label); + /* We can't assume nonlocal labels have their following insns yet. */ + if (LABEL_REF_NONLOCAL_P (x) || LABEL_REF_NONLOCAL_P (y)) + return XEXP (x, 0) == XEXP (y, 0); + + /* Two label-refs are equivalent if they point at labels + in the same position in the instruction stream. */ + return (next_real_insn (XEXP (x, 0)) + == next_real_insn (XEXP (y, 0))); + case SYMBOL_REF: + return XSTR (x, 0) == XSTR (y, 0); + /* Some rtl is guaranteed to be shared, or unique; If we didn't match + EQ equality above, they aren't the same. */ + case CONST_INT: + case CODE_LABEL: + return false; + default: + break; + } + + /* For commutative operations, the RTX match if the operands match in any + order. */ + if (targetm.commutative_p (x, UNKNOWN)) + return ((rtx_equiv_p (&XEXP (x, 0), XEXP (y, 0), rvalue, info) + && rtx_equiv_p (&XEXP (x, 1), XEXP (y, 1), rvalue, info)) + || (rtx_equiv_p (&XEXP (x, 0), XEXP (y, 1), rvalue, info) + && rtx_equiv_p (&XEXP (x, 1), XEXP (y, 0), rvalue, info))); + + /* Process subexpressions - this is similar to rtx_equal_p. */ + length = GET_RTX_LENGTH (code); + format = GET_RTX_FORMAT (code); + + for (i = 0; i < length; ++i) + { + switch (format[i]) + { + case 'w': + if (XWINT (x, i) != XWINT (y, i)) + return false; + break; + case 'n': + case 'i': + if (XINT (x, i) != XINT (y, i)) + return false; + break; + case 'V': + case 'E': + if (XVECLEN (x, i) != XVECLEN (y, i)) + return false; + if (XVEC (x, i) != 0) + { + int j; + for (j = 0; j < XVECLEN (x, i); ++j) + { + if (! rtx_equiv_p (&XVECEXP (x, i, j), XVECEXP (y, i, j), + rvalue, info)) + return false; + } + } + break; + case 'e': + if (! rtx_equiv_p (&XEXP (x, i), XEXP (y, i), rvalue, info)) + return false; + break; + case 'S': + case 's': + if ((XSTR (x, i) || XSTR (y, i)) + && (! XSTR (x, i) || ! XSTR (y, i) + || strcmp (XSTR (x, i), XSTR (y, i)))) + return false; + break; + case 'u': + /* These are just backpointers, so they don't matter. */ + break; + case '0': + case 't': + break; + /* It is believed that rtx's at this level will never + contain anything but integers and other rtx's, + except for within LABEL_REFs and SYMBOL_REFs. */ + default: + gcc_unreachable (); + } + } + return true; +} + +/* Do only the rtx_equiv_p SET_DEST processing for SETs and CLOBBERs. + Since we are scanning backwards, this the first step in processing each + insn. Return true for success. */ +static bool +set_dest_equiv_p (rtx x, rtx y, struct equiv_info *info) +{ + if (!x || !y) + return x == y; + if (GET_CODE (x) != GET_CODE (y)) + return false; + else if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER) + return rtx_equiv_p (&XEXP (x, 0), XEXP (y, 0), 0, info); + else if (GET_CODE (x) == PARALLEL) + { + int j; + + if (XVECLEN (x, 0) != XVECLEN (y, 0)) + return false; + for (j = 0; j < XVECLEN (x, 0); ++j) + { + rtx xe = XVECEXP (x, 0, j); + rtx ye = XVECEXP (y, 0, j); + + if (GET_CODE (xe) != GET_CODE (ye)) + return false; + if ((GET_CODE (xe) == SET || GET_CODE (xe) == CLOBBER) + && ! rtx_equiv_p (&XEXP (xe, 0), XEXP (ye, 0), 0, info)) + return false; + } + } + return true; +} + +/* Process MEMs in SET_DEST destinations. We must not process this together + with REG SET_DESTs, but must do it separately, lest when we see + [(set (reg:SI foo) (bar)) + (set (mem:SI (reg:SI foo) (baz)))] + struct_equiv_block_eq could get confused to assume that (reg:SI foo) + is not live before this instruction. */ +static bool +set_dest_addr_equiv_p (rtx x, rtx y, struct equiv_info *info) +{ + enum rtx_code code = GET_CODE (x); + int length; + const char *format; + int i; + + if (code != GET_CODE (y)) + return false; + if (code == MEM) + return rtx_equiv_p (&XEXP (x, 0), XEXP (y, 0), 1, info); + + /* Process subexpressions. */ + length = GET_RTX_LENGTH (code); + format = GET_RTX_FORMAT (code); + + for (i = 0; i < length; ++i) + { + switch (format[i]) + { + case 'V': + case 'E': + if (XVECLEN (x, i) != XVECLEN (y, i)) + return false; + if (XVEC (x, i) != 0) + { + int j; + for (j = 0; j < XVECLEN (x, i); ++j) + { + if (! set_dest_addr_equiv_p (XVECEXP (x, i, j), + XVECEXP (y, i, j), info)) + return false; + } + } + break; + case 'e': + if (! set_dest_addr_equiv_p (XEXP (x, i), XEXP (y, i), info)) + return false; + break; + default: + break; + } + } + return true; +} + /* Check if the set of REG_DEAD notes attached to I1 and I2 allows us to - go ahead with merging I1 and I2, which otherwise look fine. */ + go ahead with merging I1 and I2, which otherwise look fine. + Inputs / local registers for the inputs of I1 and I2 have already been + set up. */ static bool death_notes_match_p (rtx i1 ATTRIBUTE_UNUSED, rtx i2 ATTRIBUTE_UNUSED, - int mode ATTRIBUTE_UNUSED) + struct equiv_info *info ATTRIBUTE_UNUSED) { #ifdef STACK_REGS /* If cross_jump_death_matters is not 0, the insn's mode - indicates whether or not the insn contains any stack-like - regs. */ + indicates whether or not the insn contains any stack-like regs. */ - if ((mode & CLEANUP_POST_REGSTACK) && stack_regs_mentioned (i1)) + if ((info->mode & CLEANUP_POST_REGSTACK) && stack_regs_mentioned (i1)) { /* If register stack conversion has already been done, then death notes must also be compared before it is certain that @@ -158,7 +849,18 @@ death_notes_match_p (rtx i1 ATTRIBUTE_UNUSED, rtx i2 ATTRIBUTE_UNUSED, for (note = REG_NOTES (i2); note; note = XEXP (note, 1)) if (REG_NOTE_KIND (note) == REG_DEAD && STACK_REG_P (XEXP (note, 0))) - SET_HARD_REG_BIT (i2_regset, REGNO (XEXP (note, 0))); + { + unsigned regno = REGNO (XEXP (note, 0)); + int i; + + for (i = info->cur.local_count - 1; i >= 0; i--) + if (regno == REGNO (info->y_local[i])) + { + regno = REGNO (info->x_local[i]); + break; + } + SET_HARD_REG_BIT (i2_regset, regno); + } GO_IF_HARD_REG_EQUAL (i1_regset, i2_regset, done); @@ -174,19 +876,17 @@ death_notes_match_p (rtx i1 ATTRIBUTE_UNUSED, rtx i2 ATTRIBUTE_UNUSED, /* Return true if I1 and I2 are equivalent and thus can be crossjumped. */ bool -insns_match_p (int mode, rtx i1, rtx i2) +insns_match_p (rtx i1, rtx i2, struct equiv_info *info) { - rtx p1, p2; + int rvalue_change_start; + struct struct_equiv_checkpoint before_rvalue_change; /* Verify that I1 and I2 are equivalent. */ if (GET_CODE (i1) != GET_CODE (i2)) return false; - p1 = PATTERN (i1); - p2 = PATTERN (i2); - - if (GET_CODE (p1) != GET_CODE (p2)) - return false; + info->cur.x_start = i1; + info->cur.y_start = i2; /* If this is a CALL_INSN, compare register usage information. If we don't check this on stack register machines, the two @@ -198,17 +898,36 @@ insns_match_p (int mode, rtx i1, rtx i2) ??? We take the simple route for now and assume that if they're equal, they were constructed identically. */ - if (CALL_P (i1) - && (!rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1), - CALL_INSN_FUNCTION_USAGE (i2)) - || SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2))) - return false; - - if (!death_notes_match_p (i1, i2, mode)) - return false; - - if (reload_completed - ? rtx_renumbered_equal_p (p1, p2) : rtx_equal_p (p1, p2)) + if (CALL_P (i1)) + { + if (SIBLING_CALL_P (i1) != SIBLING_CALL_P (i2) + || ! set_dest_equiv_p (PATTERN (i1), PATTERN (i2), info) + || ! set_dest_equiv_p (CALL_INSN_FUNCTION_USAGE (i1), + CALL_INSN_FUNCTION_USAGE (i2), info) + || ! rtx_equiv_p (&CALL_INSN_FUNCTION_USAGE (i1), + CALL_INSN_FUNCTION_USAGE (i2), -1, info)) + { + cancel_changes (0); + return false; + } + } + else if (INSN_P (i1)) + { + if (! set_dest_equiv_p (PATTERN (i1), PATTERN (i2), info)) + { + cancel_changes (0); + return false; + } + } + rvalue_change_start = num_validated_changes (); + struct_equiv_make_checkpoint (&before_rvalue_change, info); + /* Check death_notes_match_p *after* the inputs have been processed, + so that local inputs will already have been set up. */ + if (! INSN_P (i1) + || (!bitmap_bit_p (info->equiv_used, info->cur.ninsns) + && rtx_equiv_p (&PATTERN (i1), PATTERN (i2), -1, info) + && death_notes_match_p (i1, i2, info) + && verify_changes (0))) return true; /* Do not do EQUIV substitution after reload. First, we're undoing the @@ -218,10 +937,14 @@ insns_match_p (int mode, rtx i1, rtx i2) targets to disallow the troublesome insns after splitting. */ if (!reload_completed) { - /* The following code helps take care of G++ cleanups. */ - rtx equiv1 = find_reg_equal_equiv_note (i1); - rtx equiv2 = find_reg_equal_equiv_note (i2); + rtx equiv1, equiv2; + cancel_changes (rvalue_change_start); + struct_equiv_restore_checkpoint (&before_rvalue_change, info); + + /* The following code helps take care of G++ cleanups. */ + equiv1 = find_reg_equal_equiv_note (i1); + equiv2 = find_reg_equal_equiv_note (i2); if (equiv1 && equiv2 /* If the equivalences are not to a constant, they may reference pseudos that no longer exist, so we can't @@ -232,131 +955,390 @@ insns_match_p (int mode, rtx i1, rtx i2) { rtx s1 = single_set (i1); rtx s2 = single_set (i2); - if (s1 != 0 && s2 != 0 - && rtx_renumbered_equal_p (SET_DEST (s1), SET_DEST (s2))) + + if (s1 != 0 && s2 != 0) { validate_change (i1, &SET_SRC (s1), XEXP (equiv1, 0), 1); validate_change (i2, &SET_SRC (s2), XEXP (equiv2, 0), 1); - if (! rtx_renumbered_equal_p (p1, p2)) - cancel_changes (0); - else if (apply_change_group ()) - return true; + /* Only inspecting the new SET_SRC is not good enough, + because there may also be bare USEs in a single_set + PARALLEL. */ + if (rtx_equiv_p (&PATTERN (i1), PATTERN (i2), -1, info) + && death_notes_match_p (i1, i2, info) + && verify_changes (0)) + { + /* Mark this insn so that we'll use the equivalence in + all subsequent passes. */ + bitmap_set_bit (info->equiv_used, info->cur.ninsns); + return true; + } } } } + cancel_changes (0); return false; } - -/* Look through the insns at the end of BB1 and BB2 and find the longest - sequence that are equivalent. Store the first insns for that sequence - in *F1 and *F2 and return the sequence length. - To simplify callers of this function, if the blocks match exactly, - store the head of the blocks in *F1 and *F2. */ +/* Set up mode and register information in INFO. Return true for success. */ +bool +struct_equiv_init (int mode, struct equiv_info *info) +{ + if ((info->x_block->flags | info->y_block->flags) & BB_DIRTY) + update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES, + (PROP_DEATH_NOTES + | ((mode & CLEANUP_POST_REGSTACK) + ? PROP_POST_REGSTACK : 0))); + if (!REG_SET_EQUAL_P (info->x_block->il.rtl->global_live_at_end, + info->y_block->il.rtl->global_live_at_end)) + { +#ifdef STACK_REGS + unsigned rn; + + if (!(mode & CLEANUP_POST_REGSTACK)) + return false; + /* After reg-stack. Remove bogus live info about stack regs. N.B. + these regs are not necessarily all dead - we swap random bogosity + against constant bogosity. However, clearing these bits at + least makes the regsets comparable. */ + for (rn = FIRST_STACK_REG; rn < LAST_STACK_REG; rn++) + { + CLEAR_REGNO_REG_SET (info->x_block->il.rtl->global_live_at_end, rn); + CLEAR_REGNO_REG_SET (info->y_block->il.rtl->global_live_at_end, rn); + } + if (!REG_SET_EQUAL_P (info->x_block->il.rtl->global_live_at_end, + info->y_block->il.rtl->global_live_at_end)) +#endif + return false; + } + info->mode = mode; + if (mode & STRUCT_EQUIV_START) + { + info->x_input = info->y_input = info->input_reg = NULL_RTX; + info->equiv_used = ALLOC_REG_SET (®_obstack); + info->check_input_conflict = false; + } + info->had_input_conflict = false; + info->cur.ninsns = info->cur.version = 0; + info->cur.local_count = info->cur.input_count = 0; + info->cur.x_start = info->cur.y_start = NULL_RTX; + info->x_label = info->y_label = NULL_RTX; + info->need_rerun = false; + info->live_update = true; + info->cur.input_valid = false; + info->common_live = ALLOC_REG_SET (®_obstack); + info->x_local_live = ALLOC_REG_SET (®_obstack); + info->y_local_live = ALLOC_REG_SET (®_obstack); + COPY_REG_SET (info->common_live, info->x_block->il.rtl->global_live_at_end); + struct_equiv_make_checkpoint (&info->best_match, info); + return true; +} + +/* Insns XI and YI have been matched. Merge memory attributes and reg + notes. */ +static void +struct_equiv_merge (rtx xi, rtx yi, struct equiv_info *info) +{ + rtx equiv1, equiv2; + + merge_memattrs (xi, yi); + + /* If the merged insns have different REG_EQUAL notes, then + remove them. */ + info->live_update = false; + equiv1 = find_reg_equal_equiv_note (xi); + equiv2 = find_reg_equal_equiv_note (yi); + if (equiv1 && !equiv2) + remove_note (xi, equiv1); + else if (!equiv1 && equiv2) + remove_note (yi, equiv2); + else if (equiv1 && equiv2 + && !rtx_equiv_p (&XEXP (equiv1, 0), XEXP (equiv2, 0), + 1, info)) + { + remove_note (xi, equiv1); + remove_note (yi, equiv2); + } + info->live_update = true; +} +/* Return number of matched insns. + This function must be called up to three times for a successful cross-jump + match: + first to find out which instructions do match. While trying to match + another instruction that doesn't match, we destroy information in info + about the actual inputs. So if there have been any before the last + match attempt, we need to call this function again to recompute the + actual inputs up to the actual start of the matching sequence. + When we are then satisfied that the cross-jump is worthwhile, we + call this function a third time to make any changes needed to make the + sequences match: apply equivalences, remove non-matching + notes and merge memory attributes. */ int -flow_find_cross_jump (int mode ATTRIBUTE_UNUSED, basic_block bb1, - basic_block bb2, rtx *f1, rtx *f2) +struct_equiv_block_eq (int mode, struct equiv_info *info) { - rtx i1, i2, last1, last2, afterlast1, afterlast2; - int ninsns = 0; + rtx x_stop, y_stop; + rtx xi, yi; + int i; + + if (mode & STRUCT_EQUIV_START) + { + x_stop = BB_HEAD (info->x_block); + y_stop = BB_HEAD (info->y_block); + if (!x_stop || !y_stop) + return 0; + } + else + { + x_stop = info->cur.x_start; + y_stop = info->cur.y_start; + } + if (!struct_equiv_init (mode, info)) + gcc_unreachable (); /* Skip simple jumps at the end of the blocks. Complex jumps still need to be compared for equivalence, which we'll do below. */ - i1 = BB_END (bb1); - last1 = afterlast1 = last2 = afterlast2 = NULL_RTX; - if (onlyjump_p (i1) - || (returnjump_p (i1) && !side_effects_p (PATTERN (i1)))) + xi = BB_END (info->x_block); + if (onlyjump_p (xi) + || (returnjump_p (xi) && !side_effects_p (PATTERN (xi)))) { - last1 = i1; - i1 = PREV_INSN (i1); + info->cur.x_start = xi; + xi = PREV_INSN (xi); } - i2 = BB_END (bb2); - if (onlyjump_p (i2) - || (returnjump_p (i2) && !side_effects_p (PATTERN (i2)))) + yi = BB_END (info->y_block); + if (onlyjump_p (yi) + || (returnjump_p (yi) && !side_effects_p (PATTERN (yi)))) { - last2 = i2; + info->cur.y_start = yi; /* Count everything except for unconditional jump as insn. */ - if (!simplejump_p (i2) && !returnjump_p (i2) && last1) - ninsns++; - i2 = PREV_INSN (i2); + /* ??? Is it right to count unconditional jumps with a clobber? + Should we count conditional returns? */ + if (!simplejump_p (yi) && !returnjump_p (yi) && info->cur.x_start) + info->cur.ninsns++; + yi = PREV_INSN (yi); } - while (true) + if (mode & STRUCT_EQUIV_MATCH_JUMPS) { - /* Ignore notes. */ - while (!INSN_P (i1) && i1 != BB_HEAD (bb1)) - i1 = PREV_INSN (i1); + /* The caller is expected to have comapred the jumps already, but we + need to match them again to get any local registers and inputs. */ + gcc_assert (!info->cur.x_start == !info->cur.y_start); + if (info->cur.x_start) + { + if (any_condjump_p (info->cur.x_start) + ? !condjump_equiv_p (info, false) + : !insns_match_p (info->cur.x_start, info->cur.y_start, info)) + gcc_unreachable (); + } + else if (any_condjump_p (xi) && any_condjump_p (yi)) + { + info->cur.x_start = xi; + info->cur.y_start = yi; + xi = PREV_INSN (xi); + yi = PREV_INSN (yi); + info->cur.ninsns++; + if (!condjump_equiv_p (info, false)) + gcc_unreachable (); + } + if (info->cur.x_start && info->mode & STRUCT_EQUIV_FINAL) + struct_equiv_merge (info->cur.x_start, info->cur.y_start, info); + } - while (!INSN_P (i2) && i2 != BB_HEAD (bb2)) - i2 = PREV_INSN (i2); + struct_equiv_improve_checkpoint (&info->best_match, info); + info->x_end = xi; + info->y_end = yi; + if (info->cur.x_start != x_stop) + for (;;) + { + /* Ignore notes. */ + while (!INSN_P (xi) && xi != x_stop) + xi = PREV_INSN (xi); - if (i1 == BB_HEAD (bb1) || i2 == BB_HEAD (bb2)) - break; + while (!INSN_P (yi) && yi != y_stop) + yi = PREV_INSN (yi); + + if (!insns_match_p (xi, yi, info)) + break; + if (INSN_P (xi)) + { + if (info->mode & STRUCT_EQUIV_FINAL) + struct_equiv_merge (xi, yi, info); + info->cur.ninsns++; + struct_equiv_improve_checkpoint (&info->best_match, info); + } + if (xi == x_stop || yi == y_stop) + { + /* If we reached the start of at least one of the blocks, but + best_match hasn't been advanced back to the first valid insn + yet, represent the increased benefit of completing the block + as an increased instruction count. */ + if (info->best_match.x_start != info->cur.x_start + && (xi == BB_HEAD (info->x_block) + || yi == BB_HEAD (info->y_block))) + { + info->cur.ninsns++; + struct_equiv_improve_checkpoint (&info->best_match, info); + info->cur.ninsns--; + if (info->best_match.ninsns > info->cur.ninsns) + info->best_match.ninsns = info->cur.ninsns; + } + break; + } + xi = PREV_INSN (xi); + yi = PREV_INSN (yi); + } + + /* If we failed to match an insn, but had some changes registered from + trying to make the insns match, we need to cancel these changes now. */ + cancel_changes (0); + /* Restore to best_match to get the sequence with the best known-so-far + cost-benefit difference. */ + struct_equiv_restore_checkpoint (&info->best_match, info); + + /* Include preceding notes and labels in the cross-jump / if-conversion. + One, this may bring us to the head of the blocks. + Two, it keeps line number notes as matched as may be. */ + if (info->cur.ninsns) + { + xi = info->cur.x_start; + yi = info->cur.y_start; + while (xi != x_stop && !INSN_P (PREV_INSN (xi))) + xi = PREV_INSN (xi); - if (!insns_match_p (mode, i1, i2)) - break; + while (yi != y_stop && !INSN_P (PREV_INSN (yi))) + yi = PREV_INSN (yi); - merge_memattrs (i1, i2); + info->cur.x_start = xi; + info->cur.y_start = yi; + } - /* Don't begin a cross-jump with a NOTE insn. */ - if (INSN_P (i1)) + if (!info->cur.input_valid) + info->x_input = info->y_input = info->input_reg = NULL_RTX; + if (!info->need_rerun) + { + find_dying_inputs (info); + if (info->mode & STRUCT_EQUIV_FINAL) + { + if (info->check_input_conflict && ! resolve_input_conflict (info)) + gcc_unreachable (); + } + else { - /* If the merged insns have different REG_EQUAL notes, then - remove them. */ - rtx equiv1 = find_reg_equal_equiv_note (i1); - rtx equiv2 = find_reg_equal_equiv_note (i2); - - if (equiv1 && !equiv2) - remove_note (i1, equiv1); - else if (!equiv1 && equiv2) - remove_note (i2, equiv2); - else if (equiv1 && equiv2 - && !rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0))) + bool input_conflict = info->had_input_conflict; + + if (!input_conflict + && info->dying_inputs > 1 + && bitmap_intersect_p (info->x_local_live, info->y_local_live)) { - remove_note (i1, equiv1); - remove_note (i2, equiv2); + regset_head clobbered_regs; + + INIT_REG_SET (&clobbered_regs); + for (i = 0; i < info->cur.local_count; i++) + { + if (assign_reg_reg_set (&clobbered_regs, info->y_local[i], 0)) + { + input_conflict = true; + break; + } + assign_reg_reg_set (&clobbered_regs, info->x_local[i], 1); + } + CLEAR_REG_SET (&clobbered_regs); } - - afterlast1 = last1, afterlast2 = last2; - last1 = i1, last2 = i2; - ninsns++; + if (input_conflict && !info->check_input_conflict) + info->need_rerun = true; + info->check_input_conflict = input_conflict; } - - i1 = PREV_INSN (i1); - i2 = PREV_INSN (i2); } -#ifdef HAVE_cc0 - /* Don't allow the insn after a compare to be shared by - cross-jumping unless the compare is also shared. */ - if (ninsns && reg_mentioned_p (cc0_rtx, last1) && ! sets_cc0_p (last1)) - last1 = afterlast1, last2 = afterlast2, ninsns--; -#endif + if (info->mode & STRUCT_EQUIV_NEED_FULL_BLOCK + && (info->cur.x_start != x_stop || info->cur.y_start != y_stop)) + return 0; + return info->cur.ninsns; +} - /* Include preceding notes and labels in the cross-jump. One, - this may bring us to the head of the blocks as requested above. - Two, it keeps line number notes as matched as may be. */ - if (ninsns) - { - while (last1 != BB_HEAD (bb1) && !INSN_P (PREV_INSN (last1))) - last1 = PREV_INSN (last1); +/* For each local register, set info->local_rvalue to true iff the register + is a dying input. Store the total number of these in info->dying_inputs. */ +static void +find_dying_inputs (struct equiv_info *info) +{ + int i; - if (last1 != BB_HEAD (bb1) && LABEL_P (PREV_INSN (last1))) - last1 = PREV_INSN (last1); + info->dying_inputs = 0; + for (i = info->cur.local_count-1; i >=0; i--) + { + rtx x = info->x_local[i]; + unsigned regno = REGNO (x); + int nregs = (regno >= FIRST_PSEUDO_REGISTER + ? 1 : hard_regno_nregs[regno][GET_MODE (x)]); + + for (info->local_rvalue[i] = false; nregs >= 0; regno++, --nregs) + if (REGNO_REG_SET_P (info->x_local_live, regno)) + { + info->dying_inputs++; + info->local_rvalue[i] = true; + break; + } + } +} - while (last2 != BB_HEAD (bb2) && !INSN_P (PREV_INSN (last2))) - last2 = PREV_INSN (last2); +/* For each local register that is a dying input, y_local[i] will be + copied to x_local[i]. We'll do this in ascending order. Try to + re-order the locals to avoid conflicts like r3 = r2; r4 = r3; . + Return true iff the re-ordering is successful, or not necessary. */ +static bool +resolve_input_conflict (struct equiv_info *info) +{ + int i, j, end; + int nswaps = 0; + rtx save_x_local[STRUCT_EQUIV_MAX_LOCAL]; + rtx save_y_local[STRUCT_EQUIV_MAX_LOCAL]; - if (last2 != BB_HEAD (bb2) && LABEL_P (PREV_INSN (last2))) - last2 = PREV_INSN (last2); + find_dying_inputs (info); + if (info->dying_inputs <= 1) + return true; + memcpy (save_x_local, info->x_local, sizeof save_x_local); + memcpy (save_y_local, info->y_local, sizeof save_y_local); + end = info->cur.local_count - 1; + for (i = 0; i <= end; i++) + { + /* Cycle detection with regsets is expensive, so we just check that + we don't exceed the maximum number of swaps needed in the acyclic + case. */ + int max_swaps = end - i; + + /* Check if x_local[i] will be clobbered. */ + if (!info->local_rvalue[i]) + continue; + /* Check if any later value needs to be copied earlier. */ + for (j = i + 1; j <= end; j++) + { + rtx tmp; - *f1 = last1; - *f2 = last2; + if (!info->local_rvalue[j]) + continue; + if (!reg_overlap_mentioned_p (info->x_local[i], info->y_local[j])) + continue; + if (--max_swaps < 0) + { + memcpy (info->x_local, save_x_local, sizeof save_x_local); + memcpy (info->y_local, save_y_local, sizeof save_y_local); + return false; + } + nswaps++; + tmp = info->x_local[i]; + info->x_local[i] = info->x_local[j]; + info->x_local[j] = tmp; + tmp = info->y_local[i]; + info->y_local[i] = info->y_local[j]; + info->y_local[j] = tmp; + j = i; + } } - - return ninsns; + info->had_input_conflict = true; + if (dump_file && nswaps) + fprintf (dump_file, "Resolved input conflict, %d %s.\n", + nswaps, nswaps == 1 ? "swap" : "swaps"); + return true; } |