diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2020-12-17 00:15:12 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2020-12-17 00:15:12 +0000 |
commit | 0b76990a9d75d97b84014e37519086b81824c307 (patch) | |
tree | cbf8f3b50118a3bfb17db20d1eb6d2a1e2fbd3be | |
parent | 73b7582775254b764fd92ddb252a33dc15872c69 (diff) | |
download | gcc-0b76990a9d75d97b84014e37519086b81824c307.tar.gz |
fwprop: Rewrite to use RTL SSA
This patch rewrites fwprop.c to use the RTL SSA framework. It tries
as far as possible to mimic the old behaviour, even in caes where
that doesn't fit naturally with the new framework. I've added ???
comments to mark those places, but I think “fixing” them should
be done separately to make bisection easier.
In particular:
* The old implementation iterated over uses, and after a successful
substitution, the new insn's uses were added to the end of the list.
The pass still processed those uses, but because it processed them at
the end, it didn't fully optimise one instruction before propagating
it into the next.
The new version follows the same approach for comparison purposes,
but I'd like to drop that as a follow-on patch.
* The old implementation operated on single use sites (DF_REF_LOCs).
This doesn't work well for instructions with match_dups, where it's
necessary to update both an operand and its dups at the same time.
For example, attempting to substitute into a divmod instruction would
fail because only the div or the mod side would be updated.
The new version again follows this to some extent for comparison
purposes (although not exactly). Again I'd like to drop it as a
follow-on patch.
One difference is that if a register occurs in multiple MEM addresses
in a set, the new version will try to update them all at once. This is
what causes the SVE ACLE st4* output to improve.
Also, the old version didn't naturally guarantee termination (PR79405),
whereas the new one does.
gcc/
* fwprop.c: Rewrite to use the RTL SSA framework.
gcc/testsuite/
* gcc.dg/rtl/x86_64/test-return-const.c.before-fwprop.c: Don't
expect insn updates to be deferred.
* gcc.target/aarch64/sve/acle/asm/st4_s8.c: Expect the addition
to be folded into the address.
* gcc.target/aarch64/sve/acle/asm/st4_u8.c: Likewise.
-rw-r--r-- | gcc/fwprop.c | 1685 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/rtl/x86_64/test-return-const.c.before-fwprop.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s8.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u8.c | 8 |
4 files changed, 548 insertions, 1155 deletions
diff --git a/gcc/fwprop.c b/gcc/fwprop.c index 756c1d6b405..2d2d4b6c064 100644 --- a/gcc/fwprop.c +++ b/gcc/fwprop.c @@ -18,32 +18,28 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +#define INCLUDE_ALGORITHM +#define INCLUDE_FUNCTIONAL #include "config.h" #include "system.h" #include "coretypes.h" #include "backend.h" -#include "target.h" #include "rtl.h" -#include "predict.h" #include "df.h" -#include "memmodel.h" -#include "tm_p.h" -#include "insn-config.h" -#include "emit-rtl.h" -#include "recog.h" +#include "rtl-ssa.h" #include "sparseset.h" +#include "predict.h" #include "cfgrtl.h" #include "cfgcleanup.h" #include "cfgloop.h" #include "tree-pass.h" -#include "domwalk.h" #include "rtl-iter.h" - +#include "target.h" /* This pass does simple forward propagation and simplification when an operand of an insn can only come from a single def. This pass uses - df.c, so it is global. However, we only do limited analysis of + RTL SSA, so it is global. However, we only do limited analysis of available expressions. 1) The pass tries to propagate the source of the def into the use, @@ -60,9 +56,9 @@ along with GCC; see the file COPYING3. If not see (set (subreg:SI (reg:DI 120) 0) (const_int 0)) (set (subreg:SI (reg:DI 120) 4) (const_int -1)) (set (subreg:SI (reg:DI 122) 0) - (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0))) + (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0))) (set (subreg:SI (reg:DI 122) 4) - (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4))) + (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4))) can be simplified to the much simpler @@ -89,7 +85,7 @@ along with GCC; see the file COPYING3. If not see (set (reg:QI 120) (subreg:QI (reg:SI 118) 0)) (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0) - (subreg:SI (reg:QI 121) 0))) + (subreg:SI (reg:QI 121) 0))) are very common on machines that can only do word-sized operations. For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0), @@ -101,218 +97,12 @@ along with GCC; see the file COPYING3. If not see (set (reg:QI 121) (subreg:QI (reg:SI 119) 0)) (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119))) - where the first two insns are now dead. - - We used to use reaching definitions to find which uses have a - single reaching definition (sounds obvious...), but this is too - complex a problem in nasty testcases like PR33928. Now we use the - multiple definitions problem in df-problems.c. The similarity - between that problem and SSA form creation is taken further, in - that fwprop does a dominator walk to create its chains; however, - instead of creating a PHI function where multiple definitions meet - I just punt and record only singleton use-def chains, which is - all that is needed by fwprop. */ + where the first two insns are now dead. */ +using namespace rtl_ssa; static int num_changes; -static vec<df_ref> use_def_ref; -static vec<df_ref> reg_defs; -static vec<df_ref> reg_defs_stack; - -/* The maximum number of propagations that are still allowed. If we do - more propagations than originally we had uses, we must have ended up - in a propagation loop, as in PR79405. Until the algorithm fwprop - uses can obviously not get into such loops we need a workaround like - this. */ -static int propagations_left; - -/* The MD bitmaps are trimmed to include only live registers to cut - memory usage on testcases like insn-recog.c. Track live registers - in the basic block and do not perform forward propagation if the - destination is a dead pseudo occurring in a note. */ -static bitmap local_md; -static bitmap local_lr; - -/* Return the only def in USE's use-def chain, or NULL if there is - more than one def in the chain. */ - -static inline df_ref -get_def_for_use (df_ref use) -{ - return use_def_ref[DF_REF_ID (use)]; -} - - -/* Update the reg_defs vector with non-partial definitions in DEF_REC. - TOP_FLAG says which artificials uses should be used, when DEF_REC - is an artificial def vector. LOCAL_MD is modified as after a - df_md_simulate_* function; we do more or less the same processing - done there, so we do not use those functions. */ - -#define DF_MD_GEN_FLAGS \ - (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER) - -static void -process_defs (df_ref def, int top_flag) -{ - for (; def; def = DF_REF_NEXT_LOC (def)) - { - df_ref curr_def = reg_defs[DF_REF_REGNO (def)]; - unsigned int dregno; - - if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag) - continue; - - dregno = DF_REF_REGNO (def); - if (curr_def) - reg_defs_stack.safe_push (curr_def); - else - { - /* Do not store anything if "transitioning" from NULL to NULL. But - otherwise, push a special entry on the stack to tell the - leave_block callback that the entry in reg_defs was NULL. */ - if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS) - ; - else - reg_defs_stack.safe_push (def); - } - - if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS) - { - bitmap_set_bit (local_md, dregno); - reg_defs[dregno] = NULL; - } - else - { - bitmap_clear_bit (local_md, dregno); - reg_defs[dregno] = def; - } - } -} - - -/* Fill the use_def_ref vector with values for the uses in USE_REC, - taking reaching definitions info from LOCAL_MD and REG_DEFS. - TOP_FLAG says which artificials uses should be used, when USE_REC - is an artificial use vector. */ - -static void -process_uses (df_ref use, int top_flag) -{ - for (; use; use = DF_REF_NEXT_LOC (use)) - if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag) - { - unsigned int uregno = DF_REF_REGNO (use); - if (reg_defs[uregno] - && !bitmap_bit_p (local_md, uregno) - && bitmap_bit_p (local_lr, uregno)) - use_def_ref[DF_REF_ID (use)] = reg_defs[uregno]; - } -} - -class single_def_use_dom_walker : public dom_walker -{ -public: - single_def_use_dom_walker (cdi_direction direction) - : dom_walker (direction) {} - virtual edge before_dom_children (basic_block); - virtual void after_dom_children (basic_block); -}; - -edge -single_def_use_dom_walker::before_dom_children (basic_block bb) -{ - int bb_index = bb->index; - class df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index); - class df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index); - rtx_insn *insn; - - bitmap_copy (local_md, &md_bb_info->in); - bitmap_copy (local_lr, &lr_bb_info->in); - - /* Push a marker for the leave_block callback. */ - reg_defs_stack.safe_push (NULL); - - process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP); - process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP); - - /* We don't call df_simulate_initialize_forwards, as it may overestimate - the live registers if there are unused artificial defs. We prefer - liveness to be underestimated. */ - - FOR_BB_INSNS (bb, insn) - if (INSN_P (insn)) - { - unsigned int uid = INSN_UID (insn); - process_uses (DF_INSN_UID_USES (uid), 0); - process_uses (DF_INSN_UID_EQ_USES (uid), 0); - process_defs (DF_INSN_UID_DEFS (uid), 0); - df_simulate_one_insn_forwards (bb, insn, local_lr); - } - - process_uses (df_get_artificial_uses (bb_index), 0); - process_defs (df_get_artificial_defs (bb_index), 0); - - return NULL; -} - -/* Pop the definitions created in this basic block when leaving its - dominated parts. */ - -void -single_def_use_dom_walker::after_dom_children (basic_block bb ATTRIBUTE_UNUSED) -{ - df_ref saved_def; - while ((saved_def = reg_defs_stack.pop ()) != NULL) - { - unsigned int dregno = DF_REF_REGNO (saved_def); - - /* See also process_defs. */ - if (saved_def == reg_defs[dregno]) - reg_defs[dregno] = NULL; - else - reg_defs[dregno] = saved_def; - } -} - - -/* Build a vector holding the reaching definitions of uses reached by a - single dominating definition. */ - -static void -build_single_def_use_links (void) -{ - /* We use the multiple definitions problem to compute our restricted - use-def chains. */ - df_set_flags (DF_EQ_NOTES); - df_md_add_problem (); - df_note_add_problem (); - df_analyze (); - df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES); - - use_def_ref.create (DF_USES_TABLE_SIZE ()); - use_def_ref.safe_grow_cleared (DF_USES_TABLE_SIZE (), true); - - reg_defs.create (max_reg_num ()); - reg_defs.safe_grow_cleared (max_reg_num (), true); - - reg_defs_stack.create (n_basic_blocks_for_fn (cfun) * 10); - local_md = BITMAP_ALLOC (NULL); - local_lr = BITMAP_ALLOC (NULL); - - /* Walk the dominator tree looking for single reaching definitions - dominating the uses. This is similar to how SSA form is built. */ - single_def_use_dom_walker (CDI_DOMINATORS) - .walk (cfun->cfg->x_entry_block_ptr); - - BITMAP_FREE (local_lr); - BITMAP_FREE (local_md); - reg_defs.release (); - reg_defs_stack.release (); -} - - /* Do not try to replace constant addresses or addresses of local and argument slots. These MEM expressions are made only once and inserted in many instructions, as well as being used to control symbol table @@ -342,773 +132,477 @@ can_simplify_addr (rtx addr) && REGNO (reg) != ARG_POINTER_REGNUM)); } -/* Returns a canonical version of X for the address, from the point of view, - that all multiplications are represented as MULT instead of the multiply - by a power of 2 being represented as ASHIFT. - - Every ASHIFT we find has been made by simplify_gen_binary and was not - there before, so it is not shared. So we can do this in place. */ - -static void -canonicalize_address (rtx x) -{ - for (;;) - switch (GET_CODE (x)) - { - case ASHIFT: - if (CONST_INT_P (XEXP (x, 1)) - && INTVAL (XEXP (x, 1)) < GET_MODE_UNIT_BITSIZE (GET_MODE (x)) - && INTVAL (XEXP (x, 1)) >= 0) - { - HOST_WIDE_INT shift = INTVAL (XEXP (x, 1)); - PUT_CODE (x, MULT); - XEXP (x, 1) = gen_int_mode (HOST_WIDE_INT_1 << shift, - GET_MODE (x)); - } - - x = XEXP (x, 0); - break; - - case PLUS: - if (GET_CODE (XEXP (x, 0)) == PLUS - || GET_CODE (XEXP (x, 0)) == ASHIFT - || GET_CODE (XEXP (x, 0)) == CONST) - canonicalize_address (XEXP (x, 0)); - - x = XEXP (x, 1); - break; - - case CONST: - x = XEXP (x, 0); - break; - - default: - return; - } -} - -/* OLD is a memory address. Return whether it is good to use NEW instead, - for a memory access in the given MODE. */ +/* MEM is the result of an address simplification, and temporarily + undoing changes OLD_NUM_CHANGES onwards restores the original address. + Return whether it is good to use the new address instead of the + old one. INSN is the containing instruction. */ static bool -should_replace_address (rtx old_rtx, rtx new_rtx, machine_mode mode, - addr_space_t as, bool speed) +should_replace_address (int old_num_changes, rtx mem, rtx_insn *insn) { int gain; - if (rtx_equal_p (old_rtx, new_rtx) - || !memory_address_addr_space_p (mode, new_rtx, as)) - return false; - - /* Copy propagation is always ok. */ - if (REG_P (old_rtx) && REG_P (new_rtx)) - return true; - /* Prefer the new address if it is less expensive. */ - gain = (address_cost (old_rtx, mode, as, speed) - - address_cost (new_rtx, mode, as, speed)); + bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); + temporarily_undo_changes (old_num_changes); + gain = address_cost (XEXP (mem, 0), GET_MODE (mem), + MEM_ADDR_SPACE (mem), speed); + redo_changes (old_num_changes); + gain -= address_cost (XEXP (mem, 0), GET_MODE (mem), + MEM_ADDR_SPACE (mem), speed); /* If the addresses have equivalent cost, prefer the new address if it has the highest `set_src_cost'. That has the potential of eliminating the most insns without additional costs, and it is the same that cse.c used to do. */ if (gain == 0) - gain = (set_src_cost (new_rtx, VOIDmode, speed) - - set_src_cost (old_rtx, VOIDmode, speed)); + { + gain = set_src_cost (XEXP (mem, 0), VOIDmode, speed); + temporarily_undo_changes (old_num_changes); + gain -= set_src_cost (XEXP (mem, 0), VOIDmode, speed); + redo_changes (old_num_changes); + } return (gain > 0); } -/* Flags for the last parameter of propagate_rtx_1. */ - -enum { - /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true; - if it is false, propagate_rtx_1 returns false if, for at least - one occurrence OLD, it failed to collapse the result to a constant. - For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may - collapse to zero if replacing (reg:M B) with (reg:M A). - - PR_CAN_APPEAR is disregarded inside MEMs: in that case, - propagate_rtx_1 just tries to make cheaper and valid memory - addresses. */ - PR_CAN_APPEAR = 1, - - /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement - outside memory addresses. This is needed because propagate_rtx_1 does - not do any analysis on memory; thus it is very conservative and in general - it will fail if non-read-only MEMs are found in the source expression. - - PR_HANDLE_MEM is set when the source of the propagation was not - another MEM. Then, it is safe not to treat non-read-only MEMs as - ``opaque'' objects. */ - PR_HANDLE_MEM = 2, - - /* Set when costs should be optimized for speed. */ - PR_OPTIMIZE_FOR_SPEED = 4 -}; +namespace +{ + class fwprop_propagation : public insn_propagation + { + public: + static const uint16_t CHANGED_MEM = FIRST_SPARE_RESULT; + static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1; + static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2; + + fwprop_propagation (rtx_insn *, rtx, rtx); + + bool changed_mem_p () const { return result_flags & CHANGED_MEM; } + bool folded_to_constants_p () const; + bool profitable_p () const; + + bool check_mem (int, rtx) final override; + void note_simplification (int, uint16_t, rtx, rtx) final override; + uint16_t classify_result (rtx, rtx); + }; +} -/* Check that X has a single def. */ +/* Prepare to replace FROM with TO in INSN. */ -static bool -reg_single_def_p (rtx x) +fwprop_propagation::fwprop_propagation (rtx_insn *insn, rtx from, rtx to) + : insn_propagation (insn, from, to) { - if (!REG_P (x)) - return false; - - int regno = REGNO (x); - return (DF_REG_DEF_COUNT (regno) == 1 - && !bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)), regno)); + should_check_mems = true; + should_note_simplifications = true; } -/* Replace all occurrences of OLD in *PX with NEW and try to simplify the - resulting expression. Replace *PX with a new RTL expression if an - occurrence of OLD was found. +/* MEM is the result of an address simplification, and temporarily + undoing changes OLD_NUM_CHANGES onwards restores the original address. + Return true if the propagation should continue, false if it has failed. */ - This is only a wrapper around simplify-rtx.c: do not add any pattern - matching code here. (The sole exception is the handling of LO_SUM, but - that is because there is no simplify_gen_* function for LO_SUM). */ - -static bool -propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags) +bool +fwprop_propagation::check_mem (int old_num_changes, rtx mem) { - rtx x = *px, tem = NULL_RTX, op0, op1, op2; - enum rtx_code code = GET_CODE (x); - machine_mode mode = GET_MODE (x); - machine_mode op_mode; - bool can_appear = (flags & PR_CAN_APPEAR) != 0; - bool valid_ops = true; - - if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x)) + if (!memory_address_addr_space_p (GET_MODE (mem), XEXP (mem, 0), + MEM_ADDR_SPACE (mem))) { - /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether - they have side effects or not). */ - *px = (side_effects_p (x) - ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx) - : gen_rtx_SCRATCH (GET_MODE (x))); + failure_reason = "would create an invalid MEM"; return false; } - /* If X is OLD_RTX, return NEW_RTX. But not if replacing only within an - address, and we are *not* inside one. */ - if (x == old_rtx) + temporarily_undo_changes (old_num_changes); + bool can_simplify = can_simplify_addr (XEXP (mem, 0)); + redo_changes (old_num_changes); + if (!can_simplify) { - *px = new_rtx; - return can_appear; + failure_reason = "would replace a frame address"; + return false; } - /* If this is an expression, try recursive substitution. */ - switch (GET_RTX_CLASS (code)) + /* Copy propagations are always ok. Otherwise check the costs. */ + if (!(REG_P (from) && REG_P (to)) + && !should_replace_address (old_num_changes, mem, insn)) { - case RTX_UNARY: - op0 = XEXP (x, 0); - op_mode = GET_MODE (op0); - valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); - if (op0 == XEXP (x, 0)) - return true; - tem = simplify_gen_unary (code, mode, op0, op_mode); - break; - - case RTX_BIN_ARITH: - case RTX_COMM_ARITH: - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); - valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); - valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); - if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) - return true; - tem = simplify_gen_binary (code, mode, op0, op1); - break; - - case RTX_COMPARE: - case RTX_COMM_COMPARE: - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); - op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); - valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); - valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); - if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) - return true; - tem = simplify_gen_relational (code, mode, op_mode, op0, op1); - break; - - case RTX_TERNARY: - case RTX_BITFIELD_OPS: - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); - op2 = XEXP (x, 2); - op_mode = GET_MODE (op0); - valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); - valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); - valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags); - if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2)) - return true; - if (op_mode == VOIDmode) - op_mode = GET_MODE (op0); - tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2); - break; - - case RTX_EXTRA: - /* The only case we try to handle is a SUBREG. */ - if (code == SUBREG) - { - op0 = XEXP (x, 0); - valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags); - if (op0 == XEXP (x, 0)) - return true; - tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)), - SUBREG_BYTE (x)); - } - - else - { - rtvec vec; - rtvec newvec; - const char *fmt = GET_RTX_FORMAT (code); - rtx op; - - for (int i = 0; fmt[i]; i++) - switch (fmt[i]) - { - case 'E': - vec = XVEC (x, i); - newvec = vec; - for (int j = 0; j < GET_NUM_ELEM (vec); j++) - { - op = RTVEC_ELT (vec, j); - valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags); - if (op != RTVEC_ELT (vec, j)) - { - if (newvec == vec) - { - newvec = shallow_copy_rtvec (vec); - if (!tem) - tem = shallow_copy_rtx (x); - XVEC (tem, i) = newvec; - } - RTVEC_ELT (newvec, j) = op; - } - } - break; - - case 'e': - if (XEXP (x, i)) - { - op = XEXP (x, i); - valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags); - if (op != XEXP (x, i)) - { - if (!tem) - tem = shallow_copy_rtx (x); - XEXP (tem, i) = op; - } - } - break; - } - } - - break; - - case RTX_OBJ: - if (code == MEM && x != new_rtx) - { - rtx new_op0; - op0 = XEXP (x, 0); - - /* There are some addresses that we cannot work on. */ - if (!can_simplify_addr (op0)) - return true; - - op0 = new_op0 = targetm.delegitimize_address (op0); - valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx, - flags | PR_CAN_APPEAR); - - /* Dismiss transformation that we do not want to carry on. */ - if (!valid_ops - || new_op0 == op0 - || !(GET_MODE (new_op0) == GET_MODE (op0) - || GET_MODE (new_op0) == VOIDmode)) - return true; - - canonicalize_address (new_op0); - - /* Copy propagations are always ok. Otherwise check the costs. */ - if (!(REG_P (old_rtx) && REG_P (new_rtx)) - && !should_replace_address (op0, new_op0, GET_MODE (x), - MEM_ADDR_SPACE (x), - flags & PR_OPTIMIZE_FOR_SPEED)) - return true; - - tem = replace_equiv_address_nv (x, new_op0); - } - - else if (code == LO_SUM) - { - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); - - /* The only simplification we do attempts to remove references to op0 - or make it constant -- in both cases, op0's invalidity will not - make the result invalid. */ - propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR); - valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags); - if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1)) - return true; - - /* (lo_sum (high x) x) -> x */ - if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1)) - tem = op1; - else - tem = gen_rtx_LO_SUM (mode, op0, op1); + failure_reason = "would increase the cost of a MEM"; + return false; + } - /* OP1 is likely not a legitimate address, otherwise there would have - been no LO_SUM. We want it to disappear if it is invalid, return - false in that case. */ - return memory_address_p (mode, tem); - } + result_flags |= CHANGED_MEM; + return true; +} - else if (code == REG) - { - if (rtx_equal_p (x, old_rtx)) - { - *px = new_rtx; - return can_appear; - } - } - break; +/* OLDX has been simplified to NEWX. Describe the change in terms of + result_flags. */ - default: - break; +uint16_t +fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx) +{ + if (CONSTANT_P (new_rtx)) + { + /* If OLD_RTX is a LO_SUM, then it presumably exists for a reason, + and NEW_RTX is likely not a legitimate address. We want it to + disappear if it is invalid. + + ??? Using the mode of the LO_SUM as the mode of the address + seems odd, but it was what the pre-SSA code did. */ + if (GET_CODE (old_rtx) == LO_SUM + && !memory_address_p (GET_MODE (old_rtx), new_rtx)) + return CONSTANT; + return CONSTANT | PROFITABLE; } - /* No change, no trouble. */ - if (tem == NULL_RTX) - return true; - - *px = tem; - /* Allow replacements that simplify operations on a vector or complex value to a component. The most prominent case is (subreg ([vec_]concat ...)). */ - if (REG_P (tem) && !HARD_REGISTER_P (tem) - && (VECTOR_MODE_P (GET_MODE (new_rtx)) - || COMPLEX_MODE_P (GET_MODE (new_rtx))) - && GET_MODE (tem) == GET_MODE_INNER (GET_MODE (new_rtx))) - return true; + if (REG_P (new_rtx) + && !HARD_REGISTER_P (new_rtx) + && (VECTOR_MODE_P (GET_MODE (from)) + || COMPLEX_MODE_P (GET_MODE (from))) + && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from))) + return PROFITABLE; - /* The replacement we made so far is valid, if all of the recursive - replacements were valid, or we could simplify everything to - a constant. */ - return valid_ops || can_appear || CONSTANT_P (tem); + return 0; } +/* Record that OLD_RTX has been simplified to NEW_RTX. OLD_NUM_CHANGES + is the number of unrelated changes that had been made before processing + OLD_RTX and its subrtxes. OLD_RESULT_FLAGS is the value that result_flags + had at that point. */ -/* Return true if X constains a non-constant mem. */ - -static bool -varying_mem_p (const_rtx x) +void +fwprop_propagation::note_simplification (int old_num_changes, + uint16_t old_result_flags, + rtx old_rtx, rtx new_rtx) { - subrtx_iterator::array_type array; - FOR_EACH_SUBRTX (iter, array, x, NONCONST) - if (MEM_P (*iter) && !MEM_READONLY_P (*iter)) - return true; - return false; + result_flags &= ~(CONSTANT | PROFITABLE); + uint16_t new_flags = classify_result (old_rtx, new_rtx); + if (old_num_changes) + new_flags &= old_result_flags; + result_flags |= new_flags; } +/* Return true if all substitutions eventually folded to constants. */ + +bool +fwprop_propagation::folded_to_constants_p () const +{ + /* If we're propagating a HIGH, require it to be folded with a + partnering LO_SUM. For example, a REG_EQUAL note with a register + replaced by an unfolded HIGH is not useful. */ + if (CONSTANT_P (to) && GET_CODE (to) != HIGH) + return true; + return !(result_flags & UNSIMPLIFIED) && (result_flags & CONSTANT); +} -/* Replace all occurrences of OLD in X with NEW and try to simplify the - resulting expression (in mode MODE). Return a new expression if it is - a constant, otherwise X. - Simplifications where occurrences of NEW collapse to a constant are always - accepted. All simplifications are accepted if NEW is a pseudo too. - Otherwise, we accept simplifications that have a lower or equal cost. */ +/* Return true if it is worth keeping the result of the propagation, + false if it would increase the complexity of the pattern too much. */ -static rtx -propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx, - bool speed) +bool +fwprop_propagation::profitable_p () const { - rtx tem; - bool collapsed; - int flags; + if (changed_mem_p ()) + return true; - if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER) - return NULL_RTX; + if (!(result_flags & UNSIMPLIFIED) + && (result_flags & PROFITABLE)) + return true; - flags = 0; - if (REG_P (new_rtx) - || CONSTANT_P (new_rtx) - || (GET_CODE (new_rtx) == SUBREG - && REG_P (SUBREG_REG (new_rtx)) - && !paradoxical_subreg_p (new_rtx))) - flags |= PR_CAN_APPEAR; - if (!varying_mem_p (new_rtx)) - flags |= PR_HANDLE_MEM; - - if (speed) - flags |= PR_OPTIMIZE_FOR_SPEED; - - tem = x; - collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags); - if (tem == x || !collapsed) - return NULL_RTX; - - /* gen_lowpart_common will not be able to process VOIDmode entities other - than CONST_INTs. */ - if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem)) - return NULL_RTX; - - if (GET_MODE (tem) == VOIDmode) - tem = rtl_hooks.gen_lowpart_no_emit (mode, tem); - else - gcc_assert (GET_MODE (tem) == mode); + if (REG_P (to)) + return true; - return tem; -} + if (GET_CODE (to) == SUBREG + && REG_P (SUBREG_REG (to)) + && !paradoxical_subreg_p (to)) + return true; + if (CONSTANT_P (to)) + return true; - + return false; +} -/* Return true if the register from reference REF is killed - between FROM to (but not including) TO. */ +/* Check that X has a single def. */ static bool -local_ref_killed_between_p (df_ref ref, rtx_insn *from, rtx_insn *to) +reg_single_def_p (rtx x) { - rtx_insn *insn; + return REG_P (x) && crtl->ssa->single_dominating_def (REGNO (x)); +} - for (insn = from; insn != to; insn = NEXT_INSN (insn)) - { - df_ref def; - if (!INSN_P (insn)) - continue; +/* Return true if X contains a paradoxical subreg. */ - FOR_EACH_INSN_DEF (def, insn) - if (DF_REF_REGNO (ref) == DF_REF_REGNO (def)) - return true; +static bool +contains_paradoxical_subreg_p (rtx x) +{ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST) + { + x = *iter; + if (SUBREG_P (x) && paradoxical_subreg_p (x)) + return true; } return false; } +/* Try to substitute (set DEST SRC) from DEF_INSN into note NOTE of USE_INSN. + Return the number of substitutions on success, otherwise return -1 and + leave USE_INSN unchanged. -/* Check if USE is killed between DEF_INSN and TARGET_INSN. This would - require full computation of available expressions; we check only a few - restricted conditions: - - if the reg in USE has only one definition, go ahead; - - in the same basic block, we check for no definitions killing the use; - - if TARGET_INSN's basic block has DEF_INSN's basic block as its sole - predecessor, we check if the use is killed after DEF_INSN or before - TARGET_INSN insn, in their respective basic blocks. */ + If REQUIRE_CONSTANT is true, require all substituted occurences of SRC + to fold to a constant, so that the note does not use any more registers + than it did previously. If REQUIRE_CONSTANT is false, also allow the + substitution if it's something we'd normally allow for the main + instruction pattern. */ -static bool -use_killed_between (df_ref use, rtx_insn *def_insn, rtx_insn *target_insn) +static int +try_fwprop_subst_note (insn_info *use_insn, insn_info *def_insn, + rtx note, rtx dest, rtx src, bool require_constant) { - basic_block def_bb = BLOCK_FOR_INSN (def_insn); - basic_block target_bb = BLOCK_FOR_INSN (target_insn); - int regno; - df_ref def; - - /* We used to have a def reaching a use that is _before_ the def, - with the def not dominating the use even though the use and def - are in the same basic block, when a register may be used - uninitialized in a loop. This should not happen anymore since - we do not use reaching definitions, but still we test for such - cases and assume that DEF is not available. */ - if (def_bb == target_bb - ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn) - : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb)) - return true; + rtx_insn *use_rtl = use_insn->rtl (); - /* Check if the reg in USE has only one definition. We already - know that this definition reaches use, or we wouldn't be here. - However, this is invalid for hard registers because if they are - live at the beginning of the function it does not mean that we - have an uninitialized access. And we have to check for the case - where a register may be used uninitialized in a loop as above. */ - regno = DF_REF_REGNO (use); - def = DF_REG_DEF_CHAIN (regno); - if (def - && DF_REF_NEXT_REG (def) == NULL - && regno >= FIRST_PSEUDO_REGISTER - && (BLOCK_FOR_INSN (DF_REF_INSN (def)) == def_bb - ? DF_INSN_LUID (DF_REF_INSN (def)) < DF_INSN_LUID (def_insn) - : dominated_by_p (CDI_DOMINATORS, - def_bb, BLOCK_FOR_INSN (DF_REF_INSN (def))))) - return false; - - /* Check locally if we are in the same basic block. */ - if (def_bb == target_bb) - return local_ref_killed_between_p (use, def_insn, target_insn); - - /* Finally, if DEF_BB is the sole predecessor of TARGET_BB. */ - if (single_pred_p (target_bb) - && single_pred (target_bb) == def_bb) + insn_change_watermark watermark; + fwprop_propagation prop (use_rtl, dest, src); + if (!prop.apply_to_rvalue (&XEXP (note, 0))) { - df_ref x; - - /* See if USE is killed between DEF_INSN and the last insn in the - basic block containing DEF_INSN. */ - x = df_bb_regno_last_def_find (def_bb, regno); - if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn)) - return true; - - /* See if USE is killed between TARGET_INSN and the first insn in the - basic block containing TARGET_INSN. */ - x = df_bb_regno_first_def_find (target_bb, regno); - if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn)) - return true; - - return false; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "cannot propagate from insn %d into" + " notes of insn %d: %s\n", def_insn->uid (), + use_insn->uid (), prop.failure_reason); + return -1; } - /* Otherwise assume the worst case. */ - return true; -} - + if (prop.num_replacements == 0) + return 0; -/* Check if all uses in DEF_INSN can be used in TARGET_INSN. This - would require full computation of available expressions; - we check only restricted conditions, see use_killed_between. */ -static bool -all_uses_available_at (rtx_insn *def_insn, rtx_insn *target_insn) -{ - df_ref use; - struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn); - rtx def_set = single_set (def_insn); - rtx_insn *next; - - gcc_assert (def_set); - - /* If target_insn comes right after def_insn, which is very common - for addresses, we can use a quicker test. Ignore debug insns - other than target insns for this. */ - next = NEXT_INSN (def_insn); - while (next && next != target_insn && DEBUG_INSN_P (next)) - next = NEXT_INSN (next); - if (next == target_insn && REG_P (SET_DEST (def_set))) + if (require_constant) { - rtx def_reg = SET_DEST (def_set); - - /* If the insn uses the reg that it defines, the substitution is - invalid. */ - FOR_EACH_INSN_INFO_USE (use, insn_info) - if (rtx_equal_p (DF_REF_REG (use), def_reg)) - return false; - FOR_EACH_INSN_INFO_EQ_USE (use, insn_info) - if (rtx_equal_p (DF_REF_REG (use), def_reg)) - return false; + if (!prop.folded_to_constants_p ()) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "cannot propagate from insn %d into" + " notes of insn %d: %s\n", def_insn->uid (), + use_insn->uid (), "wouldn't fold to constants"); + return -1; + } } else { - rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX; - - /* Look at all the uses of DEF_INSN, and see if they are not - killed between DEF_INSN and TARGET_INSN. */ - FOR_EACH_INSN_INFO_USE (use, insn_info) - { - if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg)) - return false; - if (use_killed_between (use, def_insn, target_insn)) - return false; - } - FOR_EACH_INSN_INFO_EQ_USE (use, insn_info) + if (!prop.folded_to_constants_p () && !prop.profitable_p ()) { - if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg)) - return false; - if (use_killed_between (use, def_insn, target_insn)) - return false; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "cannot propagate from insn %d into" + " notes of insn %d: %s\n", def_insn->uid (), + use_insn->uid (), "would increase complexity of node"); + return -1; } } - return true; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nin notes of insn %d, replacing:\n ", + INSN_UID (use_rtl)); + temporarily_undo_changes (0); + print_inline_rtx (dump_file, note, 2); + redo_changes (0); + fprintf (dump_file, "\n with:\n "); + print_inline_rtx (dump_file, note, 2); + fprintf (dump_file, "\n"); + } + watermark.keep (); + return prop.num_replacements; } - -static df_ref *active_defs; -static sparseset active_defs_check; - -/* Fill the ACTIVE_DEFS array with the use->def link for the registers - mentioned in USE_REC. Register the valid entries in ACTIVE_DEFS_CHECK - too, for checking purposes. */ +/* Try to substitute (set DEST SRC) from DEF_INSN into location LOC of + USE_INSN's pattern. Return true on success, otherwise leave USE_INSN + unchanged. */ -static void -register_active_defs (df_ref use) +static bool +try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change, + insn_info *def_insn, rtx *loc, rtx dest, rtx src) { - for (; use; use = DF_REF_NEXT_LOC (use)) - { - df_ref def = get_def_for_use (use); - int regno = DF_REF_REGNO (use); + insn_info *use_insn = use_change.insn (); + rtx_insn *use_rtl = use_insn->rtl (); - if (flag_checking) - sparseset_set_bit (active_defs_check, regno); - active_defs[regno] = def; + insn_change_watermark watermark; + fwprop_propagation prop (use_rtl, dest, src); + if (!prop.apply_to_pattern (loc)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "cannot propagate from insn %d into" + " insn %d: %s\n", def_insn->uid (), use_insn->uid (), + prop.failure_reason); + return false; } -} + if (prop.num_replacements == 0) + return false; -/* Build the use->def links that we use to update the dataflow info - for new uses. Note that building the links is very cheap and if - it were done earlier, they could be used to rule out invalid - propagations (in addition to what is done in all_uses_available_at). - I'm not doing this yet, though. */ - -static void -update_df_init (rtx_insn *def_insn, rtx_insn *insn) -{ - if (flag_checking) - sparseset_clear (active_defs_check); - register_active_defs (DF_INSN_USES (def_insn)); - register_active_defs (DF_INSN_USES (insn)); - register_active_defs (DF_INSN_EQ_USES (insn)); -} + if (!prop.profitable_p ()) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "cannot propagate from insn %d into" + " insn %d: %s\n", def_insn->uid (), use_insn->uid (), + "would increase complexity of pattern"); + return false; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\npropagating insn %d into insn %d, replacing:\n", + def_insn->uid (), use_insn->uid ()); + temporarily_undo_changes (0); + print_rtl_single (dump_file, PATTERN (use_rtl)); + redo_changes (0); + } -/* Update the USE_DEF_REF array for the given use, using the active definitions - in the ACTIVE_DEFS array to match pseudos to their def. */ + /* ??? In theory, it should be better to use insn costs rather than + set_src_costs here. That would involve replacing this code with + change_is_worthwhile. */ + bool ok = recog (attempt, use_change); + if (ok && !prop.changed_mem_p () && !use_insn->is_asm ()) + if (rtx use_set = single_set (use_rtl)) + { + bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_rtl)); + temporarily_undo_changes (0); + auto old_cost = set_src_cost (SET_SRC (use_set), + GET_MODE (SET_DEST (use_set)), speed); + redo_changes (0); + auto new_cost = set_src_cost (SET_SRC (use_set), + GET_MODE (SET_DEST (use_set)), speed); + if (new_cost > old_cost) + { + if (dump_file) + fprintf (dump_file, "change not profitable" + " (cost %d -> cost %d)\n", old_cost, new_cost); + ok = false; + } + } -static inline void -update_uses (df_ref use) -{ - for (; use; use = DF_REF_NEXT_LOC (use)) + if (!ok) { - int regno = DF_REF_REGNO (use); + /* The pattern didn't match, but if all uses of SRC folded to + constants, we can add a REG_EQUAL note for the result, if there + isn't one already. */ + if (!prop.folded_to_constants_p ()) + return false; - /* Set up the use-def chain. */ - if (DF_REF_ID (use) >= (int) use_def_ref.length ()) - use_def_ref.safe_grow_cleared (DF_REF_ID (use) + 1, true); + /* Test this first to avoid creating an unnecessary copy of SRC. */ + if (find_reg_note (use_rtl, REG_EQUAL, NULL_RTX)) + return false; - if (flag_checking) - gcc_assert (sparseset_bit_p (active_defs_check, regno)); - use_def_ref[DF_REF_ID (use)] = active_defs[regno]; - } -} + rtx set = set_for_reg_notes (use_rtl); + if (!set || !REG_P (SET_DEST (set))) + return false; + rtx value = copy_rtx (SET_SRC (set)); + cancel_changes (0); -/* Update the USE_DEF_REF array for the uses in INSN. Only update note - uses if NOTES_ONLY is true. */ + /* If there are any paradoxical SUBREGs, drop the REG_EQUAL note, + because the bits in there can be anything and so might not + match the REG_EQUAL note content. See PR70574. */ + if (contains_paradoxical_subreg_p (SET_SRC (set))) + return false; -static void -update_df (rtx_insn *insn, rtx note) -{ - struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Setting REG_EQUAL note\n"); - if (note) - { - df_uses_create (&XEXP (note, 0), insn, DF_REF_IN_NOTE); - df_notes_rescan (insn); + return set_unique_reg_note (use_rtl, REG_EQUAL, value); } - else + + rtx *note_ptr = ®_NOTES (use_rtl); + while (rtx note = *note_ptr) { - df_uses_create (&PATTERN (insn), insn, 0); - df_insn_rescan (insn); - update_uses (DF_INSN_INFO_USES (insn_info)); + if ((REG_NOTE_KIND (note) == REG_EQUAL + || REG_NOTE_KIND (note) == REG_EQUIV) + && try_fwprop_subst_note (use_insn, def_insn, note, + dest, src, false) < 0) + { + *note_ptr = XEXP (note, 1); + free_EXPR_LIST_node (note); + } + else + note_ptr = &XEXP (note, 1); } - update_uses (DF_INSN_INFO_EQ_USES (insn_info)); + confirm_change_group (); + crtl->ssa->change_insn (use_change); + num_changes++; + return true; } - -/* Try substituting NEW into LOC, which originated from forward propagation - of USE's value from DEF_INSN. SET_REG_EQUAL says whether we are - substituting the whole SET_SRC, so we can set a REG_EQUAL note if the - new insn is not recognized. Return whether the substitution was - performed. */ +/* Try to substitute (set DEST SRC) from DEF_INSN into USE_INSN's notes, + given that it was not possible to do this for USE_INSN's main pattern. + Return true on success, otherwise leave USE_INSN unchanged. */ static bool -try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn, - bool set_reg_equal) +try_fwprop_subst_notes (insn_info *use_insn, insn_info *def_insn, + rtx dest, rtx src) { - rtx_insn *insn = DF_REF_INSN (use); - rtx set = single_set (insn); - rtx note = NULL_RTX; - bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); - int old_cost = 0; - bool ok; + rtx_insn *use_rtl = use_insn->rtl (); + for (rtx note = REG_NOTES (use_rtl); note; note = XEXP (note, 1)) + if ((REG_NOTE_KIND (note) == REG_EQUAL + || REG_NOTE_KIND (note) == REG_EQUIV) + && try_fwprop_subst_note (use_insn, def_insn, note, + dest, src, true) > 0) + { + confirm_change_group (); + return true; + } - update_df_init (def_insn, insn); + return false; +} - /* forward_propagate_subreg may be operating on an instruction with - multiple sets. If so, assume the cost of the new instruction is - not greater than the old one. */ - if (set) - old_cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed); - if (dump_file) - { - fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn)); - print_inline_rtx (dump_file, *loc, 2); - fprintf (dump_file, "\n with "); - print_inline_rtx (dump_file, new_rtx, 2); - fprintf (dump_file, "\n"); - } +/* Check whether we could validly substitute (set DEST SRC) from DEF_INSN + into USE. If so, first try performing the substitution in location LOC + of USE->insn ()'s pattern. If that fails, try instead to substitute + into the notes. - validate_unshare_change (insn, loc, new_rtx, true); - if (!verify_changes (0)) - { - if (dump_file) - fprintf (dump_file, "Changes to insn %d not recognized\n", - INSN_UID (insn)); - ok = false; - } + Return true on success, otherwise leave USE_INSN unchanged. */ - else if (DF_REF_TYPE (use) == DF_REF_REG_USE - && set - && (set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), speed) - > old_cost)) - { - if (dump_file) - fprintf (dump_file, "Changes to insn %d not profitable\n", - INSN_UID (insn)); - ok = false; - } +static bool +try_fwprop_subst (use_info *use, insn_info *def_insn, + rtx *loc, rtx dest, rtx src) +{ + insn_info *use_insn = use->insn (); - else - { - if (dump_file) - fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn)); - ok = true; - } + auto attempt = crtl->ssa->new_change_attempt (); + use_array src_uses = remove_note_accesses (attempt, def_insn->uses ()); - if (ok) + /* ??? Not really a meaningful test: it means we can propagate arithmetic + involving hard registers but not bare references to them. A better + test would be to iterate over src_uses looking for hard registers + that are not fixed. */ + if (REG_P (src) && HARD_REGISTER_P (src)) + return false; + + /* ??? It would be better to make this EBB-based instead. That would + involve checking for equal EBBs rather than equal BBs and trying + to make the uses available at use_insn->ebb ()->first_bb (). */ + if (def_insn->bb () != use_insn->bb ()) { - confirm_change_group (); - num_changes++; + src_uses = crtl->ssa->make_uses_available (attempt, src_uses, + use_insn->bb ()); + if (!src_uses.is_valid ()) + return false; } - else - { - cancel_changes (0); - - /* Can also record a simplified value in a REG_EQUAL note, - making a new one if one does not already exist. */ - if (set_reg_equal) - { - /* If there are any paradoxical SUBREGs, don't add REG_EQUAL note, - because the bits in there can be anything and so might not - match the REG_EQUAL note content. See PR70574. */ - subrtx_var_iterator::array_type array; - FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST) - { - rtx x = *iter; - if (SUBREG_P (x) && paradoxical_subreg_p (x)) - { - set_reg_equal = false; - break; - } - } - if (set_reg_equal) - { - if (dump_file) - fprintf (dump_file, " Setting REG_EQUAL note\n"); + insn_change use_change (use_insn); + use_change.new_uses = merge_access_arrays (attempt, use_change.new_uses, + src_uses); + if (!use_change.new_uses.is_valid ()) + return false; - note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx)); - } - } - } + /* ??? We could allow movement within the EBB by adding: - if ((ok || note) && !CONSTANT_P (new_rtx)) - update_df (insn, note); + use_change.move_range = use_insn->ebb ()->insn_range (); */ + if (!restrict_movement (use_change)) + return false; - return ok; + return (try_fwprop_subst_pattern (attempt, use_change, def_insn, + loc, dest, src) + || try_fwprop_subst_notes (use_insn, def_insn, dest, src)); } /* For the given single_set INSN, containing SRC known to be a @@ -1117,33 +611,31 @@ try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx_insn *def_insn, load from memory. */ static bool -free_load_extend (rtx src, rtx_insn *insn) +free_load_extend (rtx src, insn_info *insn) { - rtx reg; - df_ref def, use; - - reg = XEXP (src, 0); + rtx reg = XEXP (src, 0); if (load_extend_op (GET_MODE (reg)) != GET_CODE (src)) return false; - FOR_EACH_INSN_USE (use, insn) - if (!DF_REF_IS_ARTIFICIAL (use) - && DF_REF_TYPE (use) == DF_REF_REG_USE - && DF_REF_REG (use) == reg) - break; - if (!use) - return false; + def_info *def = nullptr; + for (use_info *use : insn->uses ()) + if (use->regno () == REGNO (reg)) + { + def = use->def (); + break; + } - def = get_def_for_use (use); if (!def) return false; - if (DF_REF_IS_ARTIFICIAL (def)) + insn_info *def_insn = def->insn (); + if (def_insn->is_artificial ()) return false; - if (NONJUMP_INSN_P (DF_REF_INSN (def))) + rtx_insn *def_rtl = def_insn->rtl (); + if (NONJUMP_INSN_P (def_rtl)) { - rtx patt = PATTERN (DF_REF_INSN (def)); + rtx patt = PATTERN (def_rtl); if (GET_CODE (patt) == SET && GET_CODE (SET_SRC (patt)) == MEM @@ -1153,22 +645,24 @@ free_load_extend (rtx src, rtx_insn *insn) return false; } -/* If USE is a subreg, see if it can be replaced by a pseudo. */ +/* Subroutine of forward_propagate_subreg that handles a use of DEST + in REF. The other parameters are the same. */ static bool -forward_propagate_subreg (df_ref use, rtx_insn *def_insn, rtx def_set) +forward_propagate_subreg (use_info *use, insn_info *def_insn, + rtx dest, rtx src, df_ref ref) { - rtx use_reg = DF_REF_REG (use); - rtx_insn *use_insn; - rtx src; scalar_int_mode int_use_mode, src_mode; /* Only consider subregs... */ + rtx use_reg = DF_REF_REG (ref); machine_mode use_mode = GET_MODE (use_reg); if (GET_CODE (use_reg) != SUBREG - || !REG_P (SET_DEST (def_set))) + || GET_MODE (SUBREG_REG (use_reg)) != GET_MODE (dest)) return false; + /* ??? Replacing throughout the pattern would help for match_dups. */ + rtx *loc = DF_REF_LOC (ref); if (paradoxical_subreg_p (use_reg)) { /* If this is a paradoxical SUBREG, we have no idea what value the @@ -1176,16 +670,13 @@ forward_propagate_subreg (df_ref use, rtx_insn *def_insn, rtx def_set) a SUBREG whose operand is the same as our mode, and all the modes are within a word, we can just use the inner operand because these SUBREGs just say how to treat the register. */ - use_insn = DF_REF_INSN (use); - src = SET_SRC (def_set); if (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src)) && REGNO (SUBREG_REG (src)) >= FIRST_PSEUDO_REGISTER && GET_MODE (SUBREG_REG (src)) == use_mode - && subreg_lowpart_p (src) - && all_uses_available_at (def_insn, use_insn)) - return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src), - def_insn, false); + && subreg_lowpart_p (src)) + return try_fwprop_subst (use, def_insn, loc, + use_reg, SUBREG_REG (src)); } /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG @@ -1206,8 +697,6 @@ forward_propagate_subreg (df_ref use, rtx_insn *def_insn, rtx def_set) else if (is_a <scalar_int_mode> (use_mode, &int_use_mode) && subreg_lowpart_p (use_reg)) { - use_insn = DF_REF_INSN (use); - src = SET_SRC (def_set); if ((GET_CODE (src) == ZERO_EXTEND || GET_CODE (src) == SIGN_EXTEND) && is_a <scalar_int_mode> (GET_MODE (src), &src_mode) @@ -1216,139 +705,73 @@ forward_propagate_subreg (df_ref use, rtx_insn *def_insn, rtx def_set) && GET_MODE (XEXP (src, 0)) == use_mode && !free_load_extend (src, def_insn) && (targetm.mode_rep_extended (int_use_mode, src_mode) - != (int) GET_CODE (src)) - && all_uses_available_at (def_insn, use_insn)) - return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0), - def_insn, false); + != (int) GET_CODE (src))) + return try_fwprop_subst (use, def_insn, loc, use_reg, XEXP (src, 0)); } return false; } -/* Try to replace USE with SRC (defined in DEF_INSN) in __asm. */ +/* Try to substitute (set DEST SRC) from DEF_INSN into USE and simplify + the result, handling cases where DEST is used in a subreg and where + applying that subreg to SRC results in a useful simplification. */ static bool -forward_propagate_asm (df_ref use, rtx_insn *def_insn, rtx def_set, rtx reg) +forward_propagate_subreg (use_info *use, insn_info *def_insn, + rtx dest, rtx src) { - rtx_insn *use_insn = DF_REF_INSN (use); - rtx src, use_pat, asm_operands, new_rtx, *loc; - int speed_p, i; - df_ref uses; - - gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0); - - src = SET_SRC (def_set); - use_pat = PATTERN (use_insn); + if (!use->includes_subregs () || !REG_P (dest)) + return false; - /* In __asm don't replace if src might need more registers than - reg, as that could increase register pressure on the __asm. */ - uses = DF_INSN_USES (def_insn); - if (uses && DF_REF_NEXT_LOC (uses)) + if (GET_CODE (src) != SUBREG + && GET_CODE (src) != ZERO_EXTEND + && GET_CODE (src) != SIGN_EXTEND) return false; - update_df_init (def_insn, use_insn); - speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)); - asm_operands = NULL_RTX; - switch (GET_CODE (use_pat)) - { - case ASM_OPERANDS: - asm_operands = use_pat; - break; - case SET: - if (MEM_P (SET_DEST (use_pat))) - { - loc = &SET_DEST (use_pat); - new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p); - if (new_rtx) - validate_unshare_change (use_insn, loc, new_rtx, true); - } - asm_operands = SET_SRC (use_pat); - break; - case PARALLEL: - for (i = 0; i < XVECLEN (use_pat, 0); i++) - if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET) - { - if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i)))) - { - loc = &SET_DEST (XVECEXP (use_pat, 0, i)); - new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, - src, speed_p); - if (new_rtx) - validate_unshare_change (use_insn, loc, new_rtx, true); - } - asm_operands = SET_SRC (XVECEXP (use_pat, 0, i)); - } - else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS) - asm_operands = XVECEXP (use_pat, 0, i); - break; - default: - gcc_unreachable (); - } + rtx_insn *use_rtl = use->insn ()->rtl (); + df_ref ref; - gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS); - for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++) - { - loc = &ASM_OPERANDS_INPUT (asm_operands, i); - new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p); - if (new_rtx) - validate_unshare_change (use_insn, loc, new_rtx, true); - } + FOR_EACH_INSN_USE (ref, use_rtl) + if (DF_REF_REGNO (ref) == use->regno () + && forward_propagate_subreg (use, def_insn, dest, src, ref)) + return true; - if (num_changes_pending () == 0 || !apply_change_group ()) - return false; + FOR_EACH_INSN_EQ_USE (ref, use_rtl) + if (DF_REF_REGNO (ref) == use->regno () + && forward_propagate_subreg (use, def_insn, dest, src, ref)) + return true; - update_df (use_insn, NULL); - num_changes++; - return true; + return false; } -/* Try to replace USE with SRC (defined in DEF_INSN) and simplify the - result. */ +/* Try to substitute (set DEST SRC) from DEF_INSN into USE and + simplify the result. */ static bool -forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set) +forward_propagate_and_simplify (use_info *use, insn_info *def_insn, + rtx dest, rtx src) { - rtx_insn *use_insn = DF_REF_INSN (use); - rtx use_set = single_set (use_insn); - rtx src, reg, new_rtx, *loc; - bool set_reg_equal; - machine_mode mode; - int asm_use = -1; - - if (INSN_CODE (use_insn) < 0) - asm_use = asm_noperands (PATTERN (use_insn)); - - if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn)) + insn_info *use_insn = use->insn (); + rtx_insn *use_rtl = use_insn->rtl (); + + /* ??? This check seems unnecessary. We should be able to propagate + into any kind of instruction, regardless of whether it's a single set. + It seems odd to be more permissive with asms than normal instructions. */ + bool need_single_set = (!use_insn->is_asm () && !use_insn->is_debug_insn ()); + rtx use_set = single_set (use_rtl); + if (need_single_set && !use_set) return false; - /* Do not propagate into PC, CC0, etc. */ - if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode) - return false; + /* Do not propagate into PC, CC0, etc. - /* If def and use are subreg, check if they match. */ - reg = DF_REF_REG (use); - if (GET_CODE (reg) == SUBREG && GET_CODE (SET_DEST (def_set)) == SUBREG) - { - if (maybe_ne (SUBREG_BYTE (SET_DEST (def_set)), SUBREG_BYTE (reg))) - return false; - } - /* Check if the def had a subreg, but the use has the whole reg. */ - else if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG) - return false; - /* Check if the use has a subreg, but the def had the whole reg. Unlike the - previous case, the optimization is possible and often useful indeed. */ - else if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set))) - reg = SUBREG_REG (reg); - - /* Make sure that we can treat REG as having the same mode as the - source of DEF_SET. */ - if (GET_MODE (SET_DEST (def_set)) != GET_MODE (reg)) + ??? This too seems unnecessary. The current code should work correctly + without it, including cases where jumps become unconditional. */ + if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode) return false; - /* Check if the substitution is valid (last, because it's the most - expensive check!). */ - src = SET_SRC (def_set); - if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn)) + /* In __asm don't replace if src might need more registers than + reg, as that could increase register pressure on the __asm. */ + if (use_insn->is_asm () && def_insn->uses ().size () > 1) return false; /* Check if the def is loading something from the constant pool; in this @@ -1357,149 +780,90 @@ forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set) if (MEM_P (src) && MEM_READONLY_P (src)) { rtx x = avoid_constant_pool_reference (src); - if (x != src && use_set) + rtx note_set; + if (x != src + && (note_set = set_for_reg_notes (use_rtl)) + && REG_P (SET_DEST (note_set)) + && !contains_paradoxical_subreg_p (SET_SRC (note_set))) { - rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); - rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set); + rtx note = find_reg_note (use_rtl, REG_EQUAL, NULL_RTX); + rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (note_set); rtx new_rtx = simplify_replace_rtx (old_rtx, src, x); if (old_rtx != new_rtx) - set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx)); + set_unique_reg_note (use_rtl, REG_EQUAL, copy_rtx (new_rtx)); } return false; } - if (asm_use >= 0) - return forward_propagate_asm (use, def_insn, def_set, reg); - - /* Else try simplifying. */ - - if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE) - { - loc = &SET_DEST (use_set); - set_reg_equal = false; - } - else if (!use_set) - { - loc = &INSN_VAR_LOCATION_LOC (use_insn); - set_reg_equal = false; - } - else - { - rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); - if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE) - loc = &XEXP (note, 0); - else - loc = &SET_SRC (use_set); - - /* Do not replace an existing REG_EQUAL note if the insn is not - recognized. Either we're already replacing in the note, or we'll - separately try plugging the definition in the note and simplifying. - And only install a REQ_EQUAL note when the destination is a REG - that isn't mentioned in USE_SET, as the note would be invalid - otherwise. We also don't want to install a note if we are merely - propagating a pseudo since verifying that this pseudo isn't dead - is a pain; moreover such a note won't help anything. - If the use is a paradoxical subreg, make sure we don't add a - REG_EQUAL note for it, because it is not equivalent, it is one - possible value for it, but we can't rely on it holding that value. - See PR70574. */ - set_reg_equal = (note == NULL_RTX - && REG_P (SET_DEST (use_set)) - && !REG_P (src) - && !(GET_CODE (src) == SUBREG - && REG_P (SUBREG_REG (src))) - && !reg_mentioned_p (SET_DEST (use_set), - SET_SRC (use_set)) - && !paradoxical_subreg_p (DF_REF_REG (use))); - } - - if (GET_MODE (*loc) == VOIDmode) - mode = GET_MODE (SET_DEST (use_set)); - else - mode = GET_MODE (*loc); - - new_rtx = propagate_rtx (*loc, mode, reg, src, - optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn))); - - if (!new_rtx) - return false; - - return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal); + /* ??? Unconditionally propagating into PATTERN would work better + for instructions that have match_dups. */ + rtx *loc = need_single_set ? &use_set : &PATTERN (use_rtl); + return try_fwprop_subst (use, def_insn, loc, dest, src); } - /* Given a use USE of an insn, if it has a single reaching definition, try to forward propagate it into that insn. - Return true if cfg cleanup will be needed. + Return true if something changed. + REG_PROP_ONLY is true if we should only propagate register copies. */ static bool -forward_propagate_into (df_ref use, bool reg_prop_only = false) +forward_propagate_into (use_info *use, bool reg_prop_only = false) { - df_ref def; - rtx_insn *def_insn, *use_insn; - rtx def_set; - rtx parent; - - if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE) - return false; - if (DF_REF_IS_ARTIFICIAL (use)) + if (use->includes_read_writes ()) return false; - /* Only consider uses that have a single definition. */ - def = get_def_for_use (use); + /* Disregard uninitialized uses. */ + def_info *def = use->def (); if (!def) return false; - if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE) - return false; - if (DF_REF_IS_ARTIFICIAL (def)) - return false; - /* Check if the use is still present in the insn! */ - use_insn = DF_REF_INSN (use); - if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE) - parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX); - else - parent = PATTERN (use_insn); + /* Only consider single-register definitions. This could be relaxed, + but it should rarely be needed before RA. */ + def = look_through_degenerate_phi (def); + if (def->includes_multiregs ()) + return false; - if (!reg_mentioned_p (DF_REF_REG (use), parent)) + /* Only consider uses whose definition comes from a real instruction. */ + insn_info *def_insn = def->insn (); + if (def_insn->is_artificial ()) return false; - def_insn = DF_REF_INSN (def); - if (multiple_sets (def_insn)) + rtx_insn *def_rtl = def_insn->rtl (); + if (!NONJUMP_INSN_P (def_rtl)) + return false; + /* ??? This seems an unnecessary restriction. We can easily tell + which set the definition comes from. */ + if (multiple_sets (def_rtl)) return false; - def_set = single_set (def_insn); + rtx def_set = simple_regno_set (PATTERN (def_rtl), def->regno ()); if (!def_set) return false; - if (reg_prop_only - && (!reg_single_def_p (SET_SRC (def_set)) - || !reg_single_def_p (SET_DEST (def_set)))) - return false; + rtx dest = SET_DEST (def_set); + rtx src = SET_SRC (def_set); /* Allow propagations into a loop only for reg-to-reg copies, since replacing one register by another shouldn't increase the cost. */ + struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father; + struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father; + if ((reg_prop_only || def_loop != use_loop) + && (!reg_single_def_p (dest) || !reg_single_def_p (src))) + return false; - if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father - && (!reg_single_def_p (SET_SRC (def_set)) - || !reg_single_def_p (SET_DEST (def_set)))) + /* Don't substitute into a non-local goto, this confuses CFG. */ + insn_info *use_insn = use->insn (); + rtx_insn *use_rtl = use_insn->rtl (); + if (JUMP_P (use_rtl) + && find_reg_note (use_rtl, REG_NON_LOCAL_GOTO, NULL_RTX)) return false; - /* Only try one kind of propagation. If two are possible, we'll - do it on the following iterations. */ - if (forward_propagate_and_simplify (use, def_insn, def_set) - || forward_propagate_subreg (use, def_insn, def_set)) - { - propagations_left--; + if (forward_propagate_and_simplify (use, def_insn, dest, src) + || forward_propagate_subreg (use, def_insn, dest, src)) + return true; - if (cfun->can_throw_non_call_exceptions - && find_reg_note (use_insn, REG_EH_REGION, NULL_RTX) - && purge_dead_edges (DF_REF_BB (use))) - return true; - } return false; } - static void fwprop_init (void) @@ -1513,14 +877,8 @@ fwprop_init (void) build_single_def_use_links. */ loop_optimizer_init (AVOID_CFG_MODIFICATIONS); - build_single_def_use_links (); - df_set_flags (DF_DEFER_INSN_RESCAN); - - active_defs = XNEWVEC (df_ref, max_reg_num ()); - if (flag_checking) - active_defs_check = sparseset_alloc (max_reg_num ()); - - propagations_left = DF_USES_TABLE_SIZE (); + df_analyze (); + crtl->ssa = new rtl_ssa::function_info (cfun); } static void @@ -1528,13 +886,13 @@ fwprop_done (void) { loop_optimizer_finalize (); - use_def_ref.release (); - free (active_defs); - if (flag_checking) - sparseset_free (active_defs_check); - + crtl->ssa->perform_pending_updates (); free_dominance_info (CDI_DOMINATORS); cleanup_cfg (0); + + delete crtl->ssa; + crtl->ssa = nullptr; + delete_trivially_dead_insns (get_insns (), max_reg_num ()); if (dump_file) @@ -1543,6 +901,41 @@ fwprop_done (void) num_changes); } +/* Try to optimize INSN, returning true if something changes. + FWPROP_ADDR_P is true if we are running fwprop_addr rather than + the full fwprop. */ + +static bool +fwprop_insn (insn_info *insn, bool fwprop_addr_p) +{ + for (use_info *use : insn->uses ()) + { + if (use->is_mem ()) + continue; + /* ??? The choices here follow those in the pre-SSA code. */ + if (!use->includes_address_uses ()) + { + if (forward_propagate_into (use, fwprop_addr_p)) + return true; + } + else + { + struct loop *loop = insn->bb ()->cfg_bb ()->loop_father; + /* The outermost loop is not really a loop. */ + if (loop == NULL || loop_outer (loop) == NULL) + { + if (forward_propagate_into (use, fwprop_addr_p)) + return true; + } + else if (fwprop_addr_p) + { + if (forward_propagate_into (use, false)) + return true; + } + } + } + return false; +} /* Main entry point. */ @@ -1555,33 +948,33 @@ gate_fwprop (void) static unsigned int fwprop (bool fwprop_addr_p) { - unsigned i; - fwprop_init (); - /* Go through all the uses. df_uses_create will create new ones at the - end, and we'll go through them as well. + /* Go through all the instructions (including debug instructions) looking + for uses that we could propagate into. Do not forward propagate addresses into loops until after unrolling. CSE did so because it was able to fix its own mess, but we are not. */ - for (i = 0; i < DF_USES_TABLE_SIZE (); i++) - { - if (!propagations_left) - break; - - df_ref use = DF_USES_GET (i); - if (use) - { - if (DF_REF_TYPE (use) == DF_REF_REG_USE - || DF_REF_BB (use)->loop_father == NULL - /* The outer most loop is not really a loop. */ - || loop_outer (DF_REF_BB (use)->loop_father) == NULL) - forward_propagate_into (use, fwprop_addr_p); + insn_info *next; - else if (fwprop_addr_p) - forward_propagate_into (use, false); - } + /* ??? This code uses a worklist in order to preserve the behavior + of the pre-SSA implementation. It would be better to instead + iterate on each instruction until no more propagations are + possible, then move on to the next. */ + auto_vec<insn_info *> worklist; + for (insn_info *insn = crtl->ssa->first_insn (); insn; insn = next) + { + next = insn->next_any_insn (); + if (insn->can_be_optimized () || insn->is_debug_insn ()) + if (fwprop_insn (insn, fwprop_addr_p)) + worklist.safe_push (insn); + } + for (unsigned int i = 0; i < worklist.length (); ++i) + { + insn_info *insn = worklist[i]; + if (fwprop_insn (insn, fwprop_addr_p)) + worklist.safe_push (insn); } fwprop_done (); diff --git a/gcc/testsuite/gcc.dg/rtl/x86_64/test-return-const.c.before-fwprop.c b/gcc/testsuite/gcc.dg/rtl/x86_64/test-return-const.c.before-fwprop.c index 075f7443f81..1dadf554338 100644 --- a/gcc/testsuite/gcc.dg/rtl/x86_64/test-return-const.c.before-fwprop.c +++ b/gcc/testsuite/gcc.dg/rtl/x86_64/test-return-const.c.before-fwprop.c @@ -31,7 +31,7 @@ int __RTL (startwith ("fwprop1")) test_returning_constant (void) } /* Verify that insn 5 is eliminated. */ -/* { dg-final { scan-rtl-dump "deferring deletion of insn with uid = 5" "fwprop1" } } */ +/* { dg-final { scan-rtl-dump "deleting insn with uid = 5" "fwprop1" } } */ /* { dg-final { scan-rtl-dump "Deleted 1 trivially dead insns" "fwprop1" } } */ int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s8.c index 1eb0bf131d8..c916d2048fc 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s8.c @@ -74,7 +74,7 @@ TEST_STORE (st4_s8_28, svint8x4_t, int8_t, /* ** st4_s8_32: ** [^{]* -** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] +** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\] ** ret */ TEST_STORE (st4_s8_32, svint8x4_t, int8_t, @@ -135,7 +135,7 @@ TEST_STORE (st4_s8_m32, svint8x4_t, int8_t, /* ** st4_s8_m36: ** [^{]* -** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] +** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\] ** ret */ TEST_STORE (st4_s8_m36, svint8x4_t, int8_t, @@ -205,7 +205,7 @@ TEST_STORE (st4_vnum_s8_28, svint8x4_t, int8_t, /* ** st4_vnum_s8_32: ** [^{]* -** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] +** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\] ** ret */ TEST_STORE (st4_vnum_s8_32, svint8x4_t, int8_t, @@ -266,7 +266,7 @@ TEST_STORE (st4_vnum_s8_m32, svint8x4_t, int8_t, /* ** st4_vnum_s8_m36: ** [^{]* -** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] +** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\] ** ret */ TEST_STORE (st4_vnum_s8_m36, svint8x4_t, int8_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u8.c index e7c2e7d766c..32b8c8c4166 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u8.c @@ -74,7 +74,7 @@ TEST_STORE (st4_u8_28, svuint8x4_t, uint8_t, /* ** st4_u8_32: ** [^{]* -** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] +** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\] ** ret */ TEST_STORE (st4_u8_32, svuint8x4_t, uint8_t, @@ -135,7 +135,7 @@ TEST_STORE (st4_u8_m32, svuint8x4_t, uint8_t, /* ** st4_u8_m36: ** [^{]* -** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] +** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\] ** ret */ TEST_STORE (st4_u8_m36, svuint8x4_t, uint8_t, @@ -205,7 +205,7 @@ TEST_STORE (st4_vnum_u8_28, svuint8x4_t, uint8_t, /* ** st4_vnum_u8_32: ** [^{]* -** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] +** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\] ** ret */ TEST_STORE (st4_vnum_u8_32, svuint8x4_t, uint8_t, @@ -266,7 +266,7 @@ TEST_STORE (st4_vnum_u8_m32, svuint8x4_t, uint8_t, /* ** st4_vnum_u8_m36: ** [^{]* -** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] +** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+, x[0-9]+\] ** ret */ TEST_STORE (st4_vnum_u8_m36, svuint8x4_t, uint8_t, |