diff options
Diffstat (limited to 'gcc/config/rx')
-rw-r--r-- | gcc/config/rx/rx-protos.h | 106 | ||||
-rw-r--r-- | gcc/config/rx/rx.c | 164 | ||||
-rw-r--r-- | gcc/config/rx/rx.md | 112 |
3 files changed, 356 insertions, 26 deletions
diff --git a/gcc/config/rx/rx-protos.h b/gcc/config/rx/rx-protos.h index b3c5bfc7a7f..0bb885d2da1 100644 --- a/gcc/config/rx/rx-protos.h +++ b/gcc/config/rx/rx-protos.h @@ -63,6 +63,112 @@ extern void rx_notice_update_cc (rtx, rtx); extern void rx_split_cbranch (machine_mode, enum rtx_code, rtx, rtx, rtx); extern machine_mode rx_select_cc_mode (enum rtx_code, rtx, rtx); + +extern bool rx_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno); +extern void rx_copy_reg_dead_or_unused_notes (rtx reg, const rtx_insn* src, + rtx_insn* dst); + +extern bool rx_fuse_in_memory_bitop (rtx* operands, rtx_insn* curr_insn, + rtx (*gen_insn)(rtx, rtx)); + +/* Result value of rx_find_set_of_reg. */ +struct set_of_reg +{ + /* The insn where sh_find_set_of_reg stopped looking. + Can be NULL_RTX if the end of the insn list was reached. */ + rtx_insn* insn; + + /* The set rtx of the specified reg if found, NULL_RTX otherwise. */ + const_rtx set_rtx; + + /* The set source rtx of the specified reg if found, NULL_RTX otherwise. + Usually, this is the most interesting return value. */ + rtx set_src; +}; + +/* FIXME: Copy-pasta from SH. Move to rtl.h. + Given a reg rtx and a start insn, try to find the insn that sets + the specified reg by using the specified insn stepping function, + such as 'prev_nonnote_nondebug_insn_bb'. When the insn is found, + try to extract the rtx of the reg set. */ +template <typename F> inline set_of_reg +rx_find_set_of_reg (rtx reg, rtx_insn* insn, F stepfunc, + bool ignore_reg_reg_copies = false) +{ + set_of_reg result; + result.insn = insn; + result.set_rtx = NULL_RTX; + result.set_src = NULL_RTX; + + if (!REG_P (reg) || insn == NULL_RTX) + return result; + + for (rtx_insn* i = stepfunc (insn); i != NULL_RTX; i = stepfunc (i)) + { + if (BARRIER_P (i)) + break; + if (!INSN_P (i) || DEBUG_INSN_P (i)) + continue; + if (reg_set_p (reg, i)) + { + if (CALL_P (i)) + break; + + result.insn = i; + result.set_rtx = set_of (reg, i); + + if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET) + break; + + result.set_src = XEXP (result.set_rtx, 1); + + if (ignore_reg_reg_copies && REG_P (result.set_src)) + { + reg = result.set_src; + continue; + } + if (ignore_reg_reg_copies && SUBREG_P (result.set_src) + && REG_P (SUBREG_REG (result.set_src))) + { + reg = SUBREG_REG (result.set_src); + continue; + } + + break; + } + } + + /* If the searched reg is found inside a (mem (post_inc:SI (reg))), set_of + will return NULL and set_rtx will be NULL. + In this case report a 'not found'. result.insn will always be non-null + at this point, so no need to check it. */ + if (result.set_src != NULL && result.set_rtx == NULL) + result.set_src = NULL; + + return result; +} + +/* FIXME: Move to rtlh.h. */ +template <typename F> inline rtx_insn* +rx_find_use_of_reg (rtx reg, rtx_insn* insn, F stepfunc) +{ + if (!REG_P (reg) || insn == NULL_RTX) + return NULL; + + for (rtx_insn* i = stepfunc (insn); i != NULL_RTX; i = stepfunc (i)) + { + if (BARRIER_P (i)) + break; + if (!INSN_P (i) || DEBUG_INSN_P (i)) + continue; + if (reg_overlap_mentioned_p (reg, PATTERN (i)) + || (CALL_P (i) && find_reg_fusage (i, USE, reg))) + return i; + } + + return NULL; +} + #endif #endif /* GCC_RX_PROTOS_H */ diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c index be8229818ae..fe467f7bd3a 100644 --- a/gcc/config/rx/rx.c +++ b/gcc/config/rx/rx.c @@ -1644,6 +1644,20 @@ mark_frame_related (rtx insn) } } +/* Create CFI notes for register pops. */ +static void +add_pop_cfi_notes (rtx_insn *insn, unsigned int high, unsigned int low) +{ + rtx t = plus_constant (Pmode, stack_pointer_rtx, + (high - low + 1) * UNITS_PER_WORD); + t = gen_rtx_SET (stack_pointer_rtx, t); + add_reg_note (insn, REG_CFA_ADJUST_CFA, t); + RTX_FRAME_RELATED_P (insn) = 1; + for (unsigned int i = low; i <= high; i++) + add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (word_mode, i)); +} + + static bool ok_for_max_constant (HOST_WIDE_INT val) { @@ -2029,11 +2043,14 @@ rx_can_use_simple_return (void) static void pop_regs (unsigned int high, unsigned int low) { + rtx_insn *insn; if (high == low) - emit_insn (gen_stack_pop (gen_rtx_REG (SImode, low))); + insn = emit_insn (gen_stack_pop (gen_rtx_REG (SImode, low))); else - emit_insn (gen_stack_popm (GEN_INT (((high - low) + 1) * UNITS_PER_WORD), - gen_rx_popm_vector (low, high))); + insn = emit_insn (gen_stack_popm (GEN_INT (((high - low) + 1) + * UNITS_PER_WORD), + gen_rx_popm_vector (low, high))); + add_pop_cfi_notes (insn, high, low); } void @@ -2976,6 +2993,62 @@ rx_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED, } static bool +rx_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED, + int opno ATTRIBUTE_UNUSED, int* total, bool speed) +{ + if (x == const0_rtx) + { + *total = 0; + return true; + } + + switch (GET_CODE (x)) + { + case MULT: + if (mode == DImode) + { + *total = COSTS_N_INSNS (2); + return true; + } + /* fall through */ + + case PLUS: + case MINUS: + case AND: + case COMPARE: + case IOR: + case XOR: + *total = COSTS_N_INSNS (1); + return true; + + case DIV: + if (speed) + /* This is the worst case for a division. Pessimize divisions when + not optimizing for size and allow reciprocal optimizations which + produce bigger code. */ + *total = COSTS_N_INSNS (20); + else + *total = COSTS_N_INSNS (3); + return true; + + case UDIV: + if (speed) + /* This is the worst case for a division. Pessimize divisions when + not optimizing for size and allow reciprocal optimizations which + produce bigger code. */ + *total = COSTS_N_INSNS (18); + else + *total = COSTS_N_INSNS (3); + return true; + + default: + break; + } + + return false; +} + +static bool rx_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) { /* We can always eliminate to the frame pointer. @@ -3439,6 +3512,88 @@ rx_atomic_sequence::~rx_atomic_sequence (void) emit_insn (gen_mvtc (GEN_INT (CTRLREG_PSW), m_prev_psw_reg)); } +/* Given an insn and a reg number, tell whether the reg dies or is unused + after the insn. */ +bool +rx_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno) +{ + return find_regno_note (i, REG_DEAD, regno) != NULL + || find_regno_note (i, REG_UNUSED, regno) != NULL; +} + +/* Copy dead and unused notes from SRC to DST for the specified REGNO. */ +void +rx_copy_reg_dead_or_unused_notes (rtx reg, const rtx_insn* src, rtx_insn* dst) +{ + int regno = REGNO (SUBREG_P (reg) ? SUBREG_REG (reg) : reg); + + if (rtx note = find_regno_note (src, REG_DEAD, regno)) + add_shallow_copy_of_reg_note (dst, note); + + if (rtx note = find_regno_note (src, REG_UNUSED, regno)) + add_shallow_copy_of_reg_note (dst, note); +} + +/* Try to fuse the current bit-operation insn with the surrounding memory load + and store. */ +bool +rx_fuse_in_memory_bitop (rtx* operands, rtx_insn* curr_insn, + rtx (*gen_insn)(rtx, rtx)) +{ + rtx op2_reg = SUBREG_P (operands[2]) ? SUBREG_REG (operands[2]) : operands[2]; + + set_of_reg op2_def = rx_find_set_of_reg (op2_reg, curr_insn, + prev_nonnote_nondebug_insn_bb); + if (op2_def.set_src == NULL_RTX + || !MEM_P (op2_def.set_src) + || GET_MODE (op2_def.set_src) != QImode + || !rx_is_restricted_memory_address (XEXP (op2_def.set_src, 0), + GET_MODE (op2_def.set_src)) + || reg_used_between_p (operands[2], op2_def.insn, curr_insn) + || !rx_reg_dead_or_unused_after_insn (curr_insn, REGNO (op2_reg)) + ) + return false; + + /* The register operand originates from a memory load and the memory load + could be fused with the bitop insn. + Look for the following memory store with the same memory operand. */ + rtx mem = op2_def.set_src; + + /* If the memory is an auto-mod address, it can't be fused. */ + if (GET_CODE (XEXP (mem, 0)) == POST_INC + || GET_CODE (XEXP (mem, 0)) == PRE_INC + || GET_CODE (XEXP (mem, 0)) == POST_DEC + || GET_CODE (XEXP (mem, 0)) == PRE_DEC) + return false; + + rtx_insn* op0_use = rx_find_use_of_reg (operands[0], curr_insn, + next_nonnote_nondebug_insn_bb); + if (op0_use == NULL + || !(GET_CODE (PATTERN (op0_use)) == SET + && RX_REG_P (XEXP (PATTERN (op0_use), 1)) + && reg_overlap_mentioned_p (operands[0], XEXP (PATTERN (op0_use), 1)) + && rtx_equal_p (mem, XEXP (PATTERN (op0_use), 0))) + || !rx_reg_dead_or_unused_after_insn (op0_use, REGNO (operands[0])) + || reg_set_between_p (operands[2], curr_insn, op0_use)) + return false; + + /* If the load-modify-store operation is fused it could potentially modify + load/store ordering if there are other memory accesses between the load + and the store for this insn. If there are volatile mems between the load + and store it's better not to change the ordering. If there is a call + between the load and store, it's also not safe to fuse it. */ + for (rtx_insn* i = next_nonnote_nondebug_insn_bb (op2_def.insn); + i != NULL && i != op0_use; + i = next_nonnote_nondebug_insn_bb (i)) + if (volatile_insn_p (PATTERN (i)) || CALL_P (i)) + return false; + + emit_insn (gen_insn (mem, gen_lowpart (QImode, operands[1]))); + set_insn_deleted (op2_def.insn); + set_insn_deleted (op0_use); + return true; +} + /* Implement TARGET_HARD_REGNO_NREGS. */ static unsigned int @@ -3627,6 +3782,9 @@ rx_modes_tieable_p (machine_mode mode1, machine_mode mode2) #undef TARGET_MODES_TIEABLE_P #define TARGET_MODES_TIEABLE_P rx_modes_tieable_p +#undef TARGET_RTX_COSTS +#define TARGET_RTX_COSTS rx_rtx_costs + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-rx.h" diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md index 3fb2ac854b0..116d4d4a939 100644 --- a/gcc/config/rx/rx.md +++ b/gcc/config/rx/rx.md @@ -1094,7 +1094,7 @@ DONE; }) -(define_insn "andsi3" +(define_insn_and_split "andsi3" [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r,r") (and:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0") (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q"))) @@ -1110,6 +1110,21 @@ and\t%1, %0 and\t%2, %1, %0 and\t%Q2, %0" + "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2]) + && pow2p_hwi (~UINTVAL (operands[2]))" + [(const_int 0)] +{ + /* For negated single bit constants use the bclr insn for smaller code. */ + + if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG)) + FAIL; + + rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn, + emit_insn (gen_bitclr (operands[0], + GEN_INT (exact_log2 (~UINTVAL (operands[2]))), + operands[1]))); + DONE; +} [(set_attr "timings" "11,11,11,11,11,11,11,11,33") (set_attr "length" "2,2,3,4,5,6,2,5,5")] ) @@ -1383,7 +1398,7 @@ [(set_attr "length" "2,3")] ) -(define_insn "iorsi3" +(define_insn_and_split "iorsi3" [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r,r") (ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0") (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q"))) @@ -1399,6 +1414,21 @@ or\t%1, %0 or\t%2, %1, %0 or\t%Q2, %0" + "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2]) + && pow2p_hwi (UINTVAL (operands[2]))" + [(const_int 0)] +{ + /* For single bit constants use the bset insn for smaller code. */ + + if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG)) + FAIL; + + rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn, + emit_insn (gen_bitset (operands[0], + GEN_INT (exact_log2 (UINTVAL (operands[2]))), + operands[1]))); + DONE; +} [(set_attr "timings" "11,11,11,11,11,11,11,11,33") (set_attr "length" "2,2,3,4,5,6,2,3,5")] ) @@ -1704,7 +1734,7 @@ DONE; }) -(define_insn "xorsi3" +(define_insn_and_split "xorsi3" [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r") (xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0") (match_operand:SI 2 "rx_source_operand" @@ -1712,6 +1742,21 @@ (clobber (reg:CC CC_REG))] "" "xor\t%Q2, %0" + "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2]) + && pow2p_hwi (UINTVAL (operands[2]))" + [(const_int 0)] +{ + /* For single bit constants use the bnot insn for smaller code. */ + + if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG)) + FAIL; + + rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn, + emit_insn (gen_bitinvert (operands[0], + GEN_INT (exact_log2 (UINTVAL (operands[2]))), + operands[1]))); + DONE; +} [(set_attr "timings" "11,11,11,11,11,33") (set_attr "length" "3,4,5,6,7,6")] ) @@ -1960,50 +2005,63 @@ ;; Bit manipulation instructions. -;; ??? The *_in_memory patterns will not be matched without further help. -;; At one time we had the insv expander generate them, but I suspect that -;; in general we get better performance by exposing the register load to -;; the optimizers. -;; -;; An alternate solution would be to re-organize these patterns such -;; that allow both register and memory operands. This would allow the -;; register allocator to spill and not load the register operand. This -;; would be possible only for operations for which we have a constant -;; bit offset, so that we can adjust the address by ofs/8 and replace -;; the offset in the insn by ofs%8. - -(define_insn "*bitset" +;; The *_in_memory patterns will not be matched automatically, not even with +;; combiner bridge patterns. Especially when the memory operands have a +;; displacement, the resulting patterns look too complex. +;; Instead we manually look around the matched insn to see if there is a +;; preceeding memory load and a following memory store of the modified register +;; which can be fused into the single *_in_memory insn. +;; Do that before register allocation, as it can eliminate one temporary +;; register that needs to be allocated. + +(define_insn_and_split "bitset" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (ashift:SI (const_int 1) (match_operand:SI 1 "rx_shift_operand" "ri")) (match_operand:SI 2 "register_operand" "0")))] "" "bset\t%1, %0" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitset_in_memory)) + DONE; + else + FAIL; +} [(set_attr "length" "3")] ) -(define_insn "*bitset_in_memory" +(define_insn "bitset_in_memory" [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q") (ior:QI (ashift:QI (const_int 1) (match_operand:QI 1 "nonmemory_operand" "ri")) (match_dup 0)))] "" "bset\t%1, %0.B" - [(set_attr "length" "3") + [(set_attr "length" "5") (set_attr "timings" "33")] ) -(define_insn "*bitinvert" +(define_insn_and_split "bitinvert" [(set (match_operand:SI 0 "register_operand" "=r") (xor:SI (ashift:SI (const_int 1) (match_operand:SI 1 "rx_shift_operand" "ri")) (match_operand:SI 2 "register_operand" "0")))] "" "bnot\t%1, %0" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitinvert_in_memory)) + DONE; + else + FAIL; +} [(set_attr "length" "3")] ) -(define_insn "*bitinvert_in_memory" +(define_insn "bitinvert_in_memory" [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q") (xor:QI (ashift:QI (const_int 1) (match_operand:QI 1 "nonmemory_operand" "ri")) @@ -2014,7 +2072,7 @@ (set_attr "timings" "33")] ) -(define_insn "*bitclr" +(define_insn_and_split "bitclr" [(set (match_operand:SI 0 "register_operand" "=r") (and:SI (not:SI (ashift:SI @@ -2023,10 +2081,18 @@ (match_operand:SI 2 "register_operand" "0")))] "" "bclr\t%1, %0" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitclr_in_memory)) + DONE; + else + FAIL; +} [(set_attr "length" "3")] ) -(define_insn "*bitclr_in_memory" +(define_insn "bitclr_in_memory" [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q") (and:QI (not:QI (ashift:QI @@ -2035,7 +2101,7 @@ (match_dup 0)))] "" "bclr\t%1, %0.B" - [(set_attr "length" "3") + [(set_attr "length" "5") (set_attr "timings" "33")] ) |