summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvmakarov <vmakarov@138bc75d-0d04-0410-961f-82ee72b054a4>2014-11-13 03:02:49 +0000
committervmakarov <vmakarov@138bc75d-0d04-0410-961f-82ee72b054a4>2014-11-13 03:02:49 +0000
commit497ba60f76c13da878a9932dc38eda8e7333c0b6 (patch)
tree6e275a0a0d4c96a79407def2c6bc8fc0ee919f34
parent2bef00f6c5beb4563d4e8f5834a5a1e0955f07d9 (diff)
downloadgcc-497ba60f76c13da878a9932dc38eda8e7333c0b6.tar.gz
2014-11-12 Vladimir Makarov <vmakarov@redhat.com>
* common.opt (flra-remat): New. * opts.c (default_options_table): Add entry for flra_remat. * timevar_def (TV_LRA_REMAT): New. * doc/invoke.texi (-flra-remat): Add description of the new option. * doc/passes.texi (-flra-remat): Remove lra-equivs.c and lra-saves.c. Add lra-remat.c. * Makefile.in (OBJS): Add lra-remat.o. * lra-remat.c: New file. * lra.c: Add info about the rematerialization pass in the top comment. (collect_non_operand_hard_regs, add_regs_to_insn_regno_info): Process unallocatable regs too. (lra_constraint_new_insn_uid_start): Remove. (lra): Add code for calling rematerialization sub-pass. * lra-int.h (lra_constraint_new_insn_uid_start): Remove. (lra_constrain_insn, lra_remat): New prototypes. (lra_eliminate_regs_1): Add parameter. * lra-lives.c (make_hard_regno_born, make_hard_regno_dead): Process unallocatable hard regs too. (process_bb_lives): Ditto. * lra-spills.c (remove_pseudos): Add argument to lra_eliminate_regs_1 call. * lra-eliminations.c (lra_eliminate_regs_1): Add parameter. Use it for sp offset calculation. (lra_eliminate_regs): Add argument for lra_eliminate_regs_1 call. (eliminate_regs_in_insn): Add parameter. Use it for sp offset calculation. (process_insn_for_elimination): Add argument for eliminate_regs_in_insn call. * lra-constraints.c (get_equiv_with_elimination): Add argument for lra_eliminate_regs_1 call. (process_addr_reg): Add parameter. Use it. (process_address_1): Ditto. Add argument for process_addr_reg call. (process_address): Ditto. (curr_insn_transform): Add parameter. Use it. Add argument for process_address calls. (lra_constrain_insn): New function. (lra_constraints): Add argument for curr_insn_transform call. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@217458 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog43
-rw-r--r--gcc/Makefile.in1
-rw-r--r--gcc/common.opt4
-rw-r--r--gcc/doc/invoke.texi11
-rw-r--r--gcc/doc/passes.texi8
-rw-r--r--gcc/lra-constraints.c188
-rw-r--r--gcc/lra-eliminations.c97
-rw-r--r--gcc/lra-int.h10
-rw-r--r--gcc/lra-lives.c7
-rw-r--r--gcc/lra-remat.c1212
-rw-r--r--gcc/lra-spills.c2
-rw-r--r--gcc/lra.c107
-rw-r--r--gcc/opts.c1
-rw-r--r--gcc/timevar.def1
14 files changed, 1531 insertions, 161 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5b96094511f..b089cb8a6e0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,46 @@
+2014-11-12 Vladimir Makarov <vmakarov@redhat.com>
+
+ * common.opt (flra-remat): New.
+ * opts.c (default_options_table): Add entry for flra_remat.
+ * timevar_def (TV_LRA_REMAT): New.
+ * doc/invoke.texi (-flra-remat): Add description of the new
+ option.
+ * doc/passes.texi (-flra-remat): Remove lra-equivs.c and
+ lra-saves.c. Add lra-remat.c.
+ * Makefile.in (OBJS): Add lra-remat.o.
+ * lra-remat.c: New file.
+ * lra.c: Add info about the rematerialization pass in the top
+ comment.
+ (collect_non_operand_hard_regs, add_regs_to_insn_regno_info):
+ Process unallocatable regs too.
+ (lra_constraint_new_insn_uid_start): Remove.
+ (lra): Add code for calling rematerialization sub-pass.
+ * lra-int.h (lra_constraint_new_insn_uid_start): Remove.
+ (lra_constrain_insn, lra_remat): New prototypes.
+ (lra_eliminate_regs_1): Add parameter.
+ * lra-lives.c (make_hard_regno_born, make_hard_regno_dead):
+ Process unallocatable hard regs too.
+ (process_bb_lives): Ditto.
+ * lra-spills.c (remove_pseudos): Add argument to
+ lra_eliminate_regs_1 call.
+ * lra-eliminations.c (lra_eliminate_regs_1): Add parameter. Use it
+ for sp offset calculation.
+ (lra_eliminate_regs): Add argument for lra_eliminate_regs_1 call.
+ (eliminate_regs_in_insn): Add parameter. Use it for sp offset
+ calculation.
+ (process_insn_for_elimination): Add argument for
+ eliminate_regs_in_insn call.
+ * lra-constraints.c (get_equiv_with_elimination): Add argument
+ for lra_eliminate_regs_1 call.
+ (process_addr_reg): Add parameter. Use it.
+ (process_address_1): Ditto. Add argument for process_addr_reg
+ call.
+ (process_address): Ditto.
+ (curr_insn_transform): Add parameter. Use it. Add argument for
+ process_address calls.
+ (lra_constrain_insn): New function.
+ (lra_constraints): Add argument for curr_insn_transform call.
+
2014-11-13 Manuel López-Ibáñez <manu@gcc.gnu.org>
* opts-global.c (postpone_unknown_option_warning): Fix spelling.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 3d671c27006..88c3f8975ff 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1304,6 +1304,7 @@ OBJS = \
lra-constraints.o \
lra-eliminations.o \
lra-lives.o \
+ lra-remat.o \
lra-spills.o \
lto-cgraph.o \
lto-streamer.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 784a4b0a053..1a58cfb0385 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1551,6 +1551,10 @@ floop-optimize
Common Ignore
Does nothing. Preserved for backward compatibility.
+flra-remat
+Common Report Var(flag_lra_remat) Optimization
+Do CFG-sensitive rematerialization in LRA
+
flto
Common
Enable link-time optimization.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 294e17dcf89..12dbb2767f4 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -392,7 +392,7 @@ Objective-C and Objective-C++ Dialects}.
-fisolate-erroneous-paths-dereference -fisolate-erroneous-paths-attribute @gol
-fivopts -fkeep-inline-functions -fkeep-static-consts -flive-range-shrinkage @gol
-floop-block -floop-interchange -floop-strip-mine -floop-nest-optimize @gol
--floop-parallelize-all -flto -flto-compression-level @gol
+-floop-parallelize-all -flra-remat -flto -flto-compression-level @gol
-flto-partition=@var{alg} -flto-report -flto-report-wpa -fmerge-all-constants @gol
-fmerge-constants -fmodulo-sched -fmodulo-sched-allow-regmoves @gol
-fmove-loop-invariants -fno-branch-count-reg @gol
@@ -7183,6 +7183,7 @@ also turns on the following optimization flags:
-fipa-sra @gol
-fipa-icf @gol
-fisolate-erroneous-paths-dereference @gol
+-flra-remat @gol
-foptimize-sibling-calls @gol
-foptimize-strlen @gol
-fpartial-inlining @gol
@@ -7811,6 +7812,14 @@ Control the verbosity of the dump file for the integrated register allocator.
The default value is 5. If the value @var{n} is greater or equal to 10,
the dump output is sent to stderr using the same format as @var{n} minus 10.
+@item -flra-remat
+@opindex fcaller-saves
+Enable CFG-sensitive rematerialization in LRA. Instead of loading
+values of spilled pseudos, LRA tries to rematerialize (recalculate)
+values if it is profitable.
+
+Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}.
+
@item -fdelayed-branch
@opindex fdelayed-branch
If supported for the target machine, attempt to reorder instructions
diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi
index 0ab8f21caec..090eb0a6cbc 100644
--- a/gcc/doc/passes.texi
+++ b/gcc/doc/passes.texi
@@ -911,10 +911,10 @@ Source files are @file{reload.c} and @file{reload1.c}, plus the header
This pass is a modern replacement of the reload pass. Source files
are @file{lra.c}, @file{lra-assign.c}, @file{lra-coalesce.c},
@file{lra-constraints.c}, @file{lra-eliminations.c},
-@file{lra-equivs.c}, @file{lra-lives.c}, @file{lra-saves.c},
-@file{lra-spills.c}, the header @file{lra-int.h} used for
-communication between them, and the header @file{lra.h} used for
-communication between LRA and the rest of compiler.
+@file{lra-lives.c}, @file{lra-remat.c}, @file{lra-spills.c}, the
+header @file{lra-int.h} used for communication between them, and the
+header @file{lra.h} used for communication between LRA and the rest of
+compiler.
Unlike the reload pass, intermediate LRA decisions are reflected in
RTL as much as possible. This reduces the number of target-dependent
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 73533547942..a67bf8a2f7c 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -506,7 +506,8 @@ get_equiv_with_elimination (rtx x, rtx_insn *insn)
if (x == res || CONSTANT_P (res))
return res;
- return lra_eliminate_regs_1 (insn, res, GET_MODE (res), false, false, true);
+ return lra_eliminate_regs_1 (insn, res, GET_MODE (res),
+ 0, false, false, true);
}
/* Set up curr_operand_mode. */
@@ -1243,12 +1244,16 @@ static bool no_input_reloads_p, no_output_reloads_p;
insn. */
static int curr_swapped;
-/* Arrange for address element *LOC to be a register of class CL.
- Add any input reloads to list BEFORE. AFTER is nonnull if *LOC is an
- automodified value; handle that case by adding the required output
- reloads to list AFTER. Return true if the RTL was changed. */
+/* if CHECK_ONLY_P is false, arrange for address element *LOC to be a
+ register of class CL. Add any input reloads to list BEFORE. AFTER
+ is nonnull if *LOC is an automodified value; handle that case by
+ adding the required output reloads to list AFTER. Return true if
+ the RTL was changed.
+
+ if CHECK_ONLY_P is true, check that the *LOC is a correct address
+ register. Return false if the address register is correct. */
static bool
-process_addr_reg (rtx *loc, rtx_insn **before, rtx_insn **after,
+process_addr_reg (rtx *loc, bool check_only_p, rtx_insn **before, rtx_insn **after,
enum reg_class cl)
{
int regno;
@@ -1265,6 +1270,8 @@ process_addr_reg (rtx *loc, rtx_insn **before, rtx_insn **after,
mode = GET_MODE (reg);
if (! REG_P (reg))
{
+ if (check_only_p)
+ return true;
/* Always reload memory in an address even if the target supports
such addresses. */
new_reg = lra_create_new_reg_with_unique_value (mode, reg, cl, "address");
@@ -1274,7 +1281,8 @@ process_addr_reg (rtx *loc, rtx_insn **before, rtx_insn **after,
{
regno = REGNO (reg);
rclass = get_reg_class (regno);
- if ((*loc = get_equiv_with_elimination (reg, curr_insn)) != reg)
+ if (! check_only_p
+ && (*loc = get_equiv_with_elimination (reg, curr_insn)) != reg)
{
if (lra_dump_file != NULL)
{
@@ -1288,6 +1296,8 @@ process_addr_reg (rtx *loc, rtx_insn **before, rtx_insn **after,
}
if (*loc != reg || ! in_class_p (reg, cl, &new_class))
{
+ if (check_only_p)
+ return true;
reg = *loc;
if (get_reload_reg (after == NULL ? OP_IN : OP_INOUT,
mode, reg, cl, subreg_p, "address", &new_reg))
@@ -1295,6 +1305,8 @@ process_addr_reg (rtx *loc, rtx_insn **before, rtx_insn **after,
}
else if (new_class != NO_REGS && rclass != new_class)
{
+ if (check_only_p)
+ return true;
lra_change_class (regno, new_class, " Change to", true);
return false;
}
@@ -2740,8 +2752,9 @@ equiv_address_substitution (struct address_info *ad)
return change_p;
}
-/* Major function to make reloads for an address in operand NOP.
- The supported cases are:
+/* Major function to make reloads for an address in operand NOP or
+ check its correctness (If CHECK_ONLY_P is true). The supported
+ cases are:
1) an address that existed before LRA started, at which point it
must have been valid. These addresses are subject to elimination
@@ -2761,18 +2774,19 @@ equiv_address_substitution (struct address_info *ad)
address. Return true for any RTL change.
The function is a helper function which does not produce all
- transformations which can be necessary. It does just basic steps.
- To do all necessary transformations use function
- process_address. */
+ transformations (when CHECK_ONLY_P is false) which can be
+ necessary. It does just basic steps. To do all necessary
+ transformations use function process_address. */
static bool
-process_address_1 (int nop, rtx_insn **before, rtx_insn **after)
+process_address_1 (int nop, bool check_only_p,
+ rtx_insn **before, rtx_insn **after)
{
struct address_info ad;
rtx new_reg;
rtx op = *curr_id->operand_loc[nop];
const char *constraint = curr_static_id->operand[nop].constraint;
enum constraint_num cn = lookup_constraint (constraint);
- bool change_p;
+ bool change_p = false;
if (insn_extra_address_constraint (cn))
decompose_lea_address (&ad, curr_id->operand_loc[nop]);
@@ -2783,10 +2797,11 @@ process_address_1 (int nop, rtx_insn **before, rtx_insn **after)
decompose_mem_address (&ad, SUBREG_REG (op));
else
return false;
- change_p = equiv_address_substitution (&ad);
+ if (! check_only_p)
+ change_p = equiv_address_substitution (&ad);
if (ad.base_term != NULL
&& (process_addr_reg
- (ad.base_term, before,
+ (ad.base_term, check_only_p, before,
(ad.autoinc_p
&& !(REG_P (*ad.base_term)
&& find_regno_note (curr_insn, REG_DEAD,
@@ -2800,7 +2815,8 @@ process_address_1 (int nop, rtx_insn **before, rtx_insn **after)
*ad.base_term2 = *ad.base_term;
}
if (ad.index_term != NULL
- && process_addr_reg (ad.index_term, before, NULL, INDEX_REG_CLASS))
+ && process_addr_reg (ad.index_term, check_only_p,
+ before, NULL, INDEX_REG_CLASS))
change_p = true;
/* Target hooks sometimes don't treat extra-constraint addresses as
@@ -2809,6 +2825,9 @@ process_address_1 (int nop, rtx_insn **before, rtx_insn **after)
&& satisfies_address_constraint_p (&ad, cn))
return change_p;
+ if (check_only_p)
+ return change_p;
+
/* There are three cases where the shape of *AD.INNER may now be invalid:
1) the original address was valid, but either elimination or
@@ -2977,15 +2996,24 @@ process_address_1 (int nop, rtx_insn **before, rtx_insn **after)
return true;
}
-/* Do address reloads until it is necessary. Use process_address_1 as
- a helper function. Return true for any RTL changes. */
+/* If CHECK_ONLY_P is false, do address reloads until it is necessary.
+ Use process_address_1 as a helper function. Return true for any
+ RTL changes.
+
+ If CHECK_ONLY_P is true, just check address correctness. Return
+ false if the address correct. */
static bool
-process_address (int nop, rtx_insn **before, rtx_insn **after)
+process_address (int nop, bool check_only_p,
+ rtx_insn **before, rtx_insn **after)
{
bool res = false;
- while (process_address_1 (nop, before, after))
- res = true;
+ while (process_address_1 (nop, check_only_p, before, after))
+ {
+ if (check_only_p)
+ return true;
+ res = true;
+ }
return res;
}
@@ -3157,9 +3185,15 @@ swap_operands (int nop)
model can be changed in future. Make commutative operand exchange
if it is chosen.
- Return true if some RTL changes happened during function call. */
+ if CHECK_ONLY_P is false, do RTL changes to satisfy the
+ constraints. Return true if any change happened during function
+ call.
+
+ If CHECK_ONLY_P is true then don't do any transformation. Just
+ check that the insn satisfies all constraints. If the insn does
+ not satisfy any constraint, return true. */
static bool
-curr_insn_transform (void)
+curr_insn_transform (bool check_only_p)
{
int i, j, k;
int n_operands;
@@ -3226,50 +3260,53 @@ curr_insn_transform (void)
curr_swapped = false;
goal_alt_swapped = false;
- /* Make equivalence substitution and memory subreg elimination
- before address processing because an address legitimacy can
- depend on memory mode. */
- for (i = 0; i < n_operands; i++)
- {
- rtx op = *curr_id->operand_loc[i];
- rtx subst, old = op;
- bool op_change_p = false;
-
- if (GET_CODE (old) == SUBREG)
- old = SUBREG_REG (old);
- subst = get_equiv_with_elimination (old, curr_insn);
- if (subst != old)
- {
- subst = copy_rtx (subst);
- lra_assert (REG_P (old));
- if (GET_CODE (op) == SUBREG)
- SUBREG_REG (op) = subst;
- else
- *curr_id->operand_loc[i] = subst;
- if (lra_dump_file != NULL)
- {
- fprintf (lra_dump_file,
- "Changing pseudo %d in operand %i of insn %u on equiv ",
- REGNO (old), i, INSN_UID (curr_insn));
- dump_value_slim (lra_dump_file, subst, 1);
+ if (! check_only_p)
+ /* Make equivalence substitution and memory subreg elimination
+ before address processing because an address legitimacy can
+ depend on memory mode. */
+ for (i = 0; i < n_operands; i++)
+ {
+ rtx op = *curr_id->operand_loc[i];
+ rtx subst, old = op;
+ bool op_change_p = false;
+
+ if (GET_CODE (old) == SUBREG)
+ old = SUBREG_REG (old);
+ subst = get_equiv_with_elimination (old, curr_insn);
+ if (subst != old)
+ {
+ subst = copy_rtx (subst);
+ lra_assert (REG_P (old));
+ if (GET_CODE (op) == SUBREG)
+ SUBREG_REG (op) = subst;
+ else
+ *curr_id->operand_loc[i] = subst;
+ if (lra_dump_file != NULL)
+ {
+ fprintf (lra_dump_file,
+ "Changing pseudo %d in operand %i of insn %u on equiv ",
+ REGNO (old), i, INSN_UID (curr_insn));
+ dump_value_slim (lra_dump_file, subst, 1);
fprintf (lra_dump_file, "\n");
- }
- op_change_p = change_p = true;
- }
- if (simplify_operand_subreg (i, GET_MODE (old)) || op_change_p)
- {
- change_p = true;
- lra_update_dup (curr_id, i);
- }
- }
+ }
+ op_change_p = change_p = true;
+ }
+ if (simplify_operand_subreg (i, GET_MODE (old)) || op_change_p)
+ {
+ change_p = true;
+ lra_update_dup (curr_id, i);
+ }
+ }
/* Reload address registers and displacements. We do it before
finding an alternative because of memory constraints. */
before = after = NULL;
for (i = 0; i < n_operands; i++)
if (! curr_static_id->operand[i].is_operator
- && process_address (i, &before, &after))
+ && process_address (i, check_only_p, &before, &after))
{
+ if (check_only_p)
+ return true;
change_p = true;
lra_update_dup (curr_id, i);
}
@@ -3279,13 +3316,13 @@ curr_insn_transform (void)
we chose previously may no longer be valid. */
lra_set_used_insn_alternative (curr_insn, -1);
- if (curr_insn_set != NULL_RTX
+ if (! check_only_p && curr_insn_set != NULL_RTX
&& check_and_process_move (&change_p, &sec_mem_p))
return change_p;
try_swapped:
- reused_alternative_num = curr_id->used_insn_alternative;
+ reused_alternative_num = check_only_p ? -1 : curr_id->used_insn_alternative;
if (lra_dump_file != NULL && reused_alternative_num >= 0)
fprintf (lra_dump_file, "Reusing alternative %d for insn #%u\n",
reused_alternative_num, INSN_UID (curr_insn));
@@ -3293,6 +3330,9 @@ curr_insn_transform (void)
if (process_alt_operands (reused_alternative_num))
alt_p = true;
+ if (check_only_p)
+ return ! alt_p || best_losers != 0;
+
/* If insn is commutative (it's safe to exchange a certain pair of
operands) then we need to try each alternative twice, the second
time matching those two operands as if we had exchanged them. To
@@ -3522,7 +3562,7 @@ curr_insn_transform (void)
*curr_id->operand_loc[i] = tem;
lra_update_dup (curr_id, i);
- process_address (i, &before, &after);
+ process_address (i, false, &before, &after);
/* If the alternative accepts constant pool refs directly
there will be no reload needed at all. */
@@ -3746,6 +3786,26 @@ curr_insn_transform (void)
return change_p;
}
+/* Return true if INSN satisfies all constraints. In other words, no
+ reload insns are needed. */
+bool
+lra_constrain_insn (rtx_insn *insn)
+{
+ int saved_new_regno_start = new_regno_start;
+ int saved_new_insn_uid_start = new_insn_uid_start;
+ bool change_p;
+
+ curr_insn = insn;
+ curr_id = lra_get_insn_recog_data (curr_insn);
+ curr_static_id = curr_id->insn_static_data;
+ new_insn_uid_start = get_max_uid ();
+ new_regno_start = max_reg_num ();
+ change_p = curr_insn_transform (true);
+ new_regno_start = saved_new_regno_start;
+ new_insn_uid_start = saved_new_insn_uid_start;
+ return ! change_p;
+}
+
/* Return true if X is in LIST. */
static bool
in_list_p (rtx x, rtx list)
@@ -4238,7 +4298,7 @@ lra_constraints (bool first_p)
curr_static_id = curr_id->insn_static_data;
init_curr_insn_input_reloads ();
init_curr_operand_mode ();
- if (curr_insn_transform ())
+ if (curr_insn_transform (false))
changed_p = true;
/* Check non-transformed insns too for equiv change as USE
or CLOBBER don't need reloads but can contain pseudos
diff --git a/gcc/lra-eliminations.c b/gcc/lra-eliminations.c
index e742ebaf0ad..8ab0222ae50 100644
--- a/gcc/lra-eliminations.c
+++ b/gcc/lra-eliminations.c
@@ -298,7 +298,8 @@ get_elimination (rtx reg)
a change in the offset between the eliminable register and its
substitution if UPDATE_P, or the full offset if FULL_P, or
otherwise zero. If FULL_P, we also use the SP offsets for
- elimination to SP.
+ elimination to SP. If UPDATE_P, use UPDATE_SP_OFFSET for updating
+ offsets of register elimnable to SP.
MEM_MODE is the mode of an enclosing MEM. We need this to know how
much to adjust a register for, e.g., PRE_DEC. Also, if we are
@@ -311,7 +312,8 @@ get_elimination (rtx reg)
sp offset. */
rtx
lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
- bool subst_p, bool update_p, bool full_p)
+ bool subst_p, bool update_p,
+ HOST_WIDE_INT update_sp_offset, bool full_p)
{
enum rtx_code code = GET_CODE (x);
struct lra_elim_table *ep;
@@ -346,7 +348,10 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
rtx to = subst_p ? ep->to_rtx : ep->from_rtx;
if (update_p)
- return plus_constant (Pmode, to, ep->offset - ep->previous_offset);
+ return plus_constant (Pmode, to,
+ ep->offset - ep->previous_offset
+ + (ep->to_rtx == stack_pointer_rtx
+ ? update_sp_offset : 0));
else if (full_p)
return plus_constant (Pmode, to,
ep->offset
@@ -373,7 +378,10 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
return gen_rtx_PLUS (Pmode, to, XEXP (x, 1));
offset = (update_p
- ? ep->offset - ep->previous_offset : ep->offset);
+ ? ep->offset - ep->previous_offset
+ + (ep->to_rtx == stack_pointer_rtx
+ ? update_sp_offset : 0)
+ : ep->offset);
if (full_p && insn != NULL_RTX && ep->to_rtx == stack_pointer_rtx)
offset -= lra_get_insn_recog_data (insn)->sp_offset;
if (CONST_INT_P (XEXP (x, 1))
@@ -402,9 +410,11 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
{
rtx new0 = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode,
- subst_p, update_p, full_p);
+ subst_p, update_p,
+ update_sp_offset, full_p);
rtx new1 = lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode,
- subst_p, update_p, full_p);
+ subst_p, update_p,
+ update_sp_offset, full_p);
if (new0 != XEXP (x, 0) || new1 != XEXP (x, 1))
return form_sum (new0, new1);
@@ -423,11 +433,12 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
rtx to = subst_p ? ep->to_rtx : ep->from_rtx;
if (update_p)
- return
- plus_constant (Pmode,
- gen_rtx_MULT (Pmode, to, XEXP (x, 1)),
- (ep->offset - ep->previous_offset)
- * INTVAL (XEXP (x, 1)));
+ return plus_constant (Pmode,
+ gen_rtx_MULT (Pmode, to, XEXP (x, 1)),
+ (ep->offset - ep->previous_offset
+ + (ep->to_rtx == stack_pointer_rtx
+ ? update_sp_offset : 0))
+ * INTVAL (XEXP (x, 1)));
else if (full_p)
{
HOST_WIDE_INT offset = ep->offset;
@@ -459,10 +470,12 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
case LE: case LT: case LEU: case LTU:
{
rtx new0 = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode,
- subst_p, update_p, full_p);
+ subst_p, update_p,
+ update_sp_offset, full_p);
rtx new1 = XEXP (x, 1)
? lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode,
- subst_p, update_p, full_p) : 0;
+ subst_p, update_p,
+ update_sp_offset, full_p) : 0;
if (new0 != XEXP (x, 0) || new1 != XEXP (x, 1))
return gen_rtx_fmt_ee (code, GET_MODE (x), new0, new1);
@@ -475,7 +488,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
if (XEXP (x, 0))
{
new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode,
- subst_p, update_p, full_p);
+ subst_p, update_p,
+ update_sp_offset, full_p);
if (new_rtx != XEXP (x, 0))
{
/* If this is a REG_DEAD note, it is not valid anymore.
@@ -484,7 +498,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
if (REG_NOTE_KIND (x) == REG_DEAD)
return (XEXP (x, 1)
? lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode,
- subst_p, update_p, full_p)
+ subst_p, update_p,
+ update_sp_offset, full_p)
: NULL_RTX);
x = alloc_reg_note (REG_NOTE_KIND (x), new_rtx, XEXP (x, 1));
@@ -501,7 +516,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
if (XEXP (x, 1))
{
new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 1), mem_mode,
- subst_p, update_p, full_p);
+ subst_p, update_p,
+ update_sp_offset, full_p);
if (new_rtx != XEXP (x, 1))
return
gen_rtx_fmt_ee (GET_CODE (x), GET_MODE (x),
@@ -528,8 +544,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
&& XEXP (XEXP (x, 1), 0) == XEXP (x, 0))
{
rtx new_rtx = lra_eliminate_regs_1 (insn, XEXP (XEXP (x, 1), 1),
- mem_mode,
- subst_p, update_p, full_p);
+ mem_mode, subst_p, update_p,
+ update_sp_offset, full_p);
if (new_rtx != XEXP (XEXP (x, 1), 1))
return gen_rtx_fmt_ee (code, GET_MODE (x), XEXP (x, 0),
@@ -553,14 +569,16 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
case PARITY:
case BSWAP:
new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 0), mem_mode,
- subst_p, update_p, full_p);
+ subst_p, update_p,
+ update_sp_offset, full_p);
if (new_rtx != XEXP (x, 0))
return gen_rtx_fmt_e (code, GET_MODE (x), new_rtx);
return x;
case SUBREG:
new_rtx = lra_eliminate_regs_1 (insn, SUBREG_REG (x), mem_mode,
- subst_p, update_p, full_p);
+ subst_p, update_p,
+ update_sp_offset, full_p);
if (new_rtx != SUBREG_REG (x))
{
@@ -598,12 +616,12 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
replace_equiv_address_nv
(x,
lra_eliminate_regs_1 (insn, XEXP (x, 0), GET_MODE (x),
- subst_p, update_p, full_p));
+ subst_p, update_p, update_sp_offset, full_p));
case USE:
/* Handle insn_list USE that a call to a pure function may generate. */
new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, 0), VOIDmode,
- subst_p, update_p, full_p);
+ subst_p, update_p, update_sp_offset, full_p);
if (new_rtx != XEXP (x, 0))
return gen_rtx_USE (GET_MODE (x), new_rtx);
return x;
@@ -624,7 +642,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
if (*fmt == 'e')
{
new_rtx = lra_eliminate_regs_1 (insn, XEXP (x, i), mem_mode,
- subst_p, update_p, full_p);
+ subst_p, update_p,
+ update_sp_offset, full_p);
if (new_rtx != XEXP (x, i) && ! copied)
{
x = shallow_copy_rtx (x);
@@ -638,7 +657,8 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
for (j = 0; j < XVECLEN (x, i); j++)
{
new_rtx = lra_eliminate_regs_1 (insn, XVECEXP (x, i, j), mem_mode,
- subst_p, update_p, full_p);
+ subst_p, update_p,
+ update_sp_offset, full_p);
if (new_rtx != XVECEXP (x, i, j) && ! copied_vec)
{
rtvec new_v = gen_rtvec_v (XVECLEN (x, i),
@@ -665,7 +685,7 @@ rtx
lra_eliminate_regs (rtx x, machine_mode mem_mode,
rtx insn ATTRIBUTE_UNUSED)
{
- return lra_eliminate_regs_1 (NULL, x, mem_mode, true, false, true);
+ return lra_eliminate_regs_1 (NULL, x, mem_mode, true, false, 0, true);
}
/* Stack pointer offset before the current insn relative to one at the
@@ -850,13 +870,15 @@ remove_reg_equal_offset_note (rtx insn, rtx what)
If REPLACE_P is false, just update the offsets while keeping the
base register the same. If FIRST_P, use the sp offset for
- elimination to sp. Attach the note about used elimination for
- insns setting frame pointer to update elimination easy (without
- parsing already generated elimination insns to find offset
- previously used) in future. */
+ elimination to sp. Otherwise, use UPDATE_SP_OFFSET for this.
+ Attach the note about used elimination for insns setting frame
+ pointer to update elimination easy (without parsing already
+ generated elimination insns to find offset previously used) in
+ future. */
-static void
-eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p)
+void
+eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p,
+ HOST_WIDE_INT update_sp_offset)
{
int icode = recog_memoized (insn);
rtx old_set = single_set (insn);
@@ -986,8 +1008,13 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p)
if (! replace_p)
{
offset += (ep->offset - ep->previous_offset);
- if (first_p && ep->to_rtx == stack_pointer_rtx)
- offset -= lra_get_insn_recog_data (insn)->sp_offset;
+ if (ep->to_rtx == stack_pointer_rtx)
+ {
+ if (first_p)
+ offset -= lra_get_insn_recog_data (insn)->sp_offset;
+ else
+ offset += update_sp_offset;
+ }
offset = trunc_int_for_mode (offset, GET_MODE (plus_cst_src));
}
@@ -1061,7 +1088,7 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p)
substed_operand[i]
= lra_eliminate_regs_1 (insn, *id->operand_loc[i], VOIDmode,
replace_p, ! replace_p && ! first_p,
- first_p);
+ update_sp_offset, first_p);
if (substed_operand[i] != orig_operand[i])
validate_p = true;
}
@@ -1349,7 +1376,7 @@ lra_eliminate_reg_if_possible (rtx *loc)
static void
process_insn_for_elimination (rtx_insn *insn, bool final_p, bool first_p)
{
- eliminate_regs_in_insn (insn, final_p, first_p);
+ eliminate_regs_in_insn (insn, final_p, first_p, 0);
if (! final_p)
{
/* Check that insn changed its code. This is a case when a move
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 5355ebfa4ec..92dc40cb7da 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -328,7 +328,6 @@ extern bitmap_head lra_inheritance_pseudos;
extern bitmap_head lra_split_regs;
extern bitmap_head lra_subreg_reload_pseudos;
extern bitmap_head lra_optional_reload_pseudos;
-extern int lra_constraint_new_insn_uid_start;
/* lra-constraints.c: */
@@ -339,6 +338,7 @@ extern int lra_constraint_iter;
extern bool lra_risky_transformations_p;
extern int lra_inheritance_iter;
extern int lra_undo_inheritance_iter;
+extern bool lra_constrain_insn (rtx_insn *);
extern bool lra_constraints (bool);
extern void lra_constraints_init (void);
extern void lra_constraints_finish (void);
@@ -389,13 +389,17 @@ extern bool lra_need_for_spills_p (void);
extern void lra_spill (void);
extern void lra_final_code_change (void);
+/* lra-remat.c: */
+
+extern bool lra_remat (void);
/* lra-elimination.c: */
extern void lra_debug_elim_table (void);
extern int lra_get_elimination_hard_regno (int);
-extern rtx lra_eliminate_regs_1 (rtx_insn *, rtx, machine_mode, bool,
- bool, bool);
+extern rtx lra_eliminate_regs_1 (rtx_insn *, rtx, machine_mode,
+ bool, bool, HOST_WIDE_INT, bool);
+extern void eliminate_regs_in_insn (rtx_insn *insn, bool, bool, HOST_WIDE_INT);
extern void lra_eliminate (bool, bool);
extern void lra_eliminate_reg_if_possible (rtx *);
diff --git a/gcc/lra-lives.c b/gcc/lra-lives.c
index 03def823a10..5ea5ecdcc52 100644
--- a/gcc/lra-lives.c
+++ b/gcc/lra-lives.c
@@ -252,8 +252,7 @@ make_hard_regno_born (int regno)
unsigned int i;
lra_assert (regno < FIRST_PSEUDO_REGISTER);
- if (TEST_HARD_REG_BIT (lra_no_alloc_regs, regno)
- || TEST_HARD_REG_BIT (hard_regs_live, regno))
+ if (TEST_HARD_REG_BIT (hard_regs_live, regno))
return;
SET_HARD_REG_BIT (hard_regs_live, regno);
sparseset_set_bit (start_living, regno);
@@ -267,8 +266,7 @@ static void
make_hard_regno_dead (int regno)
{
lra_assert (regno < FIRST_PSEUDO_REGISTER);
- if (TEST_HARD_REG_BIT (lra_no_alloc_regs, regno)
- || ! TEST_HARD_REG_BIT (hard_regs_live, regno))
+ if (! TEST_HARD_REG_BIT (hard_regs_live, regno))
return;
sparseset_set_bit (start_dying, regno);
CLEAR_HARD_REG_BIT (hard_regs_live, regno);
@@ -662,7 +660,6 @@ process_bb_lives (basic_block bb, int &curr_point)
sparseset_clear (pseudos_live_through_setjumps);
REG_SET_TO_HARD_REG_SET (hard_regs_live, reg_live_out);
AND_COMPL_HARD_REG_SET (hard_regs_live, eliminable_regset);
- AND_COMPL_HARD_REG_SET (hard_regs_live, lra_no_alloc_regs);
EXECUTE_IF_SET_IN_BITMAP (reg_live_out, FIRST_PSEUDO_REGISTER, j, bi)
mark_pseudo_live (j, curr_point);
diff --git a/gcc/lra-remat.c b/gcc/lra-remat.c
new file mode 100644
index 00000000000..51a94caf162
--- /dev/null
+++ b/gcc/lra-remat.c
@@ -0,0 +1,1212 @@
+/* Rematerialize pseudos values.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ Contributed by Vladimir Makarov <vmakarov@redhat.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* This code objective is to rematerialize spilled pseudo values. To
+ do this we calculate available insn candidates. The candidate is
+ available at some point if there is dominated set of insns with the
+ same pattern, the insn inputs are not dying or modified on any path
+ from the set, the outputs are not modified.
+
+ The insns containing memory or spilled pseudos (except for the
+ rematerialized pseudo) are not considered as such insns are not
+ profitable in comparison with regular loads of spilled pseudo
+ values. That simplifies the implementation as we don't need to
+ deal with memory aliasing.
+
+ To speed up available candidate calculation, we calculate partially
+ available candidates first and use them for initialization of the
+ availability. That is because (partial) availability sets are
+ sparse.
+
+ The rematerialization sub-pass could be improved further in the
+ following ways:
+
+ o We could make longer live ranges of inputs in the
+ rematerialization candidates if their hard registers are not used
+ for other purposes. This could be complicated if we need to
+ update BB live info information as LRA does not use
+ DF-infrastructure for compile-time reasons. This problem could
+ be overcome if constrain making live ranges longer only in BB/EBB
+ scope.
+ o We could use cost-based decision to choose rematerialization insn
+ (currently all insns without memory is can be used).
+ o We could use other free hard regs for unused output pseudos in
+ rematerialization candidates although such cases probably will
+ be very rare. */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "hard-reg-set.h"
+#include "rtl.h"
+#include "rtl-error.h"
+#include "tm_p.h"
+#include "target.h"
+#include "insn-config.h"
+#include "recog.h"
+#include "output.h"
+#include "regs.h"
+#include "hashtab.h"
+#include "hash-set.h"
+#include "vec.h"
+#include "machmode.h"
+#include "input.h"
+#include "function.h"
+#include "expr.h"
+#include "predict.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "basic-block.h"
+#include "except.h"
+#include "df.h"
+#include "ira.h"
+#include "sparseset.h"
+#include "params.h"
+#include "df.h"
+#include "lra-int.h"
+
+/* Number of candidates for rematerialization. */
+static unsigned int cands_num;
+
+/* The following is used for representation of call_used_reg_set in
+ form array whose elements are hard register numbers with nonzero bit
+ in CALL_USED_REG_SET. */
+static int call_used_regs_arr_len;
+static int call_used_regs_arr[FIRST_PSEUDO_REGISTER];
+
+/* Bitmap used for different calculations. */
+static bitmap_head temp_bitmap;
+
+typedef struct cand *cand_t;
+typedef const struct cand *const_cand_t;
+
+/* Insn candidates for rematerialization. The candidate insn should
+ have the following properies:
+ o no any memory (as access to memory is non-profitable)
+ o no INOUT regs (it means no non-paradoxical subreg of output reg)
+ o one output spilled pseudo (or reload pseudo of a spilled pseudo)
+ o all other pseudos are with assigned hard regs. */
+struct cand
+{
+ /* Index of the candidates in all_cands. */
+ int index;
+ /* The candidate insn. */
+ rtx_insn *insn;
+ /* Insn pseudo regno for rematerialization. */
+ int regno;
+ /* Non-negative if a reload pseudo is in the insn instead of the
+ pseudo for rematerialization. */
+ int reload_regno;
+ /* Number of the operand containing the regno or its reload
+ regno. */
+ int nop;
+ /* Next candidate for the same regno. */
+ cand_t next_regno_cand;
+};
+
+/* Vector containing all candidates. */
+static vec<cand_t> all_cands;
+/* Map: insn -> candidate representing it. It is null if the insn can
+ not be used for rematerialization. */
+static cand_t *insn_to_cand;
+
+/* Map regno -> candidates can be used for the regno
+ rematerialization. */
+static cand_t *regno_cands;
+
+/* Data about basic blocks used for the rematerialization
+ sub-pass. */
+struct remat_bb_data
+{
+ /* Basic block about which the below data are. */
+ basic_block bb;
+ /* Registers changed in the basic block: */
+ bitmap_head changed_regs;
+ /* Registers becoming dead in the BB. */
+ bitmap_head dead_regs;
+ /* Cands present in the BB whose in/out regs are not changed after
+ the cands occurence and are not dead (except the reload
+ regno). */
+ bitmap_head gen_cands;
+ bitmap_head livein_cands; /* cands whose inputs live at the BB start. */
+ bitmap_head pavin_cands; /* cands partially available at BB entry. */
+ bitmap_head pavout_cands; /* cands partially available at BB exit. */
+ bitmap_head avin_cands; /* cands available at the entry of the BB. */
+ bitmap_head avout_cands; /* cands available at the exit of the BB. */
+};
+
+/* Array for all BB data. Indexed by the corresponding BB index. */
+typedef struct remat_bb_data *remat_bb_data_t;
+
+/* Basic blocks for data flow problems -- all bocks except the special
+ ones. */
+static bitmap_head all_blocks;
+
+/* All basic block data are referred through the following array. */
+static remat_bb_data_t remat_bb_data;
+
+/* Two small functions for access to the bb data. */
+static inline remat_bb_data_t
+get_remat_bb_data (basic_block bb)
+{
+ return &remat_bb_data[(bb)->index];
+}
+
+static inline remat_bb_data_t
+get_remat_bb_data_by_index (int index)
+{
+ return &remat_bb_data[index];
+}
+
+
+
+/* Recursive hash function for RTL X. */
+static hashval_t
+rtx_hash (rtx x)
+{
+ int i, j;
+ enum rtx_code code;
+ const char *fmt;
+ hashval_t val = 0;
+
+ if (x == 0)
+ return val;
+
+ code = GET_CODE (x);
+ val += (int) code + 4095;
+
+ /* Some RTL can be compared nonrecursively. */
+ switch (code)
+ {
+ case REG:
+ return val + REGNO (x);
+
+ case LABEL_REF:
+ return iterative_hash_object (XEXP (x, 0), val);
+
+ case SYMBOL_REF:
+ return iterative_hash_object (XSTR (x, 0), val);
+
+ case SCRATCH:
+ case CONST_DOUBLE:
+ case CONST_INT:
+ case CONST_VECTOR:
+ return val;
+
+ default:
+ break;
+ }
+
+ /* Hash the elements. */
+ fmt = GET_RTX_FORMAT (code);
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ {
+ switch (fmt[i])
+ {
+ case 'w':
+ val += XWINT (x, i);
+ break;
+
+ case 'n':
+ case 'i':
+ val += XINT (x, i);
+ break;
+
+ case 'V':
+ case 'E':
+ val += XVECLEN (x, i);
+
+ for (j = 0; j < XVECLEN (x, i); j++)
+ val += rtx_hash (XVECEXP (x, i, j));
+ break;
+
+ case 'e':
+ val += rtx_hash (XEXP (x, i));
+ break;
+
+ case 'S':
+ case 's':
+ val += htab_hash_string (XSTR (x, i));
+ break;
+
+ case 'u':
+ case '0':
+ case 't':
+ break;
+
+ /* It is believed that rtx's at this level will never
+ contain anything but integers and other rtx's, except for
+ within LABEL_REFs and SYMBOL_REFs. */
+ default:
+ abort ();
+ }
+ }
+ return val;
+}
+
+
+
+/* Hash table for the candidates. Different insns (e.g. structurally
+ the same insns or even insns with different unused output regs) can
+ be represented by the same candidate in the table. */
+static htab_t cand_table;
+
+/* Hash function for candidate CAND. */
+static hashval_t
+cand_hash (const void *cand)
+{
+ const_cand_t c = (const_cand_t) cand;
+ lra_insn_recog_data_t id = lra_get_insn_recog_data (c->insn);
+ struct lra_static_insn_data *static_id = id->insn_static_data;
+ int nops = static_id->n_operands;
+ hashval_t hash = 0;
+
+ for (int i = 0; i < nops; i++)
+ if (i == c->nop)
+ hash = iterative_hash_object (c->regno, hash);
+ else if (static_id->operand[i].type == OP_IN)
+ hash = iterative_hash_object (*id->operand_loc[i], hash);
+ return hash;
+}
+
+/* Equal function for candidates CAND1 and CAND2. They are equal if
+ the corresponding candidate insns have the same code, the same
+ regno for rematerialization, the same input operands. */
+static int
+cand_eq_p (const void *cand1, const void *cand2)
+{
+ const_cand_t c1 = (const_cand_t) cand1;
+ const_cand_t c2 = (const_cand_t) cand2;
+ lra_insn_recog_data_t id1 = lra_get_insn_recog_data (c1->insn);
+ lra_insn_recog_data_t id2 = lra_get_insn_recog_data (c2->insn);
+ struct lra_static_insn_data *static_id1 = id1->insn_static_data;
+ int nops = static_id1->n_operands;
+
+ if (c1->regno != c2->regno
+ || INSN_CODE (c1->insn) < 0
+ || INSN_CODE (c1->insn) != INSN_CODE (c2->insn))
+ return false;
+ gcc_assert (c1->nop == c2->nop);
+ for (int i = 0; i < nops; i++)
+ if (i != c1->nop && static_id1->operand[i].type == OP_IN
+ && *id1->operand_loc[i] != *id2->operand_loc[i])
+ return false;
+ return true;
+}
+
+/* Insert candidate CAND into the table if it is not there yet.
+ Return candidate which is in the table. */
+static cand_t
+insert_cand (cand_t cand)
+{
+ void **entry_ptr;
+
+ entry_ptr = htab_find_slot (cand_table, cand, INSERT);
+ if (*entry_ptr == NULL)
+ *entry_ptr = (void *) cand;
+ return (cand_t) *entry_ptr;
+}
+
+/* Free candidate CAND memory. */
+static void
+free_cand (void *cand)
+{
+ free (cand);
+}
+
+/* Initiate the candidate table. */
+static void
+initiate_cand_table (void)
+{
+ cand_table = htab_create (8000, cand_hash, cand_eq_p,
+ (htab_del) free_cand);
+}
+
+/* Finish the candidate table. */
+static void
+finish_cand_table (void)
+{
+ htab_delete (cand_table);
+}
+
+
+
+/* Return true if X contains memory or UNSPEC. We can not just check
+ insn operands as memory or unspec might be not an operand itself
+ but contain an operand. Insn with memory access is not profitable
+ for rematerialization. Rematerialization of UNSPEC might result in
+ wrong code generation as the UNPEC effect is unknown
+ (e.g. generating a label). */
+static bool
+bad_for_rematerialization_p (rtx x)
+{
+ int i, j;
+ const char *fmt;
+ enum rtx_code code;
+
+ if (MEM_P (x) || GET_CODE (x) == UNSPEC)
+ return true;
+ code = GET_CODE (x);
+ fmt = GET_RTX_FORMAT (code);
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'e')
+ {
+ if (bad_for_rematerialization_p (XEXP (x, i)))
+ return true;
+ }
+ else if (fmt[i] == 'E')
+ {
+ for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+ if (bad_for_rematerialization_p (XVECEXP (x, i, j)))
+ return true;
+ }
+ }
+ return false;
+}
+
+/* If INSN can not be used for rematerialization, return negative
+ value. If INSN can be considered as a candidate for
+ rematerialization, return value which is the operand number of the
+ pseudo for which the insn can be used for rematerialization. Here
+ we consider the insns without any memory, spilled pseudo (except
+ for the rematerialization pseudo), or dying or unused regs. */
+static int
+operand_to_remat (rtx_insn *insn)
+{
+ lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
+ struct lra_static_insn_data *static_id = id->insn_static_data;
+ struct lra_insn_reg *reg, *found_reg = NULL;
+
+ /* First find a pseudo which can be rematerialized. */
+ for (reg = id->regs; reg != NULL; reg = reg->next)
+ if (reg->type == OP_OUT && ! reg->subreg_p
+ && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
+ {
+ /* We permits only one spilled reg. */
+ if (found_reg != NULL)
+ return -1;
+ found_reg = reg;
+ }
+ if (found_reg == NULL)
+ return -1;
+ if (found_reg->regno < FIRST_PSEUDO_REGISTER)
+ return -1;
+ if (bad_for_rematerialization_p (PATTERN (insn)))
+ return -1;
+ /* Check the other regs are not spilled. */
+ for (reg = id->regs; reg != NULL; reg = reg->next)
+ if (found_reg == reg)
+ continue;
+ else if (reg->type == OP_INOUT)
+ return -1;
+ else if (reg->regno >= FIRST_PSEUDO_REGISTER
+ && reg_renumber[reg->regno] < 0)
+ /* Another spilled reg. */
+ return -1;
+ else if (reg->type == OP_IN)
+ {
+ if (find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
+ /* We don't want to make live ranges longer. */
+ return -1;
+ /* Check that there is no output reg as the input one. */
+ for (struct lra_insn_reg *reg2 = id->regs;
+ reg2 != NULL;
+ reg2 = reg2->next)
+ if (reg2->type == OP_OUT && reg->regno == reg2->regno)
+ return -1;
+ }
+ /* Find the rematerialization operand. */
+ int nop = static_id->n_operands;
+ for (int i = 0; i < nop; i++)
+ if (REG_P (*id->operand_loc[i])
+ && (int) REGNO (*id->operand_loc[i]) == found_reg->regno)
+ return i;
+ return -1;
+}
+
+/* Create candidate for INSN with rematerialization operand NOP and
+ REGNO. Insert the candidate into the table and set up the
+ corresponding INSN_TO_CAND element. */
+static void
+create_cand (rtx_insn *insn, int nop, int regno)
+{
+ lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
+ rtx reg = *id->operand_loc[nop];
+ gcc_assert (REG_P (reg));
+ int op_regno = REGNO (reg);
+ gcc_assert (op_regno >= FIRST_PSEUDO_REGISTER);
+ cand_t cand = XNEW (struct cand);
+ cand->insn = insn;
+ cand->nop = nop;
+ cand->regno = regno;
+ cand->reload_regno = op_regno == regno ? -1 : op_regno;
+ gcc_assert (cand->regno >= 0);
+ cand_t cand_in_table = insert_cand (cand);
+ insn_to_cand[INSN_UID (insn)] = cand_in_table;
+ if (cand != cand_in_table)
+ free (cand);
+ else
+ {
+ /* A new cand. */
+ cand->index = all_cands.length ();
+ all_cands.safe_push (cand);
+ cand->next_regno_cand = regno_cands[cand->regno];
+ regno_cands[cand->regno] = cand;
+ }
+}
+
+/* Create rematerialization candidates (inserting them into the
+ table). */
+static void
+create_cands (void)
+{
+ rtx_insn *insn;
+ struct potential_cand
+ {
+ rtx_insn *insn;
+ int nop;
+ };
+ struct potential_cand *regno_potential_cand;
+
+ /* Create candidates. */
+ regno_potential_cand = XCNEWVEC (struct potential_cand, max_reg_num ());
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ if (INSN_P (insn))
+ {
+ rtx set;
+ int src_regno, dst_regno;
+ rtx_insn *insn2;
+ lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
+ int nop = operand_to_remat (insn);
+ int regno = -1;
+
+ if ((set = single_set (insn)) != NULL
+ && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set))
+ && (src_regno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER
+ && (dst_regno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
+ && reg_renumber[dst_regno] < 0
+ && (insn2 = regno_potential_cand[src_regno].insn) != NULL
+ && BLOCK_FOR_INSN (insn2) == BLOCK_FOR_INSN (insn))
+ create_cand (insn2, regno_potential_cand[src_regno].nop, dst_regno);
+ if (nop < 0)
+ goto fail;
+ gcc_assert (REG_P (*id->operand_loc[nop]));
+ regno = REGNO (*id->operand_loc[nop]);
+ gcc_assert (regno >= FIRST_PSEUDO_REGISTER);
+ if (reg_renumber[regno] < 0)
+ create_cand (insn, nop, regno);
+ else
+ {
+ regno_potential_cand[regno].insn = insn;
+ regno_potential_cand[regno].nop = nop;
+ goto fail;
+ }
+ regno = -1;
+ fail:
+ for (struct lra_insn_reg *reg = id->regs; reg != NULL; reg = reg->next)
+ if (reg->type != OP_IN && reg->regno != regno
+ && reg->regno >= FIRST_PSEUDO_REGISTER)
+ regno_potential_cand[reg->regno].insn = NULL;
+ }
+ cands_num = all_cands.length ();
+ free (regno_potential_cand);
+}
+
+
+
+/* Create and initialize BB data. */
+static void
+create_remat_bb_data (void)
+{
+ basic_block bb;
+ remat_bb_data_t bb_info;
+
+ remat_bb_data = XNEWVEC (struct remat_bb_data,
+ last_basic_block_for_fn (cfun));
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+#ifdef ENABLE_CHECKING
+ if (bb->index < 0 || bb->index >= last_basic_block_for_fn (cfun))
+ abort ();
+#endif
+ bb_info = get_remat_bb_data (bb);
+ bb_info->bb = bb;
+ bitmap_initialize (&bb_info->changed_regs, &reg_obstack);
+ bitmap_initialize (&bb_info->dead_regs, &reg_obstack);
+ bitmap_initialize (&bb_info->gen_cands, &reg_obstack);
+ bitmap_initialize (&bb_info->livein_cands, &reg_obstack);
+ bitmap_initialize (&bb_info->pavin_cands, &reg_obstack);
+ bitmap_initialize (&bb_info->pavout_cands, &reg_obstack);
+ bitmap_initialize (&bb_info->avin_cands, &reg_obstack);
+ bitmap_initialize (&bb_info->avout_cands, &reg_obstack);
+ }
+}
+
+/* Dump all candidates to DUMP_FILE. */
+static void
+dump_cands (FILE *dump_file)
+{
+ int i;
+ cand_t cand;
+
+ fprintf (dump_file, "\nCands:\n");
+ for (i = 0; i < (int) cands_num; i++)
+ {
+ cand = all_cands[i];
+ fprintf (dump_file, "%d (nop=%d, remat_regno=%d, reload_regno=%d):\n",
+ i, cand->nop, cand->regno, cand->reload_regno);
+ print_inline_rtx (dump_file, cand->insn, 6);
+ fprintf (dump_file, "\n");
+ }
+}
+
+/* Dump all candidates and BB data. */
+static void
+dump_candidates_and_remat_bb_data (void)
+{
+ basic_block bb;
+
+ if (lra_dump_file == NULL)
+ return;
+ dump_cands (lra_dump_file);
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ fprintf (lra_dump_file, "\nBB %d:\n", bb->index);
+ /* Livein */
+ fprintf (lra_dump_file, " register live in:");
+ dump_regset (df_get_live_in (bb), lra_dump_file);
+ putc ('\n', lra_dump_file);
+ /* Liveout */
+ fprintf (lra_dump_file, " register live out:");
+ dump_regset (df_get_live_out (bb), lra_dump_file);
+ putc ('\n', lra_dump_file);
+ /* Changed/dead regs: */
+ fprintf (lra_dump_file, " changed regs:");
+ dump_regset (&get_remat_bb_data (bb)->changed_regs, lra_dump_file);
+ putc ('\n', lra_dump_file);
+ fprintf (lra_dump_file, " dead regs:");
+ dump_regset (&get_remat_bb_data (bb)->dead_regs, lra_dump_file);
+ putc ('\n', lra_dump_file);
+ lra_dump_bitmap_with_title ("cands generated in BB",
+ &get_remat_bb_data (bb)->gen_cands, bb->index);
+ lra_dump_bitmap_with_title ("livein cands in BB",
+ &get_remat_bb_data (bb)->livein_cands, bb->index);
+ lra_dump_bitmap_with_title ("pavin cands in BB",
+ &get_remat_bb_data (bb)->pavin_cands, bb->index);
+ lra_dump_bitmap_with_title ("pavout cands in BB",
+ &get_remat_bb_data (bb)->pavout_cands, bb->index);
+ lra_dump_bitmap_with_title ("avin cands in BB",
+ &get_remat_bb_data (bb)->avin_cands, bb->index);
+ lra_dump_bitmap_with_title ("avout cands in BB",
+ &get_remat_bb_data (bb)->avout_cands, bb->index);
+ }
+}
+
+/* Free all BB data. */
+static void
+finish_remat_bb_data (void)
+{
+ basic_block bb;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ bitmap_clear (&get_remat_bb_data (bb)->avout_cands);
+ bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
+ bitmap_clear (&get_remat_bb_data (bb)->pavout_cands);
+ bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
+ bitmap_clear (&get_remat_bb_data (bb)->livein_cands);
+ bitmap_clear (&get_remat_bb_data (bb)->gen_cands);
+ bitmap_clear (&get_remat_bb_data (bb)->dead_regs);
+ bitmap_clear (&get_remat_bb_data (bb)->changed_regs);
+ }
+ free (remat_bb_data);
+}
+
+
+
+/* Update changed_regs and dead_regs of BB from INSN. */
+static void
+set_bb_regs (basic_block bb, rtx_insn *insn)
+{
+ lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
+ struct lra_insn_reg *reg;
+
+ for (reg = id->regs; reg != NULL; reg = reg->next)
+ if (reg->type != OP_IN)
+ bitmap_set_bit (&get_remat_bb_data (bb)->changed_regs, reg->regno);
+ else
+ {
+ if (find_regno_note (insn, REG_DEAD, (unsigned) reg->regno) != NULL)
+ bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs, reg->regno);
+ }
+ if (CALL_P (insn))
+ for (int i = 0; i < call_used_regs_arr_len; i++)
+ bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs,
+ call_used_regs_arr[i]);
+}
+
+/* Calculate changed_regs and dead_regs for each BB. */
+static void
+calculate_local_reg_remat_bb_data (void)
+{
+ basic_block bb;
+ rtx_insn *insn;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ FOR_BB_INSNS (bb, insn)
+ if (INSN_P (insn))
+ set_bb_regs (bb, insn);
+}
+
+
+
+/* Return true if REGNO is an input operand of INSN. */
+static bool
+input_regno_present_p (rtx_insn *insn, int regno)
+{
+ lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
+ struct lra_insn_reg *reg;
+
+ for (reg = id->regs; reg != NULL; reg = reg->next)
+ if (reg->type == OP_IN && reg->regno == regno)
+ return true;
+ return false;
+}
+
+/* Return true if a call used register is an input operand of INSN. */
+static bool
+call_used_input_regno_present_p (rtx_insn *insn)
+{
+ lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
+ struct lra_insn_reg *reg;
+
+ for (reg = id->regs; reg != NULL; reg = reg->next)
+ if (reg->type == OP_IN && reg->regno <= FIRST_PSEUDO_REGISTER
+ && TEST_HARD_REG_BIT (call_used_reg_set, reg->regno))
+ return true;
+ return false;
+}
+
+/* Calculate livein_cands for each BB. */
+static void
+calculate_livein_cands (void)
+{
+ basic_block bb;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ bitmap livein_regs = df_get_live_in (bb);
+ bitmap livein_cands = &get_remat_bb_data (bb)->livein_cands;
+ for (unsigned int i = 0; i < cands_num; i++)
+ {
+ cand_t cand = all_cands[i];
+ lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
+ struct lra_insn_reg *reg;
+
+ for (reg = id->regs; reg != NULL; reg = reg->next)
+ if (reg->type == OP_IN && ! bitmap_bit_p (livein_regs, reg->regno))
+ break;
+ if (reg == NULL)
+ bitmap_set_bit (livein_cands, i);
+ }
+ }
+}
+
+/* Calculate gen_cands for each BB. */
+static void
+calculate_gen_cands (void)
+{
+ basic_block bb;
+ bitmap gen_cands;
+ bitmap_head gen_insns;
+ rtx_insn *insn;
+
+ bitmap_initialize (&gen_insns, &reg_obstack);
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ gen_cands = &get_remat_bb_data (bb)->gen_cands;
+ bitmap_clear (&gen_insns);
+ FOR_BB_INSNS (bb, insn)
+ if (INSN_P (insn))
+ {
+ lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
+ struct lra_insn_reg *reg;
+ unsigned int uid;
+ bitmap_iterator bi;
+ cand_t cand;
+ rtx set;
+ int src_regno = -1, dst_regno = -1;
+
+ if ((set = single_set (insn)) != NULL
+ && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
+ {
+ src_regno = REGNO (SET_SRC (set));
+ dst_regno = REGNO (SET_DEST (set));
+ }
+
+ /* Update gen_cands: */
+ bitmap_clear (&temp_bitmap);
+ for (reg = id->regs; reg != NULL; reg = reg->next)
+ if (reg->type != OP_IN
+ || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
+ EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
+ {
+ rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
+
+ cand = insn_to_cand[INSN_UID (insn2)];
+ gcc_assert (cand != NULL);
+ /* Ignore the reload insn. */
+ if (src_regno == cand->reload_regno
+ && dst_regno == cand->regno)
+ continue;
+ if (cand->regno == reg->regno
+ || input_regno_present_p (insn2, reg->regno))
+ {
+ bitmap_clear_bit (gen_cands, cand->index);
+ bitmap_set_bit (&temp_bitmap, uid);
+ }
+ }
+
+ if (CALL_P (insn))
+ EXECUTE_IF_SET_IN_BITMAP (&gen_insns, 0, uid, bi)
+ {
+ rtx_insn *insn2 = lra_insn_recog_data[uid]->insn;
+
+ cand = insn_to_cand[INSN_UID (insn2)];
+ gcc_assert (cand != NULL);
+ if (call_used_input_regno_present_p (insn2))
+ {
+ bitmap_clear_bit (gen_cands, cand->index);
+ bitmap_set_bit (&temp_bitmap, uid);
+ }
+ }
+ bitmap_and_compl_into (&gen_insns, &temp_bitmap);
+
+ cand = insn_to_cand[INSN_UID (insn)];
+ if (cand != NULL)
+ {
+ bitmap_set_bit (gen_cands, cand->index);
+ bitmap_set_bit (&gen_insns, INSN_UID (insn));
+ }
+ }
+ }
+ bitmap_clear (&gen_insns);
+}
+
+
+
+/* The common transfer function used by the DF equation solver to
+ propagate (partial) availability info BB_IN to BB_OUT through block
+ with BB_INDEX according to the following equation:
+
+ bb.out = ((bb.in & bb.livein) - bb.killed) OR bb.gen
+*/
+static bool
+cand_trans_fun (int bb_index, bitmap bb_in, bitmap bb_out)
+{
+ remat_bb_data_t bb_info;
+ bitmap bb_livein, bb_changed_regs, bb_dead_regs;
+ unsigned int cid;
+ bitmap_iterator bi;
+
+ bb_info = get_remat_bb_data_by_index (bb_index);
+ bb_livein = &bb_info->livein_cands;
+ bb_changed_regs = &bb_info->changed_regs;
+ bb_dead_regs = &bb_info->dead_regs;
+ /* Calculate killed avin cands -- cands whose regs are changed or
+ becoming dead in the BB. We calculate it here as we hope that
+ repeated calculations are compensated by smaller size of BB_IN in
+ comparison with all candidates number. */
+ bitmap_clear (&temp_bitmap);
+ EXECUTE_IF_SET_IN_BITMAP (bb_in, 0, cid, bi)
+ {
+ cand_t cand = all_cands[cid];
+ lra_insn_recog_data_t id = lra_get_insn_recog_data (cand->insn);
+ struct lra_insn_reg *reg;
+
+ if (! bitmap_bit_p (bb_livein, cid))
+ {
+ bitmap_set_bit (&temp_bitmap, cid);
+ continue;
+ }
+ for (reg = id->regs; reg != NULL; reg = reg->next)
+ /* Ignore all outputs which are not the regno for
+ rematerialization. */
+ if (reg->type == OP_OUT && reg->regno != cand->regno)
+ continue;
+ else if (bitmap_bit_p (bb_changed_regs, reg->regno)
+ || bitmap_bit_p (bb_dead_regs, reg->regno))
+ {
+ bitmap_set_bit (&temp_bitmap, cid);
+ break;
+ }
+ }
+ return bitmap_ior_and_compl (bb_out,
+ &bb_info->gen_cands, bb_in, &temp_bitmap);
+}
+
+
+
+/* The transfer function used by the DF equation solver to propagate
+ partial candidate availability info through block with BB_INDEX
+ according to the following equation:
+
+ bb.pavout = ((bb.pavin & bb.livein) - bb.killed) OR bb.gen
+*/
+static bool
+cand_pav_trans_fun (int bb_index)
+{
+ remat_bb_data_t bb_info;
+
+ bb_info = get_remat_bb_data_by_index (bb_index);
+ return cand_trans_fun (bb_index, &bb_info->pavin_cands,
+ &bb_info->pavout_cands);
+}
+
+/* The confluence function used by the DF equation solver to set up
+ cand_pav info for a block BB without predecessor. */
+static void
+cand_pav_con_fun_0 (basic_block bb)
+{
+ bitmap_clear (&get_remat_bb_data (bb)->pavin_cands);
+}
+
+/* The confluence function used by the DF equation solver to propagate
+ partial candidate availability info from predecessor to successor
+ on edge E (pred->bb) according to the following equation:
+
+ bb.pavin_cands = 0 for entry block | OR (pavout_cands of predecessors)
+ */
+static bool
+cand_pav_con_fun_n (edge e)
+{
+ basic_block pred = e->src;
+ basic_block bb = e->dest;
+ remat_bb_data_t bb_info;
+ bitmap bb_pavin, pred_pavout;
+
+ bb_info = get_remat_bb_data (bb);
+ bb_pavin = &bb_info->pavin_cands;
+ pred_pavout = &get_remat_bb_data (pred)->pavout_cands;
+ return bitmap_ior_into (bb_pavin, pred_pavout);
+}
+
+
+
+/* The transfer function used by the DF equation solver to propagate
+ candidate availability info through block with BB_INDEX according
+ to the following equation:
+
+ bb.avout = ((bb.avin & bb.livein) - bb.killed) OR bb.gen
+*/
+static bool
+cand_av_trans_fun (int bb_index)
+{
+ remat_bb_data_t bb_info;
+
+ bb_info = get_remat_bb_data_by_index (bb_index);
+ return cand_trans_fun (bb_index, &bb_info->avin_cands,
+ &bb_info->avout_cands);
+}
+
+/* The confluence function used by the DF equation solver to set up
+ cand_av info for a block BB without predecessor. */
+static void
+cand_av_con_fun_0 (basic_block bb)
+{
+ bitmap_clear (&get_remat_bb_data (bb)->avin_cands);
+}
+
+/* The confluence function used by the DF equation solver to propagate
+ cand_av info from predecessor to successor on edge E (pred->bb)
+ according to the following equation:
+
+ bb.avin_cands = 0 for entry block | AND (avout_cands of predecessors)
+ */
+static bool
+cand_av_con_fun_n (edge e)
+{
+ basic_block pred = e->src;
+ basic_block bb = e->dest;
+ remat_bb_data_t bb_info;
+ bitmap bb_avin, pred_avout;
+
+ bb_info = get_remat_bb_data (bb);
+ bb_avin = &bb_info->avin_cands;
+ pred_avout = &get_remat_bb_data (pred)->avout_cands;
+ return bitmap_and_into (bb_avin, pred_avout);
+}
+
+/* Calculate available candidates for each BB. */
+static void
+calculate_global_remat_bb_data (void)
+{
+ basic_block bb;
+
+ df_simple_dataflow
+ (DF_FORWARD, NULL, cand_pav_con_fun_0, cand_pav_con_fun_n,
+ cand_pav_trans_fun, &all_blocks,
+ df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
+ /* Initialize avin by pavin. */
+ FOR_EACH_BB_FN (bb, cfun)
+ bitmap_copy (&get_remat_bb_data (bb)->avin_cands,
+ &get_remat_bb_data (bb)->pavin_cands);
+ df_simple_dataflow
+ (DF_FORWARD, NULL, cand_av_con_fun_0, cand_av_con_fun_n,
+ cand_av_trans_fun, &all_blocks,
+ df_get_postorder (DF_FORWARD), df_get_n_blocks (DF_FORWARD));
+}
+
+
+
+/* Setup sp offset attribute to SP_OFFSET for all INSNS. */
+static void
+change_sp_offset (rtx_insn *insns, HOST_WIDE_INT sp_offset)
+{
+ for (rtx_insn *insn = insns; insn != NULL; insn = NEXT_INSN (insn))
+ eliminate_regs_in_insn (insn, false, false, sp_offset);
+}
+
+/* Return start hard register of REG (can be a hard or a pseudo reg)
+ or -1 (if it is a spilled pseudo). Return number of hard registers
+ occupied by REG through parameter NREGS if the start hard reg is
+ not negative. */
+static int
+get_hard_regs (struct lra_insn_reg *reg, int &nregs)
+{
+ int regno = reg->regno;
+ int hard_regno = regno < FIRST_PSEUDO_REGISTER ? regno : reg_renumber[regno];
+
+ if (hard_regno >= 0)
+ nregs = hard_regno_nregs[hard_regno][reg->biggest_mode];
+ return hard_regno;
+}
+
+/* Insert rematerialization insns using the data-flow data calculated
+ earlier. */
+static bool
+do_remat (void)
+{
+ rtx_insn *insn;
+ basic_block bb;
+ bitmap_head avail_cands;
+ bool changed_p = false;
+ /* Living hard regs and hard registers of living pseudos. */
+ HARD_REG_SET live_hard_regs;
+
+ bitmap_initialize (&avail_cands, &reg_obstack);
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_out (bb));
+ bitmap_and (&avail_cands, &get_remat_bb_data (bb)->avin_cands,
+ &get_remat_bb_data (bb)->livein_cands);
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!NONDEBUG_INSN_P (insn))
+ continue;
+
+ lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
+ struct lra_insn_reg *reg;
+ cand_t cand;
+ unsigned int cid;
+ bitmap_iterator bi;
+ rtx set;
+ int src_regno = -1, dst_regno = -1;
+
+ if ((set = single_set (insn)) != NULL
+ && REG_P (SET_SRC (set)) && REG_P (SET_DEST (set)))
+ {
+ src_regno = REGNO (SET_SRC (set));
+ dst_regno = REGNO (SET_DEST (set));
+ }
+
+ cand = NULL;
+ /* Check possibility of rematerialization (hard reg or
+ unpsilled pseudo <- spilled pseudo): */
+ if (dst_regno >= 0 && src_regno >= FIRST_PSEUDO_REGISTER
+ && reg_renumber[src_regno] < 0
+ && (dst_regno < FIRST_PSEUDO_REGISTER
+ || reg_renumber[dst_regno] >= 0))
+ {
+ for (cand = regno_cands[src_regno];
+ cand != NULL;
+ cand = cand->next_regno_cand)
+ if (bitmap_bit_p (&avail_cands, cand->index))
+ break;
+ }
+ int i, hard_regno, nregs;
+ rtx_insn *remat_insn = NULL;
+ HOST_WIDE_INT cand_sp_offset = 0;
+ if (cand != NULL)
+ {
+ lra_insn_recog_data_t cand_id = lra_get_insn_recog_data (cand->insn);
+ rtx saved_op = *cand_id->operand_loc[cand->nop];
+
+ /* Check clobbers do not kill something living. */
+ gcc_assert (REG_P (saved_op));
+ int ignore_regno = REGNO (saved_op);
+
+ for (reg = cand_id->regs; reg != NULL; reg = reg->next)
+ if (reg->type != OP_IN && reg->regno != ignore_regno)
+ {
+ hard_regno = get_hard_regs (reg, nregs);
+ gcc_assert (hard_regno >= 0);
+ for (i = 0; i < nregs; i++)
+ if (TEST_HARD_REG_BIT (live_hard_regs, hard_regno + i))
+ break;
+ if (i < nregs)
+ break;
+ }
+
+ if (reg == NULL)
+ {
+ *cand_id->operand_loc[cand->nop] = SET_DEST (set);
+ lra_update_insn_regno_info (cand->insn);
+ bool ok_p = lra_constrain_insn (cand->insn);
+ if (ok_p)
+ {
+ rtx remat_pat = copy_insn (PATTERN (cand->insn));
+
+ start_sequence ();
+ emit_insn (remat_pat);
+ remat_insn = get_insns ();
+ end_sequence ();
+ if (recog_memoized (remat_insn) < 0)
+ remat_insn = NULL;
+ cand_sp_offset = cand_id->sp_offset;
+ }
+ *cand_id->operand_loc[cand->nop] = saved_op;
+ lra_update_insn_regno_info (cand->insn);
+ }
+ }
+
+ /* Update avail_cands (see analogous code for
+ calculate_gen_cands). */
+ for (reg = id->regs; reg != NULL; reg = reg->next)
+ if (reg->type != OP_IN
+ || find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
+ EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
+ {
+ cand = all_cands[cid];
+
+ /* Ignore the reload insn. */
+ if (src_regno == cand->reload_regno
+ && dst_regno == cand->regno)
+ continue;
+ if (cand->regno == reg->regno
+ || input_regno_present_p (cand->insn, reg->regno))
+ bitmap_clear_bit (&avail_cands, cand->index);
+ }
+
+ if (CALL_P (insn))
+ EXECUTE_IF_SET_IN_BITMAP (&avail_cands, 0, cid, bi)
+ {
+ cand = all_cands[cid];
+
+ if (call_used_input_regno_present_p (cand->insn))
+ bitmap_clear_bit (&avail_cands, cand->index);
+ }
+
+ if ((cand = insn_to_cand[INSN_UID (insn)]) != NULL)
+ bitmap_set_bit (&avail_cands, cand->index);
+
+ if (remat_insn != NULL)
+ {
+ HOST_WIDE_INT sp_offset_change = cand_sp_offset - id->sp_offset;
+ if (sp_offset_change != 0)
+ change_sp_offset (remat_insn, sp_offset_change);
+ lra_process_new_insns (insn, remat_insn, NULL,
+ "Inserting rematerialization insn");
+ lra_set_insn_deleted (insn);
+ changed_p = true;
+ continue;
+ }
+
+ /* Update live hard regs: */
+ for (reg = id->regs; reg != NULL; reg = reg->next)
+ if (reg->type == OP_IN
+ && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
+ {
+ if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
+ continue;
+ for (i = 0; i < nregs; i++)
+ CLEAR_HARD_REG_BIT (live_hard_regs, hard_regno + i);
+ }
+ else if (reg->type != OP_IN
+ && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
+ {
+ if ((hard_regno = get_hard_regs (reg, nregs)) < 0)
+ continue;
+ for (i = 0; i < nregs; i++)
+ SET_HARD_REG_BIT (live_hard_regs, hard_regno + i);
+ }
+ }
+ }
+ bitmap_clear (&avail_cands);
+ return changed_p;
+}
+
+
+
+/* Entry point of the rematerialization sub-pass. Return true if we
+ did any rematerialization. */
+bool
+lra_remat (void)
+{
+ basic_block bb;
+ bool result;
+ int max_regno = max_reg_num ();
+
+ if (! flag_lra_remat)
+ return false;
+ timevar_push (TV_LRA_REMAT);
+ insn_to_cand = XCNEWVEC (cand_t, get_max_uid ());
+ regno_cands = XCNEWVEC (cand_t, max_regno);
+ all_cands.create (8000);
+ call_used_regs_arr_len = 0;
+ for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (call_used_regs[i])
+ call_used_regs_arr[call_used_regs_arr_len++] = i;
+ initiate_cand_table ();
+ create_cands ();
+ create_remat_bb_data ();
+ bitmap_initialize (&temp_bitmap, &reg_obstack);
+ calculate_local_reg_remat_bb_data ();
+ calculate_livein_cands ();
+ calculate_gen_cands ();
+ bitmap_initialize (&all_blocks, &reg_obstack);
+ FOR_ALL_BB_FN (bb, cfun)
+ bitmap_set_bit (&all_blocks, bb->index);
+ calculate_global_remat_bb_data ();
+ dump_candidates_and_remat_bb_data ();
+ result = do_remat ();
+ all_cands.release ();
+ bitmap_clear (&temp_bitmap);
+ finish_remat_bb_data ();
+ finish_cand_table ();
+ bitmap_clear (&all_blocks);
+ free (regno_cands);
+ free (insn_to_cand);
+ timevar_pop (TV_LRA_REMAT);
+ return result;
+}
diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c
index a4ffc8578e6..f21a3abcd6f 100644
--- a/gcc/lra-spills.c
+++ b/gcc/lra-spills.c
@@ -445,7 +445,7 @@ remove_pseudos (rtx *loc, rtx_insn *insn)
{
rtx x = lra_eliminate_regs_1 (insn, pseudo_slots[i].mem,
GET_MODE (pseudo_slots[i].mem),
- false, false, true);
+ 0, false, false, true);
*loc = x != pseudo_slots[i].mem ? x : copy_rtx (x);
}
return;
diff --git a/gcc/lra.c b/gcc/lra.c
index 3ae47e86ba6..a140f2ab89a 100644
--- a/gcc/lra.c
+++ b/gcc/lra.c
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see
generated;
o Some pseudos might be spilled to assign hard registers to
new reload pseudos;
+ o Recalculating spilled pseudo values (rematerialization);
o Changing spilled pseudos to stack memory or their equivalences;
o Allocation stack memory changes the address displacement and
new iteration is needed.
@@ -57,19 +58,26 @@ along with GCC; see the file COPYING3. If not see
----------- | ---------------- |
| | |
| V New |
- ---------------- No ------------ pseudos -------------------
- | Spilled pseudo | change |Constraints:| or insns | Inheritance/split |
- | to memory |<-------| RTL |--------->| transformations |
- | substitution | | transfor- | | in EBB scope |
- ---------------- | mations | -------------------
- | ------------
- V
- -------------------------
- | Hard regs substitution, |
- | devirtalization, and |------> Finish
- | restoring scratches got |
- | memory |
- -------------------------
+ | ------------ pseudos -------------------
+ | |Constraints:| or insns | Inheritance/split |
+ | | RTL |--------->| transformations |
+ | | transfor- | | in EBB scope |
+ | substi- | mations | -------------------
+ | tutions ------------
+ | | No change
+ ---------------- V
+ | Spilled pseudo | -------------------
+ | to memory |<----| Rematerialization |
+ | substitution | -------------------
+ ----------------
+ | No susbtitions
+ V
+ -------------------------
+ | Hard regs substitution, |
+ | devirtalization, and |------> Finish
+ | restoring scratches got |
+ | memory |
+ -------------------------
To speed up the process:
o We process only insns affected by changes on previous
@@ -849,38 +857,38 @@ collect_non_operand_hard_regs (rtx *x, lra_insn_recog_data_t data,
{
if ((regno = REGNO (op)) >= FIRST_PSEUDO_REGISTER)
return list;
+ /* Process all regs even unallocatable ones as we need info
+ about all regs for rematerialization pass. */
for (last = regno + hard_regno_nregs[regno][mode];
regno < last;
regno++)
- if (! TEST_HARD_REG_BIT (lra_no_alloc_regs, regno)
- || TEST_HARD_REG_BIT (eliminable_regset, regno))
- {
- for (curr = list; curr != NULL; curr = curr->next)
- if (curr->regno == regno && curr->subreg_p == subreg_p
- && curr->biggest_mode == mode)
- {
- if (curr->type != type)
- curr->type = OP_INOUT;
- if (curr->early_clobber != early_clobber)
- curr->early_clobber = true;
- break;
- }
- if (curr == NULL)
+ {
+ for (curr = list; curr != NULL; curr = curr->next)
+ if (curr->regno == regno && curr->subreg_p == subreg_p
+ && curr->biggest_mode == mode)
{
- /* This is a new hard regno or the info can not be
- integrated into the found structure. */
+ if (curr->type != type)
+ curr->type = OP_INOUT;
+ if (curr->early_clobber != early_clobber)
+ curr->early_clobber = true;
+ break;
+ }
+ if (curr == NULL)
+ {
+ /* This is a new hard regno or the info can not be
+ integrated into the found structure. */
#ifdef STACK_REGS
- early_clobber
- = (early_clobber
- /* This clobber is to inform popping floating
- point stack only. */
- && ! (FIRST_STACK_REG <= regno
- && regno <= LAST_STACK_REG));
+ early_clobber
+ = (early_clobber
+ /* This clobber is to inform popping floating
+ point stack only. */
+ && ! (FIRST_STACK_REG <= regno
+ && regno <= LAST_STACK_REG));
#endif
- list = new_insn_reg (data->insn, regno, type, mode, subreg_p,
- early_clobber, list);
- }
- }
+ list = new_insn_reg (data->insn, regno, type, mode, subreg_p,
+ early_clobber, list);
+ }
+ }
return list;
}
switch (code)
@@ -1456,10 +1464,8 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, int uid,
if (REG_P (x))
{
regno = REGNO (x);
- if (regno < FIRST_PSEUDO_REGISTER
- && TEST_HARD_REG_BIT (lra_no_alloc_regs, regno)
- && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
- return;
+ /* Process all regs even unallocatable ones as we need info about
+ all regs for rematerialization pass. */
expand_reg_info ();
if (bitmap_set_bit (&lra_reg_info[regno].insn_bitmap, uid))
{
@@ -2152,9 +2158,6 @@ bitmap_head lra_optional_reload_pseudos;
pass. */
bitmap_head lra_subreg_reload_pseudos;
-/* First UID of insns generated before a new spill pass. */
-int lra_constraint_new_insn_uid_start;
-
/* File used for output of LRA debug information. */
FILE *lra_dump_file;
@@ -2252,7 +2255,6 @@ lra (FILE *f)
lra_curr_reload_num = 0;
push_insns (get_last_insn (), NULL);
/* It is needed for the 1st coalescing. */
- lra_constraint_new_insn_uid_start = get_max_uid ();
bitmap_initialize (&lra_inheritance_pseudos, &reg_obstack);
bitmap_initialize (&lra_split_regs, &reg_obstack);
bitmap_initialize (&lra_optional_reload_pseudos, &reg_obstack);
@@ -2345,12 +2347,21 @@ lra (FILE *f)
lra_create_live_ranges (lra_reg_spill_p);
live_p = true;
}
+ /* Now we know what pseudos should be spilled. Try to
+ rematerialize them first. */
+ if (lra_remat ())
+ {
+ /* We need full live info -- see the comment above. */
+ lra_create_live_ranges (lra_reg_spill_p);
+ live_p = true;
+ if (! lra_need_for_spills_p ())
+ break;
+ }
lra_spill ();
/* Assignment of stack slots changes elimination offsets for
some eliminations. So update the offsets here. */
lra_eliminate (false, false);
lra_constraint_new_regno_start = max_reg_num ();
- lra_constraint_new_insn_uid_start = get_max_uid ();
lra_assignment_iter_after_spill = 0;
}
restore_scratches ();
diff --git a/gcc/opts.c b/gcc/opts.c
index 195cadd3e5f..4df7389fc78 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -500,6 +500,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_fipa_icf, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fisolate_erroneous_paths_dereference, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fuse_caller_save, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_flra_remat, NULL, 1 },
/* -O3 optimizations. */
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
diff --git a/gcc/timevar.def b/gcc/timevar.def
index f3e7b889069..95c84ad9857 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -237,6 +237,7 @@ DEFTIMEVAR (TV_LRA_INHERITANCE , "LRA reload inheritance")
DEFTIMEVAR (TV_LRA_CREATE_LIVE_RANGES, "LRA create live ranges")
DEFTIMEVAR (TV_LRA_ASSIGN , "LRA hard reg assignment")
DEFTIMEVAR (TV_LRA_COALESCE , "LRA coalesce pseudo regs")
+DEFTIMEVAR (TV_LRA_REMAT , "LRA rematerialization")
DEFTIMEVAR (TV_RELOAD , "reload")
DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs")
DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload")