diff options
author | wschmidt <wschmidt@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-08-08 01:35:22 +0000 |
---|---|---|
committer | wschmidt <wschmidt@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-08-08 01:35:22 +0000 |
commit | 9ce0359e143baf49a0955a60a0e6f873be49ddb2 (patch) | |
tree | d45c5c5a3cf1a03d2c0b98092a02c83996e0a606 /gcc/gimple-ssa-strength-reduction.c | |
parent | 187c8331a8f460e3aae84f108f44ea6bebf20a99 (diff) | |
download | gcc-9ce0359e143baf49a0955a60a0e6f873be49ddb2.tar.gz |
gcc:
2012-08-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gimple-ssa-strength-reduction.c (struct incr_info_d): New struct.
(incr_vec): New static var.
(incr_vec_len): Likewise.
(address_arithmetic_p): Likewise.
(stmt_cost): Remove dead assignment.
(dump_incr_vec): New function.
(cand_abs_increment): Likewise.
(lazy_create_slsr_reg): Likewise.
(incr_vec_index): Likewise.
(count_candidates): Likewise.
(record_increment): Likewise.
(record_increments): Likewise.
(unreplaced_cand_in_tree): Likewise.
(optimize_cands_for_speed_p): Likewise.
(lowest_cost_path): Likewise.
(total_savings): Likewise.
(analyze_increments): Likewise.
(ncd_for_two_cands): Likewise.
(nearest_common_dominator_for_cands): Likewise.
(profitable_increment_p): Likewise.
(insert_initializers): Likewise.
(introduce_cast_before_cand): Likewise.
(replace_rhs_if_not_dup): Likewise.
(replace_one_candidate): Likewise.
(replace_profitable_candidates): Likewise.
(analyze_candidates_and_replace): Handle candidates with SSA-name
strides.
gcc/testsuite:
2012-08-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gcc.dg/tree-ssa/slsr-5.c: New.
* gcc.dg/tree-ssa/slsr-6.c: New.
* gcc.dg/tree-ssa/slsr-7.c: New.
* gcc.dg/tree-ssa/slsr-8.c: New.
* gcc.dg/tree-ssa/slsr-9.c: New.
* gcc.dg/tree-ssa/slsr-10.c: New.
* gcc.dg/tree-ssa/slsr-11.c: New.
* gcc.dg/tree-ssa/slsr-12.c: New.
* gcc.dg/tree-ssa/slsr-13.c: New.
* gcc.dg/tree-ssa/slsr-14.c: New.
* gcc.dg/tree-ssa/slsr-15.c: New.
* gcc.dg/tree-ssa/slsr-16.c: New.
* gcc.dg/tree-ssa/slsr-17.c: New.
* gcc.dg/tree-ssa/slsr-18.c: New.
* gcc.dg/tree-ssa/slsr-19.c: New.
* gcc.dg/tree-ssa/slsr-20.c: New.
* gcc.dg/tree-ssa/slsr-21.c: New.
* gcc.dg/tree-ssa/slsr-22.c: New.
* gcc.dg/tree-ssa/slsr-23.c: New.
* gcc.dg/tree-ssa/slsr-24.c: New.
* gcc.dg/tree-ssa/slsr-25.c: New.
* gcc.dg/tree-ssa/slsr-26.c: New.
* gcc.dg/tree-ssa/slsr-30.c: New.
* gcc.dg/tree-ssa/slsr-31.c: New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@190220 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/gimple-ssa-strength-reduction.c')
-rw-r--r-- | gcc/gimple-ssa-strength-reduction.c | 902 |
1 files changed, 894 insertions, 8 deletions
diff --git a/gcc/gimple-ssa-strength-reduction.c b/gcc/gimple-ssa-strength-reduction.c index 6a46408f0ca..ccc361e900a 100644 --- a/gcc/gimple-ssa-strength-reduction.c +++ b/gcc/gimple-ssa-strength-reduction.c @@ -30,8 +30,7 @@ along with GCC; see the file COPYING3. If not see 1) Explicit multiplies, known constant multipliers, no conditional increments. (complete) 2) Explicit multiplies, unknown constant multipliers, - no conditional increments. (data gathering complete, - replacements pending) + no conditional increments. (complete) 3) Implicit multiplies in addressing expressions. (complete) 4) Explicit multiplies, conditional increments. (pending) @@ -235,6 +234,41 @@ struct cand_chain_d typedef struct cand_chain_d cand_chain, *cand_chain_t; typedef const struct cand_chain_d *const_cand_chain_t; +/* Information about a unique "increment" associated with candidates + having an SSA name for a stride. An increment is the difference + between the index of the candidate and the index of its basis, + i.e., (i - i') as discussed in the module commentary. + + When we are not going to generate address arithmetic we treat + increments that differ only in sign as the same, allowing sharing + of the cost of initializers. The absolute value of the increment + is stored in the incr_info. */ + +struct incr_info_d +{ + /* The increment that relates a candidate to its basis. */ + double_int incr; + + /* How many times the increment occurs in the candidate tree. */ + unsigned count; + + /* Cost of replacing candidates using this increment. Negative and + zero costs indicate replacement should be performed. */ + int cost; + + /* If this increment is profitable but is not -1, 0, or 1, it requires + an initializer T_0 = stride * incr to be found or introduced in the + nearest common dominator of all candidates. This field holds T_0 + for subsequent use. */ + tree initializer; + + /* If the initializer was found to already exist, this is the block + where it was found. */ + basic_block init_bb; +}; + +typedef struct incr_info_d incr_info, *incr_info_t; + /* Candidates are maintained in a vector. If candidate X dominates candidate Y, then X appears before Y in the vector; but the converse does not necessarily hold. */ @@ -259,6 +293,16 @@ static htab_t base_cand_map; /* Obstack for candidate chains. */ static struct obstack chain_obstack; + +/* An array INCR_VEC of incr_infos is used during analysis of related + candidates having an SSA name for a stride. INCR_VEC_LEN describes + its current length. */ +static incr_info_t incr_vec; +static unsigned incr_vec_len; + +/* For a chain of candidates with unknown stride, indicates whether or not + we must generate pointer arithmetic when replacing statements. */ +static bool address_arithmetic_p; /* Produce a pointer to the IDX'th candidate in the candidate vector. */ @@ -421,7 +465,6 @@ stmt_cost (gimple gs, bool speed) case PLUS_EXPR: case POINTER_PLUS_EXPR: case MINUS_EXPR: - rhs2 = gimple_assign_rhs2 (gs); return add_cost (speed, lhs_mode); case NEGATE_EXPR: @@ -1407,6 +1450,30 @@ dump_cand_chains (void) htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL); fputs ("\n", dump_file); } + +/* Dump the increment vector for debug. */ + +static void +dump_incr_vec (void) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + unsigned i; + + fprintf (dump_file, "\nIncrement vector:\n\n"); + + for (i = 0; i < incr_vec_len; i++) + { + fprintf (dump_file, "%3d increment: ", i); + dump_double_int (dump_file, incr_vec[i].incr, false); + fprintf (dump_file, "\n count: %d", incr_vec[i].count); + fprintf (dump_file, "\n cost: %d", incr_vec[i].cost); + fputs ("\n initializer: ", dump_file); + print_generic_expr (dump_file, incr_vec[i].initializer, 0); + fputs ("\n\n", dump_file); + } + } +} /* Recursive helper for unconditional_cands_with_known_stride_p. Returns TRUE iff C, its siblings, and its dependents are all @@ -1508,6 +1575,32 @@ cand_increment (slsr_cand_t c) return double_int_sub (c->index, basis->index); } +/* Calculate the increment required for candidate C relative to + its basis. If we aren't going to generate pointer arithmetic + for this candidate, return the absolute value of that increment + instead. */ + +static inline double_int +cand_abs_increment (slsr_cand_t c) +{ + double_int increment = cand_increment (c); + + if (!address_arithmetic_p && double_int_negative_p (increment)) + increment = double_int_neg (increment); + + return increment; +} + +/* If *VAR is NULL or is not of a compatible type with TYPE, create a + new temporary reg of type TYPE and store it in *VAR. */ + +static inline void +lazy_create_slsr_reg (tree *var, tree type) +{ + if (!*var || !types_compatible_p (TREE_TYPE (*var), type)) + *var = create_tmp_reg (type, "slsr"); +} + /* Return TRUE iff candidate C has already been replaced under another interpretation. */ @@ -1630,6 +1723,764 @@ replace_dependents (slsr_cand_t c) replace_dependents (lookup_cand (c->dependent)); } +/* Return the index in the increment vector of the given INCREMENT. */ + +static inline unsigned +incr_vec_index (double_int increment) +{ + unsigned i; + + for (i = 0; + i < incr_vec_len && !double_int_equal_p (increment, incr_vec[i].incr); + i++) + ; + + gcc_assert (i < incr_vec_len); + return i; +} + +/* Count the number of candidates in the tree rooted at C that have + not already been replaced under other interpretations. */ + +static unsigned +count_candidates (slsr_cand_t c) +{ + unsigned count = cand_already_replaced (c) ? 0 : 1; + + if (c->sibling) + count += count_candidates (lookup_cand (c->sibling)); + + if (c->dependent) + count += count_candidates (lookup_cand (c->dependent)); + + return count; +} + +/* Increase the count of INCREMENT by one in the increment vector. + INCREMENT is associated with candidate C. If an initializer + T_0 = stride * I is provided by a candidate that dominates all + candidates with the same increment, also record T_0 for subsequent use. */ + +static void +record_increment (slsr_cand_t c, double_int increment) +{ + bool found = false; + unsigned i; + + /* Treat increments that differ only in sign as identical so as to + share initializers, unless we are generating pointer arithmetic. */ + if (!address_arithmetic_p && double_int_negative_p (increment)) + increment = double_int_neg (increment); + + for (i = 0; i < incr_vec_len; i++) + { + if (double_int_equal_p (incr_vec[i].incr, increment)) + { + incr_vec[i].count++; + found = true; + + /* If we previously recorded an initializer that doesn't + dominate this candidate, it's not going to be useful to + us after all. */ + if (incr_vec[i].initializer + && !dominated_by_p (CDI_DOMINATORS, + gimple_bb (c->cand_stmt), + incr_vec[i].init_bb)) + { + incr_vec[i].initializer = NULL_TREE; + incr_vec[i].init_bb = NULL; + } + + break; + } + } + + if (!found) + { + /* The first time we see an increment, create the entry for it. + If this is the root candidate which doesn't have a basis, set + the count to zero. We're only processing it so it can possibly + provide an initializer for other candidates. */ + incr_vec[incr_vec_len].incr = increment; + incr_vec[incr_vec_len].count = c->basis ? 1 : 0; + incr_vec[incr_vec_len].cost = COST_INFINITE; + + /* Optimistically record the first occurrence of this increment + as providing an initializer (if it does); we will revise this + opinion later if it doesn't dominate all other occurrences. + Exception: increments of -1, 0, 1 never need initializers. */ + if (c->kind == CAND_ADD + && double_int_equal_p (c->index, increment) + && (double_int_scmp (increment, double_int_one) > 0 + || double_int_scmp (increment, double_int_minus_one) < 0)) + { + tree t0; + tree rhs1 = gimple_assign_rhs1 (c->cand_stmt); + tree rhs2 = gimple_assign_rhs2 (c->cand_stmt); + if (operand_equal_p (rhs1, c->base_expr, 0)) + t0 = rhs2; + else + t0 = rhs1; + if (SSA_NAME_DEF_STMT (t0) && gimple_bb (SSA_NAME_DEF_STMT (t0))) + { + incr_vec[incr_vec_len].initializer = t0; + incr_vec[incr_vec_len++].init_bb + = gimple_bb (SSA_NAME_DEF_STMT (t0)); + } + else + { + incr_vec[incr_vec_len].initializer = NULL_TREE; + incr_vec[incr_vec_len++].init_bb = NULL; + } + } + else + { + incr_vec[incr_vec_len].initializer = NULL_TREE; + incr_vec[incr_vec_len++].init_bb = NULL; + } + } +} + +/* Determine how many times each unique increment occurs in the set + of candidates rooted at C's parent, recording the data in the + increment vector. For each unique increment I, if an initializer + T_0 = stride * I is provided by a candidate that dominates all + candidates with the same increment, also record T_0 for subsequent + use. */ + +static void +record_increments (slsr_cand_t c) +{ + if (!cand_already_replaced (c)) + record_increment (c, cand_increment (c)); + + if (c->sibling) + record_increments (lookup_cand (c->sibling)); + + if (c->dependent) + record_increments (lookup_cand (c->dependent)); +} + +/* Return the first candidate in the tree rooted at C that has not + already been replaced, favoring siblings over dependents. */ + +static slsr_cand_t +unreplaced_cand_in_tree (slsr_cand_t c) +{ + if (!cand_already_replaced (c)) + return c; + + if (c->sibling) + { + slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling)); + if (sib) + return sib; + } + + if (c->dependent) + { + slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent)); + if (dep) + return dep; + } + + return NULL; +} + +/* Return TRUE if the candidates in the tree rooted at C should be + optimized for speed, else FALSE. We estimate this based on the block + containing the most dominant candidate in the tree that has not yet + been replaced. */ + +static bool +optimize_cands_for_speed_p (slsr_cand_t c) +{ + slsr_cand_t c2 = unreplaced_cand_in_tree (c); + gcc_assert (c2); + return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt)); +} + +/* Add COST_IN to the lowest cost of any dependent path starting at + candidate C or any of its siblings, counting only candidates along + such paths with increment INCR. Assume that replacing a candidate + reduces cost by REPL_SAVINGS. Also account for savings from any + statements that would go dead. */ + +static int +lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c, double_int incr) +{ + int local_cost, sib_cost; + double_int cand_incr = cand_abs_increment (c); + + if (cand_already_replaced (c)) + local_cost = cost_in; + else if (double_int_equal_p (incr, cand_incr)) + local_cost = cost_in - repl_savings - c->dead_savings; + else + local_cost = cost_in - c->dead_savings; + + if (c->dependent) + local_cost = lowest_cost_path (local_cost, repl_savings, + lookup_cand (c->dependent), incr); + + if (c->sibling) + { + sib_cost = lowest_cost_path (cost_in, repl_savings, + lookup_cand (c->sibling), incr); + local_cost = MIN (local_cost, sib_cost); + } + + return local_cost; +} + +/* Compute the total savings that would accrue from all replacements + in the candidate tree rooted at C, counting only candidates with + increment INCR. Assume that replacing a candidate reduces cost + by REPL_SAVINGS. Also account for savings from statements that + would go dead. */ + +static int +total_savings (int repl_savings, slsr_cand_t c, double_int incr) +{ + int savings = 0; + double_int cand_incr = cand_abs_increment (c); + + if (double_int_equal_p (incr, cand_incr) + && !cand_already_replaced (c)) + savings += repl_savings + c->dead_savings; + + if (c->dependent) + savings += total_savings (repl_savings, lookup_cand (c->dependent), incr); + + if (c->sibling) + savings += total_savings (repl_savings, lookup_cand (c->sibling), incr); + + return savings; +} + +/* Use target-specific costs to determine and record which increments + in the current candidate tree are profitable to replace, assuming + MODE and SPEED. FIRST_DEP is the first dependent of the root of + the candidate tree. + + One slight limitation here is that we don't account for the possible + introduction of casts in some cases. See replace_one_candidate for + the cases where these are introduced. This should probably be cleaned + up sometime. */ + +static void +analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed) +{ + unsigned i; + + for (i = 0; i < incr_vec_len; i++) + { + HOST_WIDE_INT incr = double_int_to_shwi (incr_vec[i].incr); + + /* If somehow this increment is bigger than a HWI, we won't + be optimizing candidates that use it. And if the increment + has a count of zero, nothing will be done with it. */ + if (!double_int_fits_in_shwi_p (incr_vec[i].incr) + || !incr_vec[i].count) + incr_vec[i].cost = COST_INFINITE; + + /* Increments of 0, 1, and -1 are always profitable to replace, + because they always replace a multiply or add with an add or + copy, and may cause one or more existing instructions to go + dead. Exception: -1 can't be assumed to be profitable for + pointer addition. */ + else if (incr == 0 + || incr == 1 + || (incr == -1 + && (gimple_assign_rhs_code (first_dep->cand_stmt) + != POINTER_PLUS_EXPR))) + incr_vec[i].cost = COST_NEUTRAL; + + /* For any other increment, if this is a multiply candidate, we + must introduce a temporary T and initialize it with + T_0 = stride * increment. When optimizing for speed, walk the + candidate tree to calculate the best cost reduction along any + path; if it offsets the fixed cost of inserting the initializer, + replacing the increment is profitable. When optimizing for + size, instead calculate the total cost reduction from replacing + all candidates with this increment. */ + else if (first_dep->kind == CAND_MULT) + { + int cost = mult_by_coeff_cost (incr, mode, speed); + int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode); + if (speed) + cost = lowest_cost_path (cost, repl_savings, first_dep, + incr_vec[i].incr); + else + cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr); + + incr_vec[i].cost = cost; + } + + /* If this is an add candidate, the initializer may already + exist, so only calculate the cost of the initializer if it + doesn't. We are replacing one add with another here, so the + known replacement savings is zero. We will account for removal + of dead instructions in lowest_cost_path or total_savings. */ + else + { + int cost = 0; + if (!incr_vec[i].initializer) + cost = mult_by_coeff_cost (incr, mode, speed); + + if (speed) + cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr); + else + cost -= total_savings (0, first_dep, incr_vec[i].incr); + + incr_vec[i].cost = cost; + } + } +} + +/* Return the nearest common dominator of BB1 and BB2. If the blocks + are identical, return the earlier of C1 and C2 in *WHERE. Otherwise, + if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2, + return C2 in *WHERE; and if the NCD matches neither, return NULL in + *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */ + +static basic_block +ncd_for_two_cands (basic_block bb1, basic_block bb2, + slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where) +{ + basic_block ncd; + + if (!bb1) + { + *where = c2; + return bb2; + } + + if (!bb2) + { + *where = c1; + return bb1; + } + + ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2); + + /* If both candidates are in the same block, the earlier + candidate wins. */ + if (bb1 == ncd && bb2 == ncd) + { + if (!c1 || (c2 && c2->cand_num < c1->cand_num)) + *where = c2; + else + *where = c1; + } + + /* Otherwise, if one of them produced a candidate in the + dominator, that one wins. */ + else if (bb1 == ncd) + *where = c1; + + else if (bb2 == ncd) + *where = c2; + + /* If neither matches the dominator, neither wins. */ + else + *where = NULL; + + return ncd; +} + +/* Consider all candidates in the tree rooted at C for which INCR + represents the required increment of C relative to its basis. + Find and return the basic block that most nearly dominates all + such candidates. If the returned block contains one or more of + the candidates, return the earliest candidate in the block in + *WHERE. */ + +static basic_block +nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr, + slsr_cand_t *where) +{ + basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd; + slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where; + double_int cand_incr; + + /* First find the NCD of all siblings and dependents. */ + if (c->sibling) + sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling), + incr, &sib_where); + if (c->dependent) + dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent), + incr, &dep_where); + if (!sib_ncd && !dep_ncd) + { + new_where = NULL; + ncd = NULL; + } + else if (sib_ncd && !dep_ncd) + { + new_where = sib_where; + ncd = sib_ncd; + } + else if (dep_ncd && !sib_ncd) + { + new_where = dep_where; + ncd = dep_ncd; + } + else + ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where, + dep_where, &new_where); + + /* If the candidate's increment doesn't match the one we're interested + in, then the result depends only on siblings and dependents. */ + cand_incr = cand_abs_increment (c); + + if (!double_int_equal_p (cand_incr, incr) || cand_already_replaced (c)) + { + *where = new_where; + return ncd; + } + + /* Otherwise, compare this candidate with the result from all siblings + and dependents. */ + this_where = c; + this_ncd = gimple_bb (c->cand_stmt); + ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where); + + return ncd; +} + +/* Return TRUE if the increment indexed by INDEX is profitable to replace. */ + +static inline bool +profitable_increment_p (unsigned index) +{ + return (incr_vec[index].cost <= COST_NEUTRAL); +} + +/* For each profitable increment in the increment vector not equal to + 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common + dominator of all statements in the candidate chain rooted at C + that require that increment, and insert an initializer + T_0 = stride * increment at that location. Record T_0 with the + increment record. */ + +static void +insert_initializers (slsr_cand_t c) +{ + unsigned i; + tree new_var = NULL_TREE; + + for (i = 0; i < incr_vec_len; i++) + { + basic_block bb; + slsr_cand_t where = NULL; + gimple init_stmt; + tree stride_type, new_name, incr_tree; + double_int incr = incr_vec[i].incr; + + if (!profitable_increment_p (i) + || double_int_one_p (incr) + || (double_int_minus_one_p (incr) + && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR) + || double_int_zero_p (incr)) + continue; + + /* We may have already identified an existing initializer that + will suffice. */ + if (incr_vec[i].initializer) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("Using existing initializer: ", dump_file); + print_gimple_stmt (dump_file, + SSA_NAME_DEF_STMT (incr_vec[i].initializer), + 0, 0); + } + continue; + } + + /* Find the block that most closely dominates all candidates + with this increment. If there is at least one candidate in + that block, the earliest one will be returned in WHERE. */ + bb = nearest_common_dominator_for_cands (c, incr, &where); + + /* Create a new SSA name to hold the initializer's value. */ + stride_type = TREE_TYPE (c->stride); + lazy_create_slsr_reg (&new_var, stride_type); + new_name = make_ssa_name (new_var, NULL); + incr_vec[i].initializer = new_name; + + /* Create the initializer and insert it in the latest possible + dominating position. */ + incr_tree = double_int_to_tree (stride_type, incr); + init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name, + c->stride, incr_tree); + if (where) + { + gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt); + gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT); + gimple_set_location (init_stmt, gimple_location (where->cand_stmt)); + } + else + { + gimple_stmt_iterator gsi = gsi_last_bb (bb); + gimple basis_stmt = lookup_cand (c->basis)->cand_stmt; + + if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi))) + gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT); + else + gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT); + + gimple_set_location (init_stmt, gimple_location (basis_stmt)); + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("Inserting initializer: ", dump_file); + print_gimple_stmt (dump_file, init_stmt, 0, 0); + } + } +} + +/* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of + type TO_TYPE, and insert it in front of the statement represented + by candidate C. Use *NEW_VAR to create the new SSA name. Return + the new SSA name. */ + +static tree +introduce_cast_before_cand (slsr_cand_t c, tree to_type, + tree from_expr, tree *new_var) +{ + tree cast_lhs; + gimple cast_stmt; + gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); + + lazy_create_slsr_reg (new_var, to_type); + cast_lhs = make_ssa_name (*new_var, NULL); + cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs, + from_expr, NULL_TREE); + gimple_set_location (cast_stmt, gimple_location (c->cand_stmt)); + gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs (" Inserting: ", dump_file); + print_gimple_stmt (dump_file, cast_stmt, 0, 0); + } + + return cast_lhs; +} + +/* Replace the RHS of the statement represented by candidate C with + NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't + leave C unchanged or just interchange its operands. The original + operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2. + If the replacement was made and we are doing a details dump, + return the revised statement, else NULL. */ + +static gimple +replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2, + enum tree_code old_code, tree old_rhs1, tree old_rhs2, + slsr_cand_t c) +{ + if (new_code != old_code + || ((!operand_equal_p (new_rhs1, old_rhs1, 0) + || !operand_equal_p (new_rhs2, old_rhs2, 0)) + && (!operand_equal_p (new_rhs1, old_rhs2, 0) + || !operand_equal_p (new_rhs2, old_rhs1, 0)))) + { + gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); + gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2); + update_stmt (gsi_stmt (gsi)); + + if (dump_file && (dump_flags & TDF_DETAILS)) + return gsi_stmt (gsi); + } + + else if (dump_file && (dump_flags & TDF_DETAILS)) + fputs (" (duplicate, not actually replacing)\n", dump_file); + + return NULL; +} + +/* Strength-reduce the statement represented by candidate C by replacing + it with an equivalent addition or subtraction. I is the index into + the increment vector identifying C's increment. NEW_VAR is used to + create a new SSA name if a cast needs to be introduced. BASIS_NAME + is the rhs1 to use in creating the add/subtract. */ + +static void +replace_one_candidate (slsr_cand_t c, unsigned i, tree *new_var, + tree basis_name) +{ + gimple stmt_to_print = NULL; + tree orig_rhs1, orig_rhs2; + tree rhs2; + enum tree_code orig_code, repl_code; + double_int cand_incr; + + orig_code = gimple_assign_rhs_code (c->cand_stmt); + orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt); + orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt); + cand_incr = cand_increment (c); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("Replacing: ", dump_file); + print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); + stmt_to_print = c->cand_stmt; + } + + if (address_arithmetic_p) + repl_code = POINTER_PLUS_EXPR; + else + repl_code = PLUS_EXPR; + + /* If the increment has an initializer T_0, replace the candidate + statement with an add of the basis name and the initializer. */ + if (incr_vec[i].initializer) + { + tree init_type = TREE_TYPE (incr_vec[i].initializer); + tree orig_type = TREE_TYPE (orig_rhs2); + + if (types_compatible_p (orig_type, init_type)) + rhs2 = incr_vec[i].initializer; + else + rhs2 = introduce_cast_before_cand (c, orig_type, + incr_vec[i].initializer, + new_var); + + if (!double_int_equal_p (incr_vec[i].incr, cand_incr)) + { + gcc_assert (repl_code == PLUS_EXPR); + repl_code = MINUS_EXPR; + } + + stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2, + orig_code, orig_rhs1, orig_rhs2, + c); + } + + /* Otherwise, the increment is one of -1, 0, and 1. Replace + with a subtract of the stride from the basis name, a copy + from the basis name, or an add of the stride to the basis + name, respectively. It may be necessary to introduce a + cast (or reuse an existing cast). */ + else if (double_int_one_p (cand_incr)) + { + tree stride_type = TREE_TYPE (c->stride); + tree orig_type = TREE_TYPE (orig_rhs2); + + if (types_compatible_p (orig_type, stride_type)) + rhs2 = c->stride; + else + rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var); + + stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2, + orig_code, orig_rhs1, orig_rhs2, + c); + } + + else if (double_int_minus_one_p (cand_incr)) + { + tree stride_type = TREE_TYPE (c->stride); + tree orig_type = TREE_TYPE (orig_rhs2); + gcc_assert (repl_code != POINTER_PLUS_EXPR); + + if (types_compatible_p (orig_type, stride_type)) + rhs2 = c->stride; + else + rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var); + + if (orig_code != MINUS_EXPR + || !operand_equal_p (basis_name, orig_rhs1, 0) + || !operand_equal_p (rhs2, orig_rhs2, 0)) + { + gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); + gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2); + update_stmt (gsi_stmt (gsi)); + + if (dump_file && (dump_flags & TDF_DETAILS)) + stmt_to_print = gsi_stmt (gsi); + } + else if (dump_file && (dump_flags & TDF_DETAILS)) + fputs (" (duplicate, not actually replacing)\n", dump_file); + } + + else if (double_int_zero_p (cand_incr)) + { + tree lhs = gimple_assign_lhs (c->cand_stmt); + tree lhs_type = TREE_TYPE (lhs); + tree basis_type = TREE_TYPE (basis_name); + + if (types_compatible_p (lhs_type, basis_type)) + { + gimple copy_stmt = gimple_build_assign (lhs, basis_name); + gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); + gimple_set_location (copy_stmt, gimple_location (c->cand_stmt)); + gsi_replace (&gsi, copy_stmt, false); + + if (dump_file && (dump_flags & TDF_DETAILS)) + stmt_to_print = copy_stmt; + } + else + { + gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); + gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs, + basis_name, + NULL_TREE); + gimple_set_location (cast_stmt, gimple_location (c->cand_stmt)); + gsi_replace (&gsi, cast_stmt, false); + + if (dump_file && (dump_flags & TDF_DETAILS)) + stmt_to_print = cast_stmt; + } + } + else + gcc_unreachable (); + + if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print) + { + fputs ("With: ", dump_file); + print_gimple_stmt (dump_file, stmt_to_print, 0, 0); + fputs ("\n", dump_file); + } +} + +/* For each candidate in the tree rooted at C, replace it with + an increment if such has been shown to be profitable. */ + +static void +replace_profitable_candidates (slsr_cand_t c) +{ + if (!cand_already_replaced (c)) + { + double_int increment = cand_abs_increment (c); + tree new_var = NULL; + enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt); + unsigned i; + + i = incr_vec_index (increment); + + /* Only process profitable increments. Nothing useful can be done + to a cast or copy. */ + if (profitable_increment_p (i) + && orig_code != MODIFY_EXPR + && orig_code != NOP_EXPR) + { + slsr_cand_t basis = lookup_cand (c->basis); + tree basis_name = gimple_assign_lhs (basis->cand_stmt); + replace_one_candidate (c, i, &new_var, basis_name); + } + } + + if (c->sibling) + replace_profitable_candidates (lookup_cand (c->sibling)); + + if (c->dependent) + replace_profitable_candidates (lookup_cand (c->dependent)); +} + /* Analyze costs of related candidates in the candidate vector, and make beneficial replacements. */ @@ -1670,11 +2521,46 @@ analyze_candidates_and_replace (void) else if (unconditional_cands_with_known_stride_p (c)) replace_dependents (first_dep); - /* TODO: When the stride is an SSA name, it may still be - profitable to replace some or all of the dependent - candidates, depending on whether the introduced increments - can be reused, or are less expensive to calculate than - the replaced statements. */ + /* When the stride is an SSA name, it may still be profitable + to replace some or all of the dependent candidates, depending + on whether the introduced increments can be reused, or are + less expensive to calculate than the replaced statements. */ + else + { + unsigned length; + enum machine_mode mode; + bool speed; + + /* Determine whether we'll be generating pointer arithmetic + when replacing candidates. */ + address_arithmetic_p = (c->kind == CAND_ADD + && POINTER_TYPE_P (TREE_TYPE (c->base_expr))); + + /* If all candidates have already been replaced under other + interpretations, nothing remains to be done. */ + length = count_candidates (c); + if (!length) + continue; + + /* Construct an array of increments for this candidate chain. */ + incr_vec = XNEWVEC (incr_info, length); + incr_vec_len = 0; + record_increments (c); + + /* Determine which increments are profitable to replace. */ + mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt))); + speed = optimize_cands_for_speed_p (c); + analyze_increments (first_dep, mode, speed); + + /* Insert initializers of the form T_0 = stride * increment + for use in profitable replacements. */ + insert_initializers (first_dep); + dump_incr_vec (); + + /* Perform the replacements. */ + replace_profitable_candidates (first_dep); + free (incr_vec); + } /* TODO: When conditional increments occur so that a candidate is dependent upon a phi-basis, the cost of |