From 070bf9803878372e814ae4fe9b9bfe3d7fddf1ef Mon Sep 17 00:00:00 2001 From: wschmidt Date: Wed, 1 Aug 2012 13:02:38 +0000 Subject: gcc: PR tree-optimization/46556 * gimple-ssa-strength-reduction.c (enum cand_kind): Add CAND_REF. (base_cand_map): Change to hash table. (base_cand_hash): New function. (base_cand_free): Likewise. (base_cand_eq): Likewise. (lookup_cand): Change base_cand_map to hash table. (find_basis_for_candidate): Likewise. (base_cand_from_table): Exclude CAND_REF. (restructure_reference): New function. (slsr_process_ref): Likewise. (find_candidates_in_block): Call slsr_process_ref. (dump_candidate): Handle CAND_REF. (base_cand_dump_callback): New function. (dump_cand_chains): Change base_cand_map to hash table. (replace_ref): New function. (replace_refs): Likewise. (analyze_candidates_and_replace): Call replace_refs. (execute_strength_reduction): Change base_cand_map to hash table. gcc/testsuite: PR tree-optimization/46556 * testsuite/gcc.dg/tree-ssa/slsr-27.c: New. * testsuite/gcc.dg/tree-ssa/slsr-28.c: New. * testsuite/gcc.dg/tree-ssa/slsr-29.c: New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@190037 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/gimple-ssa-strength-reduction.c | 346 +++++++++++++++++++++++++++++++----- 1 file changed, 299 insertions(+), 47 deletions(-) (limited to 'gcc/gimple-ssa-strength-reduction.c') diff --git a/gcc/gimple-ssa-strength-reduction.c b/gcc/gimple-ssa-strength-reduction.c index 66ddfd7bde4..5d79521722b 100644 --- a/gcc/gimple-ssa-strength-reduction.c +++ b/gcc/gimple-ssa-strength-reduction.c @@ -32,7 +32,7 @@ along with GCC; see the file COPYING3. If not see 2) Explicit multiplies, unknown constant multipliers, no conditional increments. (data gathering complete, replacements pending) - 3) Implicit multiplies in addressing expressions. (pending) + 3) Implicit multiplies in addressing expressions. (complete) 4) Explicit multiplies, conditional increments. (pending) It would also be possible to apply strength reduction to divisions @@ -106,7 +106,47 @@ along with GCC; see the file COPYING3. If not see as a strength reduction opportunity, even though this S1 would also be replaceable by the S1' above. This can be added if it - comes up in practice. */ + comes up in practice. + + Strength reduction in addressing + -------------------------------- + There is another kind of candidate known as CAND_REF. A CAND_REF + describes a statement containing a memory reference having + complex addressing that might benefit from strength reduction. + Specifically, we are interested in references for which + get_inner_reference returns a base address, offset, and bitpos as + follows: + + base: MEM_REF (T1, C1) + offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3) + bitpos: C4 * BITS_PER_UNIT + + Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are + arbitrary integer constants. Note that C2 may be zero, in which + case the offset will be MULT_EXPR (T2, C3). + + When this pattern is recognized, the original memory reference + can be replaced with: + + MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)), + C1 + (C2 * C3) + C4) + + which distributes the multiply to allow constant folding. When + two or more addressing expressions can be represented by MEM_REFs + of this form, differing only in the constants C1, C2, and C4, + making this substitution produces more efficient addressing during + the RTL phases. When there are not at least two expressions with + the same values of T1, T2, and C3, there is nothing to be gained + by the replacement. + + Strength reduction of CAND_REFs uses the same infrastructure as + that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B) + field, MULT_EXPR (T2, C3) in the stride (S) field, and + C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF + is thus another CAND_REF with the same B and S values. When at + least two CAND_REFs are chained together using the basis relation, + each of them is replaced as above, resulting in improved code + generation for addressing. */ /* Index into the candidate vector, offset by 1. VECs are zero-based, @@ -117,7 +157,8 @@ typedef unsigned cand_idx; enum cand_kind { CAND_MULT, - CAND_ADD + CAND_ADD, + CAND_REF }; struct slsr_cand_d @@ -136,7 +177,9 @@ struct slsr_cand_d /* The type of the candidate. This is normally the type of base_name, but casts may have occurred when combining feeding instructions. - A candidate can only be a basis for candidates of the same final type. */ + A candidate can only be a basis for candidates of the same final type. + (For CAND_REFs, this is the type to be used for operand 1 of the + replacement MEM_REF.) */ tree cand_type; /* The kind of candidate (CAND_MULT, etc.). */ @@ -210,8 +253,8 @@ static struct pointer_map_t *stmt_cand_map; /* Obstack for candidates. */ static struct obstack cand_obstack; -/* Array mapping from base SSA names to chains of candidates. */ -static cand_chain_t *base_cand_map; +/* Hash table embodying a mapping from base names to chains of candidates. */ +static htab_t base_cand_map; /* Obstack for candidate chains. */ static struct obstack chain_obstack; @@ -224,6 +267,33 @@ lookup_cand (cand_idx idx) return VEC_index (slsr_cand_t, cand_vec, idx - 1); } +/* Callback to produce a hash value for a candidate chain header. */ + +static hashval_t +base_cand_hash (const void *p) +{ + tree base_expr = ((const_cand_chain_t) p)->base_name; + return iterative_hash_expr (base_expr, 0); +} + +/* Callback when an element is removed from the hash table. + We never remove entries until the entire table is released. */ + +static void +base_cand_free (void *p ATTRIBUTE_UNUSED) +{ +} + +/* Callback to return true if two candidate chain headers are equal. */ + +static int +base_cand_eq (const void *p1, const void *p2) +{ + const_cand_chain_t const chain1 = (const_cand_chain_t) p1; + const_cand_chain_t const chain2 = (const_cand_chain_t) p2; + return operand_equal_p (chain1->base_name, chain2->base_name, 0); +} + /* Use the base name from candidate C to look for possible candidates that can serve as a basis for C. Each potential basis must also appear in a block that dominates the candidate statement and have @@ -234,11 +304,12 @@ lookup_cand (cand_idx idx) static int find_basis_for_candidate (slsr_cand_t c) { + cand_chain mapping_key; cand_chain_t chain; slsr_cand_t basis = NULL; - gcc_assert (TREE_CODE (c->base_name) == SSA_NAME); - chain = base_cand_map[SSA_NAME_VERSION (c->base_name)]; + mapping_key.base_name = c->base_name; + chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key); for (; chain; chain = chain->next) { @@ -272,23 +343,23 @@ find_basis_for_candidate (slsr_cand_t c) static void record_potential_basis (slsr_cand_t c) { - cand_chain_t node, head; - int index; + cand_chain_t node; + void **slot; node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain)); node->base_name = c->base_name; node->cand = c; node->next = NULL; - index = SSA_NAME_VERSION (c->base_name); - head = base_cand_map[index]; + slot = htab_find_slot (base_cand_map, node, INSERT); - if (head) + if (*slot) { + cand_chain_t head = (cand_chain_t) (*slot); node->next = head->next; head->next = node; } else - base_cand_map[index] = node; + *slot = node; } /* Allocate storage for a new candidate and initialize its fields. @@ -382,10 +453,11 @@ base_cand_from_table (tree base_in) return (slsr_cand_t) NULL; result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def); - if (!result) - return (slsr_cand_t) NULL; + + if (result && (*result)->kind != CAND_REF) + return *result; - return *result; + return (slsr_cand_t) NULL; } /* Add an entry to the statement-to-candidate mapping. */ @@ -398,6 +470,127 @@ add_cand_for_stmt (gimple gs, slsr_cand_t c) *slot = c; } +/* Look for the following pattern: + + *PBASE: MEM_REF (T1, C1) + + *POFFSET: MULT_EXPR (T2, C3) [C2 is zero] + or + MULT_EXPR (PLUS_EXPR (T2, C2), C3) + or + MULT_EXPR (MINUS_EXPR (T2, -C2), C3) + + *PINDEX: C4 * BITS_PER_UNIT + + If not present, leave the input values unchanged and return FALSE. + Otherwise, modify the input values as follows and return TRUE: + + *PBASE: T1 + *POFFSET: MULT_EXPR (T2, C3) + *PINDEX: C1 + (C2 * C3) + C4 */ + +static bool +restructure_reference (tree *pbase, tree *poffset, double_int *pindex, + tree *ptype) +{ + tree base = *pbase, offset = *poffset; + double_int index = *pindex; + double_int bpu = uhwi_to_double_int (BITS_PER_UNIT); + tree mult_op0, mult_op1, t1, t2, type; + double_int c1, c2, c3, c4; + + if (!base + || !offset + || TREE_CODE (base) != MEM_REF + || TREE_CODE (offset) != MULT_EXPR + || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST + || !double_int_zero_p (double_int_umod (index, bpu, FLOOR_MOD_EXPR))) + return false; + + t1 = TREE_OPERAND (base, 0); + c1 = mem_ref_offset (base); + type = TREE_TYPE (TREE_OPERAND (base, 1)); + + mult_op0 = TREE_OPERAND (offset, 0); + mult_op1 = TREE_OPERAND (offset, 1); + + c3 = tree_to_double_int (mult_op1); + + if (TREE_CODE (mult_op0) == PLUS_EXPR) + + if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST) + { + t2 = TREE_OPERAND (mult_op0, 0); + c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1)); + } + else + return false; + + else if (TREE_CODE (mult_op0) == MINUS_EXPR) + + if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST) + { + t2 = TREE_OPERAND (mult_op0, 0); + c2 = double_int_neg (tree_to_double_int (TREE_OPERAND (mult_op0, 1))); + } + else + return false; + + else + { + t2 = mult_op0; + c2 = double_int_zero; + } + + c4 = double_int_udiv (index, bpu, FLOOR_DIV_EXPR); + + *pbase = t1; + *poffset = fold_build2 (MULT_EXPR, sizetype, t2, + double_int_to_tree (sizetype, c3)); + *pindex = double_int_add (double_int_add (c1, double_int_mul (c2, c3)), c4); + *ptype = type; + + return true; +} + +/* Given GS which contains a data reference, create a CAND_REF entry in + the candidate table and attempt to find a basis. */ + +static void +slsr_process_ref (gimple gs) +{ + tree ref_expr, base, offset, type; + HOST_WIDE_INT bitsize, bitpos; + enum machine_mode mode; + int unsignedp, volatilep; + double_int index; + slsr_cand_t c; + + if (gimple_vdef (gs)) + ref_expr = gimple_assign_lhs (gs); + else + ref_expr = gimple_assign_rhs1 (gs); + + if (!handled_component_p (ref_expr) + || TREE_CODE (ref_expr) == BIT_FIELD_REF + || (TREE_CODE (ref_expr) == COMPONENT_REF + && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1)))) + return; + + base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode, + &unsignedp, &volatilep, false); + index = uhwi_to_double_int (bitpos); + + if (!restructure_reference (&base, &offset, &index, &type)) + return; + + c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset, + type, 0); + + /* Add the candidate to the statement-candidate mapping. */ + add_cand_for_stmt (gs, c); +} + /* Create a candidate entry for a statement GS, where GS multiplies two SSA names BASE_IN and STRIDE_IN. Propagate any known information about the two SSA names into the new candidate. Return the new @@ -1048,8 +1241,12 @@ find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED, { gimple gs = gsi_stmt (gsi); - if (is_gimple_assign (gs) - && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs))))) + if (gimple_vuse (gs) && gimple_assign_single_p (gs)) + slsr_process_ref (gs); + + else if (is_gimple_assign (gs) + && SCALAR_INT_MODE_P + (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs))))) { tree rhs1 = NULL_TREE, rhs2 = NULL_TREE; @@ -1143,6 +1340,15 @@ dump_candidate (slsr_cand_t c) print_generic_expr (dump_file, c->stride, 0); fputs (") : ", dump_file); break; + case CAND_REF: + fputs (" REF : ", dump_file); + print_generic_expr (dump_file, c->base_name, 0); + fputs (" + (", dump_file); + print_generic_expr (dump_file, c->stride, 0); + fputs (") + ", dump_file); + dump_double_int (dump_file, c->index, false); + fputs (" : ", dump_file); + break; default: gcc_unreachable (); } @@ -1173,36 +1379,33 @@ dump_cand_vec (void) dump_candidate (c); } -/* Dump the candidate chains. */ +/* Callback used to dump the candidate chains hash table. */ -static void -dump_cand_chains (void) +static int +base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED) { - unsigned i; + const_cand_chain_t chain = *((const_cand_chain_t *) slot); + cand_chain_t p; - fprintf (dump_file, "\nStrength reduction candidate chains:\n\n"); + print_generic_expr (dump_file, chain->base_name, 0); + fprintf (dump_file, " -> %d", chain->cand->cand_num); - for (i = 0; i < num_ssa_names; i++) - { - const_cand_chain_t chain = base_cand_map[i]; - - if (chain) - { - cand_chain_t p; + for (p = chain->next; p; p = p->next) + fprintf (dump_file, " -> %d", p->cand->cand_num); - print_generic_expr (dump_file, chain->base_name, 0); - fprintf (dump_file, " -> %d", chain->cand->cand_num); - - for (p = chain->next; p; p = p->next) - fprintf (dump_file, " -> %d", p->cand->cand_num); + fputs ("\n", dump_file); + return 1; +} - fputs ("\n", dump_file); - } - } +/* Dump the candidate chains. */ +static void +dump_cand_chains (void) +{ + fprintf (dump_file, "\nStrength reduction candidate chains:\n\n"); + htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL); fputs ("\n", dump_file); } - /* Recursive helper for unconditional_cands_with_known_stride_p. Returns TRUE iff C, its siblings, and its dependents are all @@ -1238,6 +1441,53 @@ unconditional_cands_with_known_stride_p (slsr_cand_t root) return unconditional_cands (lookup_cand (root->dependent)); } +/* Replace *EXPR in candidate C with an equivalent strength-reduced + data reference. */ + +static void +replace_ref (tree *expr, slsr_cand_t c) +{ + tree add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_name), + c->base_name, c->stride); + tree mem_ref = fold_build2 (MEM_REF, TREE_TYPE (*expr), add_expr, + double_int_to_tree (c->cand_type, c->index)); + + /* Gimplify the base addressing expression for the new MEM_REF tree. */ + gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); + TREE_OPERAND (mem_ref, 0) + = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0), + /*simple_p=*/true, NULL, + /*before=*/true, GSI_SAME_STMT); + copy_ref_info (mem_ref, *expr); + *expr = mem_ref; + update_stmt (c->cand_stmt); +} + +/* Replace CAND_REF candidate C, each sibling of candidate C, and each + dependent of candidate C with an equivalent strength-reduced data + reference. */ + +static void +replace_refs (slsr_cand_t c) +{ + if (gimple_vdef (c->cand_stmt)) + { + tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt); + replace_ref (lhs, c); + } + else + { + tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt); + replace_ref (rhs, c); + } + + if (c->sibling) + replace_refs (lookup_cand (c->sibling)); + + if (c->dependent) + replace_refs (lookup_cand (c->dependent)); +} + /* Calculate the increment required for candidate C relative to its basis. */ @@ -1405,13 +1655,18 @@ analyze_candidates_and_replace (void) first_dep = lookup_cand (c->dependent); + /* If this is a chain of CAND_REFs, unconditionally replace + each of them with a strength-reduced data reference. */ + if (c->kind == CAND_REF) + replace_refs (c); + /* If the common stride of all related candidates is a known constant, and none of these has a phi-dependence, then all replacements are considered profitable. Each replaces a multiply by a single add, with the possibility that a feeding add also goes dead as a result. */ - if (unconditional_cands_with_known_stride_p (c)) + else if (unconditional_cands_with_known_stride_p (c)) replace_dependents (first_dep); /* TODO: When the stride is an SSA name, it may still be @@ -1420,9 +1675,6 @@ analyze_candidates_and_replace (void) can be reused, or are less expensive to calculate than the replaced statements. */ - /* TODO: Strength-reduce data references with implicit - multiplication in their addressing expressions. */ - /* TODO: When conditional increments occur so that a candidate is dependent upon a phi-basis, the cost of introducing a temporary must be accounted for. */ @@ -1447,8 +1699,8 @@ execute_strength_reduction (void) gcc_obstack_init (&chain_obstack); /* Allocate the mapping from base names to candidate chains. */ - base_cand_map = XNEWVEC (cand_chain_t, num_ssa_names); - memset (base_cand_map, 0, num_ssa_names * sizeof (cand_chain_t)); + base_cand_map = htab_create (500, base_cand_hash, + base_cand_eq, base_cand_free); /* Initialize the loop optimizer. We need to detect flow across back edges, and this gives us dominator information as well. */ @@ -1479,7 +1731,7 @@ execute_strength_reduction (void) /* Free resources. */ fini_walk_dominator_tree (&walk_data); loop_optimizer_finalize (); - free (base_cand_map); + htab_delete (base_cand_map); obstack_free (&chain_obstack, NULL); pointer_map_destroy (stmt_cand_map); VEC_free (slsr_cand_t, heap, cand_vec); -- cgit v1.2.1