diff options
Diffstat (limited to 'gcc/tree-predcom.c')
-rw-r--r-- | gcc/tree-predcom.c | 683 |
1 files changed, 609 insertions, 74 deletions
diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c index 23e7870dd2d..a4011bf4698 100644 --- a/gcc/tree-predcom.c +++ b/gcc/tree-predcom.c @@ -156,29 +156,45 @@ along with GCC; see the file COPYING3. If not see b[10] = x; } - TODO -- stores killing other stores can be taken into account, e.g., - for (i = 0; i < n; i++) - { - a[i] = 1; - a[i+2] = 2; - } + Apart from predictive commoning on Load-Load and Store-Load chains, we + also support Store-Store chains -- stores killed by other store can be + eliminated. Given below example: + + for (i = 0; i < n; i++) + { + a[i] = 1; + a[i+2] = 2; + } - can be replaced with + It can be replaced with: - t0 = a[0]; - t1 = a[1]; - for (i = 0; i < n; i++) - { - a[i] = 1; - t2 = 2; - t0 = t1; - t1 = t2; - } - a[n] = t0; - a[n+1] = t1; + t0 = a[0]; + t1 = a[1]; + for (i = 0; i < n; i++) + { + a[i] = 1; + t2 = 2; + t0 = t1; + t1 = t2; + } + a[n] = t0; + a[n+1] = t1; + + If the loop runs more than 1 iterations, it can be further simplified into: + + for (i = 0; i < n; i++) + { + a[i] = 1; + } + a[n] = 2; + a[n+1] = 2; - The interesting part is that this would generalize store motion; still, since - sm is performed elsewhere, it does not seem that important. + The interesting part is this can be viewed either as general store motion + or general dead store elimination in either intra/inter-iterations way. + + TODO: For now, we don't support store-store chains in multi-exit loops. We + force to not unroll in case of store-store chain even if other chains might + ask for unroll. Predictive commoning can be generalized for arbitrary computations (not just memory loads), and also nontrivial transfer functions (e.g., replacing @@ -265,6 +281,9 @@ enum chain_type /* Root of the chain is store, the rest are loads. */ CT_STORE_LOAD, + /* There are only stores in the chain. */ + CT_STORE_STORE, + /* A combination of two chains. */ CT_COMBINATION }; @@ -294,6 +313,15 @@ typedef struct chain /* Initializers for the variables. */ vec<tree> inits; + /* Finalizers for the eliminated stores. */ + vec<tree> finis; + + /* gimple stmts intializing the initial variables of the chain. */ + gimple_seq init_seq; + + /* gimple stmts finalizing the eliminated stores of the chain. */ + gimple_seq fini_seq; + /* True if there is a use of a variable with the maximal distance that comes after the root in the loop. */ unsigned has_max_use_after : 1; @@ -303,6 +331,10 @@ typedef struct chain /* True if this chain was combined together with some other chain. */ unsigned combined : 1; + + /* True if this is store elimination chain and eliminated stores store + loop invariant value into memory. */ + unsigned inv_store_elimination : 1; } *chain_p; @@ -331,6 +363,10 @@ struct component /* What we know about the step of the references in the component. */ enum ref_step_type comp_step; + /* True if all references in component are stores and we try to do + intra/inter loop iteration dead store elimination. */ + bool eliminate_store_p; + /* Next component in the list. */ struct component *next; }; @@ -401,6 +437,10 @@ dump_chain (FILE *file, chain_p chain) chain_type = "Store-loads"; break; + case CT_STORE_STORE: + chain_type = "Store-stores"; + break; + case CT_COMBINATION: chain_type = "Combination"; break; @@ -511,6 +551,12 @@ release_chain (chain_p chain) chain->refs.release (); chain->vars.release (); chain->inits.release (); + if (chain->init_seq) + gimple_seq_discard (chain->init_seq); + + chain->finis.release (); + if (chain->fini_seq) + gimple_seq_discard (chain->fini_seq); free (chain); } @@ -714,6 +760,8 @@ split_data_refs_to_components (struct loop *loop, struct data_dependence_relation *ddr; struct component *comp_list = NULL, *comp; dref dataref; + /* Don't do store elimination if loop has multiple exit edges. */ + bool eliminate_store_p = single_exit (loop) != NULL; basic_block last_always_executed = last_always_executed_block (loop); FOR_EACH_VEC_ELT (datarefs, i, dr) @@ -756,6 +804,14 @@ split_data_refs_to_components (struct loop *loop, dra = DDR_A (ddr); drb = DDR_B (ddr); + + /* Don't do store elimination if there is any unknown dependence for + any store data reference. */ + if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb)) + && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know + || DDR_NUM_DIST_VECTS (ddr) == 0)) + eliminate_store_p = false; + ia = component_of (comp_father, (unsigned) (size_t) dra->aux); ib = component_of (comp_father, (unsigned) (size_t) drb->aux); if (ia == ib) @@ -795,10 +851,28 @@ split_data_refs_to_components (struct loop *loop, continue; } } + else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb) + && ia != bad && ib != bad + && !determine_offset (dra, drb, &dummy_off)) + { + merge_comps (comp_father, comp_size, bad, ia); + merge_comps (comp_father, comp_size, bad, ib); + continue; + } merge_comps (comp_father, comp_size, ia, ib); } + if (eliminate_store_p) + { + tree niters = number_of_latch_executions (loop); + + /* Don't do store elimination if niters info is unknown because stores + in the last iteration can't be eliminated and we need to recover it + after loop. */ + eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know); + } + comps = XCNEWVEC (struct component *, n); bad = component_of (comp_father, n); FOR_EACH_VEC_ELT (datarefs, i, dr) @@ -813,6 +887,7 @@ split_data_refs_to_components (struct loop *loop, { comp = XCNEW (struct component); comp->refs.create (comp_size[ca]); + comp->eliminate_store_p = eliminate_store_p; comps[ca] = comp; } @@ -827,6 +902,8 @@ split_data_refs_to_components (struct loop *loop, gimple_bb (dataref->stmt)); dataref->pos = comp->refs.length (); comp->refs.quick_push (dataref); + if (DR_IS_READ (dr)) + comp->eliminate_store_p = false; } for (i = 0; i < n; i++) @@ -951,6 +1028,21 @@ get_chain_root (chain_p chain) return chain->refs[0]; } +/* Given CHAIN, returns the last ref at DISTANCE, or NULL if it doesn't + exist. */ + +static inline dref +get_chain_last_ref_at (chain_p chain, unsigned distance) +{ + unsigned i; + + for (i = chain->refs.length (); i > 0; i--) + if (distance == chain->refs[i - 1]->distance) + break; + + return (i > 0) ? chain->refs[i - 1] : NULL; +} + /* Adds REF to the chain CHAIN. */ static void @@ -1003,23 +1095,27 @@ make_invariant_chain (struct component *comp) chain->all_always_accessed &= ref->always_accessed; } + chain->inits = vNULL; + chain->finis = vNULL; + return chain; } -/* Make a new chain rooted at REF. */ +/* Make a new chain of type TYPE rooted at REF. */ static chain_p -make_rooted_chain (dref ref) +make_rooted_chain (dref ref, enum chain_type type) { chain_p chain = XCNEW (struct chain); - chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD; - + chain->type = type; chain->refs.safe_push (ref); chain->all_always_accessed = ref->always_accessed; - ref->distance = 0; + chain->inits = vNULL; + chain->finis = vNULL; + return chain; } @@ -1149,7 +1245,7 @@ find_looparound_phi (struct loop *loop, dref ref, dref root) memset (&init_dr, 0, sizeof (struct data_reference)); DR_REF (&init_dr) = init_ref; DR_STMT (&init_dr) = phi; - if (!dr_analyze_innermost (&init_dr, loop)) + if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop)) return NULL; if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) @@ -1194,6 +1290,9 @@ add_looparound_copies (struct loop *loop, chain_p chain) dref ref, root = get_chain_root (chain); gphi *phi; + if (chain->type == CT_STORE_STORE) + return; + FOR_EACH_VEC_ELT (chain->refs, i, ref) { phi = find_looparound_phi (loop, ref, root); @@ -1218,6 +1317,7 @@ determine_roots_comp (struct loop *loop, dref a; chain_p chain = NULL; widest_int last_ofs = 0; + enum chain_type type; /* Invariants are handled specially. */ if (comp->comp_step == RS_INVARIANT) @@ -1227,11 +1327,15 @@ determine_roots_comp (struct loop *loop, return; } - comp->refs.qsort (order_drefs); + /* Trivial component. */ + if (comp->refs.length () <= 1) + return; + comp->refs.qsort (order_drefs); FOR_EACH_VEC_ELT (comp->refs, i, a) { - if (!chain || DR_IS_WRITE (a->ref) + if (!chain + || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref)) || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs)) { if (nontrivial_chain_p (chain)) @@ -1241,7 +1345,13 @@ determine_roots_comp (struct loop *loop, } else release_chain (chain); - chain = make_rooted_chain (a); + + if (DR_IS_READ (a->ref)) + type = CT_LOAD; + else + type = comp->eliminate_store_p ? CT_STORE_STORE : CT_STORE_LOAD; + + chain = make_rooted_chain (a, type); last_ofs = a->offset; continue; } @@ -1362,11 +1472,12 @@ replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs) gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT); } -/* Returns a memory reference to DR in the ITER-th iteration of - the loop it was analyzed in. Append init stmts to STMTS. */ +/* Returns a memory reference to DR in the (NITERS + ITER)-th iteration + of the loop it was analyzed in. Append init stmts to STMTS. */ static tree -ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts) +ref_at_iteration (data_reference_p dr, int iter, + gimple_seq *stmts, tree niters = NULL_TREE) { tree off = DR_OFFSET (dr); tree coff = DR_INIT (dr); @@ -1375,14 +1486,27 @@ ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts) tree ref_type = NULL_TREE; tree ref_op1 = NULL_TREE; tree ref_op2 = NULL_TREE; - if (iter == 0) - ; - else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST) - coff = size_binop (PLUS_EXPR, coff, - size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter))); - else - off = size_binop (PLUS_EXPR, off, - size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter))); + tree new_offset; + + if (iter != 0) + { + new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)); + if (TREE_CODE (new_offset) == INTEGER_CST) + coff = size_binop (PLUS_EXPR, coff, new_offset); + else + off = size_binop (PLUS_EXPR, off, new_offset); + } + + if (niters != NULL_TREE) + { + niters = fold_convert (ssizetype, niters); + new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters); + if (TREE_CODE (niters) == INTEGER_CST) + coff = size_binop (PLUS_EXPR, coff, new_offset); + else + off = size_binop (PLUS_EXPR, off, new_offset); + } + /* While data-ref analysis punts on bit offsets it still handles bitfield accesses at byte boundaries. Cope with that. Note that if the bitfield object also starts at a byte-boundary we can simply @@ -1514,21 +1638,204 @@ initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars) } } -/* Create the variables and initialization statement for root of chain - CHAIN. Uids of the newly created temporary variables are marked - in TMP_VARS. */ +/* For inter-iteration store elimination CHAIN in LOOP, returns true if + all stores to be eliminated store loop invariant values into memory. + In this case, we can use these invariant values directly after LOOP. */ + +static bool +is_inv_store_elimination_chain (struct loop *loop, chain_p chain) +{ + if (chain->length == 0 || chain->type != CT_STORE_STORE) + return false; + + gcc_assert (!chain->has_max_use_after); + + /* If loop iterates for unknown times or fewer times than chain->lenght, + we still need to setup root variable and propagate it with PHI node. */ + tree niters = number_of_latch_executions (loop); + if (TREE_CODE (niters) != INTEGER_CST || wi::leu_p (niters, chain->length)) + return false; + + /* Check stores in chain for elimination if they only store loop invariant + values. */ + for (unsigned i = 0; i < chain->length; i++) + { + dref a = get_chain_last_ref_at (chain, i); + if (a == NULL) + continue; + + gimple *def_stmt, *stmt = a->stmt; + if (!gimple_assign_single_p (stmt)) + return false; + + tree val = gimple_assign_rhs1 (stmt); + if (TREE_CLOBBER_P (val)) + return false; + + if (CONSTANT_CLASS_P (val)) + continue; + + if (TREE_CODE (val) != SSA_NAME) + return false; + + def_stmt = SSA_NAME_DEF_STMT (val); + if (gimple_nop_p (def_stmt)) + continue; + + if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) + return false; + } + return true; +} + +/* Creates root variables for store elimination CHAIN in which stores for + elimination only store loop invariant values. In this case, we neither + need to load root variables before loop nor propagate it with PHI nodes. */ static void -initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars) +initialize_root_vars_store_elim_1 (chain_p chain) { - dref root = get_chain_root (chain); - bool in_lhs = (chain->type == CT_STORE_LOAD - || chain->type == CT_COMBINATION); + tree var; + unsigned i, n = chain->length; + + chain->vars.create (n); + chain->vars.safe_grow_cleared (n); + + /* Initialize root value for eliminated stores at each distance. */ + for (i = 0; i < n; i++) + { + dref a = get_chain_last_ref_at (chain, i); + if (a == NULL) + continue; + + var = gimple_assign_rhs1 (a->stmt); + chain->vars[a->distance] = var; + } + + /* We don't propagate values with PHI nodes, so manually propagate value + to bubble positions. */ + var = chain->vars[0]; + for (i = 1; i < n; i++) + { + if (chain->vars[i] != NULL_TREE) + { + var = chain->vars[i]; + continue; + } + chain->vars[i] = var; + } + + /* Revert the vector. */ + for (i = 0; i < n / 2; i++) + std::swap (chain->vars[i], chain->vars[n - i - 1]); +} + +/* Creates root variables for store elimination CHAIN in which stores for + elimination store loop variant values. In this case, we may need to + load root variables before LOOP and propagate it with PHI nodes. Uids + of the newly created root variables are marked in TMP_VARS. */ + +static void +initialize_root_vars_store_elim_2 (struct loop *loop, + chain_p chain, bitmap tmp_vars) +{ + unsigned i, n = chain->length; + tree ref, init, var, next, val, phi_result; + gimple *stmt; + gimple_seq stmts; + + chain->vars.create (n); + + ref = DR_REF (get_chain_root (chain)->ref); + for (i = 0; i < n; i++) + { + var = predcom_tmp_var (ref, i, tmp_vars); + chain->vars.quick_push (var); + } + + FOR_EACH_VEC_ELT (chain->vars, i, var) + chain->vars[i] = make_ssa_name (var); + + /* Root values are either rhs operand of stores to be eliminated, or + loaded from memory before loop. */ + auto_vec<tree> vtemps; + vtemps.safe_grow_cleared (n); + for (i = 0; i < n; i++) + { + init = get_init_expr (chain, i); + if (init == NULL_TREE) + { + /* Root value is rhs operand of the store to be eliminated if + it isn't loaded from memory before loop. */ + dref a = get_chain_last_ref_at (chain, i); + val = gimple_assign_rhs1 (a->stmt); + if (TREE_CLOBBER_P (val)) + val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var)); + + vtemps[n - i - 1] = val; + } + else + { + edge latch = loop_latch_edge (loop); + edge entry = loop_preheader_edge (loop); + + /* Root value is loaded from memory before loop, we also need + to add PHI nodes to propagate the value across iterations. */ + init = force_gimple_operand (init, &stmts, true, NULL_TREE); + if (stmts) + gsi_insert_seq_on_edge_immediate (entry, stmts); + + next = chain->vars[n - i]; + phi_result = copy_ssa_name (next); + gphi *phi = create_phi_node (phi_result, loop->header); + add_phi_arg (phi, init, entry, UNKNOWN_LOCATION); + add_phi_arg (phi, next, latch, UNKNOWN_LOCATION); + vtemps[n - i - 1] = phi_result; + } + } + + /* Find the insertion position. */ + dref last = get_chain_root (chain); + for (i = 0; i < chain->refs.length (); i++) + { + if (chain->refs[i]->pos > last->pos) + last = chain->refs[i]; + } + + gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt); - initialize_root_vars (loop, chain, tmp_vars); - replace_ref_with (root->stmt, - chain->vars[chain->length], - true, in_lhs); + /* Insert statements copying root value to root variable. */ + for (i = 0; i < n; i++) + { + var = chain->vars[i]; + val = vtemps[i]; + stmt = gimple_build_assign (var, val); + gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); + } +} + +/* Generates stores for CHAIN's eliminated stores in LOOP's last + (CHAIN->length - 1) iterations. */ + +static void +finalize_eliminated_stores (struct loop *loop, chain_p chain) +{ + unsigned i, n = chain->length; + + for (i = 0; i < n; i++) + { + tree var = chain->vars[i]; + tree fini = chain->finis[n - i - 1]; + gimple *stmt = gimple_build_assign (fini, var); + + gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt); + } + + if (chain->fini_seq) + { + gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq); + chain->fini_seq = NULL; + } } /* Initializes a variable for load motion for ROOT and prepares phi nodes and @@ -1699,10 +2006,17 @@ remove_stmt (gimple *stmt) bsi = gsi_for_stmt (stmt); name = gimple_assign_lhs (stmt); - gcc_assert (TREE_CODE (name) == SSA_NAME); - - next = single_nonlooparound_use (name); - reset_debug_uses (stmt); + if (TREE_CODE (name) == SSA_NAME) + { + next = single_nonlooparound_use (name); + reset_debug_uses (stmt); + } + else + { + /* This is a store to be eliminated. */ + gcc_assert (gimple_vdef (stmt) != NULL); + next = NULL; + } unlink_stmt_vdef (stmt); gsi_remove (&bsi, true); @@ -1722,11 +2036,12 @@ remove_stmt (gimple *stmt) static void execute_pred_commoning_chain (struct loop *loop, chain_p chain, - bitmap tmp_vars) + bitmap tmp_vars) { - unsigned i; + unsigned i, n; dref a; tree var; + bool in_lhs; if (chain->combined) { @@ -1734,12 +2049,47 @@ execute_pred_commoning_chain (struct loop *loop, chain_p chain, compute the values of the expression (except for the root one). We delay this until after all chains are processed. */ } + else if (chain->type == CT_STORE_STORE) + { + if (chain->length > 0) + { + if (chain->inv_store_elimination) + { + /* If dead stores in this chain only store loop invariant + values, we can simply record the invariant value and use + it directly after loop. */ + initialize_root_vars_store_elim_1 (chain); + } + else + { + /* If dead stores in this chain store loop variant values, + we need to set up the variables by loading from memory + before loop and propagating it with PHI nodes. */ + initialize_root_vars_store_elim_2 (loop, chain, tmp_vars); + } + + /* For inter-iteration store elimination chain, stores at each + distance in loop's last (chain->length - 1) iterations can't + be eliminated, because there is no following killing store. + We need to generate these stores after loop. */ + finalize_eliminated_stores (loop, chain); + } + + /* Eliminate the stores killed by following store. */ + n = chain->refs.length (); + for (i = 0; i < n - 1; i++) + remove_stmt (chain->refs[i]->stmt); + } else { - /* For non-combined chains, set up the variables that hold its value, - and replace the uses of the original references by these - variables. */ - initialize_root (loop, chain, tmp_vars); + /* For non-combined chains, set up the variables that hold its value. */ + initialize_root_vars (loop, chain, tmp_vars); + a = get_chain_root (chain); + in_lhs = (chain->type == CT_STORE_LOAD + || chain->type == CT_COMBINATION); + replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs); + + /* Replace the uses of the original references by these variables. */ for (i = 1; chain->refs.iterate (i, &a); i++) { var = chain->vars[chain->length - a->distance]; @@ -1763,6 +2113,9 @@ determine_unroll_factor (vec<chain_p> chains) { if (chain->type == CT_INVARIANT) continue; + /* For now we can't handle unrolling when eliminating stores. */ + else if (chain->type == CT_STORE_STORE) + return 1; if (chain->combined) { @@ -2409,6 +2762,74 @@ try_combine_chains (vec<chain_p> *chains) } } +/* Prepare initializers for store elimination CHAIN in LOOP. Returns false + if this is impossible because one of these initializers may trap, true + otherwise. */ + +static bool +prepare_initializers_chain_store_elim (struct loop *loop, chain_p chain) +{ + unsigned i, n = chain->length; + + /* For now we can't eliminate stores if some of them are conditional + executed. */ + if (!chain->all_always_accessed) + return false; + + /* Nothing to intialize for intra-iteration store elimination. */ + if (n == 0 && chain->type == CT_STORE_STORE) + return true; + + /* For store elimination chain, there is nothing to initialize if stores + to be eliminated only store loop invariant values into memory. */ + if (chain->type == CT_STORE_STORE + && is_inv_store_elimination_chain (loop, chain)) + { + chain->inv_store_elimination = true; + return true; + } + + chain->inits.create (n); + chain->inits.safe_grow_cleared (n); + + /* For store eliminatin chain like below: + + for (i = 0; i < len; i++) + { + a[i] = 1; + // a[i + 1] = ... + a[i + 2] = 3; + } + + store to a[i + 1] is missed in loop body, it acts like bubbles. The + content of a[i + 1] remain the same if the loop iterates fewer times + than chain->length. We need to set up root variables for such stores + by loading from memory before loop. Note we only need to load bubble + elements because loop body is guaranteed to be executed at least once + after loop's preheader edge. */ + auto_vec<bool> bubbles; + bubbles.safe_grow_cleared (n + 1); + for (i = 0; i < chain->refs.length (); i++) + bubbles[chain->refs[i]->distance] = true; + + struct data_reference *dr = get_chain_root (chain)->ref; + for (i = 0; i < n; i++) + { + if (bubbles[i]) + continue; + + gimple_seq stmts = NULL; + + tree init = ref_at_iteration (dr, (int) 0 - i, &stmts); + if (stmts) + gimple_seq_add_seq_without_update (&chain->init_seq, stmts); + + chain->inits[i] = init; + } + + return true; +} + /* Prepare initializers for CHAIN in LOOP. Returns false if this is impossible because one of these initializers may trap, true otherwise. */ @@ -2421,6 +2842,9 @@ prepare_initializers_chain (struct loop *loop, chain_p chain) dref laref; edge entry = loop_preheader_edge (loop); + if (chain->type == CT_STORE_STORE) + return prepare_initializers_chain_store_elim (loop, chain); + /* Find the initializers for the variables, and check that they cannot trap. */ chain->inits.create (n); @@ -2454,7 +2878,7 @@ prepare_initializers_chain (struct loop *loop, chain_p chain) } if (stmts) - gsi_insert_seq_on_edge_immediate (entry, stmts); + gimple_seq_add_seq_without_update (&chain->init_seq, stmts); chain->inits[i] = init; } @@ -2484,10 +2908,115 @@ prepare_initializers (struct loop *loop, vec<chain_p> chains) } } -/* Performs predictive commoning for LOOP. Returns true if LOOP was - unrolled. */ +/* Generates finalizer memory references for CHAIN in LOOP. Returns true + if finalizer code for CHAIN can be generated, otherwise false. */ + +static bool +prepare_finalizers_chain (struct loop *loop, chain_p chain) +{ + unsigned i, n = chain->length; + struct data_reference *dr = get_chain_root (chain)->ref; + tree fini, niters = number_of_latch_executions (loop); + + /* For now we can't eliminate stores if some of them are conditional + executed. */ + if (!chain->all_always_accessed) + return false; + + chain->finis.create (n); + for (i = 0; i < n; i++) + chain->finis.quick_push (NULL_TREE); + + /* We never use looparound phi node for store elimination chains. */ + + /* Find the finalizers for the variables, and check that they cannot + trap. */ + for (i = 0; i < n; i++) + { + gimple_seq stmts = NULL; + gcc_assert (chain->finis[i] == NULL_TREE); + + if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME) + { + niters = copy_node (niters); + niters = force_gimple_operand (niters, &stmts, true, NULL); + if (stmts) + { + gimple_seq_add_seq_without_update (&chain->fini_seq, stmts); + stmts = NULL; + } + } + fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters); + if (stmts) + gimple_seq_add_seq_without_update (&chain->fini_seq, stmts); + + chain->finis[i] = fini; + } + + return true; +} + +/* Generates finalizer memory reference for CHAINS in LOOP. Returns true + if finalizer code generation for CHAINS breaks loop closed ssa form. */ static bool +prepare_finalizers (struct loop *loop, vec<chain_p> chains) +{ + chain_p chain; + unsigned i; + bool loop_closed_ssa = false; + + for (i = 0; i < chains.length ();) + { + chain = chains[i]; + + /* Finalizer is only necessary for inter-iteration store elimination + chains. */ + if (chain->length == 0 || chain->type != CT_STORE_STORE) + { + i++; + continue; + } + + if (prepare_finalizers_chain (loop, chain)) + { + i++; + /* We don't corrupt loop closed ssa form for store elimination + chain if eliminated stores only store loop invariant values + into memory. */ + if (!chain->inv_store_elimination) + loop_closed_ssa |= (!chain->inv_store_elimination); + } + else + { + release_chain (chain); + chains.unordered_remove (i); + } + } + return loop_closed_ssa; +} + +/* Insert all initializing gimple stmts into loop's entry edge. */ + +static void +insert_init_seqs (struct loop *loop, vec<chain_p> chains) +{ + unsigned i; + edge entry = loop_preheader_edge (loop); + + for (i = 0; i < chains.length (); ++i) + if (chains[i]->init_seq) + { + gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq); + chains[i]->init_seq = NULL; + } +} + +/* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value + if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa + form was corrupted. */ + +static unsigned tree_predictive_commoning_loop (struct loop *loop) { vec<data_reference_p> datarefs; @@ -2496,7 +3025,7 @@ tree_predictive_commoning_loop (struct loop *loop) vec<chain_p> chains = vNULL; unsigned unroll_factor; struct tree_niter_desc desc; - bool unroll = false; + bool unroll = false, loop_closed_ssa = false; edge exit; if (dump_file && (dump_flags & TDF_DETAILS)) @@ -2508,7 +3037,7 @@ tree_predictive_commoning_loop (struct loop *loop) if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n"); - return false; + return 0; } /* Find the data references and split them into components according to their @@ -2523,7 +3052,7 @@ tree_predictive_commoning_loop (struct loop *loop) fprintf (dump_file, "Cannot analyze data dependencies\n"); free_data_refs (datarefs); free_dependence_relations (dependences); - return false; + return 0; } if (dump_file && (dump_flags & TDF_DETAILS)) @@ -2536,7 +3065,7 @@ tree_predictive_commoning_loop (struct loop *loop) { free_data_refs (datarefs); free_affine_expand_cache (&name_expansions); - return false; + return 0; } if (dump_file && (dump_flags & TDF_DETAILS)) @@ -2561,10 +3090,13 @@ tree_predictive_commoning_loop (struct loop *loop) goto end; } prepare_initializers (loop, chains); + loop_closed_ssa = prepare_finalizers (loop, chains); /* Try to combine the chains that are always worked with together. */ try_combine_chains (&chains); + insert_init_seqs (loop, chains); + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Before commoning:\n\n"); @@ -2620,7 +3152,7 @@ end: ; free_affine_expand_cache (&name_expansions); - return unroll; + return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0); } /* Runs predictive commoning. */ @@ -2628,23 +3160,26 @@ end: ; unsigned tree_predictive_commoning (void) { - bool unrolled = false; struct loop *loop; - unsigned ret = 0; + unsigned ret = 0, changed = 0; initialize_original_copy_tables (); FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) if (optimize_loop_for_speed_p (loop)) { - unrolled |= tree_predictive_commoning_loop (loop); + changed |= tree_predictive_commoning_loop (loop); } + free_original_copy_tables (); - if (unrolled) + if (changed > 0) { scev_reset (); + + if (changed > 1) + rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); + ret = TODO_cleanup_cfg; } - free_original_copy_tables (); return ret; } |