diff options
author | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-08-19 09:39:50 +0000 |
---|---|---|
committer | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-08-19 09:39:50 +0000 |
commit | 221e9a92bd54d3f572f14697a066205ee80ec187 (patch) | |
tree | 1440005827d5c910ba6597f144fa3292c95f2032 /gcc/tree-vectorizer.c | |
parent | 8787bd6a0c69004eb4cfac92bc1c50a16a703c35 (diff) | |
download | gcc-221e9a92bd54d3f572f14697a066205ee80ec187.tar.gz |
* tree-vectorizer.h (vect_is_simple_reduction): Takes a loop_vec_info
as argument instead of struct loop.
(nested_in_vect_loop_p): New function.
(vect_relevant): Add enum values vect_used_in_outer_by_reduction and
vect_used_in_outer.
(is_loop_header_bb_p): New. Used to differentiate loop-header phis
from other phis in the loop.
(destroy_loop_vec_info): Add additional argument to declaration.
* tree-vectorizer.c (supportable_widening_operation): Also check if
nested_in_vect_loop_p (don't allow changing the order in this case).
(vect_is_simple_reduction): Takes a loop_vec_info as argument instead
of struct loop. Call nested_in_vect_loop_p and don't require
flag_unsafe_math_optimizations if it returns true.
(new_stmt_vec_info): When setting def_type for phis differentiate
loop-header phis from other phis.
(bb_in_loop_p): New function.
(new_loop_vec_info): Inner-loop phis already have a stmt_vinfo, so just
update their loop_vinfo. Order of BB traversal now matters - call
dfs_enumerate_from with bb_in_loop_p.
(destroy_loop_vec_info): Takes additional argument to control whether
stmt_vinfo of the loop stmts should be destroyed as well.
(vect_is_simple_reduction): Allow the "non-reduction" use of a
reduction stmt to be defines by a non loop-header phi.
(vectorize_loops): Call destroy_loop_vec_info with additional argument.
* tree-vect-transform.c (vectorizable_reduction): Call
nested_in_vect_loop_p. Check for multitypes in the inner-loop.
(vectorizable_call): Likewise.
(vectorizable_conversion): Likewise.
(vectorizable_operation): Likewise.
(vectorizable_type_promotion): Likewise.
(vectorizable_type_demotion): Likewise.
(vectorizable_store): Likewise.
(vectorizable_live_operation): Likewise.
(vectorizable_reduction): Likewise. Also pass loop_info to
vect_is_simple_reduction instead of loop.
(vect_init_vector): Call nested_in_vect_loop_p.
(get_initial_def_for_reduction): Likewise.
(vect_create_epilog_for_reduction): Likewise.
(vect_init_vector): Check which loop to work with, in case there's an
inner-loop.
(get_initial_def_for_inducion): Extend to handle outer-loop
vectorization. Fix indentation.
(vect_get_vec_def_for_operand): Support phis in the case vect_loop_def.
In the case vect_induction_def get the vector def from the induction
phi node, instead of calling get_initial_def_for_inducion.
(get_initial_def_for_reduction): Extend to handle outer-loop
vectorization.
(vect_create_epilog_for_reduction): Extend to handle outer-loop
vectorization.
(vect_transform_loop): Change assert to just skip this case. Add a
dump printout.
(vect_finish_stmt_generation): Add a couple asserts.
(vect_estimate_min_profitable_iters): Multiply
cost of inner-loop stmts (in outer-loop vectorization) by estimated
inner-loop bound.
(vect_model_reduction_cost): Don't add reduction epilogue cost in case
this is an inner-loop reduction in outer-loop vectorization.
* tree-vect-analyze.c (vect_analyze_scalar_cycles_1): New function.
Same code as what used to be vect_analyze_scalar_cycles, only with
additional argument loop, and loop_info passed to
vect_is_simple_reduction instead of loop.
(vect_analyze_scalar_cycles): Code factored out into
vect_analyze_scalar_cycles_1. Call it for each relevant loop-nest.
Updated documentation.
(analyze_operations): Check for inner-loop loop-closed exit-phis during
outer-loop vectorization that are live or not used in the outerloop,
cause this requires special handling.
(vect_enhance_data_refs_alignment): Don't consider versioning for
nested-loops.
(vect_analyze_data_refs): Check that there are no datarefs in the
inner-loop.
(vect_mark_stmts_to_be_vectorized): Also consider vect_used_in_outer
and vect_used_in_outer_by_reduction cases.
(process_use): Also consider the case of outer-loop stmt defining an
inner-loop stmt and vice versa.
(vect_analyze_loop_1): New function.
(vect_analyze_loop_form): Extend, to allow a restricted form of nested
loops. Call vect_analyze_loop_1.
(vect_analyze_loop): Skip (inner-)loops within outer-loops that have
been vectorized. Call destroy_loop_vec_info with additional argument.
* tree-vect-patterns.c (vect_recog_widen_sum_pattern): Don't allow
in the inner-loop when doing outer-loop vectorization. Add
documentation and printout.
(vect_recog_dot_prod_pattern): Likewise. Also add check for
GIMPLE_MODIFY_STMT (in case we encounter a phi in the loop).
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@127623 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r-- | gcc/tree-vectorizer.c | 149 |
1 files changed, 119 insertions, 30 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 6dc0c727c0c..20c867c708b 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1345,7 +1345,7 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo) STMT_VINFO_IN_PATTERN_P (res) = false; STMT_VINFO_RELATED_STMT (res) = NULL; STMT_VINFO_DATA_REF (res) = NULL; - if (TREE_CODE (stmt) == PHI_NODE) + if (TREE_CODE (stmt) == PHI_NODE && is_loop_header_bb_p (bb_for_stmt (stmt))) STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; else STMT_VINFO_DEF_TYPE (res) = vect_loop_def; @@ -1364,6 +1364,20 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo) } +/* Function bb_in_loop_p + + Used as predicate for dfs order traversal of the loop bbs. */ + +static bool +bb_in_loop_p (const_basic_block bb, const void *data) +{ + struct loop *loop = (struct loop *)data; + if (flow_bb_inside_loop_p (loop, bb)) + return true; + return false; +} + + /* Function new_loop_vec_info. Create and initialize a new loop_vec_info struct for LOOP, as well as @@ -1375,37 +1389,76 @@ new_loop_vec_info (struct loop *loop) loop_vec_info res; basic_block *bbs; block_stmt_iterator si; - unsigned int i; + unsigned int i, nbbs; res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info)); + LOOP_VINFO_LOOP (res) = loop; bbs = get_loop_body (loop); - /* Create stmt_info for all stmts in the loop. */ + /* Create/Update stmt_info for all stmts in the loop. */ for (i = 0; i < loop->num_nodes; i++) { basic_block bb = bbs[i]; tree phi; - for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) - { - stmt_ann_t ann = get_stmt_ann (phi); - set_stmt_info (ann, new_stmt_vec_info (phi, res)); - } - - for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) + /* BBs in a nested inner-loop will have been already processed (because + we will have called vect_analyze_loop_form for any nested inner-loop). + Therefore, for stmts in an inner-loop we just want to update the + STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new + loop_info of the outer-loop we are currently considering to vectorize + (instead of the loop_info of the inner-loop). + For stmts in other BBs we need to create a stmt_info from scratch. */ + if (bb->loop_father != loop) { - tree stmt = bsi_stmt (si); - stmt_ann_t ann; + /* Inner-loop bb. */ + gcc_assert (loop->inner && bb->loop_father == loop->inner); + for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) + { + stmt_vec_info stmt_info = vinfo_for_stmt (phi); + loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo)); + STMT_VINFO_LOOP_VINFO (stmt_info) = res; + } + for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) + { + tree stmt = bsi_stmt (si); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo)); + STMT_VINFO_LOOP_VINFO (stmt_info) = res; + } + } + else + { + /* bb in current nest. */ + for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) + { + stmt_ann_t ann = get_stmt_ann (phi); + set_stmt_info (ann, new_stmt_vec_info (phi, res)); + } - ann = stmt_ann (stmt); - set_stmt_info (ann, new_stmt_vec_info (stmt, res)); + for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) + { + tree stmt = bsi_stmt (si); + stmt_ann_t ann = stmt_ann (stmt); + set_stmt_info (ann, new_stmt_vec_info (stmt, res)); + } } } - LOOP_VINFO_LOOP (res) = loop; + /* CHECKME: We want to visit all BBs before their successors (except for + latch blocks, for which this assertion wouldn't hold). In the simple + case of the loop forms we allow, a dfs order of the BBs would the same + as reversed postorder traversal, so we are safe. */ + + free (bbs); + bbs = XCNEWVEC (basic_block, loop->num_nodes); + nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, + bbs, loop->num_nodes, loop); + gcc_assert (nbbs == loop->num_nodes); + LOOP_VINFO_BBS (res) = bbs; - LOOP_VINFO_EXIT_COND (res) = NULL; LOOP_VINFO_NITERS (res) = NULL; LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0; LOOP_VINFO_VECTORIZABLE_P (res) = 0; @@ -1430,7 +1483,7 @@ new_loop_vec_info (struct loop *loop) stmts in the loop. */ void -destroy_loop_vec_info (loop_vec_info loop_vinfo) +destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts) { struct loop *loop; basic_block *bbs; @@ -1446,6 +1499,18 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo) bbs = LOOP_VINFO_BBS (loop_vinfo); nbbs = loop->num_nodes; + if (!clean_stmts) + { + free (LOOP_VINFO_BBS (loop_vinfo)); + free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo)); + free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo)); + VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)); + + free (loop_vinfo); + loop->aux = NULL; + return; + } + for (j = 0; j < nbbs; j++) { basic_block bb = bbs[j]; @@ -1597,7 +1662,6 @@ vect_supportable_dr_alignment (struct data_reference *dr) return dr_aligned; /* Possibly unaligned access. */ - if (DR_IS_READ (dr)) { if (optab_handler (vec_realign_load_optab, mode)->insn_code != CODE_FOR_nothing @@ -1718,8 +1782,6 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt, { case PHI_NODE: *def = PHI_RESULT (*def_stmt); - gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def - || *dt == vect_invariant_def); break; case GIMPLE_MODIFY_STMT: @@ -1760,6 +1822,8 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype, enum tree_code *code1, enum tree_code *code2) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); bool ordered_p; enum machine_mode vec_mode; enum insn_code icode1, icode2; @@ -1782,9 +1846,15 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype, Some targets can take advantage of this and generate more efficient code. For example, targets like Altivec, that support widen_mult using a sequence of {mult_even,mult_odd} generate the following vectors: - vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. */ + vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. + + When vectorizaing outer-loops, we execute the inner-loop sequentially + (each vectorized inner-loop iteration contributes to VF outer-loop + iterations in parallel). We therefore don't allow to change the order + of the computation in the inner-loop during outer-loop vectorization. */ - if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction) + if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction + && !nested_in_vect_loop_p (vect_loop, stmt)) ordered_p = false; else ordered_p = true; @@ -2008,8 +2078,10 @@ reduction_code_for_scalar_code (enum tree_code code, Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */ tree -vect_is_simple_reduction (struct loop *loop, tree phi) +vect_is_simple_reduction (loop_vec_info loop_info, tree phi) { + struct loop *loop = (bb_for_stmt (phi))->loop_father; + struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); edge latch_e = loop_latch_edge (loop); tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); tree def_stmt, def1, def2; @@ -2022,6 +2094,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi) imm_use_iterator imm_iter; use_operand_p use_p; + gcc_assert (loop == vect_loop || flow_loop_nested_p (vect_loop, loop)); + name = PHI_RESULT (phi); nloop_uses = 0; FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) @@ -2133,8 +2207,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi) return NULL_TREE; } + /* Generally, when vectorizing a reduction we change the order of the + computation. This may change the behavior of the program in some + cases, so we need to check that this is ok. One exception is when + vectorizing an outer-loop: the inner-loop is executed sequentially, + and therefore vectorizing reductions in the inner-loop durint + outer-loop vectorization is safe. */ + /* CHECKME: check for !flag_finite_math_only too? */ - if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations) + if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations + && !nested_in_vect_loop_p (vect_loop, def_stmt)) { /* Changing the order of operations changes the semantics. */ if (vect_print_dump_info (REPORT_DETAILS)) @@ -2144,7 +2226,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi) } return NULL_TREE; } - else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)) + else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type) + && !nested_in_vect_loop_p (vect_loop, def_stmt)) { /* Changing the order of operations changes the semantics. */ if (vect_print_dump_info (REPORT_DETAILS)) @@ -2183,13 +2266,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi) /* Check that one def is the reduction def, defined by PHI, - the other def is either defined in the loop by a GIMPLE_MODIFY_STMT, - or it's an induction (defined by some phi node). */ + the other def is either defined in the loop ("vect_loop_def"), + or it's an induction (defined by a loop-header phi-node). */ if (def2 == phi && flow_bb_inside_loop_p (loop, bb_for_stmt (def1)) && (TREE_CODE (def1) == GIMPLE_MODIFY_STMT - || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def)) + || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def + || (TREE_CODE (def1) == PHI_NODE + && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_loop_def + && !is_loop_header_bb_p (bb_for_stmt (def1))))) { if (vect_print_dump_info (REPORT_DETAILS)) { @@ -2201,7 +2287,10 @@ vect_is_simple_reduction (struct loop *loop, tree phi) else if (def1 == phi && flow_bb_inside_loop_p (loop, bb_for_stmt (def2)) && (TREE_CODE (def2) == GIMPLE_MODIFY_STMT - || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def)) + || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def + || (TREE_CODE (def2) == PHI_NODE + && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_loop_def + && !is_loop_header_bb_p (bb_for_stmt (def2))))) { /* Swap operands (just for simplicity - so that the rest of the code can assume that the reduction variable is always the last (second) @@ -2340,7 +2429,7 @@ vectorize_loops (void) if (!loop) continue; loop_vinfo = loop->aux; - destroy_loop_vec_info (loop_vinfo); + destroy_loop_vec_info (loop_vinfo, true); loop->aux = NULL; } |