summaryrefslogtreecommitdiff
path: root/gcc/tree-vectorizer.c
diff options
context:
space:
mode:
authordorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>2007-08-19 09:39:50 +0000
committerdorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>2007-08-19 09:39:50 +0000
commit221e9a92bd54d3f572f14697a066205ee80ec187 (patch)
tree1440005827d5c910ba6597f144fa3292c95f2032 /gcc/tree-vectorizer.c
parent8787bd6a0c69004eb4cfac92bc1c50a16a703c35 (diff)
downloadgcc-221e9a92bd54d3f572f14697a066205ee80ec187.tar.gz
* tree-vectorizer.h (vect_is_simple_reduction): Takes a loop_vec_info
as argument instead of struct loop. (nested_in_vect_loop_p): New function. (vect_relevant): Add enum values vect_used_in_outer_by_reduction and vect_used_in_outer. (is_loop_header_bb_p): New. Used to differentiate loop-header phis from other phis in the loop. (destroy_loop_vec_info): Add additional argument to declaration. * tree-vectorizer.c (supportable_widening_operation): Also check if nested_in_vect_loop_p (don't allow changing the order in this case). (vect_is_simple_reduction): Takes a loop_vec_info as argument instead of struct loop. Call nested_in_vect_loop_p and don't require flag_unsafe_math_optimizations if it returns true. (new_stmt_vec_info): When setting def_type for phis differentiate loop-header phis from other phis. (bb_in_loop_p): New function. (new_loop_vec_info): Inner-loop phis already have a stmt_vinfo, so just update their loop_vinfo. Order of BB traversal now matters - call dfs_enumerate_from with bb_in_loop_p. (destroy_loop_vec_info): Takes additional argument to control whether stmt_vinfo of the loop stmts should be destroyed as well. (vect_is_simple_reduction): Allow the "non-reduction" use of a reduction stmt to be defines by a non loop-header phi. (vectorize_loops): Call destroy_loop_vec_info with additional argument. * tree-vect-transform.c (vectorizable_reduction): Call nested_in_vect_loop_p. Check for multitypes in the inner-loop. (vectorizable_call): Likewise. (vectorizable_conversion): Likewise. (vectorizable_operation): Likewise. (vectorizable_type_promotion): Likewise. (vectorizable_type_demotion): Likewise. (vectorizable_store): Likewise. (vectorizable_live_operation): Likewise. (vectorizable_reduction): Likewise. Also pass loop_info to vect_is_simple_reduction instead of loop. (vect_init_vector): Call nested_in_vect_loop_p. (get_initial_def_for_reduction): Likewise. (vect_create_epilog_for_reduction): Likewise. (vect_init_vector): Check which loop to work with, in case there's an inner-loop. (get_initial_def_for_inducion): Extend to handle outer-loop vectorization. Fix indentation. (vect_get_vec_def_for_operand): Support phis in the case vect_loop_def. In the case vect_induction_def get the vector def from the induction phi node, instead of calling get_initial_def_for_inducion. (get_initial_def_for_reduction): Extend to handle outer-loop vectorization. (vect_create_epilog_for_reduction): Extend to handle outer-loop vectorization. (vect_transform_loop): Change assert to just skip this case. Add a dump printout. (vect_finish_stmt_generation): Add a couple asserts. (vect_estimate_min_profitable_iters): Multiply cost of inner-loop stmts (in outer-loop vectorization) by estimated inner-loop bound. (vect_model_reduction_cost): Don't add reduction epilogue cost in case this is an inner-loop reduction in outer-loop vectorization. * tree-vect-analyze.c (vect_analyze_scalar_cycles_1): New function. Same code as what used to be vect_analyze_scalar_cycles, only with additional argument loop, and loop_info passed to vect_is_simple_reduction instead of loop. (vect_analyze_scalar_cycles): Code factored out into vect_analyze_scalar_cycles_1. Call it for each relevant loop-nest. Updated documentation. (analyze_operations): Check for inner-loop loop-closed exit-phis during outer-loop vectorization that are live or not used in the outerloop, cause this requires special handling. (vect_enhance_data_refs_alignment): Don't consider versioning for nested-loops. (vect_analyze_data_refs): Check that there are no datarefs in the inner-loop. (vect_mark_stmts_to_be_vectorized): Also consider vect_used_in_outer and vect_used_in_outer_by_reduction cases. (process_use): Also consider the case of outer-loop stmt defining an inner-loop stmt and vice versa. (vect_analyze_loop_1): New function. (vect_analyze_loop_form): Extend, to allow a restricted form of nested loops. Call vect_analyze_loop_1. (vect_analyze_loop): Skip (inner-)loops within outer-loops that have been vectorized. Call destroy_loop_vec_info with additional argument. * tree-vect-patterns.c (vect_recog_widen_sum_pattern): Don't allow in the inner-loop when doing outer-loop vectorization. Add documentation and printout. (vect_recog_dot_prod_pattern): Likewise. Also add check for GIMPLE_MODIFY_STMT (in case we encounter a phi in the loop). git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@127623 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vectorizer.c')
-rw-r--r--gcc/tree-vectorizer.c149
1 files changed, 119 insertions, 30 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 6dc0c727c0c..20c867c708b 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -1345,7 +1345,7 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
STMT_VINFO_IN_PATTERN_P (res) = false;
STMT_VINFO_RELATED_STMT (res) = NULL;
STMT_VINFO_DATA_REF (res) = NULL;
- if (TREE_CODE (stmt) == PHI_NODE)
+ if (TREE_CODE (stmt) == PHI_NODE && is_loop_header_bb_p (bb_for_stmt (stmt)))
STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
else
STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
@@ -1364,6 +1364,20 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
}
+/* Function bb_in_loop_p
+
+ Used as predicate for dfs order traversal of the loop bbs. */
+
+static bool
+bb_in_loop_p (const_basic_block bb, const void *data)
+{
+ struct loop *loop = (struct loop *)data;
+ if (flow_bb_inside_loop_p (loop, bb))
+ return true;
+ return false;
+}
+
+
/* Function new_loop_vec_info.
Create and initialize a new loop_vec_info struct for LOOP, as well as
@@ -1375,37 +1389,76 @@ new_loop_vec_info (struct loop *loop)
loop_vec_info res;
basic_block *bbs;
block_stmt_iterator si;
- unsigned int i;
+ unsigned int i, nbbs;
res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
+ LOOP_VINFO_LOOP (res) = loop;
bbs = get_loop_body (loop);
- /* Create stmt_info for all stmts in the loop. */
+ /* Create/Update stmt_info for all stmts in the loop. */
for (i = 0; i < loop->num_nodes; i++)
{
basic_block bb = bbs[i];
tree phi;
- for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
- {
- stmt_ann_t ann = get_stmt_ann (phi);
- set_stmt_info (ann, new_stmt_vec_info (phi, res));
- }
-
- for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+ /* BBs in a nested inner-loop will have been already processed (because
+ we will have called vect_analyze_loop_form for any nested inner-loop).
+ Therefore, for stmts in an inner-loop we just want to update the
+ STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
+ loop_info of the outer-loop we are currently considering to vectorize
+ (instead of the loop_info of the inner-loop).
+ For stmts in other BBs we need to create a stmt_info from scratch. */
+ if (bb->loop_father != loop)
{
- tree stmt = bsi_stmt (si);
- stmt_ann_t ann;
+ /* Inner-loop bb. */
+ gcc_assert (loop->inner && bb->loop_father == loop->inner);
+ for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
+ {
+ stmt_vec_info stmt_info = vinfo_for_stmt (phi);
+ loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
+ STMT_VINFO_LOOP_VINFO (stmt_info) = res;
+ }
+ for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+ {
+ tree stmt = bsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ loop_vec_info inner_loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
+ STMT_VINFO_LOOP_VINFO (stmt_info) = res;
+ }
+ }
+ else
+ {
+ /* bb in current nest. */
+ for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
+ {
+ stmt_ann_t ann = get_stmt_ann (phi);
+ set_stmt_info (ann, new_stmt_vec_info (phi, res));
+ }
- ann = stmt_ann (stmt);
- set_stmt_info (ann, new_stmt_vec_info (stmt, res));
+ for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+ {
+ tree stmt = bsi_stmt (si);
+ stmt_ann_t ann = stmt_ann (stmt);
+ set_stmt_info (ann, new_stmt_vec_info (stmt, res));
+ }
}
}
- LOOP_VINFO_LOOP (res) = loop;
+ /* CHECKME: We want to visit all BBs before their successors (except for
+ latch blocks, for which this assertion wouldn't hold). In the simple
+ case of the loop forms we allow, a dfs order of the BBs would the same
+ as reversed postorder traversal, so we are safe. */
+
+ free (bbs);
+ bbs = XCNEWVEC (basic_block, loop->num_nodes);
+ nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
+ bbs, loop->num_nodes, loop);
+ gcc_assert (nbbs == loop->num_nodes);
+
LOOP_VINFO_BBS (res) = bbs;
- LOOP_VINFO_EXIT_COND (res) = NULL;
LOOP_VINFO_NITERS (res) = NULL;
LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
@@ -1430,7 +1483,7 @@ new_loop_vec_info (struct loop *loop)
stmts in the loop. */
void
-destroy_loop_vec_info (loop_vec_info loop_vinfo)
+destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
{
struct loop *loop;
basic_block *bbs;
@@ -1446,6 +1499,18 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo)
bbs = LOOP_VINFO_BBS (loop_vinfo);
nbbs = loop->num_nodes;
+ if (!clean_stmts)
+ {
+ free (LOOP_VINFO_BBS (loop_vinfo));
+ free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
+ free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
+ VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
+
+ free (loop_vinfo);
+ loop->aux = NULL;
+ return;
+ }
+
for (j = 0; j < nbbs; j++)
{
basic_block bb = bbs[j];
@@ -1597,7 +1662,6 @@ vect_supportable_dr_alignment (struct data_reference *dr)
return dr_aligned;
/* Possibly unaligned access. */
-
if (DR_IS_READ (dr))
{
if (optab_handler (vec_realign_load_optab, mode)->insn_code != CODE_FOR_nothing
@@ -1718,8 +1782,6 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
{
case PHI_NODE:
*def = PHI_RESULT (*def_stmt);
- gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
- || *dt == vect_invariant_def);
break;
case GIMPLE_MODIFY_STMT:
@@ -1760,6 +1822,8 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
enum tree_code *code1, enum tree_code *code2)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
+ struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
bool ordered_p;
enum machine_mode vec_mode;
enum insn_code icode1, icode2;
@@ -1782,9 +1846,15 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
Some targets can take advantage of this and generate more efficient code.
For example, targets like Altivec, that support widen_mult using a sequence
of {mult_even,mult_odd} generate the following vectors:
- vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. */
+ vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
+
+ When vectorizaing outer-loops, we execute the inner-loop sequentially
+ (each vectorized inner-loop iteration contributes to VF outer-loop
+ iterations in parallel). We therefore don't allow to change the order
+ of the computation in the inner-loop during outer-loop vectorization. */
- if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction)
+ if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
+ && !nested_in_vect_loop_p (vect_loop, stmt))
ordered_p = false;
else
ordered_p = true;
@@ -2008,8 +2078,10 @@ reduction_code_for_scalar_code (enum tree_code code,
Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */
tree
-vect_is_simple_reduction (struct loop *loop, tree phi)
+vect_is_simple_reduction (loop_vec_info loop_info, tree phi)
{
+ struct loop *loop = (bb_for_stmt (phi))->loop_father;
+ struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
edge latch_e = loop_latch_edge (loop);
tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
tree def_stmt, def1, def2;
@@ -2022,6 +2094,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
imm_use_iterator imm_iter;
use_operand_p use_p;
+ gcc_assert (loop == vect_loop || flow_loop_nested_p (vect_loop, loop));
+
name = PHI_RESULT (phi);
nloop_uses = 0;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
@@ -2133,8 +2207,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
return NULL_TREE;
}
+ /* Generally, when vectorizing a reduction we change the order of the
+ computation. This may change the behavior of the program in some
+ cases, so we need to check that this is ok. One exception is when
+ vectorizing an outer-loop: the inner-loop is executed sequentially,
+ and therefore vectorizing reductions in the inner-loop durint
+ outer-loop vectorization is safe. */
+
/* CHECKME: check for !flag_finite_math_only too? */
- if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
+ if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations
+ && !nested_in_vect_loop_p (vect_loop, def_stmt))
{
/* Changing the order of operations changes the semantics. */
if (vect_print_dump_info (REPORT_DETAILS))
@@ -2144,7 +2226,8 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
}
return NULL_TREE;
}
- else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
+ else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
+ && !nested_in_vect_loop_p (vect_loop, def_stmt))
{
/* Changing the order of operations changes the semantics. */
if (vect_print_dump_info (REPORT_DETAILS))
@@ -2183,13 +2266,16 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
/* Check that one def is the reduction def, defined by PHI,
- the other def is either defined in the loop by a GIMPLE_MODIFY_STMT,
- or it's an induction (defined by some phi node). */
+ the other def is either defined in the loop ("vect_loop_def"),
+ or it's an induction (defined by a loop-header phi-node). */
if (def2 == phi
&& flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
&& (TREE_CODE (def1) == GIMPLE_MODIFY_STMT
- || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def))
+ || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def
+ || (TREE_CODE (def1) == PHI_NODE
+ && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_loop_def
+ && !is_loop_header_bb_p (bb_for_stmt (def1)))))
{
if (vect_print_dump_info (REPORT_DETAILS))
{
@@ -2201,7 +2287,10 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
else if (def1 == phi
&& flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
&& (TREE_CODE (def2) == GIMPLE_MODIFY_STMT
- || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def))
+ || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def
+ || (TREE_CODE (def2) == PHI_NODE
+ && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_loop_def
+ && !is_loop_header_bb_p (bb_for_stmt (def2)))))
{
/* Swap operands (just for simplicity - so that the rest of the code
can assume that the reduction variable is always the last (second)
@@ -2340,7 +2429,7 @@ vectorize_loops (void)
if (!loop)
continue;
loop_vinfo = loop->aux;
- destroy_loop_vec_info (loop_vinfo);
+ destroy_loop_vec_info (loop_vinfo, true);
loop->aux = NULL;
}