summaryrefslogtreecommitdiff
path: root/gcc/tree-vect-slp.c
diff options
context:
space:
mode:
authorIra Rosen <irar@il.ibm.com>2010-04-19 09:10:45 +0000
committerIra Rosen <irar@gcc.gnu.org>2010-04-19 09:10:45 +0000
commitb5aeb3bb3e9d1a0ce78fe2d7de9f510a7413605d (patch)
tree3e32ed80e44c4287a38f498c1bd5e752b9a57b20 /gcc/tree-vect-slp.c
parent5a2fa9e8bf068aaacb57627c058b0d5891763857 (diff)
downloadgcc-b5aeb3bb3e9d1a0ce78fe2d7de9f510a7413605d.tar.gz
re PR tree-optimization/37027 (SLP loop vectorization missing support for reductions)
PR tree-optimization/37027 * tree-vectorizer.h (struct _loop_vec_info): Add new field reductions and macro to access it. (vectorizable_reduction): Add argument. (vect_get_slp_defs): Likewise. * tree-vect-loop.c (vect_analyze_scalar_cycles_1): Collect reduction statements for possible use in SLP. (new_loop_vec_info): Initialize LOOP_VINFO_REDUCTIONS. (destroy_loop_vec_info): Free LOOP_VINFO_REDUCTIONS. (vect_create_epilog_for_reduction): Handle SLP. Modify documentation, add new argument. (vectorizable_reduction): Likewise. * tree-vect-stmts.c (vect_get_vec_defs): Update call to vect_get_slp_defs. (vectorizable_type_demotion, vectorizable_type_promotion, vectorizable_store): Likewise. (vect_analyze_stmt): Update call to vectorizable_reduction. (vect_transform_stmt): Likewise. * tree-vect-slp.c (vect_get_and_check_slp_defs): Handle reduction. (vect_build_slp_tree): Fix indentation. Check that there are no loads from different interleaving chains in same node. (vect_slp_rearrange_stmts): New function. (vect_supported_load_permutation_p): Allow load permutations for reductions. Call vect_slp_rearrange_stmts() to rearrange statements inside SLP nodes if necessary. (vect_analyze_slp_instance): Handle reductions. (vect_analyze_slp): Try to build SLP instances originating from groups of reductions. (vect_detect_hybrid_slp_stmts): Skip reduction statements. (vect_get_constant_vectors): Create initial vectors for reductions according to reduction code. Add new argument. (vect_get_slp_defs): Add new argument, pass it to vect_get_constant_vectors. (vect_schedule_slp_instance): Remove SLP tree root statements. From-SVN: r158506
Diffstat (limited to 'gcc/tree-vect-slp.c')
-rw-r--r--gcc/tree-vect-slp.c399
1 files changed, 325 insertions, 74 deletions
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index afc4f311078..99a865fee20 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -273,6 +273,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
break;
case vect_internal_def:
+ case vect_reduction_def:
if (i == 0)
VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
else
@@ -332,7 +333,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
HOST_WIDE_INT dummy;
bool permutation = false;
unsigned int load_place;
- gimple first_load;
+ gimple first_load, prev_first_load = NULL;
/* For every stmt in NODE find its def stmt/s. */
for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
@@ -485,42 +486,62 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
&pattern0, &pattern1))
return false;
}
- else
- {
- /* Load. */
- /* FORNOW: Check that there is no gap between the loads. */
- if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
- && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
- || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
- && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
- {
- if (vect_print_dump_info (REPORT_SLP))
- {
- fprintf (vect_dump, "Build SLP failed: strided "
- "loads have gaps ");
- print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
- }
+ else
+ {
+ /* Load. */
+ /* FORNOW: Check that there is no gap between the loads. */
+ if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
+ && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
+ || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
+ && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: strided "
+ "loads have gaps ");
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ }
- return false;
- }
-
- /* Check that the size of interleaved loads group is not
- greater than the SLP group size. */
- if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
- > ncopies * group_size)
- {
- if (vect_print_dump_info (REPORT_SLP))
- {
- fprintf (vect_dump, "Build SLP failed: the number of "
- "interleaved loads is greater than"
- " the SLP group size ");
- print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
- }
+ return false;
+ }
- return false;
- }
+ /* Check that the size of interleaved loads group is not
+ greater than the SLP group size. */
+ if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: the number of "
+ "interleaved loads is greater than"
+ " the SLP group size ");
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ }
- first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
+ return false;
+ }
+
+ first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
+ if (prev_first_load)
+ {
+ /* Check that there are no loads from different interleaving
+ chains in the same node. The only exception is complex
+ numbers. */
+ if (prev_first_load != first_load
+ && rhs_code != REALPART_EXPR
+ && rhs_code != IMAGPART_EXPR)
+ {
+ if (vect_print_dump_info (REPORT_SLP))
+ {
+ fprintf (vect_dump, "Build SLP failed: different "
+ "interleaving chains in one node ");
+ print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+ }
+
+ return false;
+ }
+ }
+ else
+ prev_first_load = first_load;
if (first_load == stmt)
{
@@ -787,6 +808,39 @@ vect_supported_slp_permutation_p (slp_instance instance)
}
+/* Rearrange the statements of NODE according to PERMUTATION. */
+
+static void
+vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
+ VEC (int, heap) *permutation)
+{
+ gimple stmt;
+ VEC (gimple, heap) *tmp_stmts;
+ unsigned int index, i;
+
+ if (!node)
+ return;
+
+ vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
+ vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
+
+ gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
+ tmp_stmts = VEC_alloc (gimple, heap, group_size);
+
+ for (i = 0; i < group_size; i++)
+ VEC_safe_push (gimple, heap, tmp_stmts, NULL);
+
+ for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
+ {
+ index = VEC_index (int, permutation, i);
+ VEC_replace (gimple, tmp_stmts, index, stmt);
+ }
+
+ VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
+ SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
+}
+
+
/* Check if the required load permutation is supported.
LOAD_PERMUTATION contains a list of indices of the loads.
In SLP this permutation is relative to the order of strided stores that are
@@ -796,9 +850,11 @@ static bool
vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
VEC (int, heap) *load_permutation)
{
- int i = 0, j, prev = -1, next, k;
- bool supported;
+ int i = 0, j, prev = -1, next, k, number_of_groups;
+ bool supported, bad_permutation = false;
sbitmap load_index;
+ slp_tree node;
+ gimple stmt;
/* FORNOW: permutations are only supported in SLP. */
if (!slp_instn)
@@ -811,9 +867,72 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
fprintf (vect_dump, "%d ", next);
}
+ /* In case of reduction every load permutation is allowed, since the order
+ of the reduction statements is not important (as opposed to the case of
+ strided stores). The only condition we need to check is that all the
+ load nodes are of the same size and have the same permutation (and then
+ rearrange all the nodes of the SLP instance according to this
+ permutation). */
+
+ /* Check that all the load nodes are of the same size. */
+ for (i = 0;
+ VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
+ i++)
+ if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
+ != (unsigned) group_size)
+ return false;
+
+ node = SLP_INSTANCE_TREE (slp_instn);
+ stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
+ /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
+ instance, not all the loads belong to the same node or interleaving
+ group. Hence, we need to divide them into groups according to
+ GROUP_SIZE. */
+ number_of_groups = VEC_length (int, load_permutation) / group_size;
+
+ /* Reduction (there are no data-refs in the root). */
+ if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
+ {
+ int first_group_load_index;
+
+ /* Compare all the permutation sequences to the first one. */
+ for (i = 1; i < number_of_groups; i++)
+ {
+ k = 0;
+ for (j = i * group_size; j < i * group_size + group_size; j++)
+ {
+ next = VEC_index (int, load_permutation, j);
+ first_group_load_index = VEC_index (int, load_permutation, k);
+
+ if (next != first_group_load_index)
+ {
+ bad_permutation = true;
+ break;
+ }
+
+ k++;
+ }
+
+ if (bad_permutation)
+ break;
+ }
+
+ if (!bad_permutation)
+ {
+ /* This permutaion is valid for reduction. Since the order of the
+ statements in the nodes is not important unless they are memory
+ accesses, we can rearrange the statements in all the nodes
+ according to the order of the loads. */
+ vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
+ load_permutation);
+ VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
+ return true;
+ }
+ }
+
/* FORNOW: the only supported permutation is 0..01..1.. of length equal to
GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
- well. */
+ well (unless it's reduction). */
if (VEC_length (int, load_permutation)
!= (unsigned int) (group_size * group_size))
return false;
@@ -896,17 +1015,28 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
slp_tree node = XNEW (struct _slp_tree);
unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
unsigned int unrolling_factor = 1, nunits;
- tree vectype, scalar_type;
+ tree vectype, scalar_type = NULL_TREE;
gimple next;
unsigned int vectorization_factor = 0;
- int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
+ int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
unsigned int max_nunits = 0;
VEC (int, heap) *load_permutation;
VEC (slp_tree, heap) *loads;
+ struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
+
+ if (dr)
+ {
+ scalar_type = TREE_TYPE (DR_REF (dr));
+ vectype = get_vectype_for_scalar_type (scalar_type);
+ group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
+ }
+ else
+ {
+ gcc_assert (loop_vinfo);
+ vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
+ group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
+ }
- scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
- vinfo_for_stmt (stmt))));
- vectype = get_vectype_for_scalar_type (scalar_type);
if (!vectype)
{
if (vect_print_dump_info (REPORT_SLP))
@@ -914,6 +1044,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
}
+
return false;
}
@@ -938,11 +1069,29 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
/* Create a node (a root of the SLP tree) for the packed strided stores. */
SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
next = stmt;
- /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
- while (next)
+ if (dr)
{
- VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
- next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+ /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
+ while (next)
+ {
+ VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
+ next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+ }
+ }
+ else
+ {
+ /* Collect reduction statements. */
+ for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
+ next);
+ i++)
+ {
+ VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "pushing reduction into node: ");
+ print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
+ }
+ }
}
SLP_TREE_VEC_STMTS (node) = NULL;
@@ -1035,7 +1184,7 @@ bool
vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{
unsigned int i;
- VEC (gimple, heap) *strided_stores;
+ VEC (gimple, heap) *strided_stores, *reductions = NULL;
gimple store;
bool ok = false;
@@ -1043,10 +1192,14 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fprintf (vect_dump, "=== vect_analyze_slp ===");
if (loop_vinfo)
- strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
+ {
+ strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
+ reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
+ }
else
strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
+ /* Find SLP sequences starting from groups of strided stores. */
for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
ok = true;
@@ -1059,6 +1212,12 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
return false;
}
+ /* Find SLP sequences starting from groups of reductions. */
+ if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo))
+ && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
+ VEC_index (gimple, reductions, 0)))
+ ok = true;
+
return true;
}
@@ -1120,7 +1279,10 @@ vect_detect_hybrid_slp_stmts (slp_tree node)
if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
&& !STMT_SLP_TYPE (stmt_vinfo)
&& (STMT_VINFO_RELEVANT (stmt_vinfo)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))))
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
+ && !(gimple_code (use_stmt) == GIMPLE_PHI
+ && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
+ == vect_reduction_def))
vect_mark_slp_stmts (node, hybrid, i);
vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
@@ -1429,11 +1591,14 @@ vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
/* For constant and loop invariant defs of SLP_NODE this function returns
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
- stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
+ stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
+ REDUC_INDEX is the index of the reduction operand in the statements, unless
+ it is -1. */
static void
vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
- unsigned int op_num, unsigned int number_of_vectors)
+ unsigned int op_num, unsigned int number_of_vectors,
+ int reduc_index)
{
VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
gimple stmt = VEC_index (gimple, stmts, 0);
@@ -1449,6 +1614,50 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
int number_of_copies = 1;
VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
bool constant_p, is_store;
+ tree neutral_op = NULL;
+
+ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
+ {
+ enum tree_code code = gimple_assign_rhs_code (stmt);
+ if (reduc_index == -1)
+ {
+ VEC_free (tree, heap, *vec_oprnds);
+ return;
+ }
+
+ op_num = reduc_index - 1;
+ op = gimple_op (stmt, op_num + 1);
+ /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
+ we need either neutral operands or the original operands. See
+ get_initial_def_for_reduction() for details. */
+ switch (code)
+ {
+ case WIDEN_SUM_EXPR:
+ case DOT_PROD_EXPR:
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
+ neutral_op = build_real (TREE_TYPE (op), dconst0);
+ else
+ neutral_op = build_int_cst (TREE_TYPE (op), 0);
+
+ break;
+
+ case MULT_EXPR:
+ case BIT_AND_EXPR:
+ if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
+ neutral_op = build_real (TREE_TYPE (op), dconst1);
+ else
+ neutral_op = build_int_cst (TREE_TYPE (op), 1);
+
+ break;
+
+ default:
+ neutral_op = NULL;
+ }
+ }
if (STMT_VINFO_DATA_REF (stmt_vinfo))
{
@@ -1499,6 +1708,19 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
else
op = gimple_op (stmt, op_num + 1);
+ if (reduc_index != -1)
+ {
+ struct loop *loop = (gimple_bb (stmt))->loop_father;
+ gimple def_stmt = SSA_NAME_DEF_STMT (op);
+
+ gcc_assert (loop);
+ /* Get the def before the loop. */
+ op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+ loop_preheader_edge (loop));
+ if (j != (number_of_copies - 1) && neutral_op)
+ op = neutral_op;
+ }
+
/* Create 'vect_ = {op0,op1,...,opn}'. */
t = tree_cons (NULL_TREE, op, t);
@@ -1536,8 +1758,25 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
to replicate the vectors. */
while (number_of_vectors > VEC_length (tree, *vec_oprnds))
{
- for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
- VEC_quick_push (tree, *vec_oprnds, vop);
+ tree neutral_vec = NULL;
+
+ if (neutral_op)
+ {
+ if (!neutral_vec)
+ {
+ t = NULL;
+ for (i = 0; i < (unsigned) nunits; i++)
+ t = tree_cons (NULL_TREE, neutral_op, t);
+ neutral_vec = build_vector (vector_type, t);
+ }
+
+ VEC_quick_push (tree, *vec_oprnds, neutral_vec);
+ }
+ else
+ {
+ for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
+ VEC_quick_push (tree, *vec_oprnds, vop);
+ }
}
}
@@ -1576,7 +1815,7 @@ vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
void
vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
- VEC (tree,heap) **vec_oprnds1)
+ VEC (tree,heap) **vec_oprnds1, int reduc_index)
{
gimple first_stmt;
enum tree_code code;
@@ -1607,19 +1846,26 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
*vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
/* SLP_NODE corresponds either to a group of stores or to a group of
- unary/binary operations. We don't call this function for loads. */
- if (SLP_TREE_LEFT (slp_node))
+ unary/binary operations. We don't call this function for loads.
+ For reduction defs we call vect_get_constant_vectors(), since we are
+ looking for initial loop invariant values. */
+ if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
/* The defs are already vectorized. */
vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
else
/* Build vectors from scalar defs. */
- vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
+ vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
+ reduc_index);
if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
/* Since we don't call this function with loads, this is a group of
stores. */
return;
+ /* For reductions, we only need initial values. */
+ if (reduc_index != -1)
+ return;
+
code = gimple_assign_rhs_code (first_stmt);
if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
return;
@@ -1638,7 +1884,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
else
/* Build vectors from scalar defs. */
- vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
+ vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
}
@@ -2027,22 +2273,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
si = gsi_for_stmt (stmt);
is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
- if (is_store)
- {
- if (DR_GROUP_FIRST_DR (stmt_info))
- /* If IS_STORE is TRUE, the vectorization of the
- interleaving chain was completed - free all the stores in
- the chain. */
- vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
- else
- /* FORNOW: SLP originates only from strided stores. */
- gcc_unreachable ();
-
- return true;
- }
-
- /* FORNOW: SLP originates only from strided stores. */
- return false;
+ return is_store;
}
@@ -2075,6 +2306,26 @@ vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fprintf (vect_dump, "vectorizing stmts using SLP.");
}
+ for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ {
+ slp_tree root = SLP_INSTANCE_TREE (instance);
+ gimple store;
+ unsigned int j;
+ gimple_stmt_iterator gsi;
+
+ for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
+ && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
+ {
+ if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
+ break;
+
+ /* Free the attached stmt_vec_info and remove the stmt. */
+ gsi = gsi_for_stmt (store);
+ gsi_remove (&gsi, true);
+ free_stmt_vec_info (store);
+ }
+ }
+
return is_store;
}