diff options
Diffstat (limited to 'gcc/tree-vect-slp.c')
-rw-r--r-- | gcc/tree-vect-slp.c | 85 |
1 files changed, 84 insertions, 1 deletions
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index f65837df4fc..704f42fd886 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1565,6 +1565,54 @@ vect_analyze_slp_cost (slp_instance instance, void *data) body_cost_vec.release (); } +/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups: + one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing + the first GROUP1_SIZE stmts, since stores are consecutive), the second + containing the remainder. + Return the first stmt in the second group. */ + +static gimple * +vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size) +{ + stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt); + gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt); + gcc_assert (group1_size > 0); + int group2_size = GROUP_SIZE (first_vinfo) - group1_size; + gcc_assert (group2_size > 0); + GROUP_SIZE (first_vinfo) = group1_size; + + gimple *stmt = first_stmt; + for (unsigned i = group1_size; i > 1; i--) + { + stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); + gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1); + } + /* STMT is now the last element of the first group. */ + gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); + GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0; + + GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size; + for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt))) + { + GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2; + gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1); + } + + /* For the second group, the GROUP_GAP is that before the original group, + plus skipping over the first vector. */ + GROUP_GAP (vinfo_for_stmt (group2)) = + GROUP_GAP (first_vinfo) + group1_size; + + /* GROUP_GAP of the first group now has to skip over the second group too. */ + GROUP_GAP (first_vinfo) += group2_size; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n", + group1_size, group2_size); + + return group2; +} + /* Analyze an SLP instance starting from a group of grouped stores. Call vect_build_slp_tree to build a tree of packed stmts if possible. Return FALSE if it's impossible to SLP any stmt in the loop. */ @@ -1580,7 +1628,7 @@ vect_analyze_slp_instance (vec_info *vinfo, tree vectype, scalar_type = NULL_TREE; gimple *next; unsigned int vectorization_factor = 0; - int i; + unsigned int i; unsigned int max_nunits = 0; vec<slp_tree> loads; struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); @@ -1774,6 +1822,41 @@ vect_analyze_slp_instance (vec_info *vinfo, vect_free_slp_tree (node); loads.release (); + /* For basic block SLP, try to break the group up into multiples of the + vectorization factor. */ + if (is_a <bb_vec_info> (vinfo) + && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) + && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) + { + /* We consider breaking the group only on VF boundaries from the existing + start. */ + for (i = 0; i < group_size; i++) + if (!matches[i]) break; + + if (i >= vectorization_factor && i < group_size) + { + /* Split into two groups at the first vector boundary before i. */ + gcc_assert ((vectorization_factor & (vectorization_factor - 1)) == 0); + unsigned group1_size = i & ~(vectorization_factor - 1); + + gimple *rest = vect_split_slp_store_group (stmt, group1_size); + bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size); + /* If the first non-match was in the middle of a vector, + skip the rest of that vector. */ + if (group1_size < i) + { + i = group1_size + vectorization_factor; + if (i < group_size) + rest = vect_split_slp_store_group (rest, vectorization_factor); + } + if (i < group_size) + res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size); + return res; + } + /* Even though the first vector did not all match, we might be able to SLP + (some) of the remainder. FORNOW ignore this possibility. */ + } + return false; } |