summaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop-manip.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-loop-manip.c')
-rw-r--r--gcc/tree-vect-loop-manip.c282
1 files changed, 274 insertions, 8 deletions
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 57aab1b764f..901113fcf03 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -369,6 +369,242 @@ vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
return false;
}
+/* Helper for vect_set_speculative_masks. Set the masks in RGM directly
+ from the corresponding scalar values. RGM belongs to LOOP, which has
+ been vectorized according to LOOP_VINFO. NSCALARITERS_SKIP is the
+ number of scalar iterations that we should skip during the first
+ iteration of the vector loop (because the start point has been
+ brought forward by that amount to achieve alignment).
+
+ Add any new preheader statements to PREHEADER_SEQ and any new header
+ statements to HEADER_SEQ. */
+
+static void
+vect_set_speculative_masks_directly (struct loop *loop,
+ loop_vec_info loop_vinfo,
+ gimple_seq *preheader_seq,
+ gimple_seq *header_seq,
+ rgroup_masks *rgm,
+ tree nscalariters_skip)
+{
+ /* It doesn't make sense to align for speculation when we have a
+ capped VF. */
+ gcc_assert (!use_capped_vf (loop_vinfo));
+
+ tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
+ tree mask_type = rgm->mask_type;
+ poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
+ unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
+
+ tree nscalars_skip = nscalariters_skip;
+ if (nscalars_per_iter != 1)
+ {
+ tree factor = build_int_cst (compare_type, nscalars_per_iter);
+ nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
+ nscalars_skip, factor);
+ }
+
+ tree full_mask = build_minus_one_cst (mask_type);
+ tree mask;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (rgm->masks, i, mask)
+ {
+ /* Previous masks covered START scalars. This mask covers the
+ next batch. */
+ tree start = build_int_cst (compare_type, nscalars_per_mask * i);
+ tree init_mask = vect_gen_while_not (preheader_seq, mask_type,
+ start, nscalars_skip);
+
+ /* Always use a full mask for subsequent iterations of the loop. */
+ vect_set_loop_mask (loop, header_seq, mask, init_mask,
+ full_mask, NULL_TREE);
+ }
+}
+
+/* Set up the controlling masks for LOOP, which is a speculative loop that
+ has been vectorized according to LOOP_VINFO. */
+
+static void
+vect_set_speculative_masks (struct loop *loop, loop_vec_info loop_vinfo)
+{
+ gimple_seq preheader_seq = NULL;
+ gimple_seq header_seq = NULL;
+
+ vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+ tree nscalariters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
+ rgroup_masks *rgm;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (*masks, i, rgm)
+ if (!rgm->masks.is_empty ())
+ {
+ /* We shouldn't be using masks if there are no elements to skip
+ on the first iteration. */
+ gcc_assert (nscalariters_skip != NULL_TREE);
+
+ /* First try using permutes. */
+ unsigned int nmasks = i + 1;
+ if ((nmasks & 1) == 0)
+ {
+ rgroup_masks *half_rgm = &(*masks)[nmasks / 2 - 1];
+ if (!half_rgm->masks.is_empty ()
+ && vect_maybe_permute_loop_masks (&header_seq, rgm, half_rgm))
+ continue;
+ }
+
+ vect_set_speculative_masks_directly (loop, loop_vinfo,
+ &preheader_seq, &header_seq,
+ rgm, nscalariters_skip);
+ }
+
+ /* Emit all accumulated statements. */
+ add_preheader_seq (loop, preheader_seq);
+ add_header_seq (loop, header_seq);
+}
+
+/* RGM belongs to the nonspeculative masks of LOOP_VINFO. Set up the masks
+ in RGM so that the active bits corresponding to the first NSCALARITERS
+ scalar iterations are true and every other bit is false. Add any new
+ statements before GSI. */
+
+static void
+vect_set_nonspeculative_masks_directly (loop_vec_info loop_vinfo,
+ gimple_stmt_iterator *gsi,
+ rgroup_masks *rgm, tree nscalariters)
+{
+ tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
+ tree mask_type = rgm->mask_type;
+ poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
+ unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
+
+ /* Calculate the number of scalars covered by the rgroup. */
+ gimple_seq seq = NULL;
+ tree nscalars = nscalariters;
+ if (nscalars_per_iter != 1)
+ nscalars = gimple_build (&seq, MULT_EXPR, compare_type, nscalars,
+ build_int_cst (compare_type, nscalars_per_iter));
+ if (seq)
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+
+ tree mask;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (rgm->masks, i, mask)
+ {
+ /* Previous masks covered START scalars. This mask covers the
+ next batch. */
+ tree start = build_int_cst (compare_type, nscalars_per_mask * i);
+ if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
+ {
+ /* First get a mask that ignores whether bits are active. */
+ tree temp = make_ssa_name (mask_type);
+ gcall *call = vect_gen_while (temp, start, nscalars);
+ gsi_insert_before (gsi, call, GSI_SAME_STMT);
+
+ /* Now AND the result with the active lanes. */
+ tree active
+ = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo),
+ rgm->masks.length (), mask_type, i);
+ gassign *assign = gimple_build_assign (mask, BIT_AND_EXPR,
+ temp, active);
+ gsi_insert_before (gsi, assign, GSI_SAME_STMT);
+ }
+ else
+ {
+ /* All lanes are active. */
+ gcall *call = vect_gen_while (mask, start, nscalars);
+ gsi_insert_before (gsi, call, GSI_SAME_STMT);
+ }
+ }
+}
+
+/* Set MASK to the mask of active elements up to and including the
+ first iteration for which the exit condition of LOOP_VINFO is true.
+ Insert any new statements before GSI. ALL_ACTIVE_P is true if we
+ should treat all elements as active, false if we should get the
+ mask of active elements from the main loop mask. */
+
+static void
+vect_add_break_after (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
+ tree mask, bool all_active_p)
+{
+ tree mask_type = TREE_TYPE (mask);
+
+ tree active;
+ if (all_active_p)
+ active = build_minus_one_cst (mask_type);
+ else
+ active = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo),
+ 1, mask_type, 0);
+
+ /* Break the mask after the first true exit condition. */
+ tree exit_mask = LOOP_VINFO_EXIT_TEST_MASK (loop_vinfo);
+ gcall *call = gimple_build_call_internal (IFN_BREAK_AFTER, 2,
+ active, exit_mask);
+ gimple_call_set_lhs (call, mask);
+ gsi_insert_before (gsi, call, GSI_SAME_STMT);
+}
+
+/* Set up the nonspeculative masks in LOOP_VINFO. Emit any new statements
+ before GSI. */
+
+static void
+vect_set_nonspeculative_masks (loop_vec_info loop_vinfo,
+ gimple_stmt_iterator *gsi)
+{
+ vec_niters_and_mask nim;
+ vec_loop_masks *masks = &LOOP_VINFO_NONSPECULATIVE_MASKS (loop_vinfo);
+ tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
+ tree niters = NULL_TREE;
+ rgroup_masks *rgm;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (*masks, i, rgm)
+ if (!rgm->masks.is_empty ())
+ {
+ unsigned int nmasks = i + 1;
+
+ /* Try to set the mask directly with a BREAK_AFTER. */
+ if (nmasks == 1 && rgm->max_nscalars_per_iter == 1)
+ {
+ /* All elements are active unless we're peeling for
+ alignment. */
+ vect_add_break_after (loop_vinfo, gsi, rgm->masks[0],
+ !LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo));
+ continue;
+ }
+
+ /* Try using permutes. */
+ if ((nmasks & 1) == 0)
+ {
+ gimple_seq seq = NULL;
+ rgroup_masks *half_rgm = &(*masks)[nmasks / 2 - 1];
+ if (!half_rgm->masks.is_empty ()
+ && vect_maybe_permute_loop_masks (&seq, rgm, half_rgm))
+ {
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+ continue;
+ }
+ }
+
+ if (niters == NULL_TREE)
+ {
+ /* Get the mask of elements up to and including the first
+ iteration for which the exit condition is true.
+ Include any inactive starting elements at this stage. */
+ tree mask_type = vect_mask_type_for_speculation (loop_vinfo);
+ nim.mask = make_ssa_name (mask_type);
+ vect_add_break_after (loop_vinfo, gsi, nim.mask, true);
+
+ /* Convert the mask to a scalar count, then convert the
+ sizetype result to the mask comparison type. */
+ gimple_seq seq = NULL;
+ niters = vect_get_niters_from_mask (&seq, &nim);
+ niters = gimple_convert (&seq, compare_type, niters);
+ if (seq)
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+ }
+ vect_set_nonspeculative_masks_directly (loop_vinfo, gsi, rgm, niters);
+ }
+}
+
/* Helper for vect_set_loop_condition_masked. Generate definitions for
all the masks in RGM and return a mask that is nonzero when the loop
needs to iterate. Add any new preheader statements to PREHEADER_SEQ
@@ -939,11 +1175,29 @@ vect_set_loop_condition (struct loop *loop, loop_vec_info loop_vinfo,
tree niters, tree step, tree final_iv,
bool niters_maybe_zero)
{
- gcond *cond_stmt;
+ gcond *cond_stmt = NULL;
gcond *orig_cond = get_loop_exit_condition (loop);
gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
+ bool masked_p = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
+ bool speculation_p
+ = (loop_vinfo && LOOP_VINFO_SPECULATIVE_EXECUTION (loop_vinfo));
- if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ if (speculation_p)
+ {
+ /* Set the masks that control statements that cannot be speculatively
+ executed. */
+ vect_set_nonspeculative_masks (loop_vinfo, &loop_cond_gsi);
+
+ /* ...then add the statements themselves. */
+ gimple_seq late_seq = LOOP_VINFO_NONSPECULATIVE_SEQ (loop_vinfo);
+ if (late_seq)
+ gsi_insert_seq_before (&loop_cond_gsi, late_seq, GSI_SAME_STMT);
+
+ /* Set up the masks that control the speculative statements. */
+ if (masked_p)
+ vect_set_speculative_masks (loop, loop_vinfo);
+ }
+ else if (masked_p)
cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
final_iv, niters_maybe_zero,
loop_cond_gsi);
@@ -952,11 +1206,14 @@ vect_set_loop_condition (struct loop *loop, loop_vec_info loop_vinfo,
final_iv, niters_maybe_zero,
loop_cond_gsi);
- /* Remove old loop exit test. */
- gsi_remove (&loop_cond_gsi, true);
- free_stmt_vec_info (orig_cond);
+ if (!speculation_p)
+ {
+ /* Remove old loop exit test. */
+ gsi_remove (&loop_cond_gsi, true);
+ free_stmt_vec_info (orig_cond);
+ }
- if (dump_enabled_p ())
+ if (dump_enabled_p () && cond_stmt)
{
dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
@@ -1644,13 +1901,15 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
{
struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
tree var;
- tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
gimple_seq stmts = NULL, new_stmts = NULL;
tree iters, iters_name;
gimple *dr_stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
+ tree niters_type = (LOOP_VINFO_SPECULATIVE_EXECUTION (loop_vinfo)
+ ? size_type_node
+ : TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)));
if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
@@ -1829,6 +2088,12 @@ vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
tree
vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
{
+ if (!LOOP_VINFO_NITERS (loop_vinfo))
+ {
+ gcc_assert (LOOP_VINFO_SPECULATIVE_EXECUTION (loop_vinfo));
+ return NULL;
+ }
+
tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
if (TREE_CODE (ni) == INTEGER_CST)
return ni;
@@ -2421,7 +2686,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
bool check_profitability, bool niters_no_overflow)
{
edge e, guard_e;
- tree type = TREE_TYPE (niters), guard_cond;
+ tree guard_cond;
basic_block guard_bb, guard_to;
profile_probability prob_prolog, prob_vector, prob_epilog;
int estimated_vf;
@@ -2469,6 +2734,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
/* Generate the number of iterations for the prolog loop. We do this here
so that we can also get the upper bound on the number of iterations. */
+ tree type = TREE_TYPE (niters);
tree niters_prolog;
int bound_prolog = 0;
if (prolog_peeling)