summaryrefslogtreecommitdiff
path: root/gcc/tree-vect-transform.c
diff options
context:
space:
mode:
authorhjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4>2007-06-08 16:30:49 +0000
committerhjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4>2007-06-08 16:30:49 +0000
commit867c03eb9cc3badac4834c0e6cab7b849db1f573 (patch)
treeb261d3382cad73cc9bf2627a1ef52c49c881835e /gcc/tree-vect-transform.c
parent2d0ff363215b22ad82b36ffc5bdf65248aa727ae (diff)
downloadgcc-867c03eb9cc3badac4834c0e6cab7b849db1f573.tar.gz
gcc/ChangeLog:
2007-06-08 Harsha Jagasia <harsha.jagasia@amd.com> Tony Linthicum <tony.linthicum@amd.com> * doc/extend.texi: Add fvect-cost-model flag. * common.opt (fvect-cost-model): New flag. * tree-vectorizer.c (new_stmt_vec_info): Initialize inside and outside cost fields in stmt_vec_info struct for STMT. * tree-vectorizer.h (stmt_vec_info): Define inside and outside cost fields in stmt_vec_info struct and access functions for the same. (TARG_COND_BRANCH_COST): Define cost of conditional branch. (TARG_VEC_STMT_COST): Define cost of any vector operation, excluding load, store and vector to scalar operation. (TARG_VEC_TO_SCALAR_COST): Define cost of vector to scalar operation. (TARG_VEC_LOAD_COST): Define cost of aligned vector load. (TARG_VEC_UNALIGNED_LOAD_COST): Define cost of misasligned vector load. (TARG_VEC_STORE_COST): Define cost of vector store. (vect_estimate_min_profitable_iters): Define new function. * tree-vect-analyze.c (vect_analyze_operations): Add a compile-time check to evaluate if loop iterations are less than minimum profitable iterations determined by cost model or minimum vect loop bound defined by user, whichever is more conservative. * tree-vect-transform.c (vect_do_peeling_for_loop_bound): Add a run-time check to evaluate if loop iterations are less than minimum profitable iterations determined by cost model or minimum vect loop bound defined by user, whichever is more conservative. (vect_estimate_min_profitable_iterations): New function to estimate mimimimum iterartions required for vector version of loop to be profitable over scalar version. (vect_model_reduction_cost): New function. (vect_model_induction_cost): New function. (vect_model_simple_cost): New function. (vect_cost_strided_group_size): New function. (vect_model_store_cost): New function. (vect_model_load_cost): New function. (vectorizable_reduction): Call vect_model_reduction_cost during analysis phase. (vectorizable_induction): Call vect_model_induction_cost during analysis phase. (vectorizable_load): Call vect_model_load_cost during analysis phase. (vectorizable_store): Call vect_model_store_cost during analysis phase. (vectorizable_call, vectorizable_assignment, vectorizable_operation, vectorizable_promotion, vectorizable_demotion): Call vect_model_simple_cost during analysis phase. gcc/testsuite/ChangeLog: 2007-06-08 Harsha Jagasia <harsha.jagasia@amd.com> * gcc.dg/vect/costmodel: New directory. * gcc.dg/vect/costmodel/i386: New directory. * gcc.dg/vect/costmodel/i386/i386-costmodel-vect.exp: New testsuite. * gcc.dg/vect/costmodel/i386/costmodel-fast-math-vect-pr29925.c: New test. * gcc.dg/vect/costmodel/i386/costmodel-vect-31.c: New test. * gcc.dg/vect/costmodel/i386/costmodel-vect-33.c: New test. * gcc.dg/vect/costmodel/i386/costmodel-vect-68.c: New test. * gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: New test. * gcc.dg/vect/costmodel/x86_64: New directory. * gcc.dg/vect/costmodel/x86_64/x86_64-costmodel-vect.exp: New testsuite. * gcc.dg/vect/costmodel/x86_64/costmodel-fast-math-vect-pr29925.c: New test. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c: New test. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c: New test. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-68.c: New test. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-reduc-1char.c: New test. * gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@125575 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-vect-transform.c')
-rw-r--r--gcc/tree-vect-transform.c547
1 files changed, 536 insertions, 11 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index e49fba116a8..00e55ed209a 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -74,6 +74,490 @@ static void vect_update_inits_of_drs (loop_vec_info, tree);
static int vect_min_worthwhile_factor (enum tree_code);
+/* Function vect_estimate_min_profitable_iters
+
+ Return the number of iterations required for the vector version of the
+ loop to be profitable relative to the cost of the scalar version of the
+ loop.
+
+ TODO: Take profile info into account before making vectorization
+ decisions, if available. */
+
+int
+vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
+{
+ int i;
+ int min_profitable_iters;
+ int peel_iters_prologue;
+ int peel_iters_epilogue;
+ int vec_inside_cost = 0;
+ int vec_outside_cost = 0;
+ int scalar_single_iter_cost = 0;
+ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ int nbbs = loop->num_nodes;
+
+ /* Cost model disabled. */
+ if (!flag_vect_cost_model)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model disabled.");
+ return 0;
+ }
+
+ /* Requires loop versioning tests to handle misalignment.
+ FIXME: Make cost depend on number of stmts in may_misalign list. */
+
+ if (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
+ {
+ vec_outside_cost += TARG_COND_BRANCH_COST;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: Adding cost of checks for loop "
+ "versioning.\n");
+ }
+
+ /* Requires a prologue loop when peeling to handle misalignment. Add cost of
+ two guards, one for the peeled loop and one for the vector loop. */
+
+ peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ if (peel_iters_prologue)
+ {
+ vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: Adding cost of checks for "
+ "prologue.\n");
+ }
+
+ /* Requires an epilogue loop to finish up remaining iterations after vector
+ loop. Add cost of two guards, one for the peeled loop and one for the
+ vector loop. */
+
+ if ((peel_iters_prologue < 0)
+ || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
+ {
+ vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model : Adding cost of checks for "
+ "epilogue.\n");
+ }
+
+ /* Count statements in scalar loop. Using this as scalar cost for a single
+ iteration for now.
+
+ TODO: Add outer loop support.
+
+ TODO: Consider assigning different costs to different scalar
+ statements. */
+
+ for (i = 0; i < nbbs; i++)
+ {
+ block_stmt_iterator si;
+ basic_block bb = bbs[i];
+
+ for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+ {
+ tree stmt = bsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ && !STMT_VINFO_LIVE_P (stmt_info))
+ continue;
+ scalar_single_iter_cost++;
+ vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
+ vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
+ }
+ }
+
+ /* Add additional cost for the peeled instructions in prologue and epilogue
+ loop.
+
+ FORNOW: If we dont know the value of peel_iters for prologue or epilogue
+ at compile-time - we assume the worst.
+
+ TODO: Build an expression that represents peel_iters for prologue and
+ epilogue to be used in a run-time test. */
+
+ peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+
+ if (peel_iters_prologue < 0)
+ {
+ peel_iters_prologue = vf - 1;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: "
+ "prologue peel iters set conservatively.");
+
+ /* If peeling for alignment is unknown, loop bound of main loop becomes
+ unkown. */
+ peel_iters_epilogue = vf - 1;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: "
+ "epilogue peel iters set conservatively because "
+ "peeling for alignment is unknown .");
+ }
+ else
+ {
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ {
+ peel_iters_epilogue = vf - 1;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: "
+ "epilogue peel iters set conservatively because "
+ "loop iterations are unknown .");
+ }
+ else
+ peel_iters_epilogue =
+ (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_iters_prologue)
+ % vf;
+ }
+
+ vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
+ + (peel_iters_epilogue * scalar_single_iter_cost);
+
+ /* Calculate number of iterations required to make the vector version
+ profitable, relative to the loop bodies only. The following condition
+ must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
+ SIC = scalar iteration cost, VIC = vector iteration cost,
+ VOC = vector outside cost and VF = vectorization factor. */
+
+ if ((scalar_single_iter_cost * vf) > vec_inside_cost)
+ {
+ if (vec_outside_cost == 0)
+ min_profitable_iters = 1;
+ else
+ {
+ min_profitable_iters = (vec_outside_cost * vf)
+ / ((scalar_single_iter_cost * vf)
+ - vec_inside_cost);
+
+ if ((scalar_single_iter_cost * vf * min_profitable_iters)
+ <= ((vec_inside_cost * min_profitable_iters)
+ + (vec_outside_cost * vf)))
+ min_profitable_iters++;
+ }
+ }
+ /* vector version will never be profitable. */
+ else
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: vector iteration cost = %d "
+ "is divisible by scalar iteration cost = %d by a factor "
+ "greater than or equal to the vectorization factor = %d .",
+ vec_inside_cost, scalar_single_iter_cost, vf);
+ return -1;
+ }
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "Cost model analysis: \n");
+ fprintf (vect_dump, " Vector inside of loop cost: %d\n",
+ vec_inside_cost);
+ fprintf (vect_dump, " Vector outside of loop cost: %d\n",
+ vec_outside_cost);
+ fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
+ fprintf (vect_dump, " prologue iterations: %d\n",
+ peel_iters_prologue);
+ fprintf (vect_dump, " epilogue iterations: %d\n",
+ peel_iters_epilogue);
+ fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
+ min_profitable_iters);
+ fprintf (vect_dump, " Actual minimum iters for profitability: %d\n",
+ min_profitable_iters < vf ? vf : min_profitable_iters);
+ }
+
+ return min_profitable_iters < vf ? vf : min_profitable_iters;
+}
+
+
+/* TODO: Close dependency between vect_model_*_cost and vectorizable_*
+ functions. Design better to avoid maintainence issues. */
+
+/* Function vect_model_reduction_cost.
+
+ Models cost for a reduction operation, including the vector ops
+ generated within the strip-mine loop, the initial definition before
+ the loop, and the epilogue code that must be generated. */
+
+static void
+vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
+ int ncopies)
+{
+ int outer_cost = 0;
+ enum tree_code code;
+ optab optab;
+ tree vectype;
+ tree orig_stmt;
+ tree reduction_op;
+ enum machine_mode mode;
+ tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
+ int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
+
+ /* Cost of reduction op inside loop. */
+ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
+
+ reduction_op = TREE_OPERAND (operation, op_type-1);
+ vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
+ mode = TYPE_MODE (vectype);
+ orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+
+ if (!orig_stmt)
+ orig_stmt = STMT_VINFO_STMT (stmt_info);
+
+ code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
+
+ /* Add in cost for initial definition. */
+ outer_cost += TARG_VEC_STMT_COST;
+
+ /* Determine cost of epilogue code.
+
+ We have a reduction operator that will reduce the vector in one statement.
+ Also requires scalar extract. */
+
+ if (reduc_code < NUM_TREE_CODES)
+ outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
+ else
+ {
+ int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+ tree bitsize =
+ TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
+ int element_bitsize = tree_low_cst (bitsize, 1);
+ int nelements = vec_size_in_bits / element_bitsize;
+
+ optab = optab_for_tree_code (code, vectype);
+
+ /* We have a whole vector shift available. */
+ if (!VECTOR_MODE_P (mode)
+ || optab->handlers[mode].insn_code == CODE_FOR_nothing)
+ /* Final reduction via vector shifts and the reduction operator. Also
+ requires scalar extract. */
+ outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST);
+ else
+ /* Use extracts and reduction op for final reduction. For N elements,
+ we have N extracts and N-1 reduction ops. */
+ outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
+ }
+
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
+ "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_model_induction_cost.
+
+ Models cost for induction operations. */
+
+static void
+vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
+{
+ /* loop cost for vec_loop. */
+ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
+ /* prologue cost for vec_init and vec_step. */
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_VEC_STMT_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
+ "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_model_simple_cost.
+
+ Models cost for simple operations, i.e. those that only emit ncopies of a
+ single op. Right now, this does not account for multiple insns that could
+ be generated for the single vector op. We will handle that shortly. */
+
+static void
+vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies)
+{
+ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
+ "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_cost_strided_group_size
+
+ For strided load or store, return the group_size only if it is the first
+ load or store of a group, else return 1. This ensures that group size is
+ only returned once per group. */
+
+static int
+vect_cost_strided_group_size (stmt_vec_info stmt_info)
+{
+ tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+
+ if (first_stmt == STMT_VINFO_STMT (stmt_info))
+ return DR_GROUP_SIZE (stmt_info);
+
+ return 1;
+}
+
+
+/* Function vect_model_store_cost
+
+ Models cost for stores. In the case of strided accesses, one access
+ has the overhead of the strided access attributed to it. */
+
+static void
+vect_model_store_cost (stmt_vec_info stmt_info, int ncopies)
+{
+ int cost = 0;
+ int group_size;
+
+ /* Strided access? */
+ if (DR_GROUP_FIRST_DR (stmt_info))
+ group_size = vect_cost_strided_group_size (stmt_info);
+ /* Not a strided access. */
+ else
+ group_size = 1;
+
+ /* Is this an access in a group of stores, which provide strided access?
+ If so, add in the cost of the permutes. */
+ if (group_size > 1)
+ {
+ /* Uses a high and low interleave operation for each needed permute. */
+ cost = ncopies * exact_log2(group_size) * group_size
+ * TARG_VEC_STMT_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
+ group_size);
+
+ }
+
+ /* Costs of the stores. */
+ cost += ncopies * TARG_VEC_STORE_COST;
+
+ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
+ "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_model_load_cost
+
+ Models cost for loads. In the case of strided accesses, the last access
+ has the overhead of the strided access attributed to it. Since unaligned
+ accesses are supported for loads, we also account for the costs of the
+ access scheme chosen. */
+
+static void
+vect_model_load_cost (stmt_vec_info stmt_info, int ncopies)
+
+{
+ int inner_cost = 0;
+ int group_size;
+ int alignment_support_cheme;
+ tree first_stmt;
+ struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
+
+ /* Strided accesses? */
+ first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+ if (first_stmt)
+ {
+ group_size = vect_cost_strided_group_size (stmt_info);
+ first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+ }
+ /* Not a strided access. */
+ else
+ {
+ group_size = 1;
+ first_dr = dr;
+ }
+
+ alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
+
+ /* Is this an access in a group of loads providing strided access?
+ If so, add in the cost of the permutes. */
+ if (group_size > 1)
+ {
+ /* Uses an even and odd extract operations for each needed permute. */
+ inner_cost = ncopies * exact_log2(group_size) * group_size
+ * TARG_VEC_STMT_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
+ group_size);
+
+ }
+
+ /* The loads themselves. */
+ switch (alignment_support_cheme)
+ {
+ case dr_aligned:
+ {
+ inner_cost += ncopies * TARG_VEC_LOAD_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_load_cost: aligned.");
+
+ break;
+ }
+ case dr_unaligned_supported:
+ {
+ /* Here, we assign an additional cost for the unaligned load. */
+ inner_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
+ "hardware.");
+
+ break;
+ }
+ case dr_unaligned_software_pipeline:
+ {
+ int outer_cost = 0;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_load_cost: unaligned software "
+ "pipelined.");
+
+ /* Unaligned software pipeline has a load of an address, an initial
+ load, and possibly a mask operation to "prime" the loop. However,
+ if this is an access in a group of loads, which provide strided
+ acccess, then the above cost should only be considered for one
+ access in the group. Inside the loop, there is a load op
+ and a realignment op. */
+
+ if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1)
+ {
+ outer_cost = 2*TARG_VEC_STMT_COST;
+ if (targetm.vectorize.builtin_mask_for_load)
+ outer_cost += TARG_VEC_STMT_COST;
+ }
+
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
+
+ inner_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
+
+ break;
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+
+ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = inner_cost;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
+ "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+
+}
+
+
/* Function vect_get_new_vect_var.
Returns a name for a new variable. The current naming scheme appends the
@@ -1655,6 +2139,7 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+ vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
return true;
}
@@ -1862,9 +2347,15 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
+ ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ / TYPE_VECTOR_SUBPARTS (vectype_out));
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_call ===");
+ vect_model_simple_cost (stmt_info, ncopies);
return true;
}
@@ -1873,8 +2364,6 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform operation.");
- ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- / TYPE_VECTOR_SUBPARTS (vectype_out));
gcc_assert (ncopies >= 1);
/* Handle def. */
@@ -2302,6 +2791,9 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_assignment ===");
+ vect_model_simple_cost (stmt_info, ncopies);
return true;
}
@@ -2392,6 +2884,9 @@ vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_induction ===");
+ vect_model_induction_cost (stmt_info, ncopies);
return true;
}
@@ -2555,6 +3050,9 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_operation ===");
+ vect_model_simple_cost (stmt_info, ncopies);
return true;
}
@@ -2772,6 +3270,9 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_demotion ===");
+ vect_model_simple_cost (stmt_info, ncopies);
return true;
}
@@ -2932,6 +3433,9 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_promotion ===");
+ vect_model_simple_cost (stmt_info, 2*ncopies);
return true;
}
@@ -3252,14 +3756,12 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
+ vect_model_store_cost (stmt_info, ncopies);
return true;
}
/** Transform. **/
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
-
if (strided_store)
{
first_stmt = DR_GROUP_FIRST_DR (stmt_info);
@@ -3284,6 +3786,9 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
group_size = 1;
}
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
+
dr_chain = VEC_alloc (tree, heap, group_size);
oprnds = VEC_alloc (tree, heap, group_size);
@@ -3915,14 +4420,15 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
+ vect_model_load_cost (stmt_info, ncopies);
return true;
}
- /** Transform. **/
-
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform load.");
+ /** Transform. **/
+
if (strided_load)
{
first_stmt = DR_GROUP_FIRST_DR (stmt_info);
@@ -4807,6 +5313,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
basic_block preheader;
int loop_num;
unsigned int th;
+ int min_scalar_loop_bound;
+ int min_profitable_iters;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
@@ -4822,11 +5330,28 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
&ratio_mult_vf_name, ratio);
loop_num = loop->num;
- /* Threshold for vectorized loop. */
- th = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) *
- LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+ /* Analyze cost to set threshhold for vectorized loop. */
+ min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
+
+ min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
+ * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+ /* Use the cost model only if it is more conservative than user specified
+ threshold. */
+
+ th = (unsigned) min_scalar_loop_bound;
+ if (min_profitable_iters
+ && (!min_scalar_loop_bound
+ || min_profitable_iters > min_scalar_loop_bound))
+ th = (unsigned) min_profitable_iters;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vectorization may not be profitable.");
+
new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
- ratio_mult_vf_name, ni_name, false, th);
+ ratio_mult_vf_name, ni_name, false,
+ th);
gcc_assert (new_loop);
gcc_assert (loop_num == loop->num);
#ifdef ENABLE_CHECKING