gcc/ChangeLog:

2007-06-08 Harsha Jagasia <harsha.jagasia@amd.com> Tony Linthicum <tony.linthicum@amd.com> * doc/extend.texi: Add fvect-cost-model flag. * common.opt (fvect-cost-model): New flag. * tree-vectorizer.c (new_stmt_vec_info): Initialize inside and outside cost fields in stmt_vec_info struct for STMT. * tree-vectorizer.h (stmt_vec_info): Define inside and outside cost fields in stmt_vec_info struct and access functions for the same. (TARG_COND_BRANCH_COST): Define cost of conditional branch. (TARG_VEC_STMT_COST): Define cost of any vector operation, excluding load, store and vector to scalar operation. (TARG_VEC_TO_SCALAR_COST): Define cost of vector to scalar operation. (TARG_VEC_LOAD_COST): Define cost of aligned vector load. (TARG_VEC_UNALIGNED_LOAD_COST): Define cost of misasligned vector load. (TARG_VEC_STORE_COST): Define cost of vector store. (vect_estimate_min_profitable_iters): Define new function. * tree-vect-analyze.c (vect_analyze_operations): Add a compile-time check to evaluate if loop iterations are less than minimum profitable iterations determined by cost model or minimum vect loop bound defined by user, whichever is more conservative. * tree-vect-transform.c (vect_do_peeling_for_loop_bound): Add a run-time check to evaluate if loop iterations are less than minimum profitable iterations determined by cost model or minimum vect loop bound defined by user, whichever is more conservative. (vect_estimate_min_profitable_iterations): New function to estimate mimimimum iterartions required for vector version of loop to be profitable over scalar version. (vect_model_reduction_cost): New function. (vect_model_induction_cost): New function. (vect_model_simple_cost): New function. (vect_cost_strided_group_size): New function. (vect_model_store_cost): New function. (vect_model_load_cost): New function. (vectorizable_reduction): Call vect_model_reduction_cost during analysis phase. (vectorizable_induction): Call vect_model_induction_cost during analysis phase. (vectorizable_load): Call vect_model_load_cost during analysis phase. (vectorizable_store): Call vect_model_store_cost during analysis phase. (vectorizable_call, vectorizable_assignment, vectorizable_operation, vectorizable_promotion, vectorizable_demotion): Call vect_model_simple_cost during analysis phase. gcc/testsuite/ChangeLog: 2007-06-08 Harsha Jagasia <harsha.jagasia@amd.com> * gcc.dg/vect/costmodel: New directory. * gcc.dg/vect/costmodel/i386: New directory. * gcc.dg/vect/costmodel/i386/i386-costmodel-vect.exp: New testsuite. * gcc.dg/vect/costmodel/i386/costmodel-fast-math-vect-pr29925.c: New test. * gcc.dg/vect/costmodel/i386/costmodel-vect-31.c: New test. * gcc.dg/vect/costmodel/i386/costmodel-vect-33.c: New test. * gcc.dg/vect/costmodel/i386/costmodel-vect-68.c: New test. * gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: New test. * gcc.dg/vect/costmodel/x86_64: New directory. * gcc.dg/vect/costmodel/x86_64/x86_64-costmodel-vect.exp: New testsuite. * gcc.dg/vect/costmodel/x86_64/costmodel-fast-math-vect-pr29925.c: New test. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c: New test. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c: New test. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-68.c: New test. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-reduc-1char.c: New test. * gcc.dg/vect/costmodel/x86_64/costmodel-pr30843.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@125575 138bc75d-0d04-0410-961f-82ee72b054a4
author: hjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4> 2007-06-08 16:30:49 +0000
committer: hjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4> 2007-06-08 16:30:49 +0000
commit: 867c03eb9cc3badac4834c0e6cab7b849db1f573 (patch)
tree: b261d3382cad73cc9bf2627a1ef52c49c881835e /gcc/tree-vect-transform.c
parent: 2d0ff363215b22ad82b36ffc5bdf65248aa727ae (diff)
download: gcc-867c03eb9cc3badac4834c0e6cab7b849db1f573.tar.gz
1 files changed, 536 insertions, 11 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index e49fba116a8..00e55ed209a 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -74,6 +74,490 @@ static void vect_update_inits_of_drs (loop_vec_info, tree);
 static int vect_min_worthwhile_factor (enum tree_code);
 
 
+/* Function vect_estimate_min_profitable_iters
+
+   Return the number of iterations required for the vector version of the
+   loop to be profitable relative to the cost of the scalar version of the
+   loop.
+
+   TODO: Take profile info into account before making vectorization
+   decisions, if available.  */
+
+int
+vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
+{
+  int i;
+  int min_profitable_iters;
+  int peel_iters_prologue;
+  int peel_iters_epilogue;
+  int vec_inside_cost = 0;
+  int vec_outside_cost = 0;
+  int scalar_single_iter_cost = 0;
+  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+  int nbbs = loop->num_nodes;
+
+  /* Cost model disabled.  */
+  if (!flag_vect_cost_model)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "cost model disabled.");      
+      return 0;
+    }
+
+  /* Requires loop versioning tests to handle misalignment.
+     FIXME: Make cost depend on number of stmts in may_misalign list.  */
+
+  if (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
+    {
+      vec_outside_cost += TARG_COND_BRANCH_COST;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "cost model: Adding cost of checks for loop "
+                 "versioning.\n");
+    }
+
+  /* Requires a prologue loop when peeling to handle misalignment. Add cost of
+     two guards, one for the peeled loop and one for the vector loop.  */
+
+  peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+  if (peel_iters_prologue)
+    {
+      vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "cost model: Adding cost of checks for "
+                 "prologue.\n");
+    }
+
+ /* Requires an epilogue loop to finish up remaining iterations after vector
+    loop. Add cost of two guards, one for the peeled loop and one for the
+    vector loop.  */
+
+  if ((peel_iters_prologue < 0)
+      || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
+    {
+      vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "cost model : Adding cost of checks for "
+                 "epilogue.\n");
+    }
+
+  /* Count statements in scalar loop.  Using this as scalar cost for a single
+     iteration for now.
+
+     TODO: Add outer loop support.
+
+     TODO: Consider assigning different costs to different scalar
+     statements.  */
+
+  for (i = 0; i < nbbs; i++)
+    {
+      block_stmt_iterator si;
+      basic_block bb = bbs[i];
+
+      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+        {
+          tree stmt = bsi_stmt (si);
+          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+          if (!STMT_VINFO_RELEVANT_P (stmt_info)
+              && !STMT_VINFO_LIVE_P (stmt_info))
+            continue;
+          scalar_single_iter_cost++;
+          vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
+          vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
+        }
+    }
+
+  /* Add additional cost for the peeled instructions in prologue and epilogue
+     loop.
+
+     FORNOW: If we dont know the value of peel_iters for prologue or epilogue
+     at compile-time - we assume the worst.  
+
+     TODO: Build an expression that represents peel_iters for prologue and
+     epilogue to be used in a run-time test.  */
+
+  peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+
+  if (peel_iters_prologue < 0)
+    {
+      peel_iters_prologue = vf - 1;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "cost model: "
+                 "prologue peel iters set conservatively.");
+
+      /* If peeling for alignment is unknown, loop bound of main loop becomes
+         unkown.  */
+      peel_iters_epilogue = vf - 1;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "cost model: "
+                 "epilogue peel iters set conservatively because "
+                 "peeling for alignment is unknown .");
+    }
+  else 
+    {
+      if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+        {
+          peel_iters_epilogue = vf - 1;
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "cost model: "
+                     "epilogue peel iters set conservatively because "
+                     "loop iterations are unknown .");
+        }
+      else      
+        peel_iters_epilogue = 
+                     (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_iters_prologue)
+                     % vf;
+    }
+
+  vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
+                      + (peel_iters_epilogue * scalar_single_iter_cost);
+
+  /* Calculate number of iterations required to make the vector version 
+     profitable, relative to the loop bodies only. The following condition
+     must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
+     SIC = scalar iteration cost, VIC = vector iteration cost,
+     VOC = vector outside cost and VF = vectorization factor.  */
+
+  if ((scalar_single_iter_cost * vf) > vec_inside_cost)
+    {
+      if (vec_outside_cost == 0)
+        min_profitable_iters = 1;
+      else
+        {
+          min_profitable_iters = (vec_outside_cost * vf)
+                                 / ((scalar_single_iter_cost * vf)
+                                    - vec_inside_cost);
+
+          if ((scalar_single_iter_cost * vf * min_profitable_iters)
+              <= ((vec_inside_cost * min_profitable_iters)
+                  + (vec_outside_cost * vf)))
+            min_profitable_iters++;
+        }
+    }
+  /* vector version will never be profitable.  */
+  else
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "cost model: vector iteration cost = %d "
+                 "is divisible by scalar iteration cost = %d by a factor "
+                 "greater than or equal to the vectorization factor = %d .",
+                 vec_inside_cost, scalar_single_iter_cost, vf);
+      return -1;
+    }
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    {
+      fprintf (vect_dump, "Cost model analysis: \n");
+      fprintf (vect_dump, "  Vector inside of loop cost: %d\n",
+	       vec_inside_cost);
+      fprintf (vect_dump, "  Vector outside of loop cost: %d\n",
+	       vec_outside_cost);
+      fprintf (vect_dump, "  Scalar cost: %d\n", scalar_single_iter_cost);
+      fprintf (vect_dump, "  prologue iterations: %d\n",
+               peel_iters_prologue);
+      fprintf (vect_dump, "  epilogue iterations: %d\n",
+               peel_iters_epilogue);
+      fprintf (vect_dump, "  Calculated minimum iters for profitability: %d\n",
+	       min_profitable_iters);
+      fprintf (vect_dump, "  Actual minimum iters for profitability: %d\n",
+	       min_profitable_iters < vf ? vf : min_profitable_iters);
+    }
+
+  return min_profitable_iters < vf ? vf : min_profitable_iters;
+}
+
+
+/* TODO: Close dependency between vect_model_*_cost and vectorizable_* 
+   functions. Design better to avoid maintainence issues.  */
+    
+/* Function vect_model_reduction_cost.  
+
+   Models cost for a reduction operation, including the vector ops 
+   generated within the strip-mine loop, the initial definition before
+   the loop, and the epilogue code that must be generated.  */
+
+static void
+vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
+			   int ncopies)
+{
+  int outer_cost = 0;
+  enum tree_code code;
+  optab optab;
+  tree vectype;
+  tree orig_stmt;
+  tree reduction_op;
+  enum machine_mode mode;
+  tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
+  int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
+
+  /* Cost of reduction op inside loop.  */
+  STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
+
+  reduction_op = TREE_OPERAND (operation, op_type-1);
+  vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
+  mode = TYPE_MODE (vectype);
+  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+
+  if (!orig_stmt) 
+    orig_stmt = STMT_VINFO_STMT (stmt_info);
+
+  code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
+
+  /* Add in cost for initial definition.  */
+  outer_cost += TARG_VEC_STMT_COST;
+
+  /* Determine cost of epilogue code.
+
+     We have a reduction operator that will reduce the vector in one statement.
+     Also requires scalar extract.  */
+
+  if (reduc_code < NUM_TREE_CODES) 
+    outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
+  else 
+    {
+      int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+      tree bitsize =
+	TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
+      int element_bitsize = tree_low_cst (bitsize, 1);
+      int nelements = vec_size_in_bits / element_bitsize;
+
+      optab = optab_for_tree_code (code, vectype);
+
+      /* We have a whole vector shift available.  */
+      if (!VECTOR_MODE_P (mode) 
+          || optab->handlers[mode].insn_code == CODE_FOR_nothing)
+        /* Final reduction via vector shifts and the reduction operator. Also
+           requires scalar extract.  */
+	outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST); 
+      else
+	/* Use extracts and reduction op for final reduction.  For N elements,
+           we have N extracts and N-1 reduction ops.  */
+	outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
+    }
+
+  STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
+             "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+             STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_model_induction_cost.
+
+   Models cost for induction operations.  */
+
+static void
+vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
+{
+  /* loop cost for vec_loop.  */
+  STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
+  /* prologue cost for vec_init and vec_step.  */
+  STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_VEC_STMT_COST;
+  
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
+             "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+             STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_model_simple_cost.  
+
+   Models cost for simple operations, i.e. those that only emit ncopies of a 
+   single op.  Right now, this does not account for multiple insns that could
+   be generated for the single vector op.  We will handle that shortly.  */
+
+static void
+vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies)
+{
+  STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
+             "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+             STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_cost_strided_group_size 
+ 
+   For strided load or store, return the group_size only if it is the first
+   load or store of a group, else return 1.  This ensures that group size is
+   only returned once per group.  */
+
+static int
+vect_cost_strided_group_size (stmt_vec_info stmt_info)
+{
+  tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+
+  if (first_stmt == STMT_VINFO_STMT (stmt_info))
+    return DR_GROUP_SIZE (stmt_info);
+
+  return 1;
+}
+
+
+/* Function vect_model_store_cost
+
+   Models cost for stores.  In the case of strided accesses, one access
+   has the overhead of the strided access attributed to it.  */
+
+static void
+vect_model_store_cost (stmt_vec_info stmt_info, int ncopies)
+{
+  int cost = 0;
+  int group_size;
+
+  /* Strided access?  */
+  if (DR_GROUP_FIRST_DR (stmt_info)) 
+    group_size = vect_cost_strided_group_size (stmt_info);
+  /* Not a strided access.  */
+  else
+    group_size = 1;
+
+  /* Is this an access in a group of stores, which provide strided access?  
+     If so, add in the cost of the permutes.  */
+  if (group_size > 1) 
+    {
+      /* Uses a high and low interleave operation for each needed permute.  */
+      cost = ncopies * exact_log2(group_size) * group_size 
+             * TARG_VEC_STMT_COST;
+
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
+                 group_size);
+
+    }
+
+  /* Costs of the stores.  */
+  cost += ncopies * TARG_VEC_STORE_COST;
+
+  STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
+             "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+             STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_model_load_cost
+
+   Models cost for loads.  In the case of strided accesses, the last access
+   has the overhead of the strided access attributed to it.  Since unaligned
+   accesses are supported for loads, we also account for the costs of the 
+   access scheme chosen.  */
+
+static void
+vect_model_load_cost (stmt_vec_info stmt_info, int ncopies)
+		 
+{
+  int inner_cost = 0;
+  int group_size;
+  int alignment_support_cheme;
+  tree first_stmt;
+  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
+
+  /* Strided accesses?  */
+  first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+  if (first_stmt)
+    {
+      group_size = vect_cost_strided_group_size (stmt_info);
+      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+    }
+  /* Not a strided access.  */
+  else
+    {
+      group_size = 1;
+      first_dr = dr;
+    }
+
+  alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
+
+  /* Is this an access in a group of loads providing strided access?  
+     If so, add in the cost of the permutes.  */
+  if (group_size > 1) 
+    {
+      /* Uses an even and odd extract operations for each needed permute.  */
+      inner_cost = ncopies * exact_log2(group_size) * group_size
+                   * TARG_VEC_STMT_COST;
+
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
+                 group_size);
+
+    }
+
+  /* The loads themselves.  */
+  switch (alignment_support_cheme)
+    {
+    case dr_aligned:
+      {
+        inner_cost += ncopies * TARG_VEC_LOAD_COST;
+
+        if (vect_print_dump_info (REPORT_DETAILS))
+          fprintf (vect_dump, "vect_model_load_cost: aligned.");
+
+        break;
+      }
+    case dr_unaligned_supported:
+      {
+        /* Here, we assign an additional cost for the unaligned load.  */
+        inner_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
+
+        if (vect_print_dump_info (REPORT_DETAILS))
+          fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
+                   "hardware.");
+
+        break;
+      }
+    case dr_unaligned_software_pipeline:
+      {
+        int outer_cost = 0;
+
+        if (vect_print_dump_info (REPORT_DETAILS))
+          fprintf (vect_dump, "vect_model_load_cost: unaligned software "
+                   "pipelined.");
+
+        /* Unaligned software pipeline has a load of an address, an initial
+           load, and possibly a mask operation to "prime" the loop. However,
+           if this is an access in a group of loads, which provide strided
+           acccess, then the above cost should only be considered for one
+           access in the group. Inside the loop, there is a load op
+           and a realignment op.  */
+
+        if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1)
+          {
+            outer_cost = 2*TARG_VEC_STMT_COST;
+            if (targetm.vectorize.builtin_mask_for_load)
+              outer_cost += TARG_VEC_STMT_COST;
+          }
+        
+        STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
+
+        inner_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
+
+        break;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+
+  STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = inner_cost;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
+             "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+             STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+
+}
+
+
 /* Function vect_get_new_vect_var.
 
    Returns a name for a new variable. The current naming scheme appends the 
@@ -1655,6 +2139,7 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+      vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
       return true;
     }
 
@@ -1862,9 +2347,15 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
 
   gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
 
+  ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+	     / TYPE_VECTOR_SUBPARTS (vectype_out));
+
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "=== vectorizable_call ===");
+      vect_model_simple_cost (stmt_info, ncopies);
       return true;
     }
 
@@ -1873,8 +2364,6 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "transform operation.");
 
-  ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
-	     / TYPE_VECTOR_SUBPARTS (vectype_out));
   gcc_assert (ncopies >= 1);
 
   /* Handle def.  */
@@ -2302,6 +2791,9 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "=== vectorizable_assignment ===");
+      vect_model_simple_cost (stmt_info, ncopies);
       return true;
     }
 
@@ -2392,6 +2884,9 @@ vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "=== vectorizable_induction ===");
+      vect_model_induction_cost (stmt_info, ncopies);
       return true;
     }
 
@@ -2555,6 +3050,9 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "=== vectorizable_operation ===");
+      vect_model_simple_cost (stmt_info, ncopies);
       return true;
     }
 
@@ -2772,6 +3270,9 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "=== vectorizable_demotion ===");
+      vect_model_simple_cost (stmt_info, ncopies);
       return true;
     }
 
@@ -2932,6 +3433,9 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "=== vectorizable_promotion ===");
+      vect_model_simple_cost (stmt_info, 2*ncopies);
       return true;
     }
 
@@ -3252,14 +3756,12 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
+      vect_model_store_cost (stmt_info, ncopies);
       return true;
     }
 
   /** Transform.  **/
 
-  if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
-
   if (strided_store)
     {
       first_stmt = DR_GROUP_FIRST_DR (stmt_info);
@@ -3284,6 +3786,9 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
       group_size = 1;
     }
   
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
+
   dr_chain = VEC_alloc (tree, heap, group_size);
   oprnds = VEC_alloc (tree, heap, group_size);
 
@@ -3915,14 +4420,15 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
+      vect_model_load_cost (stmt_info, ncopies);
       return true;
     }
 
-  /** Transform.  **/
-
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "transform load.");
 
+  /** Transform.  **/
+
   if (strided_load)
     {
       first_stmt = DR_GROUP_FIRST_DR (stmt_info);
@@ -4807,6 +5313,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
   basic_block preheader;
   int loop_num;
   unsigned int th;
+  int min_scalar_loop_bound;
+  int min_profitable_iters;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
@@ -4822,11 +5330,28 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
 				   &ratio_mult_vf_name, ratio);
 
   loop_num  = loop->num; 
-  /* Threshold for vectorized loop.  */
-  th = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) * 
-			LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+  /* Analyze cost to set threshhold for vectorized loop.  */
+  min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
+
+  min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
+                          * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+  /* Use the cost model only if it is more conservative than user specified
+     threshold.  */
+
+  th = (unsigned) min_scalar_loop_bound;
+  if (min_profitable_iters
+      && (!min_scalar_loop_bound
+          || min_profitable_iters > min_scalar_loop_bound))
+    th = (unsigned) min_profitable_iters;
+
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "vectorization may not be profitable.");
+
   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
-					    ratio_mult_vf_name, ni_name, false, th);
+                                            ratio_mult_vf_name, ni_name, false,
+                                            th);
   gcc_assert (new_loop);
   gcc_assert (loop_num == loop->num);
 #ifdef ENABLE_CHECKING
author	hjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4>	2007-06-08 16:30:49 +0000
committer	hjagasia <hjagasia@138bc75d-0d04-0410-961f-82ee72b054a4>	2007-06-08 16:30:49 +0000
commit	867c03eb9cc3badac4834c0e6cab7b849db1f573 (patch)
tree	b261d3382cad73cc9bf2627a1ef52c49c881835e /gcc/tree-vect-transform.c
parent	2d0ff363215b22ad82b36ffc5bdf65248aa727ae (diff)
download	gcc-867c03eb9cc3badac4834c0e6cab7b849db1f573.tar.gz