2007-12-05 Harsha Jagasia <harsha.jagasia@amd.com>

* tree-vectorizer.c (slpeel_add_loop_guard): Gimplify the condition. (set_prologue_iterations): New. Set the prologue iterations to total number of scalar iterations if the cost model check indicates that scalar code should be generated. (slpeel_tree_peel_loop_to_edge): Add a new parameter and code for generating the cost condition for epilog. Call set_prologue_iterations to generate cost condition for prolog. (new_loop_vec_info): Initialize LOOP_VINFO_NITERS_UNCHANGED. * tree-vectorizer.h (LOOP_VINFO_NITERS_UNCHANGED): New. (slpeel_tree_peel_loop_to_edge): Update declaration. (set_prologue_iterations): New declaration. * tree-vect-analyze.c (vect_analyze_loop_form): Update LOOP_VINFO_NITERS_UNCHANGED. * tree-vect-transform.c (vect_estimate_min_profitable_iters): Add new parameter and code to check if run time cost model test is needed. Remove code that adds builtin vectorization cost to scalar outside cost for the run time cost model test. If run time cost model test is needed add the appropriate guard cost to the scalar outside cost. The guard cost depends on whether the guard is generated at versioning or at prolog generation or at epilog generation. Change cost model equation to include scalar outside cost. (conservative_cost_threshold): New. Return the less conservative profitability threshold between the cost model threshold and the user defined vectorization threshold. (vect_do_peeling_for_loop_bound): Call conservative_cost_threshold. (vect_do_peeling_for_alignment): Same. (vect_loop_versioning): Same. (vect_create_cond_for_align_checks): ANDs the cost model condition with the alignment condition. (vect_transform_loop): Call loop versioning only when there is a misalignment or an aliasing problem. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@130651 138bc75d-0d04-0410-961f-82ee72b054a4
author: spop <spop@138bc75d-0d04-0410-961f-82ee72b054a4> 2007-12-06 16:18:55 +0000
committer: spop <spop@138bc75d-0d04-0410-961f-82ee72b054a4> 2007-12-06 16:18:55 +0000
commit: 82ba6513749be5294e8b40323322a2add4681310 (patch)
tree: eabb8bb49cc28e95fea4f091f4b02dd6cac4ad30 /gcc/tree-vectorizer.c
parent: 8a58ed0af38bbe49d77d45122f8fc503d14ea055 (diff)
download: gcc-82ba6513749be5294e8b40323322a2add4681310.tar.gz
1 files changed, 206 insertions, 20 deletions
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 43b51a7da19..fcc74168c46 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -918,20 +918,29 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 
 static edge
 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
-		        basic_block dom_bb)
+		       basic_block dom_bb)
 {
   block_stmt_iterator bsi;
   edge new_e, enter_e;
   tree cond_stmt;
+  tree gimplify_stmt_list;
 
   enter_e = EDGE_SUCC (guard_bb, 0);
   enter_e->flags &= ~EDGE_FALLTHRU;
   enter_e->flags |= EDGE_FALSE_VALUE;
   bsi = bsi_last (guard_bb);
 
+  cond =
+    force_gimple_operand (cond, &gimplify_stmt_list, true,
+			  NULL_TREE);
   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 		      NULL_TREE, NULL_TREE);
+  if (gimplify_stmt_list)
+    bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
+
+  bsi = bsi_last (guard_bb);
   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
+
   /* Add new edge to connect guard block to the merge/loop-exit block.  */
   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
@@ -1007,12 +1016,89 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 }
 #endif
 
+/* If the run time cost model check determines that vectorization is
+   not profitable and hence scalar loop should be generated then set
+   FIRST_NITERS to prologue peeled iterations. This will allow all the
+   iterations to be executed in the prologue peeled scalar loop.  */
+
+void
+set_prologue_iterations (basic_block bb_before_first_loop,
+			 tree first_niters,
+			 struct loop *loop,
+			 unsigned int th)
+{
+  edge e;
+  basic_block cond_bb, then_bb;
+  tree var, prologue_after_cost_adjust_name, stmt;
+  block_stmt_iterator bsi;
+  tree newphi;
+  edge e_true, e_false, e_fallthru;
+  tree cond_stmt;
+  tree gimplify_stmt_list;
+  tree cost_pre_condition = NULL_TREE;
+  tree scalar_loop_iters = 
+    LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop));
+
+  e = single_pred_edge (bb_before_first_loop);
+  cond_bb = split_edge(e);
+
+  e = single_pred_edge (bb_before_first_loop);
+  then_bb = split_edge(e);
+  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
+
+  e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
+				   EDGE_FALSE_VALUE);
+  set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
+
+  e_true = EDGE_PRED (then_bb, 0);
+  e_true->flags &= ~EDGE_FALLTHRU;
+  e_true->flags |= EDGE_TRUE_VALUE;
+
+  e_fallthru = EDGE_SUCC (then_bb, 0);
+
+  cost_pre_condition =
+    build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, 
+	    build_int_cst (TREE_TYPE (scalar_loop_iters), th));
+  cost_pre_condition =
+    force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
+			  true, NULL_TREE);
+  cond_stmt = build3 (COND_EXPR, void_type_node, cost_pre_condition,
+		      NULL_TREE, NULL_TREE);
+
+  bsi = bsi_last (cond_bb);
+  if (gimplify_stmt_list)
+    bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
+
+  bsi = bsi_last (cond_bb);
+  bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
+					  
+  var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
+			"prologue_after_cost_adjust");
+  add_referenced_var (var);
+  prologue_after_cost_adjust_name = 
+    force_gimple_operand (scalar_loop_iters, &stmt, false, var);
+
+  bsi = bsi_last (then_bb);
+  if (stmt)
+    bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
+
+  newphi = create_phi_node (var, bb_before_first_loop);
+  add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru);
+  add_phi_arg (newphi, first_niters, e_false);
+
+  first_niters = PHI_RESULT (newphi);
+}
+
+
 /* Function slpeel_tree_peel_loop_to_edge.
 
    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
    that is placed on the entry (exit) edge E of LOOP. After this transformation
    we have two loops one after the other - first-loop iterates FIRST_NITERS
    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
+   If the cost model indicates that it is profitable to emit a scalar 
+   loop instead of the vector one, then the prolog (epilog) loop will iterate
+   for the entire unchanged scalar iterations of the loop.
 
    Input:
    - LOOP: the loop to be peeled.
@@ -1027,6 +1113,13 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop,
         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
         is false, the caller of this function may want to take care of this
         (this can be useful if we don't want new stmts added to first-loop).
+   - TH: cost model profitability threshold of iterations for vectorization.
+   - CHECK_PROFITABILITY: specify whether cost model check has not occured
+                          during versioning and hence needs to occur during
+			  prologue generation or whether cost model check 
+			  has not occured during prologue generation and hence
+			  needs to occur during epilogue generation.
+	    
 
    Output:
    The function returns a pointer to the new loop-copy, or NULL if it failed
@@ -1048,11 +1141,11 @@ struct loop*
 slpeel_tree_peel_loop_to_edge (struct loop *loop, 
 			       edge e, tree first_niters, 
 			       tree niters, bool update_first_loop_count,
-			       unsigned int th)
+			       unsigned int th, bool check_profitability)
 {
   struct loop *new_loop = NULL, *first_loop, *second_loop;
   edge skip_e;
-  tree pre_condition;
+  tree pre_condition = NULL_TREE;
   bitmap definitions;
   basic_block bb_before_second_loop, bb_after_second_loop;
   basic_block bb_before_first_loop;
@@ -1060,6 +1153,9 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   basic_block new_exit_bb;
   edge exit_e = single_exit (loop);
   LOC loop_loc;
+  tree cost_pre_condition = NULL_TREE;
+  tree scalar_loop_iters = 
+    LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop));
   
   if (!slpeel_can_duplicate_loop_p (loop, e))
     return NULL;
@@ -1116,32 +1212,121 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   rename_variables_in_loop (new_loop);
 
 
-  /* 2. Add the guard that controls whether the first loop is executed.
-        Resulting CFG would be:
+  /* 2.  Add the guard code in one of the following ways:
 
-        bb_before_first_loop:
-        if (FIRST_NITERS == 0) GOTO bb_before_second_loop
-                               GOTO first-loop
+     2.a Add the guard that controls whether the first loop is executed.
+         This occurs when this function is invoked for prologue or epilogiue
+	 generation and when the cost model check can be done at compile time.
 
-        first_loop:
-        do {
-        } while ...
+         Resulting CFG would be:
 
-        bb_before_second_loop:
+         bb_before_first_loop:
+         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
+                                GOTO first-loop
 
-        second_loop:
-        do {
-        } while ...
+         first_loop:
+         do {
+         } while ...
 
-        orig_exit_bb:
-   */
+         bb_before_second_loop:
+
+         second_loop:
+         do {
+         } while ...
+
+         orig_exit_bb:
+
+     2.b Add the cost model check that allows the prologue
+         to iterate for the entire unchanged scalar
+         iterations of the loop in the event that the cost
+         model indicates that the scalar loop is more
+         profitable than the vector one. This occurs when
+	 this function is invoked for prologue generation
+	 and the cost model check needs to be done at run
+	 time.
+
+         Resulting CFG after prologue peeling would be:
+
+         if (scalar_loop_iterations <= th)
+           FIRST_NITERS = scalar_loop_iterations
+
+         bb_before_first_loop:
+         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
+                                GOTO first-loop
+
+         first_loop:
+         do {
+         } while ...
+
+         bb_before_second_loop:
+
+         second_loop:
+         do {
+         } while ...
+
+         orig_exit_bb:
+
+     2.c Add the cost model check that allows the epilogue
+         to iterate for the entire unchanged scalar
+         iterations of the loop in the event that the cost
+         model indicates that the scalar loop is more
+         profitable than the vector one. This occurs when
+	 this function is invoked for epilogue generation
+	 and the cost model check needs to be done at run
+	 time.
+
+         Resulting CFG after prologue peeling would be:
+
+         bb_before_first_loop:
+         if ((scalar_loop_iterations <= th)
+             ||
+             FIRST_NITERS == 0) GOTO bb_before_second_loop
+                                GOTO first-loop
+
+         first_loop:
+         do {
+         } while ...
+
+         bb_before_second_loop:
+
+         second_loop:
+         do {
+         } while ...
+
+         orig_exit_bb:
+  */
 
   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
   bb_before_second_loop = split_edge (single_exit (first_loop));
 
-  pre_condition =
-    fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
-	build_int_cst (TREE_TYPE (first_niters), th));
+  /* Epilogue peeling.  */
+  if (!update_first_loop_count)
+    {
+      pre_condition =
+	fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
+		     build_int_cst (TREE_TYPE (first_niters), 0));
+      if (check_profitability)
+	{
+	    cost_pre_condition = 
+	    build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, 
+		    build_int_cst (TREE_TYPE (scalar_loop_iters), th));
+  
+	  pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+				       cost_pre_condition, pre_condition);
+	}
+    }
+
+  /* Prologue peeling.  */  
+  else
+    {
+      if (check_profitability)
+	set_prologue_iterations (bb_before_first_loop, first_niters,
+				 loop, th);
+
+      pre_condition =
+	fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
+		     build_int_cst (TREE_TYPE (first_niters), 0));
+    }
 
   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
                                   bb_before_second_loop, bb_before_first_loop);
@@ -1468,6 +1653,7 @@ new_loop_vec_info (struct loop *loop)
 
   LOOP_VINFO_BBS (res) = bbs;
   LOOP_VINFO_NITERS (res) = NULL;
+  LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
author	spop <spop@138bc75d-0d04-0410-961f-82ee72b054a4>	2007-12-06 16:18:55 +0000
committer	spop <spop@138bc75d-0d04-0410-961f-82ee72b054a4>	2007-12-06 16:18:55 +0000
commit	82ba6513749be5294e8b40323322a2add4681310 (patch)
tree	eabb8bb49cc28e95fea4f091f4b02dd6cac4ad30 /gcc/tree-vectorizer.c
parent	8a58ed0af38bbe49d77d45122f8fc503d14ea055 (diff)
download	gcc-82ba6513749be5294e8b40323322a2add4681310.tar.gz