* tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by

known_alignment_for_access_p. (known_alignment_for_access_p): New. (do_peeling_for_alignment): Field made int instead of bool and renamed to peeling_for_alignment. (LOOP_DO_PEELING_FOR_ALIGNMENT): Renamed to LOOP_PEELING_FOR_ALIGNMENT. * tree-vect-analyze.c (vect_determine_vectorization_factor): New. This functionality used to be in vect_analyze_operations. (vect_analyze_operations): Code to determine vectorization factor was moved to vect_determine_vectorization_factor. (vect_enhance_data_refs_alignment): Update to correct alignment when it is known instead of -1. Set LOOP_PEELING_FOR_ALIGNMENT to peeling factor. (vect_analyze_loop): Call vect_determine_vectorization_factor (used to be part of vect_analyze_operations). * tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Use fold when creating the guard condition, as the number of iterations may be constant. (slpeel_tree_peel_loop_to_edge): Use new name of LOOP_DO_PEELING_FOR_ALIGNMENT. Set it to 0 instead of false. * tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Handle known alignment case more efficiently. Use LOOP_PEELING_FOR_ALIGNMENT. (vect_do_peeling_for_alignment): Use fold. (vect_transform_loop): Use new name of LOOP_DO_PEELING_FOR_ALIGNMENT. (vect_update_inits_of_dr): Renamed to vect_update_init_of_dr. (vect_update_inits_of_drs): Use new name of vect_update_inits_of_dr. (vectorizable_store): Fix assertion to use == instead of =. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@96526 138bc75d-0d04-0410-961f-82ee72b054a4
author: dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> 2005-03-15 18:33:09 +0000
committer: dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> 2005-03-15 18:33:09 +0000
commit: 39b8f742ef14abba097084b567e57563e555d0df (patch)
tree: aa609c29139804fa11435795fdbdacaccff79d8f
parent: 0b503e11ea9748a69e1be0e927e8efdefc64f396 (diff)
download: gcc-39b8f742ef14abba097084b567e57563e555d0df.tar.gz
10 files changed, 504 insertions, 174 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 59d84facf02..0d3e60f7d3a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,35 @@
+2005-03-15  Dorit Naishlos  <dorit@il.ibm.com>
+
+	* tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by
+	known_alignment_for_access_p.
+	(known_alignment_for_access_p): New.
+	(do_peeling_for_alignment): Field made int instead of bool and renamed
+	to peeling_for_alignment.
+	(LOOP_DO_PEELING_FOR_ALIGNMENT): Renamed to LOOP_PEELING_FOR_ALIGNMENT.
+	* tree-vect-analyze.c (vect_determine_vectorization_factor): New. This
+	functionality used to be in vect_analyze_operations.
+	(vect_analyze_operations): Code to determine vectorization factor was
+	moved to vect_determine_vectorization_factor.
+	(vect_enhance_data_refs_alignment): Update to correct alignment when it
+	is known instead of -1.  Set LOOP_PEELING_FOR_ALIGNMENT to peeling
+	factor.
+	(vect_analyze_loop): Call vect_determine_vectorization_factor (used to
+	be part of vect_analyze_operations).
+	* tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Use fold when
+	creating the guard condition, as the number of iterations may be
+	constant.
+	(slpeel_tree_peel_loop_to_edge): Use new name of
+	LOOP_DO_PEELING_FOR_ALIGNMENT. Set it to 0 instead of false.
+	* tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Handle known
+	alignment case more efficiently. Use LOOP_PEELING_FOR_ALIGNMENT.
+	(vect_do_peeling_for_alignment): Use fold.
+	(vect_transform_loop): Use new name of LOOP_DO_PEELING_FOR_ALIGNMENT.
+
+	(vect_update_inits_of_dr): Renamed to
+	vect_update_init_of_dr.
+	(vect_update_inits_of_drs): Use new name of vect_update_inits_of_dr.
+	(vectorizable_store): Fix assertion to use == instead of =.
+
 2005-03-15  Daniel Jacobowitz  <dan@codesourcery.com>
 
 	* config/arm/arm.h (CONDITIONAL_REGISTER_USAGE): Don't clear
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6805e68ba8a..442e6c04461 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2005-03-15  Dorit Naishlos  <dorit@il.ibm.com>
+
+	* gcc.dg/vect/vect-54.c: Now vectorizable on targets that don't support
+	misaligned accesses.
+	* gcc.dg/vect/vect-58.c: Likewise.
+	* gcc.dg/vect/vect-92.c: New.
+	* gcc.dg/vect/vect-93.c: New.
+
 2005-03-15  Feng Wang  <fengwang@nudt.edu.cn>
 
 	PR fortran/18827
diff --git a/gcc/testsuite/gcc.dg/vect/vect-54.c b/gcc/testsuite/gcc.dg/vect/vect-54.c
index b169bb33452..5ced09a3f63 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-54.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-54.c
@@ -50,6 +50,6 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-58.c b/gcc/testsuite/gcc.dg/vect/vect-58.c
index df814d9452d..275e465f882 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-58.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-58.c
@@ -51,6 +51,6 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-92.c b/gcc/testsuite/gcc.dg/vect/vect-92.c
new file mode 100644
index 00000000000..02efc6c4326
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-92.c
@@ -0,0 +1,90 @@
+/* { dg-require-effective-target vect_float } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 256
+
+typedef float afloat __attribute__ ((__aligned__(16)));
+
+/* known misalignment: same alignment  */
+
+int
+main1 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc)
+{
+  int i;
+
+  for (i = 0; i < 5; i++)
+    {
+      pa[i+1] = pb[i+1] * pc[i+1];
+    }
+
+  /* check results:  */
+  for (i = 0; i < 5; i++)
+    {
+      if (pa[i+1] != (pb[i+1] * pc[i+1]))
+	abort ();
+    }
+
+  return 0;
+}
+
+int
+main2 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc)
+{
+  int i;
+
+  for (i = 0; i < 6; i++)
+    {
+      pa[i+1] = pb[i+1] * pc[i+1];
+    }
+
+  /* check results:  */
+  for (i = 0; i < 6; i++)
+    {
+      if (pa[i+1] != (pb[i+1] * pc[i+1]))
+	abort ();
+    }
+
+  return 0;
+}
+
+int
+main3 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc, int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    {
+      pa[i+1] = pb[i+1] * pc[i+1];
+    }
+
+  /* check results:  */
+  for (i = 0; i < n; i++)
+    {
+      if (pa[i+1] != (pb[i+1] * pc[i+1]))
+	abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  int i;
+  afloat a[N];
+  afloat b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57};
+  afloat c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
+
+  check_vect ();
+
+  main1 (a,b,c);
+  main2 (a,b,c);
+  main3 (a,b,c,N);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-93.c b/gcc/testsuite/gcc.dg/vect/vect-93.c
new file mode 100644
index 00000000000..fe3a81b1ba0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-93.c
@@ -0,0 +1,76 @@
+/* { dg-require-effective-target vect_float } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 3001
+
+typedef float afloat __attribute__ ((__aligned__(16)));
+
+int
+main1 (float *pa)
+{
+  int i;
+
+  for (i = 0; i < 3001; i++)
+    {
+      pa[i] = 2.0;
+    }
+
+  /* check results:  */
+  for (i = 0; i < 3001; i++)
+    {
+      if (pa[i] != 2.0)
+	abort ();
+    }
+
+  for (i = 1; i <= 10; i++)
+    {
+      pa[i] = 3.0;
+    }
+
+  /* check results:  */
+  for (i = 1; i <= 10; i++)
+    {
+      if (pa[i] != 3.0)
+	abort ();
+    }
+  
+  return 0;
+}
+
+int main (void)
+{
+  int i;
+  afloat a[N];
+  afloat b[N];
+
+  check_vect ();
+
+  /* from bzip2: */
+  for (i=0; i<N; i++) b[i] = i;
+  a[0] = 0;
+  for (i = 1; i <= 256; i++) a[i] = b[i-1];
+
+  /* check results:  */
+  for (i = 1; i <= 256; i++)
+    {
+      if (a[i] != i-1)
+	abort ();
+    }
+  if (a[0] != 0)
+    abort ();
+
+  main1 (a);
+
+  return 0;
+}
+
+/* in main1 */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { target vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail vect_no_align } } } */
+
+/* in main */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */
diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c
index 2595e497723..d1b274b56ec 100644
--- a/gcc/tree-vect-analyze.c
+++ b/gcc/tree-vect-analyze.c
@@ -50,6 +50,7 @@ static bool vect_analyze_data_refs_alignment (loop_vec_info);
 static bool vect_compute_data_refs_alignment (loop_vec_info);
 static void vect_enhance_data_refs_alignment (loop_vec_info);
 static bool vect_analyze_operations (loop_vec_info);
+static bool vect_determine_vectorization_factor (loop_vec_info);
 
 /* Utility functions for the analyses.  */
 static bool exist_non_indexing_operands_for_use_p (tree, tree);
@@ -285,6 +286,150 @@ vect_analyze_offset_expr (tree expr,
 }
 
 
+/* Function vect_determine_vectorization_factor
+
+   Determine the vectorization factor (VF). VF is the number of data elements
+   that are operated upon in parallel in a single iteration of the vectorized
+   loop. For example, when vectorizing a loop that operates on 4byte elements,
+   on a target with vector size (VS) 16byte, the VF is set to 4, since 4
+   elements can fit in a single vector register.
+
+   We currently support vectorization of loops in which all types operated upon
+   are of the same size. Therefore this function currently sets VF according to
+   the size of the types operated upon, and fails if there are multiple sizes
+   in the loop.
+
+   VF is also the factor by which the loop iterations are strip-mined, e.g.:
+   original loop:
+        for (i=0; i<N; i++){
+          a[i] = b[i] + c[i];
+        }
+
+   vectorized loop:
+        for (i=0; i<N; i+=VF){
+          a[i:VF] = b[i:VF] + c[i:VF];
+        }
+*/
+
+static bool
+vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+  int nbbs = loop->num_nodes;
+  block_stmt_iterator si;
+  unsigned int vectorization_factor = 0;
+  int i;
+  tree scalar_type;
+
+  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+    fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
+
+  for (i = 0; i < nbbs; i++)
+    {
+      basic_block bb = bbs[i];
+
+      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+        {
+          tree stmt = bsi_stmt (si);
+          unsigned int nunits;
+          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+          tree vectype;
+
+          if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+            {
+              fprintf (vect_dump, "==> examining statement: ");
+              print_generic_expr (vect_dump, stmt, TDF_SLIM);
+            }
+
+          gcc_assert (stmt_info);
+          /* skip stmts which do not need to be vectorized.  */
+          if (!STMT_VINFO_RELEVANT_P (stmt_info))
+            continue;
+
+          if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
+            {
+              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
+                                        LOOP_LOC (loop_vinfo)))
+                {
+                  fprintf (vect_dump, "not vectorized: vector stmt in loop:");
+                  print_generic_expr (vect_dump, stmt, TDF_SLIM);
+                }
+              return false;
+            }
+
+          if (STMT_VINFO_DATA_REF (stmt_info))
+            scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
+          else if (TREE_CODE (stmt) == MODIFY_EXPR)
+            scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
+          else
+            scalar_type = TREE_TYPE (stmt);
+
+          if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+            {
+              fprintf (vect_dump, "get vectype for scalar type:  ");
+              print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
+            }
+
+          vectype = get_vectype_for_scalar_type (scalar_type);
+          if (!vectype)
+            {
+              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
+                                        LOOP_LOC (loop_vinfo)))
+                {
+                  fprintf (vect_dump, "not vectorized: unsupported data-type ");
+                  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
+                }
+              return false;
+            }
+          if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+            {
+              fprintf (vect_dump, "vectype: ");
+              print_generic_expr (vect_dump, vectype, TDF_SLIM);
+            }
+          STMT_VINFO_VECTYPE (stmt_info) = vectype;
+
+          nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
+          if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+            fprintf (vect_dump, "nunits = %d", nunits);
+
+          if (vectorization_factor)
+            {
+              /* FORNOW: don't allow mixed units. 
+                 This restriction will be relaxed in the future.  */
+              if (nunits != vectorization_factor) 
+                {
+                  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
+                                            LOOP_LOC (loop_vinfo)))
+                    fprintf (vect_dump, "not vectorized: mixed data-types");
+                  return false;
+                }
+            }
+          else
+            vectorization_factor = nunits;
+
+#ifdef ENABLE_CHECKING
+          gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
+                        * vectorization_factor == UNITS_PER_SIMD_WORD);
+#endif
+        }
+    }
+
+  /* TODO: Analyze cost. Decide if worth while to vectorize.  */
+
+  if (vectorization_factor <= 1)
+    {
+      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
+                                LOOP_LOC (loop_vinfo)))
+        fprintf (vect_dump, "not vectorized: unsupported data-type");
+      return false;
+    }
+  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+
+  return true;
+}
+
+
 /* Function vect_analyze_operations.
 
    Scan the loop stmts and make sure they are all vectorizable.  */
@@ -299,11 +444,13 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
   unsigned int vectorization_factor = 0;
   int i;
   bool ok;
-  tree scalar_type;
 
   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
     fprintf (vect_dump, "=== vect_analyze_operations ===");
 
+  gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
   for (i = 0; i < nbbs; i++)
     {
       basic_block bb = bbs[i];
@@ -311,9 +458,7 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
 	{
 	  tree stmt = bsi_stmt (si);
-	  unsigned int nunits;
 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-	  tree vectype;
 
 	  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
 	    {
@@ -337,49 +482,13 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
 	      continue;
 	    }
 
-	  if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
-	    {
-	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
-                                         LOOP_LOC (loop_vinfo)))
-		{
-                  fprintf (vect_dump, "not vectorized: vector stmt in loop:");
-		  print_generic_expr (vect_dump, stmt, TDF_SLIM);
-		}
-	      return false;
-	    }
-
-          if (STMT_VINFO_DATA_REF (stmt_info))
-            scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));    
-          else if (TREE_CODE (stmt) == MODIFY_EXPR)
-	    scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
-	  else
-	    scalar_type = TREE_TYPE (stmt);
-
-	  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
-	    {
-	      fprintf (vect_dump, "get vectype for scalar type:  ");
-	      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
-	    }
-
-	  vectype = get_vectype_for_scalar_type (scalar_type);
-	  if (!vectype)
-	    {
-	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
-                                         LOOP_LOC (loop_vinfo)))
-		{
-                  fprintf (vect_dump,
-                           "not vectorized: unsupported data-type ");
-		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
-		}
-	      return false;
-	    }
-
-	  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
-	    {
-	      fprintf (vect_dump, "vectype: ");
-	      print_generic_expr (vect_dump, vectype, TDF_SLIM);
-	    }
-	  STMT_VINFO_VECTYPE (stmt_info) = vectype;
+#ifdef ENABLE_CHECKING
+          if (STMT_VINFO_RELEVANT_P (stmt_info))
+            {
+              gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
+              gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
+            }
+#endif
 
 	  ok = (vectorizable_operation (stmt, NULL, NULL)
 		|| vectorizable_assignment (stmt, NULL, NULL)
@@ -396,44 +505,11 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
 		}
 	      return false;
 	    }
-
-	  nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
-	  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
-	    fprintf (vect_dump, "nunits = %d", nunits);
-
-	  if (vectorization_factor)
-	    {
-	      /* FORNOW: don't allow mixed units.
-	         This restriction will be relaxed in the future.  */
-	      if (nunits != vectorization_factor)
-		{
-	          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
-                                             LOOP_LOC (loop_vinfo)))
-		    fprintf (vect_dump, "not vectorized: mixed data-types");
-		  return false;
-		}
-	    }
-	  else
-	    vectorization_factor = nunits;
-
-#ifdef ENABLE_CHECKING
-	  gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
-			* vectorization_factor == UNITS_PER_SIMD_WORD);
-#endif
 	}
     }
 
   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
 
-  if (vectorization_factor <= 1)
-    {
-      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
-                                 LOOP_LOC (loop_vinfo)))
-        fprintf (vect_dump, "not vectorized: unsupported data-type");
-      return false;
-    }
-  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
     fprintf (vect_dump,
@@ -933,7 +1009,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 {
   varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
   varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
-  unsigned int i;
+  varray_type datarefs;
+  struct data_reference *dr0 = NULL;
+  unsigned int i, j;
 
   /*
      This pass will require a cost model to guide it whether to apply peeling 
@@ -1036,26 +1114,15 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 
   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
     {
-      struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
-      if (!aligned_access_p (dr))
-        {
-          LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
-          LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
+      dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
+      if (!aligned_access_p (dr0))
+	{
+	  LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
+	  LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
 	  break;
-        }
+	}
     }
 
-  if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
-    {
-      if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
-	fprintf (vect_dump, "Peeling for alignment will not be applied.");
-      return;
-    }
-  else
-    if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
-      fprintf (vect_dump, "Peeling for alignment will be applied.");
-
-
   /* (1.2) Update the alignment info according to the peeling factor.
 	   If the misalignment of the DR we peel for is M, then the
 	   peeling factor is VF - M, and the misalignment of each access DR_i
@@ -1063,37 +1130,54 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 	   If the misalignment of the DR we peel for is unknown, then the 
 	   misalignment of each access DR_i in the loop is also unknown.
 
-	   FORNOW: set the misalignment of the accesses to unknown even
-	           if the peeling factor is known at compile time.
+           TODO: - consider accesses that are known to have the same
+                   alignment, even if that alignment is unknown.  */
 
-	   TODO: - if the peeling factor is known at compile time, use that
-		   when updating the misalignment info of the loop DRs.
-		 - consider accesses that are known to have the same 
-		   alignment, even if that alignment is unknown.  */
-   
-  for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
+  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
     {
-      struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
-      if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
+      int mis;
+      int npeel = 0;
+
+      if (known_alignment_for_access_p (dr0))
 	{
-	  DR_MISALIGNMENT (dr) = 0;
-	  if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
-	    fprintf (vect_dump, "Alignment of access forced using peeling.");
+	  /* Since it's known at compile time, compute the number of iterations
+	     in the peeled loop (the peeling factor) for use in updating
+	     DR_MISALIGNMENT values.  The peeling factor is the vectorization
+	     factor minus the misalignment as an element count.  */
+	  mis = DR_MISALIGNMENT (dr0);
+	  mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
+	  npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
 	}
-      else
-	DR_MISALIGNMENT (dr) = -1;
-    }
-  for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
-    {
-      struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
-      if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
+
+      datarefs = loop_write_datarefs;
+      for (j = 0; j < 2; j++)
 	{
-	  DR_MISALIGNMENT (dr) = 0;
-	  if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
-	    fprintf (vect_dump, "Alignment of access forced using peeling.");
+	  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
+	    {
+	      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
+
+	      if (dr == dr0)
+		continue;
+	      if (known_alignment_for_access_p (dr)
+		  && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
+		DR_MISALIGNMENT (dr) = 0;
+	      else if (known_alignment_for_access_p (dr)
+		       && known_alignment_for_access_p (dr0))
+		{
+		  int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
+
+		  DR_MISALIGNMENT (dr) += npeel * drsize;
+		  DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
+		}
+	      else
+		DR_MISALIGNMENT (dr) = -1;
+	    }
+	  datarefs = loop_read_datarefs;
 	}
-      else
-	DR_MISALIGNMENT (dr) = -1;
+
+      DR_MISALIGNMENT (dr0) = 0;
+      if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
+	fprintf (vect_dump, "Alignment of access forced using peeling.");
     }
 }
 
@@ -2479,6 +2563,15 @@ vect_analyze_loop (struct loop *loop)
       return NULL;
     }
 
+  ok = vect_determine_vectorization_factor (loop_vinfo);
+  if (!ok)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
+        fprintf (vect_dump, "can't determine vectorization factor.");
+      destroy_loop_vec_info (loop_vinfo);
+      return NULL;
+    }
+
   /* Analyze the alignment of the data-refs in the loop.
      FORNOW: Only aligned accesses are handled.  */
 
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 1a82f288cb2..5dd9efecdbc 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -64,7 +64,7 @@ static void vect_generate_tmps_on_preheader
 static tree vect_build_loop_niters (loop_vec_info);
 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge); 
 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
-static void vect_update_inits_of_dr (struct data_reference *, tree niters);
+static void vect_update_init_of_dr (struct data_reference *, tree niters);
 static void vect_update_inits_of_drs (loop_vec_info, tree);
 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
 static void vect_do_peeling_for_loop_bound 
@@ -907,7 +907,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
 
   alignment_support_cheme = vect_supportable_dr_alignment (dr);
   gcc_assert (alignment_support_cheme);
-  gcc_assert (alignment_support_cheme = dr_aligned);  /* FORNOW */
+  gcc_assert (alignment_support_cheme == dr_aligned);  /* FORNOW */
 
   /* Handle use - get the vectorized def from the defining stmt.  */
   vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
@@ -1451,14 +1451,16 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
 
    Set the number of iterations for the loop represented by LOOP_VINFO
    to the minimum between LOOP_NITERS (the original iteration count of the loop)
-   and the misalignment of DR - the first data reference recorded in
+   and the misalignment of DR - the data reference recorded in
    LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of 
    this loop, the data reference DR will refer to an aligned location.
 
    The following computation is generated:
 
-   compute address misalignment in bytes:
-   addr_mis = addr & (vectype_size - 1)
+   If the misalignment of DR is known at compile time:
+     addr_mis = int mis = DR_MISALIGNMENT (dr);
+   Else, compute address misalignment in bytes:
+     addr_mis = addr & (vectype_size - 1)
 
    prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
    
@@ -1479,37 +1481,53 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
   stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
   int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
-  tree elem_misalign;
-  tree byte_misalign;
-  tree new_stmts = NULL_TREE;
-  tree start_addr = 
-	vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
-  tree ptr_type = TREE_TYPE (start_addr);
-  tree size = TYPE_SIZE (ptr_type);
-  tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
-  tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
   tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
   tree niters_type = TREE_TYPE (loop_niters);
-  tree elem_size_log = 
-	build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
-  tree vf_tree = build_int_cst (unsigned_type_node, vf);
 
   pe = loop_preheader_edge (loop); 
-  new_bb = bsi_insert_on_edge_immediate (pe, new_stmts); 
-  gcc_assert (!new_bb);
 
-  /* Create:  byte_misalign = addr & (vectype_size - 1)  */
-  byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
+  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
+    {
+      int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+      int element_size = vectype_align/vf;
+      int elem_misalign = byte_misalign / element_size;
 
-  /* Create:  elem_misalign = byte_misalign / element_size  */
-  elem_misalign = 
-	build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
+      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+        fprintf (vect_dump, "known alignment = %d.", byte_misalign);
+      iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
+    }
+  else
+    {
+      tree new_stmts = NULL_TREE;
+      tree start_addr =
+        vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
+      tree ptr_type = TREE_TYPE (start_addr);
+      tree size = TYPE_SIZE (ptr_type);
+      tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
+      tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
+      tree elem_size_log =
+        build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
+      tree vf_tree = build_int_cst (unsigned_type_node, vf);
+      tree byte_misalign;
+      tree elem_misalign;
+
+      new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
+      gcc_assert (!new_bb);
   
-  /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
-  iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
-  iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
-  iters = fold_convert (niters_type, iters);
+      /* Create:  byte_misalign = addr & (vectype_size - 1)  */
+      byte_misalign = 
+        build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
   
+      /* Create:  elem_misalign = byte_misalign / element_size  */
+      elem_misalign =
+        build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
+
+      /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
+      iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
+      iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
+      iters = fold_convert (niters_type, iters);
+    }
+
   /* Create:  prolog_loop_niters = min (iters, loop_niters) */
   /* If the loop bound is known at compile time we already verified that it is
      greater than vf; since the misalignment ('iters') is at most vf, there's
@@ -1517,12 +1535,17 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
   if (TREE_CODE (loop_niters) != INTEGER_CST)
     iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
 
+  if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+    {
+      fprintf (vect_dump, "niters for prolog loop: ");
+      print_generic_expr (vect_dump, iters, TDF_SLIM);
+    }
+
   var = create_tmp_var (niters_type, "prolog_loop_niters");
   add_referenced_tmp_var (var);
   iters_name = force_gimple_operand (iters, &stmt, false, var);
 
   /* Insert stmt on loop preheader edge.  */
-  pe = loop_preheader_edge (loop);
   if (stmt)
     {
       basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
@@ -1533,7 +1556,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
 }
 
 
-/* Function vect_update_inits_of_dr
+/* Function vect_update_init_of_dr
 
    NITERS iterations were peeled from LOOP.  DR represents a data reference
    in LOOP.  This function updates the information recorded in DR to
@@ -1541,7 +1564,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
    executed.  Specifically, it updates the OFFSET field of stmt_info.  */
 
 static void
-vect_update_inits_of_dr (struct data_reference *dr, tree niters)
+vect_update_init_of_dr (struct data_reference *dr, tree niters)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
   tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
@@ -1574,13 +1597,13 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
     {
       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
-      vect_update_inits_of_dr (dr, niters);
+      vect_update_init_of_dr (dr, niters);
     }
 
   for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
     {
       struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
-      vect_update_inits_of_dr (dr, niters);
+      vect_update_init_of_dr (dr, niters);
     }
 }
 
@@ -1618,8 +1641,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
 
   /* Update number of times loop executes.  */
   n_iters = LOOP_VINFO_NITERS (loop_vinfo);
-  LOOP_VINFO_NITERS (loop_vinfo) =
-    build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
+  LOOP_VINFO_NITERS (loop_vinfo) = fold (build2 (MINUS_EXPR,
+		TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop));
 
   /* Update the init conditions of the access functions of all data refs.  */
   vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
@@ -1656,7 +1679,7 @@ vect_transform_loop (loop_vec_info loop_vinfo,
   /* Peel the loop if there are data refs with unknown alignment.
      Only one data ref with unknown store is allowed.  */
 
-  if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
     vect_do_peeling_for_alignment (loop_vinfo, loops);
   
   /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 08a923ee6b8..82c108888ac 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -963,7 +963,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
 
   pre_condition =
-        build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
+    fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
                                   bb_before_second_loop, bb_before_first_loop);
   slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
@@ -1001,7 +1001,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
   bb_after_second_loop = split_edge (second_loop->single_exit);
   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
 
-  pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
+  pre_condition = 
+	fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
                                   bb_after_second_loop, bb_before_first_loop);
   slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
@@ -1213,7 +1214,7 @@ new_loop_vec_info (struct loop *loop)
   LOOP_VINFO_EXIT_COND (res) = NULL;
   LOOP_VINFO_NITERS (res) = NULL;
   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
-  LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
+  LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
   LOOP_VINFO_VECT_FACTOR (res) = 0;
   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
 			   "loop_write_datarefs");
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index b761f4d0ef3..e32eed2364a 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -95,9 +95,15 @@ typedef struct _loop_vec_info {
   /* Unknown DRs according to which loop was peeled.  */
   struct data_reference *unaligned_dr;
 
-  /* If true, loop is peeled.
-   unaligned_drs show in this case DRs used for peeling.  */
-  bool do_peeling_for_alignment;
+  /* peeling_for_alignment indicates whether peeling for alignment will take
+     place, and what the peeling factor should be:
+     peeling_for_alignment = X means:
+        If X=0: Peeling for alignment will not be applied.
+        If X>0: Peel first X iterations.
+        If X=-1: Generate a runtime test to calculate the number of iterations
+                 to be peeled, using the dataref recorded in the field
+                 unaligned_dr.  */
+  int peeling_for_alignment;
 
   /* All data references in the loop that are being written to.  */
   varray_type data_ref_writes;
@@ -119,7 +125,7 @@ typedef struct _loop_vec_info {
 #define LOOP_VINFO_DATAREF_WRITES(L) (L)->data_ref_writes
 #define LOOP_VINFO_DATAREF_READS(L)  (L)->data_ref_reads
 #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
-#define LOOP_DO_PEELING_FOR_ALIGNMENT(L) (L)->do_peeling_for_alignment
+#define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
 #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
 #define LOOP_VINFO_LOC(L)          (L)->loop_line_number
 
@@ -242,7 +248,8 @@ vinfo_for_stmt (tree stmt)
 /* Info on data references alignment.                              */
 /*-----------------------------------------------------------------*/
 
-/* The misalignment of the memory access in bytes.  */
+/* Reflects actual alignment of first access in the vectorized loop,
+   taking into account peeling/versioning if applied.  */
 #define DR_MISALIGNMENT(DR)   (DR)->aux
 
 static inline bool
@@ -252,9 +259,9 @@ aligned_access_p (struct data_reference *data_ref_info)
 }
 
 static inline bool
-unknown_alignment_for_access_p (struct data_reference *data_ref_info)
+known_alignment_for_access_p (struct data_reference *data_ref_info)
 {
-  return (DR_MISALIGNMENT (data_ref_info) == -1);
+  return (DR_MISALIGNMENT (data_ref_info) != -1);
 }
 
 /* Perform signed modulo, always returning a non-negative value.  */
author	dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>	2005-03-15 18:33:09 +0000
committer	dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>	2005-03-15 18:33:09 +0000
commit	39b8f742ef14abba097084b567e57563e555d0df (patch)
tree	aa609c29139804fa11435795fdbdacaccff79d8f
parent	0b503e11ea9748a69e1be0e927e8efdefc64f396 (diff)
download	gcc-39b8f742ef14abba097084b567e57563e555d0df.tar.gz