summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog62
-rw-r--r--gcc/config/alpha/alpha.h3
-rw-r--r--gcc/config/bfin/bfin.h3
-rw-r--r--gcc/config/i386/i386.h2
-rw-r--r--gcc/config/ia64/ia64.h2
-rw-r--r--gcc/config/mips/mips.h2
-rw-r--r--gcc/config/rs6000/rs6000.h5
-rw-r--r--gcc/config/sparc/sparc.h2
-rw-r--r--gcc/defaults.h4
-rw-r--r--gcc/doc/tm.texi10
-rw-r--r--gcc/testsuite/ChangeLog11
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c32
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11a.c38
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c33
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c39
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c37
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c52
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c34
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c35
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c33
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-82.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-82_64.c5
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-83.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-83_64.c5
-rw-r--r--gcc/tree-complex.c157
-rw-r--r--gcc/tree-optimize.c1
-rw-r--r--gcc/tree-vect-analyze.c9
-rw-r--r--gcc/tree-vect-transform.c36
-rw-r--r--gcc/tree-vectorizer.c17
29 files changed, 563 insertions, 110 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 368698a40b8..5a99ffa1aff 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,39 @@
+2004-04-27 Paolo Bonzini <bonzini@gnu.org>
+
+ * tree-complex.c (expand_vector_operation): New, extracted from
+ expand_vector_operations_1.
+ (tree_vec_extract): Build a NOP_EXPR.
+ (expand_vec_parallel): Do not care about returning the correct type.
+ (expand_vector_operations_1): Call expand_vector_operation.
+ Build the VIEW_CONVERT_EXPR on the left side of MODIFY_EXPRs.
+
+ * tree-complex.c (gate_expand_vector_operations): New.
+ (pass_lower_vector_ssa): Use it.
+ * tree-optimize.c (init_tree_optimization_passes): Include
+ pass_lower_vector_ssa.
+ * tree-vect-transform.c (vect_min_worthwhile_factor): New.
+ (vectorizable_operation): Use it.
+ * tree-vectorizer.c (get_vectype_for_scalar_type): Accept
+ integer modes for the vector type.
+
+ * defaults.h (UNITS_PER_SIMD_WORD): Default to UNITS_PER_WORD.
+ * tree-vect-analyze.c (vect_enhance_data_refs_alignment):
+ Do not cope with UNITS_PER_SIMD_WORD == 0.
+ * tree-vectorizer.c (get_vectype_for_scalar_type): Check
+ if the scalar type is not bigger than UNITS_PER_SIMD_WORD.
+ (vectorize_loops): Do not check that UNITS_PER_SIMD_WORD > 0.
+ * config/i386/i386.h (UNITS_PER_SIMD_WORD): Default to UNITS_PER_WORD.
+ * config/mips/mips.h (UNITS_PER_SIMD_WORD): Likewise.
+ * config/rs6000/rs6000.h (UNITS_PER_SIMD_WORD): Likewise.
+ * config/sparc/sparc.h (UNITS_PER_SIMD_WORD): Likewise.
+
+ * config/alpha/alpha.h (UNITS_PER_SIMD_WORD): Remove.
+ * config/bfin/bfin.h (UNITS_PER_SIMD_WORD): Remove.
+ * config/ia64/ia64.h (UNITS_PER_SIMD_WORD): Remove.
+
+ * doc/tm.texi (UNITS_PER_WORD): Rephrase more accurately.
+ (UNITS_PER_SIMD_WORD): New.
+
2005-04-27 Nathan Sidwell <nathan@codesourcery.com>
* config/ia64/ia64.c (ia64_encode_addr_area): Use gcc_assert and
@@ -29,17 +65,38 @@
(INDEX_REGISTER_P): New.
(BASE_REGISTER_P): New.
(indirectable_constant_address_p): New. Adapted from
+<<<<<<< ChangeLog
+ INDIRECTABLE_CONSTANT_ADDRESS_P in vax.h.
+ Use SYMBOL_REF_LOCAL_P.
+=======
INDIRECTABLE_CONSTANT_ADDRESS_P in vax.h. Use SYMBOL_REF_LOCAL_P.
+>>>>>>> 2.8478
(indirectable_address_p): New. Adapted from
INDIRECTABLE_ADDRESS_P in vax.h.
(nonindexed_address_p): New. Adapted from
+<<<<<<< ChangeLog
+ GO_IF_NONINDEXED_ADDRESS in vax.h.
+ (index_temp_p): New. Adapted from
+ INDEX_TERM_P in vax.h.
+ (reg_plus_index_p): New. Adapted from
+ GO_IF_REG_PLUS_INDEX in vax.h.
+=======
GO_IF_NONINDEXED_ADDRESS in vax.h.
(index_temp_p): New. Adapted from INDEX_TERM_P in vax.h.
(reg_plus_index_p): New. Adapted from GO_IF_REG_PLUS_INDEX in vax.h.
+>>>>>>> 2.8478
(legitimate_address_p): New. Adapted from
+<<<<<<< ChangeLog
+ GO_IF_LEGITIMATE_ADDRESS in vax.h
+=======
GO_IF_LEGITIMATE_ADDRESS in vax.h.
+>>>>>>> 2.8478
(vax_mode_dependent_address_p): New. Adapted from
+<<<<<<< ChangeLog
+ GO_IF_MODE_DEPENDENT_ADDRESS in vax.h
+=======
GO_IF_MODE_DEPENDENT_ADDRESS in vax.h.
+>>>>>>> 2.8478
* config/vax/vax.h (CONSTANT_ADDRESS_P): Use
legitimate_constant_address_p.
(CONSTANT_P): Use legitimate_constant_p.
@@ -48,8 +105,13 @@
(GO_IF_NONINDEXED_ADDRESS): Removed.
(INDEX_TEMP_P): Removed.
(GO_IF_REG_PLUS_INDEX): Removed.
+<<<<<<< ChangeLog
+ (GO_IF_LEGITIMATE_ADDRESS): Use legitimate_address_p.
+ Two definitions, depending on whether REG_OK_STRICT is defined.
+=======
(GO_IF_LEGITIMATE_ADDRESS): Use legitimate_address_p. Two
definitions, depending on whether REG_OK_STRICT is defined.
+>>>>>>> 2.8478
(GO_IF_MODE_DEPENDENT_ADDRESS): Use vax_mode_dependent_address_p.
Two definitions, depending on whether REG_OK_STRICT is defined.
* config/vax/vax-protos.h (legitimate_constant_address_p): Prototype
diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h
index 780cf4af20d..97bd9efde6a 100644
--- a/gcc/config/alpha/alpha.h
+++ b/gcc/config/alpha/alpha.h
@@ -394,9 +394,6 @@ extern int alpha_tls_size;
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
-/* Our SIMD is all done on single integer registers. */
-#define UNITS_PER_SIMD_WORD UNITS_PER_WORD
-
/* Standard register usage. */
/* Number of actual hardware registers.
diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h
index 512525afb81..608188ae882 100644
--- a/gcc/config/bfin/bfin.h
+++ b/gcc/config/bfin/bfin.h
@@ -773,9 +773,6 @@ do { \
/* Width of a word, in units (bytes). */
#define UNITS_PER_WORD 4
-/* Size of a vector for autovectorization. */
-#define UNITS_PER_SIMD_WORD 4
-
/* Width in bits of a pointer.
See also the macro `Pmode1' defined below. */
#define POINTER_SIZE 32
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index fc0596ba948..9b04fadbcca 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -836,7 +836,7 @@ do { \
/* ??? No autovectorization into MMX or 3DNOW until we can reliably
place emms and femms instructions. */
-#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : 0)
+#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD)
#define VALID_FP_MODE_P(MODE) \
((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \
diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
index 362064af9e2..4aca02471da 100644
--- a/gcc/config/ia64/ia64.h
+++ b/gcc/config/ia64/ia64.h
@@ -168,8 +168,6 @@ extern enum processor_type ia64_tune;
#define UNITS_PER_WORD 8
-#define UNITS_PER_SIMD_WORD UNITS_PER_WORD
-
#define POINTER_SIZE (TARGET_ILP32 ? 32 : 64)
/* A C expression whose value is zero if pointers that need to be extended
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index e2118e513bb..f81e7e97e23 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -966,7 +966,7 @@ extern const struct mips_cpu_info *mips_tune_info;
/* The number of bytes in a double. */
#define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
-#define UNITS_PER_SIMD_WORD (TARGET_PAIRED_SINGLE_FLOAT ? 8 : 0)
+#define UNITS_PER_SIMD_WORD (TARGET_PAIRED_SINGLE_FLOAT ? 8 : UNITS_PER_WORD)
/* Set the sizes of the core types. */
#define SHORT_TYPE_SIZE 16
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 137e6dd2c61..c2f78a576ae 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1043,8 +1043,9 @@ extern const char *rs6000_warn_altivec_long_switch;
|| (MODE) == V1DImode \
|| (MODE) == V2SImode)
-#define UNITS_PER_SIMD_WORD \
- (TARGET_ALTIVEC ? 16 : (TARGET_SPE ? 8 : 0) )
+#define UNITS_PER_SIMD_WORD \
+ (TARGET_ALTIVEC ? UNITS_PER_ALTIVEC_WORD \
+ : (TARGET_SPE ? UNITS_PER_SPE_WORD : UNITS_PER_WORD))
/* Value is TRUE if hard register REGNO can hold a value of
machine-mode MODE. */
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index dbad1b91365..fddb2e0e0bb 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -588,7 +588,7 @@ extern struct sparc_cpu_select sparc_select[];
#define MIN_UNITS_PER_WORD 4
#endif
-#define UNITS_PER_SIMD_WORD (TARGET_VIS ? 8 : 0)
+#define UNITS_PER_SIMD_WORD (TARGET_VIS ? 8 : UNITS_PER_WORD)
/* Now define the sizes of the C data types. */
diff --git a/gcc/defaults.h b/gcc/defaults.h
index 28a0d83adb6..e4ef7f36246 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -702,8 +702,10 @@ do { fputs (integer_asm_op (POINTER_SIZE / BITS_PER_UNIT, TRUE), FILE); \
#define HAS_LONG_UNCOND_BRANCH 0
#endif
+/* By default, only attempt to parallelize bitwise operations, and
+ possibly adds/subtracts using bit-twiddling. */
#ifndef UNITS_PER_SIMD_WORD
-#define UNITS_PER_SIMD_WORD 0
+#define UNITS_PER_SIMD_WORD UNITS_PER_WORD
#endif
/* Determine whether __cxa_atexit, rather than atexit, is used to
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 771461618cc..514f59d5bbe 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -1076,7 +1076,8 @@ largest value that @code{BITS_PER_WORD} can have at run-time.
@end defmac
@defmac UNITS_PER_WORD
-Number of storage units in a word; normally 4.
+Number of storage units in a word; normally the size of a general-purpose
+register, a power of two from 1 or 8.
@end defmac
@defmac MIN_UNITS_PER_WORD
@@ -1085,6 +1086,13 @@ Minimum number of units in a word. If this is undefined, the default is
smallest value that @code{UNITS_PER_WORD} can have at run-time.
@end defmac
+@defmac UNITS_PER_SIMD_WORD
+Number of units in the vectors that the vectorizer can produce.
+The default is equal to @code{UNITS_PER_WORD}, because the vectorizer
+can do some transformations even in absence of specialized @acronym{SIMD}
+hardware.
+@end defmac
+
@defmac POINTER_SIZE
Width of a pointer, in bits. You must specify a value no wider than the
width of @code{Pmode}. If it is not equal to the width of @code{Pmode},
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bd88d352ed9..0f69c320f7d 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,14 @@
+2004-04-27 Paolo Bonzini <bonzini@gnu.org>
+
+ * gcc.dg/tree-ssa/gen-vect-11.c, gcc.dg/tree-ssa/gen-vect-11a.c,
+ gcc.dg/tree-ssa/gen-vect-11b.c, gcc.dg/tree-ssa/gen-vect-11c.c,
+ gcc.dg/tree-ssa/gen-vect-2.c, gcc.dg/tree-ssa/gen-vect-25.c,
+ gcc.dg/tree-ssa/gen-vect-26.c, gcc.dg/tree-ssa/gen-vect-28.c,
+ gcc.dg/tree-ssa/gen-vect-32.c: New.
+ * gcc.dg/vect/vect-82.c, gcc.dg/vect/vect-83.c: Fix dg-final.
+ * gcc.dg/vect/vect-82_64.c, gcc.dg/vect/vect-83_64.c: Remove xfail,
+ don't run on PPC32.
+
2005-04-27 Joseph S. Myers <joseph@codesourcery.com>
PR c/21213
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c
new file mode 100644
index 00000000000..bc6c2869d75
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+
+int main ()
+{
+ int i;
+ char ia[N];
+ char ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ char ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+
+ for (i = 0; i < N; i++)
+ {
+ ia[i] = ib[i] + ic[i];
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ia[i] != ib[i] + ic[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11a.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11a.c
new file mode 100644
index 00000000000..75ec7ce8863
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11a.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+
+#if __LONG_MAX__ == 2147483647
+typedef short half_word;
+#else
+typedef int half_word;
+#endif
+
+int main ()
+{
+ int i;
+ half_word ia[N];
+ half_word ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ half_word ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+
+ for (i = 0; i < N; i++)
+ {
+ ia[i] = ib[i] & ic[i];
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ia[i] != ib[i] & ic[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c
new file mode 100644
index 00000000000..20833533468
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+
+int main ()
+{
+ int i;
+ char ia[N];
+ char ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ char ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+
+ /* Not vectorizable, multiplication */
+ for (i = 0; i < N; i++)
+ {
+ ia[i] = ib[i] * ic[i];
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ia[i] != (char) (ib[i] * ic[i]))
+ abort ();
+ }
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c
new file mode 100644
index 00000000000..8632ae42b3a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+
+#if LONG_MAX == 2147483647
+typedef short half_word;
+#else
+typedef int half_word;
+#endif
+
+int main ()
+{
+ int i;
+ half_word ia[N];
+ half_word ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ half_word ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+
+ /* Not worthwhile, only 2 parts per int */
+ for (i = 0; i < N; i++)
+ {
+ ia[i] = ib[i] + ic[i];
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ia[i] != ib[i] + ic[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c
new file mode 100644
index 00000000000..be89c268258
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+
+#if __LONG_MAX__ == 2147483647
+typedef short half_word;
+#else
+typedef int half_word;
+#endif
+
+int main ()
+{
+ half_word cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+ half_word ca[N];
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ ca[i] = cb[i];
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ca[i] != cb[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c
new file mode 100644
index 00000000000..1e0c2c11cf8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 128
+
+#if __LONG_MAX__ == 2147483647
+typedef short half_word;
+#else
+typedef int half_word;
+#endif
+
+int main (int n, int *p)
+{
+ int i;
+ half_word ib[N];
+ half_word ia[N];
+ int k;
+
+ for (i = 0; i < N; i++)
+ {
+ ia[i] = n;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ia[i] != n)
+ abort ();
+ }
+
+ k = *p;
+ for (i = 0; i < N; i++)
+ {
+ ib[i] = k;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ib[i] != k)
+ abort ();
+ }
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c
new file mode 100644
index 00000000000..b90413aa4bf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 128
+
+/* unaligned store. */
+
+int main ()
+{
+ int i;
+ char ia[N+1];
+
+ for (i = 1; i <= N; i++)
+ {
+ ia[i] = 5;
+ }
+
+ /* check results: */
+ for (i = 1; i <= N; i++)
+ {
+ if (ia[i] != 5)
+ abort ();
+ }
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c
new file mode 100644
index 00000000000..0d017529357
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 128
+#define OFF 3
+
+/* unaligned store. */
+
+int main (int off)
+{
+ int i;
+ char ia[N+OFF];
+
+ for (i = 0; i < N; i++)
+ {
+ ia[i+off] = 5;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ia[i+off] != 5)
+ abort ();
+ }
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c
new file mode 100644
index 00000000000..681c7071685
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+
+int main ()
+{
+ struct {
+ char ca[N];
+ } s;
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ s.ca[i] = 5;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (s.ca[i] != 5)
+ abort ();
+ }
+
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-82.c b/gcc/testsuite/gcc.dg/vect/vect-82.c
index ac682c7997b..aecd61970d5 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-82.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-82.c
@@ -32,5 +32,5 @@ int main (void)
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-82_64.c b/gcc/testsuite/gcc.dg/vect/vect-82_64.c
index d6e97523695..a21ed1281fe 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-82_64.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-82_64.c
@@ -1,4 +1,5 @@
-/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-do run { target { powerpc*-*-* && lp64 } } } */
+/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */
/* { dg-options "-O2 -ftree-vectorize -mpowerpc64 -fdump-tree-vect-stats -maltivec" } */
#include <stdarg.h>
@@ -33,5 +34,5 @@ int main (void)
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-83.c b/gcc/testsuite/gcc.dg/vect/vect-83.c
index 7a23f5b4908..5c21cbbe341 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-83.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-83.c
@@ -32,5 +32,5 @@ int main (void)
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-83_64.c b/gcc/testsuite/gcc.dg/vect/vect-83_64.c
index ea99a9e21be..b5f6f6f08a5 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-83_64.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-83_64.c
@@ -1,4 +1,5 @@
-/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-do run { target { powerpc*-*-* && lp64 } } } */
+/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */
/* { dg-options "-O2 -ftree-vectorize -mpowerpc64 -fdump-tree-vect-stats -maltivec" } */
#include <stdarg.h>
@@ -33,5 +34,5 @@ int main (void)
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/tree-complex.c b/gcc/tree-complex.c
index 18582c1b20b..98b6c561503 100644
--- a/gcc/tree-complex.c
+++ b/gcc/tree-complex.c
@@ -632,8 +632,13 @@ tree_vec_extract (block_stmt_iterator *bsi, tree type,
{
if (bitpos)
return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
- else
+
+ /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
+ anyway be stored in memory, so prefer NOP_EXPR. */
+ else if (TYPE_MODE (type) == BLKmode)
return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
+ else
+ return gimplify_build1 (bsi, NOP_EXPR, type, t);
}
static tree
@@ -783,7 +788,7 @@ expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
}
- return build1 (VIEW_CONVERT_EXPR, type, result);
+ return result;
}
/* Expand a vector operation to scalars; for integer types we can use
@@ -810,6 +815,60 @@ expand_vector_addition (block_stmt_iterator *bsi,
a, b, code);
}
+static tree
+expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type,
+ tree rhs, enum tree_code code)
+{
+ enum machine_mode compute_mode = TYPE_MODE (compute_type);
+
+ /* If the compute mode is not a vector mode (hence we are not decomposing
+ a BLKmode vector to smaller, hardware-supported vectors), we may want
+ to expand the operations in parallel. */
+ if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
+ && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
+ switch (code)
+ {
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ if (!TYPE_TRAP_SIGNED (type))
+ return expand_vector_addition (bsi, do_binop, do_plus_minus, type,
+ TREE_OPERAND (rhs, 0),
+ TREE_OPERAND (rhs, 1), code);
+ break;
+
+ case NEGATE_EXPR:
+ if (!TYPE_TRAP_SIGNED (type))
+ return expand_vector_addition (bsi, do_unop, do_negate, type,
+ TREE_OPERAND (rhs, 0),
+ NULL_TREE, code);
+ break;
+
+ case BIT_AND_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ return expand_vector_parallel (bsi, do_binop, type,
+ TREE_OPERAND (rhs, 0),
+ TREE_OPERAND (rhs, 1), code);
+
+ case BIT_NOT_EXPR:
+ return expand_vector_parallel (bsi, do_unop, type,
+ TREE_OPERAND (rhs, 0),
+ NULL_TREE, code);
+
+ default:
+ break;
+ }
+
+ if (TREE_CODE_CLASS (code) == tcc_unary)
+ return expand_vector_piecewise (bsi, do_unop, type, compute_type,
+ TREE_OPERAND (rhs, 0),
+ NULL_TREE, code);
+ else
+ return expand_vector_piecewise (bsi, do_binop, type, compute_type,
+ TREE_OPERAND (rhs, 0),
+ TREE_OPERAND (rhs, 1), code);
+}
+
/* Return a type for the widest vector mode whose components are of mode
INNER_MODE, or NULL_TREE if none is found. */
static tree
@@ -841,7 +900,7 @@ static void
expand_vector_operations_1 (block_stmt_iterator *bsi)
{
tree stmt = bsi_stmt (*bsi);
- tree *p_rhs, rhs, type, compute_type;
+ tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type;
enum tree_code code;
enum machine_mode compute_mode;
optab op;
@@ -856,7 +915,9 @@ expand_vector_operations_1 (block_stmt_iterator *bsi)
/* FALLTHRU */
case MODIFY_EXPR:
+ p_lhs = &TREE_OPERAND (stmt, 0);
p_rhs = &TREE_OPERAND (stmt, 1);
+ lhs = *p_lhs;
rhs = *p_rhs;
break;
@@ -897,86 +958,48 @@ expand_vector_operations_1 (block_stmt_iterator *bsi)
compute_type = vector_compute_type;
}
- compute_mode = TYPE_MODE (compute_type);
-
/* If we are breaking a BLKmode vector into smaller pieces,
type_for_widest_vector_mode has already looked into the optab,
so skip these checks. */
if (compute_type == type)
{
+ compute_mode = TYPE_MODE (compute_type);
if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
|| GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
&& op != NULL
&& op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
return;
else
- {
- /* There is no operation in hardware, so fall back to scalars. */
- compute_type = TREE_TYPE (type);
- compute_mode = TYPE_MODE (compute_type);
- }
+ /* There is no operation in hardware, so fall back to scalars. */
+ compute_type = TREE_TYPE (type);
}
- /* If the compute mode is not a vector mode (hence we are decomposing
- a BLKmode vector to smaller, hardware-supported vectors), we may
- want to expand the operations in parallel. */
- if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
- && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
- switch (code)
- {
- case PLUS_EXPR:
- case MINUS_EXPR:
- if (TYPE_TRAP_SIGNED (type))
- break;
-
- *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
- TREE_OPERAND (rhs, 0),
- TREE_OPERAND (rhs, 1), code);
- mark_stmt_modified (bsi_stmt (*bsi));
- return;
-
- case NEGATE_EXPR:
- if (TYPE_TRAP_SIGNED (type))
- break;
-
- *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
- TREE_OPERAND (rhs, 0),
- NULL_TREE, code);
- mark_stmt_modified (bsi_stmt (*bsi));
- return;
-
- case BIT_AND_EXPR:
- case BIT_IOR_EXPR:
- case BIT_XOR_EXPR:
- *p_rhs = expand_vector_parallel (bsi, do_binop, type,
- TREE_OPERAND (rhs, 0),
- TREE_OPERAND (rhs, 1), code);
- mark_stmt_modified (bsi_stmt (*bsi));
- return;
-
- case BIT_NOT_EXPR:
- *p_rhs = expand_vector_parallel (bsi, do_unop, type,
- TREE_OPERAND (rhs, 0),
- NULL_TREE, code);
- mark_stmt_modified (bsi_stmt (*bsi));
- return;
-
- default:
- break;
- }
-
- if (TREE_CODE_CLASS (code) == tcc_unary)
- *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
- TREE_OPERAND (rhs, 0),
- NULL_TREE, code);
+ rhs = expand_vector_operation (bsi, type, compute_type, rhs, code);
+ if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
+ *p_rhs = rhs;
else
- *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
- TREE_OPERAND (rhs, 0),
- TREE_OPERAND (rhs, 1), code);
+ {
+ /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
+ be stored in memory anyway, so prefer NOP_EXPR. Also, perform the
+ VIEW_CONVERT_EXPR on the left side of the assignment. */
+ if (TYPE_MODE (TREE_TYPE (rhs)) == BLKmode)
+ *p_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (rhs), lhs);
+ else
+ *p_rhs = gimplify_build1 (bsi, NOP_EXPR, TREE_TYPE (lhs), rhs);
+ }
mark_stmt_modified (bsi_stmt (*bsi));
}
+/* Use this to lower vector operations introduced by the vectorizer,
+ if it may need the bit-twiddling tricks implemented in this file. */
+
+static bool
+gate_expand_vector_operations (void)
+{
+ return flag_tree_vectorize != 0;
+}
+
static void
expand_vector_operations (void)
{
@@ -1015,8 +1038,8 @@ tree_lower_operations (void)
struct tree_opt_pass pass_lower_vector_ssa =
{
- "vector", /* name */
- NULL, /* gate */
+ "veclower", /* name */
+ gate_expand_vector_operations, /* gate */
expand_vector_operations, /* execute */
NULL, /* sub */
NULL, /* next */
diff --git a/gcc/tree-optimize.c b/gcc/tree-optimize.c
index 40020ac30ab..e77c29c573a 100644
--- a/gcc/tree-optimize.c
+++ b/gcc/tree-optimize.c
@@ -437,6 +437,7 @@ init_tree_optimization_passes (void)
NEXT_PASS (pass_iv_canon);
NEXT_PASS (pass_if_conversion);
NEXT_PASS (pass_vectorize);
+ NEXT_PASS (pass_lower_vector_ssa);
NEXT_PASS (pass_complete_unroll);
NEXT_PASS (pass_iv_optimize);
NEXT_PASS (pass_loop_done);
diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c
index 71f7663357e..1416bc435e1 100644
--- a/gcc/tree-vect-analyze.c
+++ b/gcc/tree-vect-analyze.c
@@ -1076,13 +1076,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
struct data_reference *dr0 = NULL;
unsigned int i, j;
- /* Sigh, a hack to make targets that do not define UNITS_PER_SIMD_WORD
- bootstrap. Copy UNITS_PER_SIMD_WORD to a local variable to avoid a
- "division by zero" error. This error would be issued because we
- we do "... % UNITS_PER_SIMD_WORD" below, and UNITS_PER_SIMD_WORD
- defaults to 0 if it is not defined by the target. */
- int units_per_simd_word = UNITS_PER_SIMD_WORD;
-
/*
This pass will require a cost model to guide it whether to apply peeling
or versioning or a combination of the two. For example, the scheme that
@@ -1237,7 +1230,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
DR_MISALIGNMENT (dr) += npeel * drsize;
- DR_MISALIGNMENT (dr) %= units_per_simd_word;
+ DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
}
else
DR_MISALIGNMENT (dr) = -1;
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index e51d56f7491..68222e95ce5 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -720,6 +720,32 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
}
+/* Function vect_min_worthwhile_factor.
+
+ For a loop where we could vectorize the operation indicated by CODE,
+ return the minimum vectorization factor that makes it worthwhile
+ to use generic vectors. */
+static int
+vect_min_worthwhile_factor (enum tree_code code)
+{
+ switch (code)
+ {
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ case NEGATE_EXPR:
+ return 4;
+
+ case BIT_AND_EXPR:
+ case BIT_IOR_EXPR:
+ case BIT_XOR_EXPR:
+ case BIT_NOT_EXPR:
+ return 2;
+
+ default:
+ return INT_MAX;
+ }
+}
+
/* Function vectorizable_operation.
Check if STMT performs a binary or unary operation that can be vectorized.
@@ -792,6 +818,16 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
return false;
}
+ /* Worthwhile without SIMD support? */
+ if (!VECTOR_MODE_P (TYPE_MODE (vectype))
+ && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ < vect_min_worthwhile_factor (code))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+ fprintf (vect_dump, "not worthwhile without SIMD support.");
+ return false;
+ }
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index ed95eba2d80..2285bb1cad5 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -1525,7 +1525,7 @@ get_vectype_for_scalar_type (tree scalar_type)
int nunits;
tree vectype;
- if (nbytes == 0)
+ if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
return NULL_TREE;
/* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
@@ -1548,11 +1548,9 @@ get_vectype_for_scalar_type (tree scalar_type)
print_generic_expr (vect_dump, vectype, TDF_SLIM);
}
- if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
+ if (!VECTOR_MODE_P (TYPE_MODE (vectype))
+ && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
{
- /* TODO: tree-complex.c sometimes can parallelize operations
- on generic vectors. We can vectorize the loop in that case,
- but then we should re-run the lowering pass. */
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "mode not supported by target.");
return NULL_TREE;
@@ -1733,15 +1731,6 @@ vectorize_loops (struct loops *loops)
/* Fix the verbosity level if not defined explicitly by the user. */
vect_set_dump_settings ();
- /* Does the target support SIMD? */
- /* FORNOW: until more sophisticated machine modelling is in place. */
- if (!UNITS_PER_SIMD_WORD)
- {
- if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
- fprintf (vect_dump, "vectorizer: target vector size is not defined.");
- return;
- }
-
/* ----------- Analyze loops. ----------- */
/* If some loop was duplicated, it gets bigger number