diff options
29 files changed, 563 insertions, 110 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 368698a40b8..5a99ffa1aff 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,39 @@ +2004-04-27 Paolo Bonzini <bonzini@gnu.org> + + * tree-complex.c (expand_vector_operation): New, extracted from + expand_vector_operations_1. + (tree_vec_extract): Build a NOP_EXPR. + (expand_vec_parallel): Do not care about returning the correct type. + (expand_vector_operations_1): Call expand_vector_operation. + Build the VIEW_CONVERT_EXPR on the left side of MODIFY_EXPRs. + + * tree-complex.c (gate_expand_vector_operations): New. + (pass_lower_vector_ssa): Use it. + * tree-optimize.c (init_tree_optimization_passes): Include + pass_lower_vector_ssa. + * tree-vect-transform.c (vect_min_worthwhile_factor): New. + (vectorizable_operation): Use it. + * tree-vectorizer.c (get_vectype_for_scalar_type): Accept + integer modes for the vector type. + + * defaults.h (UNITS_PER_SIMD_WORD): Default to UNITS_PER_WORD. + * tree-vect-analyze.c (vect_enhance_data_refs_alignment): + Do not cope with UNITS_PER_SIMD_WORD == 0. + * tree-vectorizer.c (get_vectype_for_scalar_type): Check + if the scalar type is not bigger than UNITS_PER_SIMD_WORD. + (vectorize_loops): Do not check that UNITS_PER_SIMD_WORD > 0. + * config/i386/i386.h (UNITS_PER_SIMD_WORD): Default to UNITS_PER_WORD. + * config/mips/mips.h (UNITS_PER_SIMD_WORD): Likewise. + * config/rs6000/rs6000.h (UNITS_PER_SIMD_WORD): Likewise. + * config/sparc/sparc.h (UNITS_PER_SIMD_WORD): Likewise. + + * config/alpha/alpha.h (UNITS_PER_SIMD_WORD): Remove. + * config/bfin/bfin.h (UNITS_PER_SIMD_WORD): Remove. + * config/ia64/ia64.h (UNITS_PER_SIMD_WORD): Remove. + + * doc/tm.texi (UNITS_PER_WORD): Rephrase more accurately. + (UNITS_PER_SIMD_WORD): New. + 2005-04-27 Nathan Sidwell <nathan@codesourcery.com> * config/ia64/ia64.c (ia64_encode_addr_area): Use gcc_assert and @@ -29,17 +65,38 @@ (INDEX_REGISTER_P): New. (BASE_REGISTER_P): New. (indirectable_constant_address_p): New. Adapted from +<<<<<<< ChangeLog + INDIRECTABLE_CONSTANT_ADDRESS_P in vax.h. + Use SYMBOL_REF_LOCAL_P. +======= INDIRECTABLE_CONSTANT_ADDRESS_P in vax.h. Use SYMBOL_REF_LOCAL_P. +>>>>>>> 2.8478 (indirectable_address_p): New. Adapted from INDIRECTABLE_ADDRESS_P in vax.h. (nonindexed_address_p): New. Adapted from +<<<<<<< ChangeLog + GO_IF_NONINDEXED_ADDRESS in vax.h. + (index_temp_p): New. Adapted from + INDEX_TERM_P in vax.h. + (reg_plus_index_p): New. Adapted from + GO_IF_REG_PLUS_INDEX in vax.h. +======= GO_IF_NONINDEXED_ADDRESS in vax.h. (index_temp_p): New. Adapted from INDEX_TERM_P in vax.h. (reg_plus_index_p): New. Adapted from GO_IF_REG_PLUS_INDEX in vax.h. +>>>>>>> 2.8478 (legitimate_address_p): New. Adapted from +<<<<<<< ChangeLog + GO_IF_LEGITIMATE_ADDRESS in vax.h +======= GO_IF_LEGITIMATE_ADDRESS in vax.h. +>>>>>>> 2.8478 (vax_mode_dependent_address_p): New. Adapted from +<<<<<<< ChangeLog + GO_IF_MODE_DEPENDENT_ADDRESS in vax.h +======= GO_IF_MODE_DEPENDENT_ADDRESS in vax.h. +>>>>>>> 2.8478 * config/vax/vax.h (CONSTANT_ADDRESS_P): Use legitimate_constant_address_p. (CONSTANT_P): Use legitimate_constant_p. @@ -48,8 +105,13 @@ (GO_IF_NONINDEXED_ADDRESS): Removed. (INDEX_TEMP_P): Removed. (GO_IF_REG_PLUS_INDEX): Removed. +<<<<<<< ChangeLog + (GO_IF_LEGITIMATE_ADDRESS): Use legitimate_address_p. + Two definitions, depending on whether REG_OK_STRICT is defined. +======= (GO_IF_LEGITIMATE_ADDRESS): Use legitimate_address_p. Two definitions, depending on whether REG_OK_STRICT is defined. +>>>>>>> 2.8478 (GO_IF_MODE_DEPENDENT_ADDRESS): Use vax_mode_dependent_address_p. Two definitions, depending on whether REG_OK_STRICT is defined. * config/vax/vax-protos.h (legitimate_constant_address_p): Prototype diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h index 780cf4af20d..97bd9efde6a 100644 --- a/gcc/config/alpha/alpha.h +++ b/gcc/config/alpha/alpha.h @@ -394,9 +394,6 @@ extern int alpha_tls_size; #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1 -/* Our SIMD is all done on single integer registers. */ -#define UNITS_PER_SIMD_WORD UNITS_PER_WORD - /* Standard register usage. */ /* Number of actual hardware registers. diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h index 512525afb81..608188ae882 100644 --- a/gcc/config/bfin/bfin.h +++ b/gcc/config/bfin/bfin.h @@ -773,9 +773,6 @@ do { \ /* Width of a word, in units (bytes). */ #define UNITS_PER_WORD 4 -/* Size of a vector for autovectorization. */ -#define UNITS_PER_SIMD_WORD 4 - /* Width in bits of a pointer. See also the macro `Pmode1' defined below. */ #define POINTER_SIZE 32 diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index fc0596ba948..9b04fadbcca 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -836,7 +836,7 @@ do { \ /* ??? No autovectorization into MMX or 3DNOW until we can reliably place emms and femms instructions. */ -#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : 0) +#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD) #define VALID_FP_MODE_P(MODE) \ ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \ diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h index 362064af9e2..4aca02471da 100644 --- a/gcc/config/ia64/ia64.h +++ b/gcc/config/ia64/ia64.h @@ -168,8 +168,6 @@ extern enum processor_type ia64_tune; #define UNITS_PER_WORD 8 -#define UNITS_PER_SIMD_WORD UNITS_PER_WORD - #define POINTER_SIZE (TARGET_ILP32 ? 32 : 64) /* A C expression whose value is zero if pointers that need to be extended diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index e2118e513bb..f81e7e97e23 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -966,7 +966,7 @@ extern const struct mips_cpu_info *mips_tune_info; /* The number of bytes in a double. */ #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT) -#define UNITS_PER_SIMD_WORD (TARGET_PAIRED_SINGLE_FLOAT ? 8 : 0) +#define UNITS_PER_SIMD_WORD (TARGET_PAIRED_SINGLE_FLOAT ? 8 : UNITS_PER_WORD) /* Set the sizes of the core types. */ #define SHORT_TYPE_SIZE 16 diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 137e6dd2c61..c2f78a576ae 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1043,8 +1043,9 @@ extern const char *rs6000_warn_altivec_long_switch; || (MODE) == V1DImode \ || (MODE) == V2SImode) -#define UNITS_PER_SIMD_WORD \ - (TARGET_ALTIVEC ? 16 : (TARGET_SPE ? 8 : 0) ) +#define UNITS_PER_SIMD_WORD \ + (TARGET_ALTIVEC ? UNITS_PER_ALTIVEC_WORD \ + : (TARGET_SPE ? UNITS_PER_SPE_WORD : UNITS_PER_WORD)) /* Value is TRUE if hard register REGNO can hold a value of machine-mode MODE. */ diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index dbad1b91365..fddb2e0e0bb 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -588,7 +588,7 @@ extern struct sparc_cpu_select sparc_select[]; #define MIN_UNITS_PER_WORD 4 #endif -#define UNITS_PER_SIMD_WORD (TARGET_VIS ? 8 : 0) +#define UNITS_PER_SIMD_WORD (TARGET_VIS ? 8 : UNITS_PER_WORD) /* Now define the sizes of the C data types. */ diff --git a/gcc/defaults.h b/gcc/defaults.h index 28a0d83adb6..e4ef7f36246 100644 --- a/gcc/defaults.h +++ b/gcc/defaults.h @@ -702,8 +702,10 @@ do { fputs (integer_asm_op (POINTER_SIZE / BITS_PER_UNIT, TRUE), FILE); \ #define HAS_LONG_UNCOND_BRANCH 0 #endif +/* By default, only attempt to parallelize bitwise operations, and + possibly adds/subtracts using bit-twiddling. */ #ifndef UNITS_PER_SIMD_WORD -#define UNITS_PER_SIMD_WORD 0 +#define UNITS_PER_SIMD_WORD UNITS_PER_WORD #endif /* Determine whether __cxa_atexit, rather than atexit, is used to diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 771461618cc..514f59d5bbe 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -1076,7 +1076,8 @@ largest value that @code{BITS_PER_WORD} can have at run-time. @end defmac @defmac UNITS_PER_WORD -Number of storage units in a word; normally 4. +Number of storage units in a word; normally the size of a general-purpose +register, a power of two from 1 or 8. @end defmac @defmac MIN_UNITS_PER_WORD @@ -1085,6 +1086,13 @@ Minimum number of units in a word. If this is undefined, the default is smallest value that @code{UNITS_PER_WORD} can have at run-time. @end defmac +@defmac UNITS_PER_SIMD_WORD +Number of units in the vectors that the vectorizer can produce. +The default is equal to @code{UNITS_PER_WORD}, because the vectorizer +can do some transformations even in absence of specialized @acronym{SIMD} +hardware. +@end defmac + @defmac POINTER_SIZE Width of a pointer, in bits. You must specify a value no wider than the width of @code{Pmode}. If it is not equal to the width of @code{Pmode}, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bd88d352ed9..0f69c320f7d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2004-04-27 Paolo Bonzini <bonzini@gnu.org> + + * gcc.dg/tree-ssa/gen-vect-11.c, gcc.dg/tree-ssa/gen-vect-11a.c, + gcc.dg/tree-ssa/gen-vect-11b.c, gcc.dg/tree-ssa/gen-vect-11c.c, + gcc.dg/tree-ssa/gen-vect-2.c, gcc.dg/tree-ssa/gen-vect-25.c, + gcc.dg/tree-ssa/gen-vect-26.c, gcc.dg/tree-ssa/gen-vect-28.c, + gcc.dg/tree-ssa/gen-vect-32.c: New. + * gcc.dg/vect/vect-82.c, gcc.dg/vect/vect-83.c: Fix dg-final. + * gcc.dg/vect/vect-82_64.c, gcc.dg/vect/vect-83_64.c: Remove xfail, + don't run on PPC32. + 2005-04-27 Joseph S. Myers <joseph@codesourcery.com> PR c/21213 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c new file mode 100644 index 00000000000..bc6c2869d75 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */ + +#include <stdlib.h> + +#define N 16 + +int main () +{ + int i; + char ia[N]; + char ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; + char ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; + + for (i = 0; i < N; i++) + { + ia[i] = ib[i] + ic[i]; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ia[i] != ib[i] + ic[i]) + abort (); + } + + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11a.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11a.c new file mode 100644 index 00000000000..75ec7ce8863 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11a.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */ + +#include <stdlib.h> + +#define N 16 + +#if __LONG_MAX__ == 2147483647 +typedef short half_word; +#else +typedef int half_word; +#endif + +int main () +{ + int i; + half_word ia[N]; + half_word ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; + half_word ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; + + for (i = 0; i < N; i++) + { + ia[i] = ib[i] & ic[i]; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ia[i] != ib[i] & ic[i]) + abort (); + } + + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c new file mode 100644 index 00000000000..20833533468 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */ + +#include <stdlib.h> + +#define N 16 + +int main () +{ + int i; + char ia[N]; + char ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; + char ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; + + /* Not vectorizable, multiplication */ + for (i = 0; i < N; i++) + { + ia[i] = ib[i] * ic[i]; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ia[i] != (char) (ib[i] * ic[i])) + abort (); + } + + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c new file mode 100644 index 00000000000..8632ae42b3a --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */ + +#include <stdlib.h> + +#define N 16 + +#if LONG_MAX == 2147483647 +typedef short half_word; +#else +typedef int half_word; +#endif + +int main () +{ + int i; + half_word ia[N]; + half_word ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; + half_word ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; + + /* Not worthwhile, only 2 parts per int */ + for (i = 0; i < N; i++) + { + ia[i] = ib[i] + ic[i]; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ia[i] != ib[i] + ic[i]) + abort (); + } + + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c new file mode 100644 index 00000000000..be89c268258 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */ + +#include <stdlib.h> + +#define N 16 + +#if __LONG_MAX__ == 2147483647 +typedef short half_word; +#else +typedef int half_word; +#endif + +int main () +{ + half_word cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; + half_word ca[N]; + int i; + + for (i = 0; i < N; i++) + { + ca[i] = cb[i]; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ca[i] != cb[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c new file mode 100644 index 00000000000..1e0c2c11cf8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */ + +#include <stdlib.h> + +#define N 128 + +#if __LONG_MAX__ == 2147483647 +typedef short half_word; +#else +typedef int half_word; +#endif + +int main (int n, int *p) +{ + int i; + half_word ib[N]; + half_word ia[N]; + int k; + + for (i = 0; i < N; i++) + { + ia[i] = n; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ia[i] != n) + abort (); + } + + k = *p; + for (i = 0; i < N; i++) + { + ib[i] = k; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ib[i] != k) + abort (); + } + + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c new file mode 100644 index 00000000000..b90413aa4bf --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c @@ -0,0 +1,34 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */ + +#include <stdlib.h> + +#define N 128 + +/* unaligned store. */ + +int main () +{ + int i; + char ia[N+1]; + + for (i = 1; i <= N; i++) + { + ia[i] = 5; + } + + /* check results: */ + for (i = 1; i <= N; i++) + { + if (ia[i] != 5) + abort (); + } + + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c new file mode 100644 index 00000000000..0d017529357 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c @@ -0,0 +1,35 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */ + +#include <stdlib.h> + +#define N 128 +#define OFF 3 + +/* unaligned store. */ + +int main (int off) +{ + int i; + char ia[N+OFF]; + + for (i = 0; i < N; i++) + { + ia[i+off] = 5; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (ia[i+off] != 5) + abort (); + } + + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c new file mode 100644 index 00000000000..681c7071685 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */ + +#include <stdlib.h> + +#define N 16 + +int main () +{ + struct { + char ca[N]; + } s; + int i; + + for (i = 0; i < N; i++) + { + s.ca[i] = 5; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (s.ca[i] != 5) + abort (); + } + + return 0; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-82.c b/gcc/testsuite/gcc.dg/vect/vect-82.c index ac682c7997b..aecd61970d5 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-82.c +++ b/gcc/testsuite/gcc.dg/vect/vect-82.c @@ -32,5 +32,5 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-82_64.c b/gcc/testsuite/gcc.dg/vect/vect-82_64.c index d6e97523695..a21ed1281fe 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-82_64.c +++ b/gcc/testsuite/gcc.dg/vect/vect-82_64.c @@ -1,4 +1,5 @@ -/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-do run { target { powerpc*-*-* && lp64 } } } */ +/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */ /* { dg-options "-O2 -ftree-vectorize -mpowerpc64 -fdump-tree-vect-stats -maltivec" } */ #include <stdarg.h> @@ -33,5 +34,5 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-83.c b/gcc/testsuite/gcc.dg/vect/vect-83.c index 7a23f5b4908..5c21cbbe341 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-83.c +++ b/gcc/testsuite/gcc.dg/vect/vect-83.c @@ -32,5 +32,5 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-83_64.c b/gcc/testsuite/gcc.dg/vect/vect-83_64.c index ea99a9e21be..b5f6f6f08a5 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-83_64.c +++ b/gcc/testsuite/gcc.dg/vect/vect-83_64.c @@ -1,4 +1,5 @@ -/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-do run { target { powerpc*-*-* && lp64 } } } */ +/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */ /* { dg-options "-O2 -ftree-vectorize -mpowerpc64 -fdump-tree-vect-stats -maltivec" } */ #include <stdarg.h> @@ -33,5 +34,5 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/tree-complex.c b/gcc/tree-complex.c index 18582c1b20b..98b6c561503 100644 --- a/gcc/tree-complex.c +++ b/gcc/tree-complex.c @@ -632,8 +632,13 @@ tree_vec_extract (block_stmt_iterator *bsi, tree type, { if (bitpos) return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos); - else + + /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will + anyway be stored in memory, so prefer NOP_EXPR. */ + else if (TYPE_MODE (type) == BLKmode) return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t); + else + return gimplify_build1 (bsi, NOP_EXPR, type, t); } static tree @@ -783,7 +788,7 @@ expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type, result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); } - return build1 (VIEW_CONVERT_EXPR, type, result); + return result; } /* Expand a vector operation to scalars; for integer types we can use @@ -810,6 +815,60 @@ expand_vector_addition (block_stmt_iterator *bsi, a, b, code); } +static tree +expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type, + tree rhs, enum tree_code code) +{ + enum machine_mode compute_mode = TYPE_MODE (compute_type); + + /* If the compute mode is not a vector mode (hence we are not decomposing + a BLKmode vector to smaller, hardware-supported vectors), we may want + to expand the operations in parallel. */ + if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT + && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT) + switch (code) + { + case PLUS_EXPR: + case MINUS_EXPR: + if (!TYPE_TRAP_SIGNED (type)) + return expand_vector_addition (bsi, do_binop, do_plus_minus, type, + TREE_OPERAND (rhs, 0), + TREE_OPERAND (rhs, 1), code); + break; + + case NEGATE_EXPR: + if (!TYPE_TRAP_SIGNED (type)) + return expand_vector_addition (bsi, do_unop, do_negate, type, + TREE_OPERAND (rhs, 0), + NULL_TREE, code); + break; + + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + return expand_vector_parallel (bsi, do_binop, type, + TREE_OPERAND (rhs, 0), + TREE_OPERAND (rhs, 1), code); + + case BIT_NOT_EXPR: + return expand_vector_parallel (bsi, do_unop, type, + TREE_OPERAND (rhs, 0), + NULL_TREE, code); + + default: + break; + } + + if (TREE_CODE_CLASS (code) == tcc_unary) + return expand_vector_piecewise (bsi, do_unop, type, compute_type, + TREE_OPERAND (rhs, 0), + NULL_TREE, code); + else + return expand_vector_piecewise (bsi, do_binop, type, compute_type, + TREE_OPERAND (rhs, 0), + TREE_OPERAND (rhs, 1), code); +} + /* Return a type for the widest vector mode whose components are of mode INNER_MODE, or NULL_TREE if none is found. */ static tree @@ -841,7 +900,7 @@ static void expand_vector_operations_1 (block_stmt_iterator *bsi) { tree stmt = bsi_stmt (*bsi); - tree *p_rhs, rhs, type, compute_type; + tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type; enum tree_code code; enum machine_mode compute_mode; optab op; @@ -856,7 +915,9 @@ expand_vector_operations_1 (block_stmt_iterator *bsi) /* FALLTHRU */ case MODIFY_EXPR: + p_lhs = &TREE_OPERAND (stmt, 0); p_rhs = &TREE_OPERAND (stmt, 1); + lhs = *p_lhs; rhs = *p_rhs; break; @@ -897,86 +958,48 @@ expand_vector_operations_1 (block_stmt_iterator *bsi) compute_type = vector_compute_type; } - compute_mode = TYPE_MODE (compute_type); - /* If we are breaking a BLKmode vector into smaller pieces, type_for_widest_vector_mode has already looked into the optab, so skip these checks. */ if (compute_type == type) { + compute_mode = TYPE_MODE (compute_type); if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT) && op != NULL && op->handlers[compute_mode].insn_code != CODE_FOR_nothing) return; else - { - /* There is no operation in hardware, so fall back to scalars. */ - compute_type = TREE_TYPE (type); - compute_mode = TYPE_MODE (compute_type); - } + /* There is no operation in hardware, so fall back to scalars. */ + compute_type = TREE_TYPE (type); } - /* If the compute mode is not a vector mode (hence we are decomposing - a BLKmode vector to smaller, hardware-supported vectors), we may - want to expand the operations in parallel. */ - if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT - && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT) - switch (code) - { - case PLUS_EXPR: - case MINUS_EXPR: - if (TYPE_TRAP_SIGNED (type)) - break; - - *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type, - TREE_OPERAND (rhs, 0), - TREE_OPERAND (rhs, 1), code); - mark_stmt_modified (bsi_stmt (*bsi)); - return; - - case NEGATE_EXPR: - if (TYPE_TRAP_SIGNED (type)) - break; - - *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type, - TREE_OPERAND (rhs, 0), - NULL_TREE, code); - mark_stmt_modified (bsi_stmt (*bsi)); - return; - - case BIT_AND_EXPR: - case BIT_IOR_EXPR: - case BIT_XOR_EXPR: - *p_rhs = expand_vector_parallel (bsi, do_binop, type, - TREE_OPERAND (rhs, 0), - TREE_OPERAND (rhs, 1), code); - mark_stmt_modified (bsi_stmt (*bsi)); - return; - - case BIT_NOT_EXPR: - *p_rhs = expand_vector_parallel (bsi, do_unop, type, - TREE_OPERAND (rhs, 0), - NULL_TREE, code); - mark_stmt_modified (bsi_stmt (*bsi)); - return; - - default: - break; - } - - if (TREE_CODE_CLASS (code) == tcc_unary) - *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type, - TREE_OPERAND (rhs, 0), - NULL_TREE, code); + rhs = expand_vector_operation (bsi, type, compute_type, rhs, code); + if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) + *p_rhs = rhs; else - *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type, - TREE_OPERAND (rhs, 0), - TREE_OPERAND (rhs, 1), code); + { + /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will + be stored in memory anyway, so prefer NOP_EXPR. Also, perform the + VIEW_CONVERT_EXPR on the left side of the assignment. */ + if (TYPE_MODE (TREE_TYPE (rhs)) == BLKmode) + *p_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (rhs), lhs); + else + *p_rhs = gimplify_build1 (bsi, NOP_EXPR, TREE_TYPE (lhs), rhs); + } mark_stmt_modified (bsi_stmt (*bsi)); } +/* Use this to lower vector operations introduced by the vectorizer, + if it may need the bit-twiddling tricks implemented in this file. */ + +static bool +gate_expand_vector_operations (void) +{ + return flag_tree_vectorize != 0; +} + static void expand_vector_operations (void) { @@ -1015,8 +1038,8 @@ tree_lower_operations (void) struct tree_opt_pass pass_lower_vector_ssa = { - "vector", /* name */ - NULL, /* gate */ + "veclower", /* name */ + gate_expand_vector_operations, /* gate */ expand_vector_operations, /* execute */ NULL, /* sub */ NULL, /* next */ diff --git a/gcc/tree-optimize.c b/gcc/tree-optimize.c index 40020ac30ab..e77c29c573a 100644 --- a/gcc/tree-optimize.c +++ b/gcc/tree-optimize.c @@ -437,6 +437,7 @@ init_tree_optimization_passes (void) NEXT_PASS (pass_iv_canon); NEXT_PASS (pass_if_conversion); NEXT_PASS (pass_vectorize); + NEXT_PASS (pass_lower_vector_ssa); NEXT_PASS (pass_complete_unroll); NEXT_PASS (pass_iv_optimize); NEXT_PASS (pass_loop_done); diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c index 71f7663357e..1416bc435e1 100644 --- a/gcc/tree-vect-analyze.c +++ b/gcc/tree-vect-analyze.c @@ -1076,13 +1076,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) struct data_reference *dr0 = NULL; unsigned int i, j; - /* Sigh, a hack to make targets that do not define UNITS_PER_SIMD_WORD - bootstrap. Copy UNITS_PER_SIMD_WORD to a local variable to avoid a - "division by zero" error. This error would be issued because we - we do "... % UNITS_PER_SIMD_WORD" below, and UNITS_PER_SIMD_WORD - defaults to 0 if it is not defined by the target. */ - int units_per_simd_word = UNITS_PER_SIMD_WORD; - /* This pass will require a cost model to guide it whether to apply peeling or versioning or a combination of the two. For example, the scheme that @@ -1237,7 +1230,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); DR_MISALIGNMENT (dr) += npeel * drsize; - DR_MISALIGNMENT (dr) %= units_per_simd_word; + DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD; } else DR_MISALIGNMENT (dr) = -1; diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index e51d56f7491..68222e95ce5 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -720,6 +720,32 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } +/* Function vect_min_worthwhile_factor. + + For a loop where we could vectorize the operation indicated by CODE, + return the minimum vectorization factor that makes it worthwhile + to use generic vectors. */ +static int +vect_min_worthwhile_factor (enum tree_code code) +{ + switch (code) + { + case PLUS_EXPR: + case MINUS_EXPR: + case NEGATE_EXPR: + return 4; + + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case BIT_NOT_EXPR: + return 2; + + default: + return INT_MAX; + } +} + /* Function vectorizable_operation. Check if STMT performs a binary or unary operation that can be vectorized. @@ -792,6 +818,16 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) return false; } + /* Worthwhile without SIMD support? */ + if (!VECTOR_MODE_P (TYPE_MODE (vectype)) + && LOOP_VINFO_VECT_FACTOR (loop_vinfo) + < vect_min_worthwhile_factor (code)) + { + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "not worthwhile without SIMD support."); + return false; + } + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index ed95eba2d80..2285bb1cad5 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1525,7 +1525,7 @@ get_vectype_for_scalar_type (tree scalar_type) int nunits; tree vectype; - if (nbytes == 0) + if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD) return NULL_TREE; /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD) @@ -1548,11 +1548,9 @@ get_vectype_for_scalar_type (tree scalar_type) print_generic_expr (vect_dump, vectype, TDF_SLIM); } - if (!VECTOR_MODE_P (TYPE_MODE (vectype))) + if (!VECTOR_MODE_P (TYPE_MODE (vectype)) + && !INTEGRAL_MODE_P (TYPE_MODE (vectype))) { - /* TODO: tree-complex.c sometimes can parallelize operations - on generic vectors. We can vectorize the loop in that case, - but then we should re-run the lowering pass. */ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "mode not supported by target."); return NULL_TREE; @@ -1733,15 +1731,6 @@ vectorize_loops (struct loops *loops) /* Fix the verbosity level if not defined explicitly by the user. */ vect_set_dump_settings (); - /* Does the target support SIMD? */ - /* FORNOW: until more sophisticated machine modelling is in place. */ - if (!UNITS_PER_SIMD_WORD) - { - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - fprintf (vect_dump, "vectorizer: target vector size is not defined."); - return; - } - /* ----------- Analyze loops. ----------- */ /* If some loop was duplicated, it gets bigger number |