diff options
author | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-05-03 12:54:45 +0000 |
---|---|---|
committer | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2007-05-03 12:54:45 +0000 |
commit | 30f263a419dd1cf9804dea818dab0c2c40fda265 (patch) | |
tree | c155e715aab82e74ee4d9c068a74667321236329 | |
parent | 6b75e1228cecab24ff5650ca0e98b8f7aae8d5ad (diff) | |
download | gcc-30f263a419dd1cf9804dea818dab0c2c40fda265.tar.gz |
PR tree-optimization/31699
* tree-vect-analyze.c (vect_update_misalignment_for_peel): Remove wrong
code.
(vect_enhance_data_refs_alignment): Compute peel amount using
TYPE_VECTOR_SUBPARTS instead of vf.
* tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Likewise.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@124375 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr31699.c | 35 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-floatint-conversion-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-intfloat-conversion-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-iv-4.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c | 23 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-multitypes-11.c | 45 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 43 | ||||
-rw-r--r-- | gcc/tree-vect-analyze.c | 22 | ||||
-rw-r--r-- | gcc/tree-vect-transform.c | 23 |
12 files changed, 196 insertions, 45 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c1438c6ed7e..e39d94e7423 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2007-05-03 Dorit Nuzman <dorit@il.ibm.com> + + PR tree-optimization/31699 + * tree-vect-analyze.c (vect_update_misalignment_for_peel): Remove wrong + code. + (vect_enhance_data_refs_alignment): Compute peel amount using + TYPE_VECTOR_SUBPARTS instead of vf. + * tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Likewise. + 2007-05-02 Brooks Moses <brooks.moses@codesourcery.com> PR bootstrap/31776 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d8444cd286a..0d00f9ba7d5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,18 @@ +2007-05-03 Dorit Nuzman <dorit@il.ibm.com> + + PR tree-optimization/31699 + * lib/target-supports.exp (check_effective_target_vect_intfloat_cvt): + New. + (check_effective_target_vect_floatint_cvt): New. + * gcc.dg/vect/vect-floatint-conversion-1.c: Use new keyword instead + of specific targets. + * gcc.dg/vect/vect-intfloat-conversion-1.c: Likewise. + * gcc.dg/vect/vect-multitypes-1.c: One less loop gets vectorized. + * gcc.dg/vect/vect-multitypes-4.c: Likewise. + * gcc.dg/vect/vect-iv-4.c: Likewise. + * gcc.dg/vect/vect-multitypes-11.c: New. + * gcc.dg/vect/pr31699.c: New. + 2007-05-02 Geoffrey Keating <geoffk@apple.com> * gcc.c-torture/compile-limits-stringlit.c: Reduce size of string. @@ -2399,7 +2414,7 @@ Dorit Nuzman <dorit@il.ibm.com> * gcc.dg/vect/vect-intfloat-conversion-1.c: New test. - * gcc.dg/vect/vect-intfloat-conversion-1.c: New test. + * gcc.dg/vect/vect-floatint-conversion-1.c: New test. * gcc.dg/vect/vect-93.c: Another loop gets vectorized on powerpc. * gcc.dg/vect/vect-113.c: Likewise. diff --git a/gcc/testsuite/gcc.dg/vect/pr31699.c b/gcc/testsuite/gcc.dg/vect/pr31699.c new file mode 100644 index 00000000000..86099924387 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr31699.c @@ -0,0 +1,35 @@ +/* { dg-require-effective-target vect_double } */ + +#include <stdlib.h> +#include <stdarg.h> +#include "tree-vect.h" + +float x[256]; + +void foo(void) +{ + double *z = malloc (sizeof(double) * 256); + + int i; + for (i=0; i<256; ++i) + z[i] = x[i] + 1.0f; +} + + +int main() +{ + int i; + + check_vect (); + + for (i = 0; i < 256; i++) + x[i] = (float) i; + + foo(); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_intfloat_cvt } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-floatint-conversion-1.c b/gcc/testsuite/gcc.dg/vect/vect-floatint-conversion-1.c index 95ffc684ec4..dd845bf057d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-floatint-conversion-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-floatint-conversion-1.c @@ -36,5 +36,5 @@ main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_floatint_cvt } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-intfloat-conversion-1.c b/gcc/testsuite/gcc.dg/vect/vect-intfloat-conversion-1.c index 1369f2b8608..85a993a34c8 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-intfloat-conversion-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-intfloat-conversion-1.c @@ -34,5 +34,5 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target powerpc*-*-* i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_intfloat_cvt } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-iv-4.c b/gcc/testsuite/gcc.dg/vect/vect-iv-4.c index 8dd39b1aded..0a3c44579ed 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-iv-4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-iv-4.c @@ -40,5 +40,5 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c index 2b884011952..e1cbafa56a1 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c @@ -14,10 +14,9 @@ int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45, /* Current peeling-for-alignment scheme will consider the 'sa[i+7]' access for peeling, and therefore will examine the option of - using a peeling factor = VF-7%VF. This will result in a peeling factor 1, + using a peeling factor = V-7%V = 1,3 for V=8,4 respectively, which will also align the access to 'ia[i+3]', and the loop could be - vectorized on all targets that support unaligned loads. - */ + vectorized on all targets that support unaligned loads. */ int main1 (int n) { @@ -43,17 +42,16 @@ int main1 (int n) /* Current peeling-for-alignment scheme will consider the 'ia[i+3]' access for peeling, and therefore will examine the option of - using a peeling factor = VF-3%VF. This will result in a peeling factor - 5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access - to 'sa[i+3]', and the loop could be vectorized on targets that support - unaligned loads. */ + using a peeling factor = (V-3)%V = 1 for V=2,4. + This will not align the access 'sa[i+3]' (for which we need to + peel 5 iterations), so the loop can not be vectorized. */ int main2 (int n) { int i; /* Multiple types with different sizes, used in independent - copmutations. Vectorizable. */ + copmutations. */ for (i = 0; i < n; i++) { ia[i+3] = ib[i]; @@ -80,8 +78,11 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-11.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-11.c new file mode 100644 index 00000000000..26bba0b1f24 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-11.c @@ -0,0 +1,45 @@ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 64 + +short x[N] __attribute__ ((__aligned__(16))); + +int +foo (int len, int *z) { + int i; + + for (i=0; i<len; i++) { + z[i] = x[i]; + } +} + + +int main (void) +{ + short i; + int z[N+4]; + + check_vect (); + + for (i=0; i<N; i++) { + x[i] = i; + } + + foo (N,z+2); + + for (i=0; i<N; i++) { + if (z[i+2] != x[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_unpack } } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_no_align && vect_unpack } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c index 63f244d06b7..b5a55cba762 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c @@ -20,8 +20,7 @@ unsigned int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45, access for peeling, and therefore will examine the option of using a peeling factor = VF-7%VF. This will result in a peeling factor 1, which will also align the access to 'ia[i+3]', and the loop could be - vectorized on all targets that support unaligned loads. - */ + vectorized on all targets that support unaligned loads. */ int main1 (int n) { @@ -48,9 +47,9 @@ int main1 (int n) /* Current peeling-for-alignment scheme will consider the 'ia[i+3]' access for peeling, and therefore will examine the option of using a peeling factor = VF-3%VF. This will result in a peeling factor - 5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access - to 'sa[i+3]', and the loop could be vectorized on targets that support - unaligned loads. */ + 1 if VF=4,2. This will not align the access to 'sa[i+3]', for which we + need to peel 5,1 iterations for VF=4,2 respectively, so the loop can not + be vectorized. */ int main2 (int n) { @@ -84,8 +83,11 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail vect_no_align } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index a8112782419..9ada7f18706 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1368,6 +1368,49 @@ proc check_effective_target_vect_int { } { return $et_vect_int_saved } +# Return 1 if the target supports int->float conversion +# + +proc check_effective_target_vect_intfloat_cvt { } { + global et_vect_intfloat_cvt_saved + + if [info exists et_vect_intfloat_cvt_saved] { + verbose "check_effective_target_vect_intfloat_cvt: using cached result" 2 + } else { + set et_vect_intfloat_cvt_saved 0 + if { [istarget i?86-*-*] + || [istarget powerpc*-*-*] + || [istarget x86_64-*-*] } { + set et_vect_intfloat_cvt_saved 1 + } + } + + verbose "check_effective_target_vect_intfloat_cvt: returning $et_vect_intfloat_cvt_saved" 2 + return $et_vect_intfloat_cvt_saved +} + + +# Return 1 if the target supports float->int conversion +# + +proc check_effective_target_vect_floatint_cvt { } { + global et_vect_floatint_cvt_saved + + if [info exists et_vect_floatint_cvt_saved] { + verbose "check_effective_target_vect_floatint_cvt: using cached result" 2 + } else { + set et_vect_floatint_cvt_saved 0 + if { [istarget i?86-*-*] + || [istarget x86_64-*-*] } { + set et_vect_floatint_cvt_saved 1 + } + } + + verbose "check_effective_target_vect_floatint_cvt: returning $et_vect_floatint_cvt_saved" 2 + return $et_vect_floatint_cvt_saved +} + + # Return 1 is this is an arm target using 32-bit instructions proc check_effective_target_arm32 { } { global et_arm32_saved diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c index c195c7ab323..3a4841d2c05 100644 --- a/gcc/tree-vect-analyze.c +++ b/gcc/tree-vect-analyze.c @@ -1258,15 +1258,6 @@ vect_update_misalignment_for_peel (struct data_reference *dr, if (DR_GROUP_FIRST_DR (peel_stmt_info)) dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info); - if (known_alignment_for_access_p (dr) - && known_alignment_for_access_p (dr_peel) - && (DR_MISALIGNMENT (dr) / dr_size == - DR_MISALIGNMENT (dr_peel) / dr_peel_size)) - { - DR_MISALIGNMENT (dr) = 0; - return; - } - /* It can be assumed that the data refs with the same alignment as dr_peel are aligned in the vector loop. */ same_align_drs @@ -1507,7 +1498,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) the prolog loop ({VF - misalignment}), is a multiple of the number of the interleaved accesses. */ int elem_size, mis_in_elements; - int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + int nelements = TYPE_VECTOR_SUBPARTS (vectype); /* FORNOW: handle only known alignment. */ if (!known_alignment_for_access_p (dr)) @@ -1516,10 +1508,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) break; } - elem_size = UNITS_PER_SIMD_WORD / vf; + elem_size = UNITS_PER_SIMD_WORD / nelements; mis_in_elements = DR_MISALIGNMENT (dr) / elem_size; - if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info)) + if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info)) { do_peeling = false; break; @@ -1541,6 +1533,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) { int mis; int npeel = 0; + tree stmt = DR_STMT (dr0); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + int nelements = TYPE_VECTOR_SUBPARTS (vectype); if (known_alignment_for_access_p (dr0)) { @@ -1550,7 +1546,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) factor minus the misalignment as an element count. */ mis = DR_MISALIGNMENT (dr0); mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0)))); - npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis; + npeel = nelements - mis; /* For interleaved data access every iteration accesses all the members of the group, therefore we divide the number of iterations diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 161e82d3314..4775e2cd582 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -4786,13 +4786,17 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio) prolog_niters = min ( LOOP_NITERS , (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) ) where group_size is the size of the interleaved group. -*/ + + The above formulas assume that VF == number of elements in the vector. This + may not hold when there are multiple-types in the loop. + In this case, for some data-references in the loop the VF does not represent + the number of elements that fit in the vector. Therefore, instead of VF we + use TYPE_VECTOR_SUBPARTS. */ static tree vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) { struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); - int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree var, stmt; tree iters, iters_name; @@ -4805,6 +4809,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) tree niters_type = TREE_TYPE (loop_niters); int group_size = 1; int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); + int nelements = TYPE_VECTOR_SUBPARTS (vectype); if (DR_GROUP_FIRST_DR (stmt_info)) { @@ -4825,7 +4830,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "known alignment = %d.", byte_misalign); iters = build_int_cst (niters_type, - (vf - elem_misalign)&(vf/group_size-1)); + (nelements - elem_misalign)&(nelements/group_size-1)); } else { @@ -4837,9 +4842,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); tree elem_size_log = - build_int_cst (type, exact_log2 (vectype_align/vf)); - tree vf_minus_1 = build_int_cst (type, vf - 1); - tree vf_tree = build_int_cst (type, vf); + build_int_cst (type, exact_log2 (vectype_align/nelements)); + tree nelements_minus_1 = build_int_cst (type, nelements - 1); + tree nelements_tree = build_int_cst (type, nelements); tree byte_misalign; tree elem_misalign; @@ -4854,9 +4859,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) elem_misalign = fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log); - /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */ - iters = fold_build2 (MINUS_EXPR, type, vf_tree, elem_misalign); - iters = fold_build2 (BIT_AND_EXPR, type, iters, vf_minus_1); + /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */ + iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign); + iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1); iters = fold_convert (niters_type, iters); } |