summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>2007-05-03 12:54:45 +0000
committerdorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>2007-05-03 12:54:45 +0000
commit30f263a419dd1cf9804dea818dab0c2c40fda265 (patch)
treec155e715aab82e74ee4d9c068a74667321236329
parent6b75e1228cecab24ff5650ca0e98b8f7aae8d5ad (diff)
downloadgcc-30f263a419dd1cf9804dea818dab0c2c40fda265.tar.gz
PR tree-optimization/31699
* tree-vect-analyze.c (vect_update_misalignment_for_peel): Remove wrong code. (vect_enhance_data_refs_alignment): Compute peel amount using TYPE_VECTOR_SUBPARTS instead of vf. * tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@124375 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/testsuite/ChangeLog17
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr31699.c35
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-floatint-conversion-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-intfloat-conversion-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-iv-4.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c23
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-multitypes-11.c45
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c18
-rw-r--r--gcc/testsuite/lib/target-supports.exp43
-rw-r--r--gcc/tree-vect-analyze.c22
-rw-r--r--gcc/tree-vect-transform.c23
12 files changed, 196 insertions, 45 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c1438c6ed7e..e39d94e7423 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2007-05-03 Dorit Nuzman <dorit@il.ibm.com>
+
+ PR tree-optimization/31699
+ * tree-vect-analyze.c (vect_update_misalignment_for_peel): Remove wrong
+ code.
+ (vect_enhance_data_refs_alignment): Compute peel amount using
+ TYPE_VECTOR_SUBPARTS instead of vf.
+ * tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Likewise.
+
2007-05-02 Brooks Moses <brooks.moses@codesourcery.com>
PR bootstrap/31776
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d8444cd286a..0d00f9ba7d5 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,18 @@
+2007-05-03 Dorit Nuzman <dorit@il.ibm.com>
+
+ PR tree-optimization/31699
+ * lib/target-supports.exp (check_effective_target_vect_intfloat_cvt):
+ New.
+ (check_effective_target_vect_floatint_cvt): New.
+ * gcc.dg/vect/vect-floatint-conversion-1.c: Use new keyword instead
+ of specific targets.
+ * gcc.dg/vect/vect-intfloat-conversion-1.c: Likewise.
+ * gcc.dg/vect/vect-multitypes-1.c: One less loop gets vectorized.
+ * gcc.dg/vect/vect-multitypes-4.c: Likewise.
+ * gcc.dg/vect/vect-iv-4.c: Likewise.
+ * gcc.dg/vect/vect-multitypes-11.c: New.
+ * gcc.dg/vect/pr31699.c: New.
+
2007-05-02 Geoffrey Keating <geoffk@apple.com>
* gcc.c-torture/compile-limits-stringlit.c: Reduce size of string.
@@ -2399,7 +2414,7 @@
Dorit Nuzman <dorit@il.ibm.com>
* gcc.dg/vect/vect-intfloat-conversion-1.c: New test.
- * gcc.dg/vect/vect-intfloat-conversion-1.c: New test.
+ * gcc.dg/vect/vect-floatint-conversion-1.c: New test.
* gcc.dg/vect/vect-93.c: Another loop gets vectorized on powerpc.
* gcc.dg/vect/vect-113.c: Likewise.
diff --git a/gcc/testsuite/gcc.dg/vect/pr31699.c b/gcc/testsuite/gcc.dg/vect/pr31699.c
new file mode 100644
index 00000000000..86099924387
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr31699.c
@@ -0,0 +1,35 @@
+/* { dg-require-effective-target vect_double } */
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include "tree-vect.h"
+
+float x[256];
+
+void foo(void)
+{
+ double *z = malloc (sizeof(double) * 256);
+
+ int i;
+ for (i=0; i<256; ++i)
+ z[i] = x[i] + 1.0f;
+}
+
+
+int main()
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < 256; i++)
+ x[i] = (float) i;
+
+ foo();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_intfloat_cvt } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-floatint-conversion-1.c b/gcc/testsuite/gcc.dg/vect/vect-floatint-conversion-1.c
index 95ffc684ec4..dd845bf057d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-floatint-conversion-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-floatint-conversion-1.c
@@ -36,5 +36,5 @@ main (void)
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_floatint_cvt } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-intfloat-conversion-1.c b/gcc/testsuite/gcc.dg/vect/vect-intfloat-conversion-1.c
index 1369f2b8608..85a993a34c8 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-intfloat-conversion-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-intfloat-conversion-1.c
@@ -34,5 +34,5 @@ int main (void)
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target powerpc*-*-* i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_intfloat_cvt } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-iv-4.c b/gcc/testsuite/gcc.dg/vect/vect-iv-4.c
index 8dd39b1aded..0a3c44579ed 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-iv-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-iv-4.c
@@ -40,5 +40,5 @@ int main (void)
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c
index 2b884011952..e1cbafa56a1 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c
@@ -14,10 +14,9 @@ int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
/* Current peeling-for-alignment scheme will consider the 'sa[i+7]'
access for peeling, and therefore will examine the option of
- using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
+ using a peeling factor = V-7%V = 1,3 for V=8,4 respectively,
which will also align the access to 'ia[i+3]', and the loop could be
- vectorized on all targets that support unaligned loads.
- */
+ vectorized on all targets that support unaligned loads. */
int main1 (int n)
{
@@ -43,17 +42,16 @@ int main1 (int n)
/* Current peeling-for-alignment scheme will consider the 'ia[i+3]'
access for peeling, and therefore will examine the option of
- using a peeling factor = VF-3%VF. This will result in a peeling factor
- 5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access
- to 'sa[i+3]', and the loop could be vectorized on targets that support
- unaligned loads. */
+ using a peeling factor = (V-3)%V = 1 for V=2,4.
+ This will not align the access 'sa[i+3]' (for which we need to
+ peel 5 iterations), so the loop can not be vectorized. */
int main2 (int n)
{
int i;
/* Multiple types with different sizes, used in independent
- copmutations. Vectorizable. */
+ copmutations. */
for (i = 0; i < n; i++)
{
ia[i+3] = ib[i];
@@ -80,8 +78,11 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-11.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-11.c
new file mode 100644
index 00000000000..26bba0b1f24
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-11.c
@@ -0,0 +1,45 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+short x[N] __attribute__ ((__aligned__(16)));
+
+int
+foo (int len, int *z) {
+ int i;
+
+ for (i=0; i<len; i++) {
+ z[i] = x[i];
+ }
+}
+
+
+int main (void)
+{
+ short i;
+ int z[N+4];
+
+ check_vect ();
+
+ for (i=0; i<N; i++) {
+ x[i] = i;
+ }
+
+ foo (N,z+2);
+
+ for (i=0; i<N; i++) {
+ if (z[i+2] != x[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_unpack } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_no_align && vect_unpack } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
index 63f244d06b7..b5a55cba762 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
@@ -20,8 +20,7 @@ unsigned int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
access for peeling, and therefore will examine the option of
using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
which will also align the access to 'ia[i+3]', and the loop could be
- vectorized on all targets that support unaligned loads.
- */
+ vectorized on all targets that support unaligned loads. */
int main1 (int n)
{
@@ -48,9 +47,9 @@ int main1 (int n)
/* Current peeling-for-alignment scheme will consider the 'ia[i+3]'
access for peeling, and therefore will examine the option of
using a peeling factor = VF-3%VF. This will result in a peeling factor
- 5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access
- to 'sa[i+3]', and the loop could be vectorized on targets that support
- unaligned loads. */
+ 1 if VF=4,2. This will not align the access to 'sa[i+3]', for which we
+ need to peel 5,1 iterations for VF=4,2 respectively, so the loop can not
+ be vectorized. */
int main2 (int n)
{
@@ -84,8 +83,11 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index a8112782419..9ada7f18706 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1368,6 +1368,49 @@ proc check_effective_target_vect_int { } {
return $et_vect_int_saved
}
+# Return 1 if the target supports int->float conversion
+#
+
+proc check_effective_target_vect_intfloat_cvt { } {
+ global et_vect_intfloat_cvt_saved
+
+ if [info exists et_vect_intfloat_cvt_saved] {
+ verbose "check_effective_target_vect_intfloat_cvt: using cached result" 2
+ } else {
+ set et_vect_intfloat_cvt_saved 0
+ if { [istarget i?86-*-*]
+ || [istarget powerpc*-*-*]
+ || [istarget x86_64-*-*] } {
+ set et_vect_intfloat_cvt_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect_intfloat_cvt: returning $et_vect_intfloat_cvt_saved" 2
+ return $et_vect_intfloat_cvt_saved
+}
+
+
+# Return 1 if the target supports float->int conversion
+#
+
+proc check_effective_target_vect_floatint_cvt { } {
+ global et_vect_floatint_cvt_saved
+
+ if [info exists et_vect_floatint_cvt_saved] {
+ verbose "check_effective_target_vect_floatint_cvt: using cached result" 2
+ } else {
+ set et_vect_floatint_cvt_saved 0
+ if { [istarget i?86-*-*]
+ || [istarget x86_64-*-*] } {
+ set et_vect_floatint_cvt_saved 1
+ }
+ }
+
+ verbose "check_effective_target_vect_floatint_cvt: returning $et_vect_floatint_cvt_saved" 2
+ return $et_vect_floatint_cvt_saved
+}
+
+
# Return 1 is this is an arm target using 32-bit instructions
proc check_effective_target_arm32 { } {
global et_arm32_saved
diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c
index c195c7ab323..3a4841d2c05 100644
--- a/gcc/tree-vect-analyze.c
+++ b/gcc/tree-vect-analyze.c
@@ -1258,15 +1258,6 @@ vect_update_misalignment_for_peel (struct data_reference *dr,
if (DR_GROUP_FIRST_DR (peel_stmt_info))
dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
- if (known_alignment_for_access_p (dr)
- && known_alignment_for_access_p (dr_peel)
- && (DR_MISALIGNMENT (dr) / dr_size ==
- DR_MISALIGNMENT (dr_peel) / dr_peel_size))
- {
- DR_MISALIGNMENT (dr) = 0;
- return;
- }
-
/* It can be assumed that the data refs with the same alignment as dr_peel
are aligned in the vector loop. */
same_align_drs
@@ -1507,7 +1498,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
the prolog loop ({VF - misalignment}), is a multiple of the
number of the interleaved accesses. */
int elem_size, mis_in_elements;
- int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ int nelements = TYPE_VECTOR_SUBPARTS (vectype);
/* FORNOW: handle only known alignment. */
if (!known_alignment_for_access_p (dr))
@@ -1516,10 +1508,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
break;
}
- elem_size = UNITS_PER_SIMD_WORD / vf;
+ elem_size = UNITS_PER_SIMD_WORD / nelements;
mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
- if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
+ if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
{
do_peeling = false;
break;
@@ -1541,6 +1533,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
{
int mis;
int npeel = 0;
+ tree stmt = DR_STMT (dr0);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ int nelements = TYPE_VECTOR_SUBPARTS (vectype);
if (known_alignment_for_access_p (dr0))
{
@@ -1550,7 +1546,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
factor minus the misalignment as an element count. */
mis = DR_MISALIGNMENT (dr0);
mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
- npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
+ npeel = nelements - mis;
/* For interleaved data access every iteration accesses all the
members of the group, therefore we divide the number of iterations
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 161e82d3314..4775e2cd582 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -4786,13 +4786,17 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
prolog_niters = min ( LOOP_NITERS ,
(VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
where group_size is the size of the interleaved group.
-*/
+
+ The above formulas assume that VF == number of elements in the vector. This
+ may not hold when there are multiple-types in the loop.
+ In this case, for some data-references in the loop the VF does not represent
+ the number of elements that fit in the vector. Therefore, instead of VF we
+ use TYPE_VECTOR_SUBPARTS. */
static tree
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
{
struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
- int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree var, stmt;
tree iters, iters_name;
@@ -4805,6 +4809,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
tree niters_type = TREE_TYPE (loop_niters);
int group_size = 1;
int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
+ int nelements = TYPE_VECTOR_SUBPARTS (vectype);
if (DR_GROUP_FIRST_DR (stmt_info))
{
@@ -4825,7 +4830,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "known alignment = %d.", byte_misalign);
iters = build_int_cst (niters_type,
- (vf - elem_misalign)&(vf/group_size-1));
+ (nelements - elem_misalign)&(nelements/group_size-1));
}
else
{
@@ -4837,9 +4842,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
tree elem_size_log =
- build_int_cst (type, exact_log2 (vectype_align/vf));
- tree vf_minus_1 = build_int_cst (type, vf - 1);
- tree vf_tree = build_int_cst (type, vf);
+ build_int_cst (type, exact_log2 (vectype_align/nelements));
+ tree nelements_minus_1 = build_int_cst (type, nelements - 1);
+ tree nelements_tree = build_int_cst (type, nelements);
tree byte_misalign;
tree elem_misalign;
@@ -4854,9 +4859,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
elem_misalign =
fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
- /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
- iters = fold_build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
- iters = fold_build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
+ /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
+ iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
+ iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
iters = fold_convert (niters_type, iters);
}