diff options
-rw-r--r-- | gcc/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr43432.c | 15 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/slp-perm-8.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/slp-perm-9.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-114.c | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-15.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 45 | ||||
-rw-r--r-- | gcc/tree-vect-data-refs.c | 8 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 106 |
10 files changed, 202 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 87577389a82..dd29405e0ac 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2010-09-17 Michael Matz <matz@suse.de> + + PR tree-optimization/43432 + * tree-vect-data-refs.c (vect_analyze_data_ref_access): + Accept backwards consecutive accesses. + (vect_create_data_ref_ptr): If step is negative generate + decreasing IVs. + * tree-vect-stmts.c (vectorizable_store): Reject negative steps. + (perm_mask_for_reverse, reverse_vec_elements): New functions. + (vectorizable_load): Handle loads with negative steps when easily + possible. + 2010-09-03 Jan Hubicka <jh@suse.cz> * lto-cgraph.c (compute_ltrans_boundary): Use const_value_known. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 43ea0539dd5..55f05f4c6f5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,15 @@ +2010-09-17 Michael Matz <matz@suse.de> + + PR tree-optimization/43432 + * lib/target-supports.exp (check_effective_target_vect_perm_byte, + check_effective_target_vect_perm_short): New predicates. + (check_effective_target_vect_perm): Include x86_64. + * gcc.dg/vect/pr43432.c: New test. + * gcc.dg/vect/vect-114.c: Adjust. + * gcc.dg/vect/vect-15.c: Ditto. + * gcc.dg/vect/slp-perm-8.c: Use new predicate. + * gcc.dg/vect/slp-perm-9.c: Ditto. + 2010-09-17 Nicola Pero <nicola.pero@meta-innovation.com> PR testsuite/45692 diff --git a/gcc/testsuite/gcc.dg/vect/pr43432.c b/gcc/testsuite/gcc.dg/vect/pr43432.c new file mode 100644 index 00000000000..3070318e148 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr43432.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_float } */ +/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */ + + +void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, +int len){ + int i; + src1 += len-1; + for(i=0; i<len; i++) + dst[i] = src0[i] * src1[-i]; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c index 28c3529c514..deec308449b 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c @@ -53,7 +53,7 @@ int main (int argc, const char* argv[]) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm_byte } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm_byte } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-9.c b/gcc/testsuite/gcc.dg/vect/slp-perm-9.c index 5c5176e716b..f4387299622 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-9.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-9.c @@ -54,7 +54,7 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target vect_perm } } } */ +/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target vect_perm_short } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-114.c b/gcc/testsuite/gcc.dg/vect/vect-114.c index 035c9a89c2a..f9132acd06d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-114.c +++ b/gcc/testsuite/gcc.dg/vect/vect-114.c @@ -34,6 +34,7 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_perm } } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-15.c b/gcc/testsuite/gcc.dg/vect/vect-15.c index b2ae9c8471d..87853c1248e 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-15.c +++ b/gcc/testsuite/gcc.dg/vect/vect-15.c @@ -35,5 +35,5 @@ int main (void) return main1 (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 864f14e049d..c03b9f68e8e 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2425,7 +2425,8 @@ proc check_effective_target_vect_perm { } { } else { set et_vect_perm_saved 0 if { [istarget powerpc*-*-*] - || [istarget spu-*-*] } { + || [istarget spu-*-*] + || [istarget x86_64-*-*] } { set et_vect_perm_saved 1 } } @@ -2433,6 +2434,48 @@ proc check_effective_target_vect_perm { } { return $et_vect_perm_saved } +# Return 1 if the target plus current options supports vector permutation +# on byte-sized elements, 0 otherwise. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_vect_perm_byte { } { + global et_vect_perm_byte + + if [info exists et_vect_perm_byte_saved] { + verbose "check_effective_target_vect_perm_byte: using cached result" 2 + } else { + set et_vect_perm_byte_saved 0 + if { [istarget powerpc*-*-*] + || [istarget spu-*-*] } { + set et_vect_perm_byte_saved 1 + } + } + verbose "check_effective_target_vect_perm_byte: returning $et_vect_perm_byte_saved" 2 + return $et_vect_perm_byte_saved +} + +# Return 1 if the target plus current options supports vector permutation +# on short-sized elements, 0 otherwise. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_vect_perm_short { } { + global et_vect_perm_short + + if [info exists et_vect_perm_short_saved] { + verbose "check_effective_target_vect_perm_short: using cached result" 2 + } else { + set et_vect_perm_short_saved 0 + if { [istarget powerpc*-*-*] + || [istarget spu-*-*] } { + set et_vect_perm_short_saved 1 + } + } + verbose "check_effective_target_vect_perm_short: returning $et_vect_perm_short_saved" 2 + return $et_vect_perm_short_saved +} + # Return 1 if the target plus current options supports a vector # widening summation of *short* args into *int* result, 0 otherwise. # diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 0d1e3381738..71825137cd7 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -2287,7 +2287,9 @@ vect_analyze_data_ref_access (struct data_reference *dr) } /* Consecutive? */ - if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) + if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)) + || (dr_step < 0 + && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step))) { /* Mark that it is not interleaving. */ DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL; @@ -2970,6 +2972,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, tree vptr; gimple_stmt_iterator incr_gsi; bool insert_after; + bool negative; tree indx_before_incr, indx_after_incr; gimple incr; tree step; @@ -3002,6 +3005,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, *inv_p = true; else *inv_p = false; + negative = tree_int_cst_compare (step, size_zero_node) < 0; /* Create an expression for the first address accessed by this load in LOOP. */ @@ -3160,6 +3164,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, LOOP is zero. In this case the step here is also zero. */ if (*inv_p) step = size_zero_node; + else if (negative) + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); standard_iv_increment_position (loop, &incr_gsi, &insert_after); diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 3db0ec1b850..f562ed2d528 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -3145,6 +3145,13 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; + if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step for store."); + return false; + } + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { strided_store = true; @@ -3425,6 +3432,68 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, return true; } +/* Given a vector type VECTYPE returns a builtin DECL to be used + for vector permutation and stores a mask into *MASK that implements + reversal of the vector elements. If that is impossible to do + returns NULL (and *MASK is unchanged). */ + +static tree +perm_mask_for_reverse (tree vectype, tree *mask) +{ + tree builtin_decl; + tree mask_element_type, mask_type; + tree mask_vec = NULL; + int i; + int nunits; + if (!targetm.vectorize.builtin_vec_perm) + return NULL; + + builtin_decl = targetm.vectorize.builtin_vec_perm (vectype, + &mask_element_type); + if (!builtin_decl || !mask_element_type) + return NULL; + + mask_type = get_vectype_for_scalar_type (mask_element_type); + nunits = TYPE_VECTOR_SUBPARTS (vectype); + if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type)) + return NULL; + + for (i = 0; i < nunits; i++) + mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec); + mask_vec = build_vector (mask_type, mask_vec); + + if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec)) + return NULL; + if (mask) + *mask = mask_vec; + return builtin_decl; +} + +/* Given a vector variable X, that was generated for the scalar LHS of + STMT, generate instructions to reverse the vector elements of X, + insert them a *GSI and return the permuted vector variable. */ + +static tree +reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi) +{ + tree vectype = TREE_TYPE (x); + tree mask_vec, builtin_decl; + tree perm_dest, data_ref; + gimple perm_stmt; + + builtin_decl = perm_mask_for_reverse (vectype, &mask_vec); + + perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); + + /* Generate the permute statement. */ + perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec); + data_ref = make_ssa_name (perm_dest, perm_stmt); + gimple_call_set_lhs (perm_stmt, data_ref); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + + return data_ref; +} + /* vectorizable_load. Check if STMT reads a non scalar data-ref (array/pointer/structure) that @@ -3467,6 +3536,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, gimple first_stmt; tree scalar_type; bool inv_p; + bool negative; bool compute_in_loop = false; struct loop *at_loop; int vec_num; @@ -3529,6 +3599,14 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; + negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; + if (negative && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types with negative step."); + return false; + } + scalar_type = TREE_TYPE (DR_REF (dr)); mode = TYPE_MODE (vectype); @@ -3563,6 +3641,25 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, return false; } + if (negative) + { + gcc_assert (!strided_load); + alignment_support_scheme = vect_supportable_dr_alignment (dr, false); + if (alignment_support_scheme != dr_aligned + && alignment_support_scheme != dr_unaligned_supported) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step but alignment required."); + return false; + } + if (!perm_mask_for_reverse (vectype, NULL)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step and reversing not supported."); + return false; + } + } + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; @@ -3737,6 +3834,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, else at_loop = loop; + if (negative) + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); + prev_stmt_info = NULL; for (j = 0; j < ncopies; j++) { @@ -3925,6 +4025,12 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, gcc_unreachable (); /* FORNOW. */ } + if (negative) + { + new_temp = reverse_vec_elements (new_temp, stmt, gsi); + new_stmt = SSA_NAME_DEF_STMT (new_temp); + } + /* Collect vector loads and later create their permutation in vect_transform_strided_load (). */ if (strided_load || slp_perm) |