diff options
author | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-03-15 18:33:09 +0000 |
---|---|---|
committer | dorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-03-15 18:33:09 +0000 |
commit | 39b8f742ef14abba097084b567e57563e555d0df (patch) | |
tree | aa609c29139804fa11435795fdbdacaccff79d8f | |
parent | 0b503e11ea9748a69e1be0e927e8efdefc64f396 (diff) | |
download | gcc-39b8f742ef14abba097084b567e57563e555d0df.tar.gz |
* tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by
known_alignment_for_access_p.
(known_alignment_for_access_p): New.
(do_peeling_for_alignment): Field made int instead of bool and renamed
to peeling_for_alignment.
(LOOP_DO_PEELING_FOR_ALIGNMENT): Renamed to LOOP_PEELING_FOR_ALIGNMENT.
* tree-vect-analyze.c (vect_determine_vectorization_factor): New. This
functionality used to be in vect_analyze_operations.
(vect_analyze_operations): Code to determine vectorization factor was
moved to vect_determine_vectorization_factor.
(vect_enhance_data_refs_alignment): Update to correct alignment when it
is known instead of -1. Set LOOP_PEELING_FOR_ALIGNMENT to peeling
factor.
(vect_analyze_loop): Call vect_determine_vectorization_factor (used to
be part of vect_analyze_operations).
* tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Use fold when
creating the guard condition, as the number of iterations may be
constant.
(slpeel_tree_peel_loop_to_edge): Use new name of
LOOP_DO_PEELING_FOR_ALIGNMENT. Set it to 0 instead of false.
* tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Handle known
alignment case more efficiently. Use LOOP_PEELING_FOR_ALIGNMENT.
(vect_do_peeling_for_alignment): Use fold.
(vect_transform_loop): Use new name of LOOP_DO_PEELING_FOR_ALIGNMENT.
(vect_update_inits_of_dr): Renamed to
vect_update_init_of_dr.
(vect_update_inits_of_drs): Use new name of vect_update_inits_of_dr.
(vectorizable_store): Fix assertion to use == instead of =.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@96526 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 32 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-54.c | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-58.c | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-92.c | 90 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-93.c | 76 | ||||
-rw-r--r-- | gcc/tree-vect-analyze.c | 337 | ||||
-rw-r--r-- | gcc/tree-vect-transform.c | 95 | ||||
-rw-r--r-- | gcc/tree-vectorizer.c | 7 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 21 |
10 files changed, 504 insertions, 174 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 59d84facf02..0d3e60f7d3a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,35 @@ +2005-03-15 Dorit Naishlos <dorit@il.ibm.com> + + * tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by + known_alignment_for_access_p. + (known_alignment_for_access_p): New. + (do_peeling_for_alignment): Field made int instead of bool and renamed + to peeling_for_alignment. + (LOOP_DO_PEELING_FOR_ALIGNMENT): Renamed to LOOP_PEELING_FOR_ALIGNMENT. + * tree-vect-analyze.c (vect_determine_vectorization_factor): New. This + functionality used to be in vect_analyze_operations. + (vect_analyze_operations): Code to determine vectorization factor was + moved to vect_determine_vectorization_factor. + (vect_enhance_data_refs_alignment): Update to correct alignment when it + is known instead of -1. Set LOOP_PEELING_FOR_ALIGNMENT to peeling + factor. + (vect_analyze_loop): Call vect_determine_vectorization_factor (used to + be part of vect_analyze_operations). + * tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Use fold when + creating the guard condition, as the number of iterations may be + constant. + (slpeel_tree_peel_loop_to_edge): Use new name of + LOOP_DO_PEELING_FOR_ALIGNMENT. Set it to 0 instead of false. + * tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Handle known + alignment case more efficiently. Use LOOP_PEELING_FOR_ALIGNMENT. + (vect_do_peeling_for_alignment): Use fold. + (vect_transform_loop): Use new name of LOOP_DO_PEELING_FOR_ALIGNMENT. + + (vect_update_inits_of_dr): Renamed to + vect_update_init_of_dr. + (vect_update_inits_of_drs): Use new name of vect_update_inits_of_dr. + (vectorizable_store): Fix assertion to use == instead of =. + 2005-03-15 Daniel Jacobowitz <dan@codesourcery.com> * config/arm/arm.h (CONDITIONAL_REGISTER_USAGE): Don't clear diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6805e68ba8a..442e6c04461 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2005-03-15 Dorit Naishlos <dorit@il.ibm.com> + + * gcc.dg/vect/vect-54.c: Now vectorizable on targets that don't support + misaligned accesses. + * gcc.dg/vect/vect-58.c: Likewise. + * gcc.dg/vect/vect-92.c: New. + * gcc.dg/vect/vect-93.c: New. + 2005-03-15 Feng Wang <fengwang@nudt.edu.cn> PR fortran/18827 diff --git a/gcc/testsuite/gcc.dg/vect/vect-54.c b/gcc/testsuite/gcc.dg/vect/vect-54.c index b169bb33452..5ced09a3f63 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-54.c +++ b/gcc/testsuite/gcc.dg/vect/vect-54.c @@ -50,6 +50,6 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-58.c b/gcc/testsuite/gcc.dg/vect/vect-58.c index df814d9452d..275e465f882 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-58.c +++ b/gcc/testsuite/gcc.dg/vect/vect-58.c @@ -51,6 +51,6 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */ -/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-92.c b/gcc/testsuite/gcc.dg/vect/vect-92.c new file mode 100644 index 00000000000..02efc6c4326 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-92.c @@ -0,0 +1,90 @@ +/* { dg-require-effective-target vect_float } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 256 + +typedef float afloat __attribute__ ((__aligned__(16))); + +/* known misalignment: same alignment */ + +int +main1 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc) +{ + int i; + + for (i = 0; i < 5; i++) + { + pa[i+1] = pb[i+1] * pc[i+1]; + } + + /* check results: */ + for (i = 0; i < 5; i++) + { + if (pa[i+1] != (pb[i+1] * pc[i+1])) + abort (); + } + + return 0; +} + +int +main2 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc) +{ + int i; + + for (i = 0; i < 6; i++) + { + pa[i+1] = pb[i+1] * pc[i+1]; + } + + /* check results: */ + for (i = 0; i < 6; i++) + { + if (pa[i+1] != (pb[i+1] * pc[i+1])) + abort (); + } + + return 0; +} + +int +main3 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc, int n) +{ + int i; + + for (i = 0; i < n; i++) + { + pa[i+1] = pb[i+1] * pc[i+1]; + } + + /* check results: */ + for (i = 0; i < n; i++) + { + if (pa[i+1] != (pb[i+1] * pc[i+1])) + abort (); + } + + return 0; +} + +int main (void) +{ + int i; + afloat a[N]; + afloat b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57}; + afloat c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; + + check_vect (); + + main1 (a,b,c); + main2 (a,b,c); + main3 (a,b,c,N); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-93.c b/gcc/testsuite/gcc.dg/vect/vect-93.c new file mode 100644 index 00000000000..fe3a81b1ba0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-93.c @@ -0,0 +1,76 @@ +/* { dg-require-effective-target vect_float } */ + +#include <stdarg.h> +#include "tree-vect.h" + +#define N 3001 + +typedef float afloat __attribute__ ((__aligned__(16))); + +int +main1 (float *pa) +{ + int i; + + for (i = 0; i < 3001; i++) + { + pa[i] = 2.0; + } + + /* check results: */ + for (i = 0; i < 3001; i++) + { + if (pa[i] != 2.0) + abort (); + } + + for (i = 1; i <= 10; i++) + { + pa[i] = 3.0; + } + + /* check results: */ + for (i = 1; i <= 10; i++) + { + if (pa[i] != 3.0) + abort (); + } + + return 0; +} + +int main (void) +{ + int i; + afloat a[N]; + afloat b[N]; + + check_vect (); + + /* from bzip2: */ + for (i=0; i<N; i++) b[i] = i; + a[0] = 0; + for (i = 1; i <= 256; i++) a[i] = b[i-1]; + + /* check results: */ + for (i = 1; i <= 256; i++) + { + if (a[i] != i-1) + abort (); + } + if (a[0] != 0) + abort (); + + main1 (a); + + return 0; +} + +/* in main1 */ +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { target vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail vect_no_align } } } */ + +/* in main */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ +/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */ diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c index 2595e497723..d1b274b56ec 100644 --- a/gcc/tree-vect-analyze.c +++ b/gcc/tree-vect-analyze.c @@ -50,6 +50,7 @@ static bool vect_analyze_data_refs_alignment (loop_vec_info); static bool vect_compute_data_refs_alignment (loop_vec_info); static void vect_enhance_data_refs_alignment (loop_vec_info); static bool vect_analyze_operations (loop_vec_info); +static bool vect_determine_vectorization_factor (loop_vec_info); /* Utility functions for the analyses. */ static bool exist_non_indexing_operands_for_use_p (tree, tree); @@ -285,6 +286,150 @@ vect_analyze_offset_expr (tree expr, } +/* Function vect_determine_vectorization_factor + + Determine the vectorization factor (VF). VF is the number of data elements + that are operated upon in parallel in a single iteration of the vectorized + loop. For example, when vectorizing a loop that operates on 4byte elements, + on a target with vector size (VS) 16byte, the VF is set to 4, since 4 + elements can fit in a single vector register. + + We currently support vectorization of loops in which all types operated upon + are of the same size. Therefore this function currently sets VF according to + the size of the types operated upon, and fails if there are multiple sizes + in the loop. + + VF is also the factor by which the loop iterations are strip-mined, e.g.: + original loop: + for (i=0; i<N; i++){ + a[i] = b[i] + c[i]; + } + + vectorized loop: + for (i=0; i<N; i+=VF){ + a[i:VF] = b[i:VF] + c[i:VF]; + } +*/ + +static bool +vect_determine_vectorization_factor (loop_vec_info loop_vinfo) +{ + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); + int nbbs = loop->num_nodes; + block_stmt_iterator si; + unsigned int vectorization_factor = 0; + int i; + tree scalar_type; + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); + + for (i = 0; i < nbbs; i++) + { + basic_block bb = bbs[i]; + + for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) + { + tree stmt = bsi_stmt (si); + unsigned int nunits; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype; + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "==> examining statement: "); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + + gcc_assert (stmt_info); + /* skip stmts which do not need to be vectorized. */ + if (!STMT_VINFO_RELEVANT_P (stmt_info)) + continue; + + if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, + LOOP_LOC (loop_vinfo))) + { + fprintf (vect_dump, "not vectorized: vector stmt in loop:"); + print_generic_expr (vect_dump, stmt, TDF_SLIM); + } + return false; + } + + if (STMT_VINFO_DATA_REF (stmt_info)) + scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); + else if (TREE_CODE (stmt) == MODIFY_EXPR) + scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); + else + scalar_type = TREE_TYPE (stmt); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "get vectype for scalar type: "); + print_generic_expr (vect_dump, scalar_type, TDF_SLIM); + } + + vectype = get_vectype_for_scalar_type (scalar_type); + if (!vectype) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, + LOOP_LOC (loop_vinfo))) + { + fprintf (vect_dump, "not vectorized: unsupported data-type "); + print_generic_expr (vect_dump, scalar_type, TDF_SLIM); + } + return false; + } + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "vectype: "); + print_generic_expr (vect_dump, vectype, TDF_SLIM); + } + STMT_VINFO_VECTYPE (stmt_info) = vectype; + + nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "nunits = %d", nunits); + + if (vectorization_factor) + { + /* FORNOW: don't allow mixed units. + This restriction will be relaxed in the future. */ + if (nunits != vectorization_factor) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, + LOOP_LOC (loop_vinfo))) + fprintf (vect_dump, "not vectorized: mixed data-types"); + return false; + } + } + else + vectorization_factor = nunits; + +#ifdef ENABLE_CHECKING + gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type)) + * vectorization_factor == UNITS_PER_SIMD_WORD); +#endif + } + } + + /* TODO: Analyze cost. Decide if worth while to vectorize. */ + + if (vectorization_factor <= 1) + { + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, + LOOP_LOC (loop_vinfo))) + fprintf (vect_dump, "not vectorized: unsupported data-type"); + return false; + } + LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; + + return true; +} + + /* Function vect_analyze_operations. Scan the loop stmts and make sure they are all vectorizable. */ @@ -299,11 +444,13 @@ vect_analyze_operations (loop_vec_info loop_vinfo) unsigned int vectorization_factor = 0; int i; bool ok; - tree scalar_type; if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "=== vect_analyze_operations ==="); + gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); + vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + for (i = 0; i < nbbs; i++) { basic_block bb = bbs[i]; @@ -311,9 +458,7 @@ vect_analyze_operations (loop_vec_info loop_vinfo) for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) { tree stmt = bsi_stmt (si); - unsigned int nunits; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype; if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { @@ -337,49 +482,13 @@ vect_analyze_operations (loop_vec_info loop_vinfo) continue; } - if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) - { - if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, - LOOP_LOC (loop_vinfo))) - { - fprintf (vect_dump, "not vectorized: vector stmt in loop:"); - print_generic_expr (vect_dump, stmt, TDF_SLIM); - } - return false; - } - - if (STMT_VINFO_DATA_REF (stmt_info)) - scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); - else if (TREE_CODE (stmt) == MODIFY_EXPR) - scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); - else - scalar_type = TREE_TYPE (stmt); - - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - { - fprintf (vect_dump, "get vectype for scalar type: "); - print_generic_expr (vect_dump, scalar_type, TDF_SLIM); - } - - vectype = get_vectype_for_scalar_type (scalar_type); - if (!vectype) - { - if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, - LOOP_LOC (loop_vinfo))) - { - fprintf (vect_dump, - "not vectorized: unsupported data-type "); - print_generic_expr (vect_dump, scalar_type, TDF_SLIM); - } - return false; - } - - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - { - fprintf (vect_dump, "vectype: "); - print_generic_expr (vect_dump, vectype, TDF_SLIM); - } - STMT_VINFO_VECTYPE (stmt_info) = vectype; +#ifdef ENABLE_CHECKING + if (STMT_VINFO_RELEVANT_P (stmt_info)) + { + gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))); + gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); + } +#endif ok = (vectorizable_operation (stmt, NULL, NULL) || vectorizable_assignment (stmt, NULL, NULL) @@ -396,44 +505,11 @@ vect_analyze_operations (loop_vec_info loop_vinfo) } return false; } - - nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - fprintf (vect_dump, "nunits = %d", nunits); - - if (vectorization_factor) - { - /* FORNOW: don't allow mixed units. - This restriction will be relaxed in the future. */ - if (nunits != vectorization_factor) - { - if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, - LOOP_LOC (loop_vinfo))) - fprintf (vect_dump, "not vectorized: mixed data-types"); - return false; - } - } - else - vectorization_factor = nunits; - -#ifdef ENABLE_CHECKING - gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type)) - * vectorization_factor == UNITS_PER_SIMD_WORD); -#endif } } /* TODO: Analyze cost. Decide if worth while to vectorize. */ - if (vectorization_factor <= 1) - { - if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, - LOOP_LOC (loop_vinfo))) - fprintf (vect_dump, "not vectorized: unsupported data-type"); - return false; - } - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, @@ -933,7 +1009,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) { varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); - unsigned int i; + varray_type datarefs; + struct data_reference *dr0 = NULL; + unsigned int i, j; /* This pass will require a cost model to guide it whether to apply peeling @@ -1036,26 +1114,15 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) { - struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); - if (!aligned_access_p (dr)) - { - LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr; - LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true; + dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i); + if (!aligned_access_p (dr0)) + { + LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0; + LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0); break; - } + } } - if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo)) - { - if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) - fprintf (vect_dump, "Peeling for alignment will not be applied."); - return; - } - else - if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) - fprintf (vect_dump, "Peeling for alignment will be applied."); - - /* (1.2) Update the alignment info according to the peeling factor. If the misalignment of the DR we peel for is M, then the peeling factor is VF - M, and the misalignment of each access DR_i @@ -1063,37 +1130,54 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) If the misalignment of the DR we peel for is unknown, then the misalignment of each access DR_i in the loop is also unknown. - FORNOW: set the misalignment of the accesses to unknown even - if the peeling factor is known at compile time. + TODO: - consider accesses that are known to have the same + alignment, even if that alignment is unknown. */ - TODO: - if the peeling factor is known at compile time, use that - when updating the misalignment info of the loop DRs. - - consider accesses that are known to have the same - alignment, even if that alignment is unknown. */ - - for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) + if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) { - struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); - if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo)) + int mis; + int npeel = 0; + + if (known_alignment_for_access_p (dr0)) { - DR_MISALIGNMENT (dr) = 0; - if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))) - fprintf (vect_dump, "Alignment of access forced using peeling."); + /* Since it's known at compile time, compute the number of iterations + in the peeled loop (the peeling factor) for use in updating + DR_MISALIGNMENT values. The peeling factor is the vectorization + factor minus the misalignment as an element count. */ + mis = DR_MISALIGNMENT (dr0); + mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0)))); + npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis; } - else - DR_MISALIGNMENT (dr) = -1; - } - for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) - { - struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); - if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo)) + + datarefs = loop_write_datarefs; + for (j = 0; j < 2; j++) { - DR_MISALIGNMENT (dr) = 0; - if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))) - fprintf (vect_dump, "Alignment of access forced using peeling."); + for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++) + { + struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i); + + if (dr == dr0) + continue; + if (known_alignment_for_access_p (dr) + && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0)) + DR_MISALIGNMENT (dr) = 0; + else if (known_alignment_for_access_p (dr) + && known_alignment_for_access_p (dr0)) + { + int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); + + DR_MISALIGNMENT (dr) += npeel * drsize; + DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD; + } + else + DR_MISALIGNMENT (dr) = -1; + } + datarefs = loop_read_datarefs; } - else - DR_MISALIGNMENT (dr) = -1; + + DR_MISALIGNMENT (dr0) = 0; + if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))) + fprintf (vect_dump, "Alignment of access forced using peeling."); } } @@ -2479,6 +2563,15 @@ vect_analyze_loop (struct loop *loop) return NULL; } + ok = vect_determine_vectorization_factor (loop_vinfo); + if (!ok) + { + if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) + fprintf (vect_dump, "can't determine vectorization factor."); + destroy_loop_vec_info (loop_vinfo); + return NULL; + } + /* Analyze the alignment of the data-refs in the loop. FORNOW: Only aligned accesses are handled. */ diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 1a82f288cb2..5dd9efecdbc 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -64,7 +64,7 @@ static void vect_generate_tmps_on_preheader static tree vect_build_loop_niters (loop_vec_info); static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge); static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree); -static void vect_update_inits_of_dr (struct data_reference *, tree niters); +static void vect_update_init_of_dr (struct data_reference *, tree niters); static void vect_update_inits_of_drs (loop_vec_info, tree); static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *); static void vect_do_peeling_for_loop_bound @@ -907,7 +907,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) alignment_support_cheme = vect_supportable_dr_alignment (dr); gcc_assert (alignment_support_cheme); - gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */ + gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */ /* Handle use - get the vectorized def from the defining stmt. */ vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt); @@ -1451,14 +1451,16 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, Set the number of iterations for the loop represented by LOOP_VINFO to the minimum between LOOP_NITERS (the original iteration count of the loop) - and the misalignment of DR - the first data reference recorded in + and the misalignment of DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of this loop, the data reference DR will refer to an aligned location. The following computation is generated: - compute address misalignment in bytes: - addr_mis = addr & (vectype_size - 1) + If the misalignment of DR is known at compile time: + addr_mis = int mis = DR_MISALIGNMENT (dr); + Else, compute address misalignment in bytes: + addr_mis = addr & (vectype_size - 1) prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) ) @@ -1479,37 +1481,53 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT; - tree elem_misalign; - tree byte_misalign; - tree new_stmts = NULL_TREE; - tree start_addr = - vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE); - tree ptr_type = TREE_TYPE (start_addr); - tree size = TYPE_SIZE (ptr_type); - tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); - tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1); tree niters_type = TREE_TYPE (loop_niters); - tree elem_size_log = - build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf)); - tree vf_tree = build_int_cst (unsigned_type_node, vf); pe = loop_preheader_edge (loop); - new_bb = bsi_insert_on_edge_immediate (pe, new_stmts); - gcc_assert (!new_bb); - /* Create: byte_misalign = addr & (vectype_size - 1) */ - byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1); + if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) + { + int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); + int element_size = vectype_align/vf; + int elem_misalign = byte_misalign / element_size; - /* Create: elem_misalign = byte_misalign / element_size */ - elem_misalign = - build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log); + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "known alignment = %d.", byte_misalign); + iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1)); + } + else + { + tree new_stmts = NULL_TREE; + tree start_addr = + vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE); + tree ptr_type = TREE_TYPE (start_addr); + tree size = TYPE_SIZE (ptr_type); + tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); + tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); + tree elem_size_log = + build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf)); + tree vf_tree = build_int_cst (unsigned_type_node, vf); + tree byte_misalign; + tree elem_misalign; + + new_bb = bsi_insert_on_edge_immediate (pe, new_stmts); + gcc_assert (!new_bb); - /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */ - iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign); - iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1); - iters = fold_convert (niters_type, iters); + /* Create: byte_misalign = addr & (vectype_size - 1) */ + byte_misalign = + build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1); + /* Create: elem_misalign = byte_misalign / element_size */ + elem_misalign = + build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log); + + /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */ + iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign); + iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1); + iters = fold_convert (niters_type, iters); + } + /* Create: prolog_loop_niters = min (iters, loop_niters) */ /* If the loop bound is known at compile time we already verified that it is greater than vf; since the misalignment ('iters') is at most vf, there's @@ -1517,12 +1535,17 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) if (TREE_CODE (loop_niters) != INTEGER_CST) iters = build2 (MIN_EXPR, niters_type, iters, loop_niters); + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "niters for prolog loop: "); + print_generic_expr (vect_dump, iters, TDF_SLIM); + } + var = create_tmp_var (niters_type, "prolog_loop_niters"); add_referenced_tmp_var (var); iters_name = force_gimple_operand (iters, &stmt, false, var); /* Insert stmt on loop preheader edge. */ - pe = loop_preheader_edge (loop); if (stmt) { basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); @@ -1533,7 +1556,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) } -/* Function vect_update_inits_of_dr +/* Function vect_update_init_of_dr NITERS iterations were peeled from LOOP. DR represents a data reference in LOOP. This function updates the information recorded in DR to @@ -1541,7 +1564,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) executed. Specifically, it updates the OFFSET field of stmt_info. */ static void -vect_update_inits_of_dr (struct data_reference *dr, tree niters) +vect_update_init_of_dr (struct data_reference *dr, tree niters) { stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr)); tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info); @@ -1574,13 +1597,13 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) { struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); - vect_update_inits_of_dr (dr, niters); + vect_update_init_of_dr (dr, niters); } for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) { struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); - vect_update_inits_of_dr (dr, niters); + vect_update_init_of_dr (dr, niters); } } @@ -1618,8 +1641,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) /* Update number of times loop executes. */ n_iters = LOOP_VINFO_NITERS (loop_vinfo); - LOOP_VINFO_NITERS (loop_vinfo) = - build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); + LOOP_VINFO_NITERS (loop_vinfo) = fold (build2 (MINUS_EXPR, + TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop)); /* Update the init conditions of the access functions of all data refs. */ vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); @@ -1656,7 +1679,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, /* Peel the loop if there are data refs with unknown alignment. Only one data ref with unknown store is allowed. */ - if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo)) + if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) vect_do_peeling_for_alignment (loop_vinfo, loops); /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 08a923ee6b8..82c108888ac 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -963,7 +963,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, add_bb_to_loop (bb_before_second_loop, first_loop->outer); pre_condition = - build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node); + fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node)); skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, bb_before_second_loop, bb_before_first_loop); slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */, @@ -1001,7 +1001,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, bb_after_second_loop = split_edge (second_loop->single_exit); add_bb_to_loop (bb_after_second_loop, second_loop->outer); - pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters); + pre_condition = + fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters)); skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, bb_after_second_loop, bb_before_first_loop); slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */, @@ -1213,7 +1214,7 @@ new_loop_vec_info (struct loop *loop) LOOP_VINFO_EXIT_COND (res) = NULL; LOOP_VINFO_NITERS (res) = NULL; LOOP_VINFO_VECTORIZABLE_P (res) = 0; - LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false; + LOOP_PEELING_FOR_ALIGNMENT (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0; VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, "loop_write_datarefs"); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index b761f4d0ef3..e32eed2364a 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -95,9 +95,15 @@ typedef struct _loop_vec_info { /* Unknown DRs according to which loop was peeled. */ struct data_reference *unaligned_dr; - /* If true, loop is peeled. - unaligned_drs show in this case DRs used for peeling. */ - bool do_peeling_for_alignment; + /* peeling_for_alignment indicates whether peeling for alignment will take + place, and what the peeling factor should be: + peeling_for_alignment = X means: + If X=0: Peeling for alignment will not be applied. + If X>0: Peel first X iterations. + If X=-1: Generate a runtime test to calculate the number of iterations + to be peeled, using the dataref recorded in the field + unaligned_dr. */ + int peeling_for_alignment; /* All data references in the loop that are being written to. */ varray_type data_ref_writes; @@ -119,7 +125,7 @@ typedef struct _loop_vec_info { #define LOOP_VINFO_DATAREF_WRITES(L) (L)->data_ref_writes #define LOOP_VINFO_DATAREF_READS(L) (L)->data_ref_reads #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) -#define LOOP_DO_PEELING_FOR_ALIGNMENT(L) (L)->do_peeling_for_alignment +#define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr #define LOOP_VINFO_LOC(L) (L)->loop_line_number @@ -242,7 +248,8 @@ vinfo_for_stmt (tree stmt) /* Info on data references alignment. */ /*-----------------------------------------------------------------*/ -/* The misalignment of the memory access in bytes. */ +/* Reflects actual alignment of first access in the vectorized loop, + taking into account peeling/versioning if applied. */ #define DR_MISALIGNMENT(DR) (DR)->aux static inline bool @@ -252,9 +259,9 @@ aligned_access_p (struct data_reference *data_ref_info) } static inline bool -unknown_alignment_for_access_p (struct data_reference *data_ref_info) +known_alignment_for_access_p (struct data_reference *data_ref_info) { - return (DR_MISALIGNMENT (data_ref_info) == -1); + return (DR_MISALIGNMENT (data_ref_info) != -1); } /* Perform signed modulo, always returning a non-negative value. */ |