summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>2007-07-12 12:17:03 +0000
committerdorit <dorit@138bc75d-0d04-0410-961f-82ee72b054a4>2007-07-12 12:17:03 +0000
commita28df51d87c891ef08e81de8082353eb3e83df35 (patch)
tree85414467a7c6b6b6755c5dbe54511f8981c7db54
parent7ba2cc333dd01467a2a66705460d9cce9740d813 (diff)
downloadgcc-a28df51d87c891ef08e81de8082353eb3e83df35.tar.gz
2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
* target.h (builtin_vectorization_cost): Add new target builtin. * target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New. * tree-vectorizer.h (TARG_SCALAR_STMT_COST): New. (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST): New. * tree-vect-analyze.c (vect_analyze_slp_instance): Initisliaze uninitialized variables. * tree-vect-transform.c (cost_for_stmt): New function. (vect_estimate_min_profitable_iters): Call cost_for_stmt instead of using cost 1 for all scalar stmts. Be less conservative when estimating the number of prologue/epulogue iterations. Call targetm.vectorize.builtin_vectorization_cost. Return min_profitable_iters-1. (vect_model_reduction_cost): Use TARG_SCALAR_TO_VEC_COST for initialization cost instead of TARG_VEC_STMT_COST. Use TARG_VEC_TO_SCALAR_COST instead of TARG_VEC_STMT_COST for reduction epilogue code. Fix epilogue cost computation. * config/spu/spu.c (spu_builtin_vectorization_cost): New. (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Implement. * config/spu/spu.h (TARG_COND_BRANCH_COST, TARG_SCALAR_STMT_COST): (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST, TARG_VEC_STMT_COST): (TARG_VEC_TO_SCALAR_COST, TARG_SCALAR_TO_VEC, TARG_VEC_LOAD_COST): (TARG_VEC_UNALIGNED_LOAD_COST, TARG_VEC_STORE_COST): Define. 2007-07-12 Dorit Nuzman <dorit@il.ibm.com> * gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now get vectorized. * gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: Loops now get vectorized. * gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp: New. * gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-33.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c: New. * lib/target-supports.exp (check_effective_target_vect_int_mul): Add spu. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@126584 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog25
-rw-r--r--gcc/config/spu/spu.c19
-rw-r--r--gcc/config/spu/spu.h46
-rw-r--r--gcc/target-def.h4
-rw-r--r--gcc/target.h4
-rw-r--r--gcc/testsuite/ChangeLog24
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c39
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c51
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c50
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c50
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c51
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-33.c40
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c49
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c49
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c49
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c50
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c47
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c47
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c47
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c38
-rw-r--r--gcc/testsuite/gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp69
-rw-r--r--gcc/testsuite/lib/target-supports.exp1
-rw-r--r--gcc/tree-vect-transform.c83
-rw-r--r--gcc/tree-vectorizer.h15
26 files changed, 937 insertions, 18 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 13c869f8fcc..620775cb09d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,28 @@
+2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
+
+ * target.h (builtin_vectorization_cost): Add new target builtin.
+ * target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.
+ * tree-vectorizer.h (TARG_SCALAR_STMT_COST): New.
+ (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST): New.
+ * tree-vect-analyze.c (vect_analyze_slp_instance): Initisliaze
+ uninitialized variables.
+ * tree-vect-transform.c (cost_for_stmt): New function.
+ (vect_estimate_min_profitable_iters): Call cost_for_stmt instead of
+ using cost 1 for all scalar stmts. Be less conservative when
+ estimating the number of prologue/epulogue iterations. Call
+ targetm.vectorize.builtin_vectorization_cost. Return
+ min_profitable_iters-1.
+ (vect_model_reduction_cost): Use TARG_SCALAR_TO_VEC_COST for
+ initialization cost instead of TARG_VEC_STMT_COST. Use
+ TARG_VEC_TO_SCALAR_COST instead of TARG_VEC_STMT_COST for reduction
+ epilogue code. Fix epilogue cost computation.
+ * config/spu/spu.c (spu_builtin_vectorization_cost): New.
+ (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Implement.
+ * config/spu/spu.h (TARG_COND_BRANCH_COST, TARG_SCALAR_STMT_COST):
+ (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST, TARG_VEC_STMT_COST):
+ (TARG_VEC_TO_SCALAR_COST, TARG_SCALAR_TO_VEC, TARG_VEC_LOAD_COST):
+ (TARG_VEC_UNALIGNED_LOAD_COST, TARG_VEC_STORE_COST): Define.
+
2007-07-12 Richard Guenther <rguenther@suse.de>
* gimplify.c (gimplify_conversion): Make sure that the result
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index 7e28167c251..f96326842b1 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -133,6 +133,7 @@ static void spu_encode_section_info (tree, rtx, int);
static tree spu_builtin_mul_widen_even (tree);
static tree spu_builtin_mul_widen_odd (tree);
static tree spu_builtin_mask_for_load (void);
+static int spu_builtin_vectorization_cost (bool);
extern const char *reg_names[];
rtx spu_compare_op0, spu_compare_op1;
@@ -261,6 +262,9 @@ const struct attribute_spec spu_attribute_table[];
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
+
struct gcc_target targetm = TARGET_INITIALIZER;
void
@@ -5191,6 +5195,21 @@ spu_builtin_mask_for_load (void)
return d->fndecl;
}
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+static int
+spu_builtin_vectorization_cost (bool runtime_test)
+{
+ /* If the branch of the runtime test is taken - i.e. - the vectorized
+ version is skipped - this incurs a misprediction cost (because the
+ vectorized version is expected to be the fall-through). So we subtract
+ the latency of a mispredicted branch from the costs that are incured
+ when the vectorized version is executed. */
+ if (runtime_test)
+ return -19;
+ else
+ return 0;
+}
+
void
spu_init_expanders (void)
{
diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h
index 1f1547218a8..b8af6b2dc8c 100644
--- a/gcc/config/spu/spu.h
+++ b/gcc/config/spu/spu.h
@@ -542,6 +542,52 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \
do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", (LOG)); } while (0)
+/* Model costs for the vectorizer. */
+
+/* Cost of conditional branch. */
+#ifndef TARG_COND_BRANCH_COST
+#define TARG_COND_BRANCH_COST 6
+#endif
+
+/* Cost of any scalar operation, excluding load and store. */
+#ifndef TARG_SCALAR_STMT_COST
+#define TARG_SCALAR_STMT_COST 1
+#endif
+
+/* Cost of scalar load. */
+#undef TARG_SCALAR_LOAD_COST
+#define TARG_SCALAR_LOAD_COST 2 /* load + rotate */
+
+/* Cost of scalar store. */
+#undef TARG_SCALAR_STORE_COST
+#define TARG_SCALAR_STORE_COST 10
+
+/* Cost of any vector operation, excluding load, store,
+ or vector to scalar operation. */
+#undef TARG_VEC_STMT_COST
+#define TARG_VEC_STMT_COST 1
+
+/* Cost of vector to scalar operation. */
+#undef TARG_VEC_TO_SCALAR_COST
+#define TARG_VEC_TO_SCALAR_COST 1
+
+/* Cost of scalar to vector operation. */
+#undef TARG_SCALAR_TO_VEC_COST
+#define TARG_SCALAR_TO_VEC_COST 1
+
+/* Cost of aligned vector load. */
+#undef TARG_VEC_LOAD_COST
+#define TARG_VEC_LOAD_COST 1
+
+/* Cost of misaligned vector load. */
+#undef TARG_VEC_UNALIGNED_LOAD_COST
+#define TARG_VEC_UNALIGNED_LOAD_COST 2
+
+/* Cost of vector store. */
+#undef TARG_VEC_STORE_COST
+#define TARG_VEC_STORE_COST 1
+
+
/* Misc */
#define CASE_VECTOR_MODE SImode
diff --git a/gcc/target-def.h b/gcc/target-def.h
index 31cb8f85882..8942de74f96 100644
--- a/gcc/target-def.h
+++ b/gcc/target-def.h
@@ -356,6 +356,7 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
default_builtin_vectorized_conversion
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 0
#define TARGET_VECTORIZE \
{ \
@@ -363,7 +364,8 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION, \
TARGET_VECTORIZE_BUILTIN_CONVERSION, \
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \
- TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \
+ TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD, \
+ TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
}
#define TARGET_DEFAULT_TARGET_FLAGS 0
diff --git a/gcc/target.h b/gcc/target.h
index 89ad0df087f..56c99bf14b2 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -413,6 +413,10 @@ struct gcc_target
element-by-element products for the odd elements. */
tree (* builtin_mul_widen_even) (tree);
tree (* builtin_mul_widen_odd) (tree);
+
+ /* Returns the cost to be added to the overheads involved with
+ executing the vectorized version of a loop. */
+ int (*builtin_vectorization_cost) (bool);
} vectorize;
/* The initial value of target_flags. */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 22fc219cda3..a74a74f8ad9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,27 @@
+2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
+
+ * gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now
+ get vectorized.
+ * gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: Loops
+ now get vectorized.
+ * gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-33.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c: New.
+ * gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c: New.
+ * lib/target-supports.exp (check_effective_target_vect_int_mul):
+ Add spu.
+
2007-07-12 Jakub Jelinek <jakub@redhat.com>
PR c++/30854
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c b/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c
index cf5becc4908..55334fdd20d 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c
@@ -46,6 +46,6 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */
-/* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_int_max } } } */
+/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c
index cf5becc4908..55334fdd20d 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c
@@ -46,6 +46,6 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */
-/* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_int_max } } } */
+/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c
new file mode 100644
index 00000000000..9347d05ea2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c
@@ -0,0 +1,39 @@
+/* { dg-require-effective-target vect_float } */
+
+#include <stdlib.h>
+#include "../../tree-vect.h"
+
+void interp_pitch(float *exc, float *interp, int pitch, int len)
+{
+ int i,k;
+ int maxj;
+
+ maxj=3;
+ for (i=0;i<len;i++)
+ {
+ float tmp = 0;
+ for (k=0;k<7;k++)
+ {
+ tmp += exc[i-pitch+k+maxj-6];
+ }
+ interp[i] = tmp;
+ }
+}
+
+int main()
+{
+ float *exc = calloc(126,sizeof(float));
+ float *interp = calloc(80,sizeof(float));
+ int pitch = -35;
+
+ check_vect ();
+
+ interp_pitch(exc, interp, pitch, 80);
+ free(exc);
+ free(interp);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c
new file mode 100644
index 00000000000..272b3f0d733
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 32
+
+struct t{
+ int k[N];
+ int l;
+};
+
+struct s{
+ char a; /* aligned */
+ char b[N-1]; /* unaligned (offset 1B) */
+ char c[N]; /* aligned (offset NB) */
+ struct t d; /* aligned (offset 2NB) */
+ struct t e; /* unaligned (offset 2N+4N+4 B) */
+};
+
+int main1 ()
+{
+ int i;
+ struct s tmp;
+
+ /* unaligned */
+ for (i = 0; i < N/2; i++)
+ {
+ tmp.b[i] = 5;
+ }
+
+ /* check results: */
+ for (i = 0; i <N/2; i++)
+ {
+ if (tmp.b[i] != 5)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c
new file mode 100644
index 00000000000..b3224f943d6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c
@@ -0,0 +1,50 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 32
+
+struct t{
+ int k[N];
+ int l;
+};
+
+struct s{
+ char a; /* aligned */
+ char b[N-1]; /* unaligned (offset 1B) */
+ char c[N]; /* aligned (offset NB) */
+ struct t d; /* aligned (offset 2NB) */
+ struct t e; /* unaligned (offset 2N+4N+4 B) */
+};
+
+int main1 ()
+{
+ int i;
+ struct s tmp;
+
+ /* aligned */
+ for (i = 0; i < N/2; i++)
+ {
+ tmp.c[i] = 6;
+ }
+
+ /* check results: */
+ for (i = 0; i <N/2; i++)
+ {
+ if (tmp.c[i] != 6)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c
new file mode 100644
index 00000000000..9dcd09aba42
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c
@@ -0,0 +1,50 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 32
+
+struct t{
+ int k[N];
+ int l;
+};
+
+struct s{
+ char a; /* aligned */
+ char b[N-1]; /* unaligned (offset 1B) */
+ char c[N]; /* aligned (offset NB) */
+ struct t d; /* aligned (offset 2NB) */
+ struct t e; /* unaligned (offset 2N+4N+4 B) */
+};
+
+int main1 ()
+{
+ int i;
+ struct s tmp;
+
+ /* aligned */
+ for (i = 0; i < N/2; i++)
+ {
+ tmp.d.k[i] = 7;
+ }
+
+ /* check results: */
+ for (i = 0; i <N/2; i++)
+ {
+ if (tmp.d.k[i] != 7)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c
new file mode 100644
index 00000000000..736804fd020
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 32
+
+struct t{
+ int k[N];
+ int l;
+};
+
+struct s{
+ char a; /* aligned */
+ char b[N-1]; /* unaligned (offset 1B) */
+ char c[N]; /* aligned (offset NB) */
+ struct t d; /* aligned (offset 2NB) */
+ struct t e; /* unaligned (offset 2N+4N+4 B) */
+};
+
+int main1 ()
+{
+ int i;
+ struct s tmp;
+
+ /* unaligned */
+ for (i = 0; i < N/2; i++)
+ {
+ tmp.e.k[i] = 8;
+ }
+
+ /* check results: */
+ for (i = 0; i <N/2; i++)
+ {
+ if (tmp.e.k[i] != 8)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-33.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-33.c
new file mode 100644
index 00000000000..df92ceb9e81
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-33.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 16
+struct test {
+ char ca[N];
+};
+
+extern struct test s;
+
+int main1 ()
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ s.ca[i] = 5;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (s.ca[i] != 5)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c
new file mode 100644
index 00000000000..d0d40ac338a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 32
+
+struct s{
+ int m;
+ int n[N][N][N];
+};
+
+struct test1{
+ struct s a; /* array a.n is unaligned */
+ int b;
+ int c;
+ struct s e; /* array e.n is aligned */
+};
+
+int main1 ()
+{
+ int i,j;
+ struct test1 tmp1;
+
+ /* 1. unaligned */
+ for (i = 0; i < N; i++)
+ {
+ tmp1.a.n[1][2][i] = 5;
+ }
+
+ /* check results: */
+ for (i = 0; i <N; i++)
+ {
+ if (tmp1.a.n[1][2][i] != 5)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c
new file mode 100644
index 00000000000..4e52af8b851
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 32
+
+struct s{
+ int m;
+ int n[N][N][N];
+};
+
+struct test1{
+ struct s a; /* array a.n is unaligned */
+ int b;
+ int c;
+ struct s e; /* array e.n is aligned */
+};
+
+int main1 ()
+{
+ int i,j;
+ struct test1 tmp1;
+
+ /* 2. aligned */
+ for (i = 3; i < N-1; i++)
+ {
+ tmp1.a.n[1][2][i] = 6;
+ }
+
+ /* check results: */
+ for (i = 3; i < N-1; i++)
+ {
+ if (tmp1.a.n[1][2][i] != 6)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c
new file mode 100644
index 00000000000..58c5e9fdbe5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c
@@ -0,0 +1,49 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 32
+
+struct s{
+ int m;
+ int n[N][N][N];
+};
+
+struct test1{
+ struct s a; /* array a.n is unaligned */
+ int b;
+ int c;
+ struct s e; /* array e.n is aligned */
+};
+
+int main1 ()
+{
+ int i,j;
+ struct test1 tmp1;
+
+ /* 3. aligned */
+ for (i = 0; i < N; i++)
+ {
+ tmp1.e.n[1][2][i] = 7;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (tmp1.e.n[1][2][i] != 7)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c
new file mode 100644
index 00000000000..9cec936333a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c
@@ -0,0 +1,50 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 20
+
+struct s{
+ int m;
+ int n[N][N][N];
+};
+
+struct test1{
+ struct s a; /* array a.n is unaligned */
+ int b;
+ int c;
+ struct s e; /* array e.n is aligned */
+};
+
+int main1 ()
+{
+ int i,j;
+ struct test1 tmp1;
+
+ /* 4. unaligned */
+ for (i = 3; i < N-3; i++)
+ {
+ tmp1.e.n[1][2][i] = 8;
+ }
+
+ /* check results: */
+ for (i = 3; i <N-3; i++)
+ {
+ if (tmp1.e.n[1][2][i] != 8)
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c
new file mode 100644
index 00000000000..41fe3aebf90
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 8
+#define OFF 4
+
+/* Check handling of accesses for which the "initial condition" -
+ the expression that represents the first location accessed - is
+ more involved than just an ssa_name. */
+
+int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
+
+int main1 (int *pib)
+{
+ int i;
+ int ia[N+OFF];
+ int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
+
+ for (i = OFF; i < N; i++)
+ {
+ ia[i] = pib[i - OFF];
+ }
+
+
+ /* check results: */
+ for (i = OFF; i < N; i++)
+ {
+ if (ia[i] != pib[i - OFF])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (&ib[OFF]);
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c
new file mode 100644
index 00000000000..71f3977b16a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 8
+#define OFF 4
+
+/* Check handling of accesses for which the "initial condition" -
+ the expression that represents the first location accessed - is
+ more involved than just an ssa_name. */
+
+int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
+
+int main1 (int *pib)
+{
+ int i;
+ int ia[N+OFF];
+ int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
+
+ for (i = OFF; i < N; i++)
+ {
+ pib[i - OFF] = ic[i];
+ }
+
+
+ /* check results: */
+ for (i = OFF; i < N; i++)
+ {
+ if (pib[i - OFF] != ic[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (&ib[OFF]);
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c
new file mode 100644
index 00000000000..f6127ba873b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 8
+#define OFF 4
+
+/* Check handling of accesses for which the "initial condition" -
+ the expression that represents the first location accessed - is
+ more involved than just an ssa_name. */
+
+int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
+
+int main1 (int *pib)
+{
+ int i;
+ int ia[N+OFF];
+ int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
+
+ for (i = OFF; i < N; i++)
+ {
+ ia[i] = ic[i - OFF];
+ }
+
+
+ /* check results: */
+ for (i = OFF; i < N; i++)
+ {
+ if (ia[i] != ic[i - OFF])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 (&ib[OFF]);
+ return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c
new file mode 100644
index 00000000000..e01b67e48e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c
@@ -0,0 +1,38 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "../../tree-vect.h"
+
+#define N 26
+int a[N];
+
+int main1 (int X)
+{
+ int s = X;
+ int i;
+
+ /* vectorization of reduction with induction. */
+ for (i = 0; i < N; i++)
+ s += (i + a[i]);
+
+ return s;
+}
+
+int main (void)
+{
+ int s, i;
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ a[i] = 2*i;
+
+ s = main1 (3);
+ if (s != 978)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_int_mult } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target {! vect_int_mult } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp b/gcc/testsuite/gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp
new file mode 100644
index 00000000000..fe7439a5463
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp
@@ -0,0 +1,69 @@
+# Copyright (C) 1997, 2004, 2005, 2006 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Exit immediately if this isn't a powerpc target.
+if { ![istarget spu*-*-*] } then {
+ return
+}
+
+
+# Set up flags used for tests that don't specify options.
+set DEFAULT_VECTCFLAGS ""
+
+# These flags are used for all targets.
+lappend DEFAULT_VECTCFLAGS "-O2" "-ftree-vectorize" "-fvect-cost-model"
+
+# If the target system supports vector instructions, the default action
+# for a test is 'run', otherwise it's 'compile'. Save current default.
+# Executing vector instructions on a system without hardware vector support
+# is also disabled by a call to check_vect, but disabling execution here is
+# more efficient.
+global dg-do-what-default
+set save-dg-do-what-default ${dg-do-what-default}
+
+set dg-do-what-default run
+
+# Initialize `dg'.
+dg-init
+
+lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details"
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-pr*.\[cS\]]] \
+ "" $DEFAULT_VECTCFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-vect-*.\[cS\]]] \
+ "" $DEFAULT_VECTCFLAGS
+
+#### Tests with special options
+global SAVED_DEFAULT_VECTCFLAGS
+set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS
+
+# -ffast-math tests
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-ffast-math"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-fast-math-vect*.\[cS\]]] \
+ "" $DEFAULT_VECTCFLAGS
+
+# Clean up.
+set dg-do-what-default ${save-dg-do-what-default}
+
+# All done.
+dg-finish
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 9ada7f18706..ccb6356ccf9 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2039,6 +2039,7 @@ proc check_effective_target_vect_int_mult { } {
} else {
set et_vect_int_mult_saved 0
if { [istarget powerpc*-*-*]
+ || [istarget spu-*-*]
|| [istarget i?86-*-*]
|| [istarget x86_64-*-*] } {
set et_vect_int_mult_saved 1
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 8a6e08624de..193c54961cd 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -74,6 +74,34 @@ static void vect_update_inits_of_drs (loop_vec_info, tree);
static int vect_min_worthwhile_factor (enum tree_code);
+static int
+cost_for_stmt (tree stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+ switch (STMT_VINFO_TYPE (stmt_info))
+ {
+ case load_vec_info_type:
+ return TARG_SCALAR_LOAD_COST;
+ case store_vec_info_type:
+ return TARG_SCALAR_STORE_COST;
+ case op_vec_info_type:
+ case condition_vec_info_type:
+ case assignment_vec_info_type:
+ case reduc_vec_info_type:
+ case induc_vec_info_type:
+ case type_promotion_vec_info_type:
+ case type_demotion_vec_info_type:
+ case type_conversion_vec_info_type:
+ case call_vec_info_type:
+ return TARG_SCALAR_STMT_COST;
+ case undef_vec_info_type:
+ default:
+ gcc_unreachable ();
+ }
+}
+
+
/* Function vect_estimate_min_profitable_iters
Return the number of iterations required for the vector version of the
@@ -138,7 +166,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info))
continue;
- scalar_single_iter_cost++;
+ scalar_single_iter_cost += cost_for_stmt (stmt);
vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
}
@@ -148,7 +176,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
loop.
FORNOW: If we dont know the value of peel_iters for prologue or epilogue
- at compile-time - we assume the worst.
+ at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1).
TODO: Build an expression that represents peel_iters for prologue and
epilogue to be used in a run-time test. */
@@ -157,17 +185,17 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
if (byte_misalign < 0)
{
- peel_iters_prologue = vf - 1;
+ peel_iters_prologue = (vf - 1)/2;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model: "
- "prologue peel iters set conservatively.");
+ "prologue peel iters set to (vf-1)/2.");
/* If peeling for alignment is unknown, loop bound of main loop becomes
unknown. */
- peel_iters_epilogue = vf - 1;
+ peel_iters_epilogue = (vf - 1)/2;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model: "
- "epilogue peel iters set conservatively because "
+ "epilogue peel iters set to (vf-1)/2 because "
"peeling for alignment is unknown .");
}
else
@@ -186,10 +214,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
- peel_iters_epilogue = vf - 1;
+ peel_iters_epilogue = (vf - 1)/2;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model: "
- "epilogue peel iters set conservatively because "
+ "epilogue peel iters set to (vf-1)/2 because "
"loop iterations are unknown .");
}
else
@@ -229,6 +257,26 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
+ (peel_iters_epilogue * scalar_single_iter_cost);
+ /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
+ information we provide for the target is whether testing against the
+ threshold involves a runtime test. */
+ if (targetm.vectorize.builtin_vectorization_cost)
+ {
+ bool runtime_test = false;
+
+ /* If the number of iterations is unknown, or the
+ peeling-for-misalignment amount is unknown, we eill have to generate
+ a runtime test to test the loop count agains the threshold. */
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ || (byte_misalign < 0))
+ runtime_test = true;
+ vec_outside_cost +=
+ targetm.vectorize.builtin_vectorization_cost (runtime_test);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
+ targetm.vectorize.builtin_vectorization_cost (runtime_test));
+ }
+
/* Calculate number of iterations required to make the vector version
profitable, relative to the loop bodies only. The following condition
must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
@@ -280,7 +328,14 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
min_profitable_iters < vf ? vf : min_profitable_iters);
}
- return min_profitable_iters < vf ? vf : min_profitable_iters;
+ min_profitable_iters =
+ min_profitable_iters < vf ? vf : min_profitable_iters;
+
+ /* Because the condition we create is:
+ if (niters <= min_profitable_iters)
+ then skip the vectorized loop. */
+ min_profitable_iters--;
+ return min_profitable_iters;
}
@@ -321,7 +376,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
/* Add in cost for initial definition. */
- outer_cost += TARG_VEC_STMT_COST;
+ outer_cost += TARG_SCALAR_TO_VEC_COST;
/* Determine cost of epilogue code.
@@ -341,11 +396,13 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
optab = optab_for_tree_code (code, vectype);
/* We have a whole vector shift available. */
- if (!VECTOR_MODE_P (mode)
- || optab->handlers[mode].insn_code == CODE_FOR_nothing)
+ if (VECTOR_MODE_P (mode)
+ && optab->handlers[mode].insn_code != CODE_FOR_nothing
+ && vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
/* Final reduction via vector shifts and the reduction operator. Also
requires scalar extract. */
- outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST);
+ outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
+ + TARG_VEC_TO_SCALAR_COST);
else
/* Use extracts and reduction op for final reduction. For N elements,
we have N extracts and N-1 reduction ops. */
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index e5957ca4a3d..440bb3dce53 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -326,6 +326,21 @@ typedef struct _stmt_vec_info {
#define TARG_COND_BRANCH_COST 3
#endif
+/* Cost of any scalar operation, excluding load and store. */
+#ifndef TARG_SCALAR_STMT_COST
+#define TARG_SCALAR_STMT_COST 1
+#endif
+
+/* Cost of scalar load. */
+#ifndef TARG_SCALAR_LOAD_COST
+#define TARG_SCALAR_LOAD_COST 1
+#endif
+
+/* Cost of scalar store. */
+#ifndef TARG_SCALAR_STORE_COST
+#define TARG_SCALAR_STORE_COST 1
+#endif
+
/* Cost of any vector operation, excluding load, store or vector to scalar
operation. */
#ifndef TARG_VEC_STMT_COST