summaryrefslogtreecommitdiff
path: root/gcc/testsuite
diff options
context:
space:
mode:
authordavidxl <davidxl@138bc75d-0d04-0410-961f-82ee72b054a4>2010-07-28 19:13:11 +0000
committerdavidxl <davidxl@138bc75d-0d04-0410-961f-82ee72b054a4>2010-07-28 19:13:11 +0000
commit66368bf4bc95af685d7ce30ef5833f4e6079c81b (patch)
treea565fa6fb23aca1951c811e6fa17593d1d4ca811 /gcc/testsuite
parent8637d6a214e5baf0867c1fa7906f7c3f43c15ac8 (diff)
downloadgcc-66368bf4bc95af685d7ce30ef5833f4e6079c81b.tar.gz
IVOPT performance tuning patch. The main problem is a variant of maximal weight
bipartite matching/assignment problem -- i.e., there is an additional global cost function. The complexity of the algorighm to find the optimial solution > O(n^2). The existing algorithm in gcc tries to find the solution in 3 stages: 1) Find the initial solution set (dynamic programing style) 2) Extend the solution set 3) Prune the soultion set. The problem is that in step 1, the initial set tends to be too large so that the final solution is very likely local optimal. This patch addresses the problem and sees very large SPEC improvements. Another area of problem is that ivopts often creates loop invariant expressions, and such expressions increase register pressure which is not counted. This is addressed in this patch. The third main problem is the profile data is not considered in cost computation The forth problem is that loop invariant comptuation's cost is not properly adjusted. There are more tuning opportuties, namely: 1) Do not check ivs dependency during ivs set pruning (this improves deallII 8% on core2) 2) Unconditionally consider all important candidates in partial set expansion (in addition to the extended solutino based on selected candidates) 3) revisit the two stage initial set computation. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@162653 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/testsuite')
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ivopt_1.c18
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ivopt_2.c17
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ivopt_3.c20
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ivopt_4.c19
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_1.c25
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_2.c25
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c24
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c25
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_3.c22
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c25
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c2
11 files changed, 221 insertions, 1 deletions
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_1.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_1.c
new file mode 100644
index 00000000000..60baa4bd336
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
+#define TYPE char*
+
+/* Testing that only one induction variable is selected after IVOPT on
+ the given target instead of 3. */
+void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
+{
+ int x;
+ for( x = 0; x < i_width; x++ )
+ {
+ dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
+ }
+}
+
+
+/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_2.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_2.c
new file mode 100644
index 00000000000..ba87b502cd3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
+
+#define TYPE char*
+
+/* Testing on the given target, only one iv candidate instead of 3. */
+void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
+{
+ int x;
+ for( x = 0; x < i_width; x++ )
+ {
+ *dst++ = ( *src1++ + *src2++ + 1 ) >> 1;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_3.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_3.c
new file mode 100644
index 00000000000..ae4185a7a71
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_3.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
+
+#define TYPE char*
+
+/* Make sure only 1 iv candidate is selected after IVOPT. */
+void foo (int i_width, char* dst, char* src1, char* src2)
+{
+ int x;
+ for( x = 0; x < i_width; x++ )
+ {
+ *((TYPE)dst) = ( *((TYPE)src1) + *((TYPE)src2) + 1 ) >> 1;
+ dst+=sizeof(TYPE);
+ src1+=sizeof(TYPE);
+ src2+=sizeof(TYPE);
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_4.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_4.c
new file mode 100644
index 00000000000..570664c9228
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_4.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
+
+#ifndef TYPE
+#define TYPE char*
+#endif
+
+/* Make sure only 1 iv candidate is selected. */
+void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
+{
+ TYPE dstn= dst + i_width;
+ for( ; dst < dstn; )
+ {
+ *dst++ = ( *src1++ + *src2++ + 1 ) >> 1;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_1.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_1.c
new file mode 100644
index 00000000000..076f5118e99
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_1.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+#ifndef TYPE
+#define TYPE char*
+#endif
+
+int a[400];
+
+/* Testing inferred loop iteration from array -- exit test can be replaced. */
+void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
+{
+ TYPE dstn= dst + i_width;
+ TYPE dst0 = dst;
+ unsigned long long i = 0;
+ for( ; dst <= dstn; )
+ {
+ dst0[i] = ( src1[i] + src2[i] + 1 +a[i]) >> 1;
+ dst++;
+ i += 16;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_2.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_2.c
new file mode 100644
index 00000000000..4b7e197dd04
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_2.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+#ifndef TYPE
+#define TYPE char*
+#endif
+
+extern int a[];
+
+/* Can not infer loop iteration from array -- exit test can not be replaced. */
+void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
+{
+ TYPE dstn= dst + i_width;
+ TYPE dst0 = dst;
+ unsigned long long i = 0;
+ for( ; dst <= dstn; )
+ {
+ dst0[i] = ( src1[i] + src2[i] + 1 +a[i]) >> 1;
+ dst++;
+ i += 16;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c
new file mode 100644
index 00000000000..4e19dfd01e5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+/* The test 'if (p2 > p_limit2)' can be replaced, so iv p2 can be
+ * eliminated. */
+long foo(long* p, long* p2, int N1, int N2)
+{
+ int i = 0;
+ long* p_limit = p + N1;
+ long* p_limit2 = p2 + N2;
+ long s = 0;
+ while (p <= p_limit)
+ {
+ p++;
+ p2++;
+ if (p2 > p_limit2)
+ break;
+ s += (*p);
+ }
+ return s;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c
new file mode 100644
index 00000000000..5e38df67be4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+/* Exit tests 'i < N1' and 'p2 > p_limit2' can be replaced, so
+ * two ivs i and p2 can be eliminate. */
+long foo(long* p, long* p2, int N1, int N2)
+{
+ int i = 0;
+ long* p_limit2 = p2 + N2;
+ long s = 0;
+ while (i < N1)
+ {
+ p++;
+ p2++;
+ i++;
+ if (p2 > p_limit2)
+ break;
+ s += (*p);
+ }
+
+ return s;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 2 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_3.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_3.c
new file mode 100644
index 00000000000..dc78a43f73f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_3.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+/* iv p2 can be eliminated. */
+long foo(long* p, long* p2, int N1, int N2)
+{
+ unsigned long i = 0;
+ long* p_limit2 = p2 + N2;
+ long s = 0;
+ while (i < N1)
+ {
+ p2++;
+ i++;
+ if (p2 > p_limit2)
+ break;
+ s += p[i];
+ }
+ return s;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c
new file mode 100644
index 00000000000..d2aa78d61e3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c
@@ -0,0 +1,25 @@
+
+/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
+
+/* iv i's step 16 so its period is smaller than the max iterations
+ * i.e. replacing if (p2 > p_limit2) with testing of i may result in
+ * overflow. */
+long foo(long* p, long* p2, int N1, int N2)
+{
+ unsigned long i = 0;
+ long* p_limit2 = p2 + N2;
+ long s = 0;
+ while (i < N1)
+ {
+ p2++;
+ i += 16;
+ if (p2 > p_limit2)
+ break;
+ s += p[i];
+ }
+ return s;
+}
+
+/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c
index 7c4236b1cc7..202ad1f4e2f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c
@@ -8,5 +8,5 @@ void main (void)
f2 ();
}
-/* { dg-final { scan-tree-dump-times "!= 0" 4 "ivopts" } } */
+/* { dg-final { scan-tree-dump-times "!= 0" 5 "ivopts" } } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */