summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog25
-rw-r--r--gcc/Makefile.in2
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f44
-rw-r--r--gcc/tree-data-ref.c32
-rw-r--r--gcc/tree-predcom.c24
-rw-r--r--gcc/tree-vect-data-refs.c19
-rw-r--r--gcc/tree-vect-loop-manip.c22
8 files changed, 154 insertions, 19 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1b5a0d3ec26..ca67d10d031 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,10 +1,29 @@
+2010-01-19 Michael Matz <matz@suse.de>
+
+ PR tree-optimization/41783
+ * tree-data-ref.c (toplevel): Include flags.h.
+ (dump_data_dependence_relation): Also dump the inputs if the
+ result will be unknown.
+ (split_constant_offset_1): Look through some conversions.
+ * tree-predcom.c (determine_roots_comp): Restart a new chain if
+ the offset from last element is too large.
+ (ref_at_iteration): Deal also with MISALIGNED_INDIRECT_REF.
+ (reassociate_to_the_same_stmt): Handle vector registers.
+ * tree-vect-data-refs.c (vect_equal_offsets): Handle unary operations
+ (e.g. conversions).
+ * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Add
+ wide_prolog_niters argument, emit widening instructions.
+ (vect_do_peeling_for_alignment): Adjust caller, use widened
+ variant of the iteration cound.
+ * Makefile.in (tree-data-ref.o): Add $(FLAGS_H).
+
2010-01-19 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
- PR target/38697
- * config/arm/neon-testgen.m (emit_automatics): New parameter
+ PR target/38697
+ * config/arm/neon-testgen.m (emit_automatics): New parameter
features. Adjust for Fixed_return_reg feature.
(test_intrinsic): Call emit_automatics with new feature.
- * config/arm/neon.ml: Update copyright years.
+ * config/arm/neon.ml: Update copyright years.
(features): New Fixed_return_reg feature.
(ops): Update feature for Vget_low.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index d6a57c459e7..7c08ea2a4de 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2548,7 +2548,7 @@ tree-scalar-evolution.o: tree-scalar-evolution.c $(CONFIG_H) $(SYSTEM_H) \
$(TIMEVAR_H) $(CFGLOOP_H) $(SCEV_H) $(TREE_PASS_H) $(FLAGS_H) \
gt-tree-scalar-evolution.h
tree-data-ref.o: tree-data-ref.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
- $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
+ $(GGC_H) $(FLAGS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
$(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
$(TREE_DATA_REF_H) $(TREE_PASS_H) langhooks.h
sese.o: sese.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8c02782de0e..58666a60a77 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2010-01-19 Michael Matz <matz@suse.de>
+
+ PR tree-optimization/41783
+ * gfortran.dg/vect/fast-math-mgrid-resid.f: New.
+
2010-01-19 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
PR target/38697.
diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
new file mode 100644
index 00000000000..2d1844b367a
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
@@ -0,0 +1,44 @@
+! { dg-do compile }
+! { dg-require-effective-target vect_double }
+! { dg-options "-O3 -ffast-math -fpredictive-commoning -ftree-vectorize -fdump-tree-optimized" }
+
+******* RESID COMPUTES THE RESIDUAL: R = V - AU
+*
+* THIS SIMPLE IMPLEMENTATION COSTS 27A + 4M PER RESULT, WHERE
+* A AND M DENOTE THE COSTS OF ADDITION (OR SUBTRACTION) AND
+* MULTIPLICATION, RESPECTIVELY. BY USING SEVERAL TWO-DIMENSIONAL
+* BUFFERS ONE CAN REDUCE THIS COST TO 13A + 4M IN THE GENERAL
+* CASE, OR 10A + 3M WHEN THE COEFFICIENT A(1) IS ZERO.
+*
+ SUBROUTINE RESID(U,V,R,N,A)
+ INTEGER N
+ REAL*8 U(N,N,N),V(N,N,N),R(N,N,N),A(0:3)
+ INTEGER I3, I2, I1
+C
+ DO 600 I3=2,N-1
+ DO 600 I2=2,N-1
+ DO 600 I1=2,N-1
+ 600 R(I1,I2,I3)=V(I1,I2,I3)
+ > -A(0)*( U(I1, I2, I3 ) )
+ > -A(1)*( U(I1-1,I2, I3 ) + U(I1+1,I2, I3 )
+ > + U(I1, I2-1,I3 ) + U(I1, I2+1,I3 )
+ > + U(I1, I2, I3-1) + U(I1, I2, I3+1) )
+ > -A(2)*( U(I1-1,I2-1,I3 ) + U(I1+1,I2-1,I3 )
+ > + U(I1-1,I2+1,I3 ) + U(I1+1,I2+1,I3 )
+ > + U(I1, I2-1,I3-1) + U(I1, I2+1,I3-1)
+ > + U(I1, I2-1,I3+1) + U(I1, I2+1,I3+1)
+ > + U(I1-1,I2, I3-1) + U(I1-1,I2, I3+1)
+ > + U(I1+1,I2, I3-1) + U(I1+1,I2, I3+1) )
+ > -A(3)*( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1)
+ > + U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1)
+ > + U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1)
+ > + U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) )
+C
+ RETURN
+ END
+! we want to check that predictive commoning did something on the
+! vectorized loop, which means we have to have exactly 13 vector
+! additions.
+! { dg-final { scan-tree-dump-times "vect_var\[^\\n\]*\\+ " 13 "optimized" } }
+! { dg-final { cleanup-tree-dump "vect" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index dbdd3237d5e..9f5a623b2fa 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see
#include "coretypes.h"
#include "tm.h"
#include "ggc.h"
+#include "flags.h"
#include "tree.h"
/* These RTL headers are needed for basic-block.h. */
@@ -380,6 +381,19 @@ dump_data_dependence_relation (FILE *outf,
if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
{
+ if (ddr)
+ {
+ dra = DDR_A (ddr);
+ drb = DDR_B (ddr);
+ if (dra)
+ dump_data_reference (outf, dra);
+ else
+ fprintf (outf, " (nil)\n");
+ if (drb)
+ dump_data_reference (outf, drb);
+ else
+ fprintf (outf, " (nil)\n");
+ }
fprintf (outf, " (don't know)\n)\n");
return;
}
@@ -631,6 +645,24 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
return split_constant_offset_1 (type, var0, subcode, var1, var, off);
}
+ CASE_CONVERT:
+ {
+ /* We must not introduce undefined overflow, and we must not change the value.
+ Hence we're okay if the inner type doesn't overflow to start with
+ (pointer or signed), the outer type also is an integer or pointer
+ and the outer precision is at least as large as the inner. */
+ tree itype = TREE_TYPE (op0);
+ if ((POINTER_TYPE_P (itype)
+ || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
+ && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
+ && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
+ {
+ split_constant_offset (op0, &var0, off);
+ *var = fold_convert (type, var0);
+ return true;
+ }
+ return false;
+ }
default:
return false;
diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c
index 78d45b88364..f31c3920366 100644
--- a/gcc/tree-predcom.c
+++ b/gcc/tree-predcom.c
@@ -1180,6 +1180,7 @@ determine_roots_comp (struct loop *loop,
unsigned i;
dref a;
chain_p chain = NULL;
+ double_int last_ofs = double_int_zero;
/* Invariants are handled specially. */
if (comp->comp_step == RS_INVARIANT)
@@ -1194,13 +1195,20 @@ determine_roots_comp (struct loop *loop,
for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
{
- if (!chain || !DR_IS_READ (a->ref))
+ if (!chain || !DR_IS_READ (a->ref)
+ || double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE),
+ double_int_add (a->offset,
+ double_int_neg (last_ofs))) <= 0)
{
if (nontrivial_chain_p (chain))
- VEC_safe_push (chain_p, heap, *chains, chain);
+ {
+ add_looparound_copies (loop, chain);
+ VEC_safe_push (chain_p, heap, *chains, chain);
+ }
else
release_chain (chain);
chain = make_rooted_chain (a);
+ last_ofs = a->offset;
continue;
}
@@ -1338,9 +1346,11 @@ ref_at_iteration (struct loop *loop, tree ref, int iter)
else if (!INDIRECT_REF_P (ref))
return unshare_expr (ref);
- if (TREE_CODE (ref) == INDIRECT_REF)
+ if (INDIRECT_REF_P (ref))
{
- ret = build1 (INDIRECT_REF, TREE_TYPE (ref), NULL_TREE);
+ /* Take care for INDIRECT_REF and MISALIGNED_INDIRECT_REF at
+ the same time. */
+ ret = copy_node (ref);
idx = TREE_OPERAND (ref, 0);
idx_p = &TREE_OPERAND (ret, 0);
}
@@ -2205,11 +2215,17 @@ reassociate_to_the_same_stmt (tree name1, tree name2)
/* Insert the new statement combining NAME1 and NAME2 before S1, and
combine it with the rhs of S1. */
var = create_tmp_var (type, "predreastmp");
+ if (TREE_CODE (type) == COMPLEX_TYPE
+ || TREE_CODE (type) == VECTOR_TYPE)
+ DECL_GIMPLE_REG_P (var) = 1;
add_referenced_var (var);
new_name = make_ssa_name (var, NULL);
new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2);
var = create_tmp_var (type, "predreastmp");
+ if (TREE_CODE (type) == COMPLEX_TYPE
+ || TREE_CODE (type) == VECTOR_TYPE)
+ DECL_GIMPLE_REG_P (var) = 1;
add_referenced_var (var);
tmp_name = make_ssa_name (var, NULL);
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 89918535372..37ae9b5e3bf 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -294,7 +294,7 @@ vect_update_interleaving_chain (struct data_reference *drb,
static bool
vect_equal_offsets (tree offset1, tree offset2)
{
- bool res0, res1;
+ bool res;
STRIP_NOPS (offset1);
STRIP_NOPS (offset2);
@@ -303,16 +303,19 @@ vect_equal_offsets (tree offset1, tree offset2)
return true;
if (TREE_CODE (offset1) != TREE_CODE (offset2)
- || !BINARY_CLASS_P (offset1)
- || !BINARY_CLASS_P (offset2))
+ || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
return false;
- res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
- TREE_OPERAND (offset2, 0));
- res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
- TREE_OPERAND (offset2, 1));
+ res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
+ TREE_OPERAND (offset2, 0));
- return (res0 && res1);
+ if (!res || !BINARY_CLASS_P (offset1))
+ return res;
+
+ res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
+ TREE_OPERAND (offset2, 1));
+
+ return res;
}
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index a0d3ce57cab..f4056b05891 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1961,7 +1961,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
use TYPE_VECTOR_SUBPARTS. */
static tree
-vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
+vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
+ tree *wide_prolog_niters)
{
struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -2045,6 +2046,19 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
add_referenced_var (var);
stmts = NULL;
iters_name = force_gimple_operand (iters, &stmts, false, var);
+ if (types_compatible_p (sizetype, niters_type))
+ *wide_prolog_niters = iters_name;
+ else
+ {
+ gimple_seq seq = NULL;
+ tree wide_iters = fold_convert (sizetype, iters);
+ var = create_tmp_var (sizetype, "prolog_loop_niters");
+ add_referenced_var (var);
+ *wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
+ var);
+ if (seq)
+ gimple_seq_add_seq (&stmts, seq);
+ }
/* Insert stmt on loop preheader edge. */
if (stmts)
@@ -2115,6 +2129,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree niters_of_prolog_loop, ni_name;
tree n_iters;
+ tree wide_prolog_niters;
struct loop *new_loop;
unsigned int th = 0;
int min_profitable_iters;
@@ -2125,7 +2140,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
initialize_original_copy_tables ();
ni_name = vect_build_loop_niters (loop_vinfo, NULL);
- niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
+ niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name,
+ &wide_prolog_niters);
/* Get profitability threshold for vectorized loop. */
@@ -2150,7 +2166,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
/* Update the init conditions of the access functions of all data refs. */
- vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
+ vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
/* After peeling we have to reset scalar evolution analyzer. */
scev_reset ();