summaryrefslogtreecommitdiff
path: root/gcc/tree-ssa-loop-prefetch.c
diff options
context:
space:
mode:
authorrakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4>2007-06-10 20:39:22 +0000
committerrakdver <rakdver@138bc75d-0d04-0410-961f-82ee72b054a4>2007-06-10 20:39:22 +0000
commit5b5037b32317ffd475a733d701c4ad7f90592d7b (patch)
treeaa6fde364b39cc3fab0e7d98faf468b947ade3d2 /gcc/tree-ssa-loop-prefetch.c
parent5f414da7343d7257f96b6abb4e3b269af30a01b6 (diff)
downloadgcc-5b5037b32317ffd475a733d701c4ad7f90592d7b.tar.gz
* tree-data-ref.c (dr_analyze_alias): Handle case smt is NULL.
* tree-predcom.c (mark_virtual_ops_for_renaming): Exported. * tree-ssa-loop-prefetch.c: Include optabs.h. (FENCE_FOLLOWING_MOVNT): New macro. (struct mem_ref): Add independent_p and storent_p fields. (record_ref): Initalize the new fields. (gather_memory_references_ref): Return true if the reference could be analysed. (gather_memory_references): Check whether all memory accesses in loop were recorded. (should_issue_prefetch_p): Return false for nontemporal stores. (nontemporal_store_p, mark_nontemporal_store, emit_mfence_after_loop, may_use_storent_in_loop_p, mark_nontemporal_stores): New functions. (determine_loop_nest_reuse): Detect independent memory references. (loop_prefetch_arrays): Call mark_nontemporal_stores. * tree-flow.h (mark_virtual_ops_for_renaming): Declare. * Makefile.in (tree-ssa-loop-prefetch.o): Add OPTABS_H dependency. * config/i386/i386.h (x86_mfence): Declare. (FENCE_FOLLOWING_MOVNT): Return x86_mfence. * config/i386/i386.c (x86_mfence): New variable. (ix86_init_mmx_sse_builtins): Initialize x86_mfence. * tree-pretty-print.c (dump_generic_node): Mark nontemporal stores. * optabs.c (init_optabs): Initialize storent_optab. * optabs.h (enum optab_index): Add OTI_storent. (storent_optab): Declare. * genopinit.c (optabs): Add initialization for storent_optab. * tree.h (MOVE_NONTEMPORAL): New macro. * expr.c (expand_assignment, store_expr, store_constructor_field, store_constructor, store_field, expand_expr_real_1): Propagate nontemporality of the expanded store. (emit_storent_insn): New function. * expr.h (expand_assignment, store_expr): Declaration changed. * function.c (assign_parm_setup_reg): Pass false as nontemporality to expand_assignment. * stmt.c (expand_asm_expr): Ditto. * calls.c (initialize_argument_information): Pass false as nontemporality to store_expr. * config/i386/sse.md (storentv4sf, storentv2df, storentv2di, storentsi): New. * gcc.dg/tree-ssa/prefetch-7.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@125604 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/tree-ssa-loop-prefetch.c')
-rw-r--r--gcc/tree-ssa-loop-prefetch.c225
1 files changed, 204 insertions, 21 deletions
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index 64f45a856ed..2424c4a1832 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -47,6 +47,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "langhooks.h"
#include "tree-inline.h"
#include "tree-data-ref.h"
+#include "optabs.h"
/* This pass inserts prefetch instructions to optimize cache usage during
accesses to arrays in loops. It processes loops sequentially and:
@@ -177,6 +178,13 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
is accessed several times in a single iteration of the loop. */
#define NONTEMPORAL_FRACTION 16
+/* In case we have to emit a memory fence instruction after the loop that
+ uses nontemporal stores, this defines the builtin to use. */
+
+#ifndef FENCE_FOLLOWING_MOVNT
+#define FENCE_FOLLOWING_MOVNT NULL_TREE
+#endif
+
/* The group of references between that reuse may occur. */
struct mem_ref_group
@@ -198,7 +206,6 @@ struct mem_ref
tree stmt; /* Statement in that the reference appears. */
tree mem; /* The reference. */
HOST_WIDE_INT delta; /* Constant offset of the reference. */
- bool write_p; /* Is it a write? */
struct mem_ref_group *group; /* The group of references it belongs to. */
unsigned HOST_WIDE_INT prefetch_mod;
/* Prefetch only each PREFETCH_MOD-th
@@ -208,8 +215,13 @@ struct mem_ref
iterations. */
unsigned reuse_distance; /* The amount of data accessed before the first
reuse of this value. */
- bool issue_prefetch_p; /* Should we really issue the prefetch? */
struct mem_ref *next; /* The next reference in the group. */
+ unsigned write_p : 1; /* Is it a write? */
+ unsigned independent_p : 1; /* True if the reference is independent on
+ all other references inside the loop. */
+ unsigned issue_prefetch_p : 1; /* Should we really issue the prefetch? */
+ unsigned storent_p : 1; /* True if we changed the store to a
+ nontemporal one. */
};
/* Dumps information about reference REF to FILE. */
@@ -302,6 +314,8 @@ record_ref (struct mem_ref_group *group, tree stmt, tree mem,
(*aref)->issue_prefetch_p = false;
(*aref)->group = group;
(*aref)->next = NULL;
+ (*aref)->independent_p = false;
+ (*aref)->storent_p = false;
if (dump_file && (dump_flags & TDF_DETAILS))
dump_mem_ref (dump_file, *aref);
@@ -434,9 +448,10 @@ analyze_ref (struct loop *loop, tree *ref_p, tree *base,
}
/* Record a memory reference REF to the list REFS. The reference occurs in
- LOOP in statement STMT and it is write if WRITE_P. */
+ LOOP in statement STMT and it is write if WRITE_P. Returns true if the
+ reference was recorded, false otherwise. */
-static void
+static bool
gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs,
tree ref, bool write_p, tree stmt)
{
@@ -445,26 +460,31 @@ gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs,
struct mem_ref_group *agrp;
if (!analyze_ref (loop, &ref, &base, &step, &delta, stmt))
- return;
+ return false;
/* Now we know that REF = &BASE + STEP * iter + DELTA, where DELTA and STEP
are integer constants. */
agrp = find_or_create_group (refs, base, step);
record_ref (agrp, stmt, ref, delta, write_p);
+
+ return true;
}
-/* Record the suitable memory references in LOOP. */
+/* Record the suitable memory references in LOOP. NO_OTHER_REFS is set to
+ true if there are no other memory references inside the loop. */
static struct mem_ref_group *
-gather_memory_references (struct loop *loop)
+gather_memory_references (struct loop *loop, bool *no_other_refs)
{
basic_block *body = get_loop_body_in_dom_order (loop);
basic_block bb;
unsigned i;
block_stmt_iterator bsi;
- tree stmt, lhs, rhs;
+ tree stmt, lhs, rhs, call;
struct mem_ref_group *refs = NULL;
+ *no_other_refs = true;
+
/* Scan the loop body in order, so that the former references precede the
later ones. */
for (i = 0; i < loop->num_nodes; i++)
@@ -476,16 +496,26 @@ gather_memory_references (struct loop *loop)
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
{
stmt = bsi_stmt (bsi);
+ call = get_call_expr_in (stmt);
+ if (call && !(call_expr_flags (call) & ECF_CONST))
+ *no_other_refs = false;
+
if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
- continue;
+ {
+ if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
+ *no_other_refs = false;
+ continue;
+ }
lhs = GIMPLE_STMT_OPERAND (stmt, 0);
rhs = GIMPLE_STMT_OPERAND (stmt, 1);
if (REFERENCE_CLASS_P (rhs))
- gather_memory_references_ref (loop, &refs, rhs, false, stmt);
+ *no_other_refs &= gather_memory_references_ref (loop, &refs,
+ rhs, false, stmt);
if (REFERENCE_CLASS_P (lhs))
- gather_memory_references_ref (loop, &refs, lhs, true, stmt);
+ *no_other_refs &= gather_memory_references_ref (loop, &refs,
+ lhs, true, stmt);
}
}
free (body);
@@ -746,6 +776,10 @@ should_issue_prefetch_p (struct mem_ref *ref)
if (ref->prefetch_before != PREFETCH_ALL)
return false;
+ /* Do not prefetch nontemporal stores. */
+ if (ref->storent_p)
+ return false;
+
return true;
}
@@ -884,6 +918,130 @@ issue_prefetches (struct mem_ref_group *groups,
issue_prefetch_ref (ref, unroll_factor, ahead);
}
+/* Returns true if REF is a memory write for that a nontemporal store insn
+ can be used. */
+
+static bool
+nontemporal_store_p (struct mem_ref *ref)
+{
+ enum machine_mode mode;
+ enum insn_code code;
+
+ /* REF must be a write that is not reused. We require it to be independent
+ on all other memory references in the loop, as the nontemporal stores may
+ be reordered with respect to other memory references. */
+ if (!ref->write_p
+ || !ref->independent_p
+ || ref->reuse_distance < L2_CACHE_SIZE_BYTES)
+ return false;
+
+ /* Check that we have the storent instruction for the mode. */
+ mode = TYPE_MODE (TREE_TYPE (ref->mem));
+ if (mode == BLKmode)
+ return false;
+
+ code = storent_optab->handlers[mode].insn_code;
+ return code != CODE_FOR_nothing;
+}
+
+/* If REF is a nontemporal store, we mark the corresponding modify statement
+ and return true. Otherwise, we return false. */
+
+static bool
+mark_nontemporal_store (struct mem_ref *ref)
+{
+ if (!nontemporal_store_p (ref))
+ return false;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Marked reference %p as a nontemporal store.\n",
+ (void *) ref);
+
+ MOVE_NONTEMPORAL (ref->stmt) = true;
+ ref->storent_p = true;
+
+ return true;
+}
+
+/* Issue a memory fence instruction after LOOP. */
+
+static void
+emit_mfence_after_loop (struct loop *loop)
+{
+ VEC (edge, heap) *exits = get_loop_exit_edges (loop);
+ edge exit;
+ tree call;
+ block_stmt_iterator bsi;
+ unsigned i;
+
+ for (i = 0; VEC_iterate (edge, exits, i, exit); i++)
+ {
+ call = build_function_call_expr (FENCE_FOLLOWING_MOVNT, NULL_TREE);
+
+ if (!single_pred_p (exit->dest)
+ /* If possible, we prefer not to insert the fence on other paths
+ in cfg. */
+ && !(exit->flags & EDGE_ABNORMAL))
+ split_loop_exit_edge (exit);
+ bsi = bsi_after_labels (exit->dest);
+
+ bsi_insert_before (&bsi, call, BSI_NEW_STMT);
+ mark_virtual_ops_for_renaming (call);
+ }
+
+ VEC_free (edge, heap, exits);
+ update_ssa (TODO_update_ssa_only_virtuals);
+}
+
+/* Returns true if we can use storent in loop, false otherwise. */
+
+static bool
+may_use_storent_in_loop_p (struct loop *loop)
+{
+ bool ret = true;
+
+ if (loop->inner != NULL)
+ return false;
+
+ /* If we must issue a mfence insn after using storent, check that there
+ is a suitable place for it at each of the loop exits. */
+ if (FENCE_FOLLOWING_MOVNT != NULL_TREE)
+ {
+ VEC (edge, heap) *exits = get_loop_exit_edges (loop);
+ unsigned i;
+ edge exit;
+
+ for (i = 0; VEC_iterate (edge, exits, i, exit); i++)
+ if ((exit->flags & EDGE_ABNORMAL)
+ && exit->dest == EXIT_BLOCK_PTR)
+ ret = false;
+
+ VEC_free (edge, heap, exits);
+ }
+
+ return ret;
+}
+
+/* Marks nontemporal stores in LOOP. GROUPS contains the description of memory
+ references in the loop. */
+
+static void
+mark_nontemporal_stores (struct loop *loop, struct mem_ref_group *groups)
+{
+ struct mem_ref *ref;
+ bool any = false;
+
+ if (!may_use_storent_in_loop_p (loop))
+ return;
+
+ for (; groups; groups = groups->next)
+ for (ref = groups->refs; ref; ref = ref->next)
+ any |= mark_nontemporal_store (ref);
+
+ if (any && FENCE_FOLLOWING_MOVNT != NULL_TREE)
+ emit_mfence_after_loop (loop);
+}
+
/* Determines whether we can profitably unroll LOOP FACTOR times, and if
this is the case, fill in DESC by the description of number of
iterations. */
@@ -1115,16 +1273,18 @@ self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n,
}
/* Determines the distance till the first reuse of each reference in REFS
- in the loop nest of LOOP. */
+ in the loop nest of LOOP. NO_OTHER_REFS is true if there are no other
+ memory references in the loop. */
static void
-determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
+determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
+ bool no_other_refs)
{
struct loop *nest, *aloop;
VEC (data_reference_p, heap) *datarefs = NULL;
VEC (ddr_p, heap) *dependences = NULL;
struct mem_ref_group *gr;
- struct mem_ref *ref;
+ struct mem_ref *ref, *refb;
VEC (loop_p, heap) *vloops = NULL;
unsigned *loop_data_size;
unsigned i, j, n;
@@ -1188,6 +1348,8 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
dr->aux = ref;
VEC_safe_push (data_reference_p, heap, datarefs, dr);
}
+ else
+ no_other_refs = false;
}
for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
@@ -1196,6 +1358,9 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
ref = dr->aux;
if (ref->reuse_distance > dist)
ref->reuse_distance = dist;
+
+ if (no_other_refs)
+ ref->independent_p = true;
}
compute_all_dependences (datarefs, &dependences, vloops, true);
@@ -1205,12 +1370,18 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
if (DDR_ARE_DEPENDENT (dep) == chrec_known)
continue;
+ ref = DDR_A (dep)->aux;
+ refb = DDR_B (dep)->aux;
+
if (DDR_ARE_DEPENDENT (dep) == chrec_dont_know
|| DDR_NUM_DIST_VECTS (dep) == 0)
{
/* If the dependence cannot be analysed, assume that there might be
a reuse. */
dist = 0;
+
+ ref->independent_p = false;
+ refb->independent_p = false;
}
else
{
@@ -1228,6 +1399,18 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
adist = volume_of_dist_vector (DDR_DIST_VECT (dep, j),
loop_data_size, n);
+ /* If this is a dependence in the innermost loop (i.e., the
+ distances in all superloops are zero) and it is not
+ the trivial self-dependence with distance zero, record that
+ the references are not completely independent. */
+ if (lambda_vector_zerop (DDR_DIST_VECT (dep, j), n - 1)
+ && (ref != refb
+ || DDR_DIST_VECT (dep, j)[n-1] != 0))
+ {
+ ref->independent_p = false;
+ refb->independent_p = false;
+ }
+
/* Ignore accesses closer than
L1_CACHE_SIZE_BYTES / NONTEMPORAL_FRACTION,
so that we use nontemporal prefetches e.g. if single memory
@@ -1241,12 +1424,10 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs)
}
}
- ref = DDR_A (dep)->aux;
- if (ref->reuse_distance > dist)
- ref->reuse_distance = dist;
- ref = DDR_B (dep)->aux;
if (ref->reuse_distance > dist)
ref->reuse_distance = dist;
+ if (refb->reuse_distance > dist)
+ refb->reuse_distance = dist;
}
free_dependence_relations (dependences);
@@ -1273,7 +1454,7 @@ loop_prefetch_arrays (struct loop *loop)
unsigned ahead, ninsns, time, unroll_factor;
HOST_WIDE_INT est_niter;
struct tree_niter_desc desc;
- bool unrolled = false;
+ bool unrolled = false, no_other_refs;
if (!maybe_hot_bb_p (loop->header))
{
@@ -1283,7 +1464,7 @@ loop_prefetch_arrays (struct loop *loop)
}
/* Step 1: gather the memory references. */
- refs = gather_memory_references (loop);
+ refs = gather_memory_references (loop, &no_other_refs);
/* Step 2: estimate the reuse effects. */
prune_by_reuse (refs);
@@ -1291,7 +1472,7 @@ loop_prefetch_arrays (struct loop *loop)
if (!anything_to_prefetch_p (refs))
goto fail;
- determine_loop_nest_reuse (loop, refs);
+ determine_loop_nest_reuse (loop, refs, no_other_refs);
/* Step 3: determine the ahead and unroll factor. */
@@ -1313,6 +1494,8 @@ loop_prefetch_arrays (struct loop *loop)
goto fail;
}
+ mark_nontemporal_stores (loop, refs);
+
ninsns = tree_num_loop_insns (loop, &eni_size_weights);
unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc,
est_niter);