From e17cf2c868cffd155cdc64936ec737cfc8339e01 Mon Sep 17 00:00:00 2001 From: gshobaki Date: Thu, 13 Aug 2009 21:37:24 +0000 Subject: 2009-08-13 Ghassan Shobaki * tree-ssa-loop-prefetch.c (prune_ref_by_group_reuse): Enhance probabilistic analysis for long-stride pruning. (compute_miss_rate): New function to compute the probability that two memory references access different cache lines. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@150726 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 79 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 15 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index b4797076768..60f5a2f9b0d 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -593,6 +593,45 @@ ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by) return (x + by - 1) / by; } +/* Given a CACHE_LINE_SIZE and two inductive memory references + with a common STEP greater than CACHE_LINE_SIZE and an address + difference DELTA, compute the probability that they will fall + in different cache lines. DISTINCT_ITERS is the number of + distinct iterations after which the pattern repeats itself. + ALIGN_UNIT is the unit of alignment in bytes. */ + +static int +compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, + HOST_WIDE_INT step, HOST_WIDE_INT delta, + unsigned HOST_WIDE_INT distinct_iters, + int align_unit) +{ + unsigned align, iter; + int total_positions, miss_positions, miss_rate; + int address1, address2, cache_line1, cache_line2; + + total_positions = 0; + miss_positions = 0; + + /* Iterate through all possible alignments of the first + memory reference within its cache line. */ + for (align = 0; align < cache_line_size; align += align_unit) + + /* Iterate through all distinct iterations. */ + for (iter = 0; iter < distinct_iters; iter++) + { + address1 = align + step * iter; + address2 = address1 + delta; + cache_line1 = address1 / cache_line_size; + cache_line2 = address2 / cache_line_size; + total_positions += 1; + if (cache_line1 != cache_line2) + miss_positions += 1; + } + miss_rate = 1000 * miss_positions / total_positions; + return miss_rate; +} + /* Prune the prefetch candidate REF using the reuse with BY. If BY_IS_BEFORE is true, BY is before REF in the loop. */ @@ -606,6 +645,11 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, HOST_WIDE_INT delta = delta_b - delta_r; HOST_WIDE_INT hit_from; unsigned HOST_WIDE_INT prefetch_before, prefetch_block; + int miss_rate; + HOST_WIDE_INT reduced_step; + unsigned HOST_WIDE_INT reduced_prefetch_block; + tree ref_type; + int align_unit; if (delta == 0) { @@ -667,25 +711,29 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, return; } - /* A more complicated case. First let us ensure that size of cache line - and step are coprime (here we assume that PREFETCH_BLOCK is a power - of two. */ + /* A more complicated case with step > prefetch_block. First reduce + the ratio between the step and the cache line size to its simplest + terms. The resulting denominator will then represent the number of + distinct iterations after which each address will go back to its + initial location within the cache line. This computation assumes + that PREFETCH_BLOCK is a power of two. */ prefetch_block = PREFETCH_BLOCK; - while ((step & 1) == 0 - && prefetch_block > 1) + reduced_prefetch_block = prefetch_block; + reduced_step = step; + while ((reduced_step & 1) == 0 + && reduced_prefetch_block > 1) { - step >>= 1; - prefetch_block >>= 1; - delta >>= 1; + reduced_step >>= 1; + reduced_prefetch_block >>= 1; } - /* Now step > prefetch_block, and step and prefetch_block are coprime. - Determine the probability that the accesses hit the same cache line. */ - prefetch_before = delta / step; delta %= step; - if ((unsigned HOST_WIDE_INT) delta - <= (prefetch_block * ACCEPTABLE_MISS_RATE / 1000)) + ref_type = TREE_TYPE (ref->mem); + align_unit = TYPE_ALIGN (ref_type) / 8; + miss_rate = compute_miss_rate(prefetch_block, step, delta, + reduced_prefetch_block, align_unit); + if (miss_rate <= ACCEPTABLE_MISS_RATE) { if (prefetch_before < ref->prefetch_before) ref->prefetch_before = prefetch_before; @@ -696,8 +744,9 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, /* Try also the following iteration. */ prefetch_before++; delta = step - delta; - if ((unsigned HOST_WIDE_INT) delta - <= (prefetch_block * ACCEPTABLE_MISS_RATE / 1000)) + miss_rate = compute_miss_rate(prefetch_block, step, delta, + reduced_prefetch_block, align_unit); + if (miss_rate <= ACCEPTABLE_MISS_RATE) { if (prefetch_before < ref->prefetch_before) ref->prefetch_before = prefetch_before; -- cgit v1.2.1 From 48e1416a24d50cacbb2a5e06a9ee61dd8cbee313 Mon Sep 17 00:00:00 2001 From: hjl Date: Wed, 25 Nov 2009 10:55:54 +0000 Subject: Remove trailing white spaces. 2009-11-25 H.J. Lu * alias.c: Remove trailing white spaces. * alloc-pool.c: Likewise. * alloc-pool.h: Likewise. * attribs.c: Likewise. * auto-inc-dec.c: Likewise. * basic-block.h: Likewise. * bb-reorder.c: Likewise. * bt-load.c: Likewise. * builtins.c: Likewise. * builtins.def: Likewise. * c-common.c: Likewise. * c-common.h: Likewise. * c-cppbuiltin.c: Likewise. * c-decl.c: Likewise. * c-format.c: Likewise. * c-lex.c: Likewise. * c-omp.c: Likewise. * c-opts.c: Likewise. * c-parser.c: Likewise. * c-pretty-print.c: Likewise. * c-tree.h: Likewise. * c-typeck.c: Likewise. * caller-save.c: Likewise. * calls.c: Likewise. * cfg.c: Likewise. * cfganal.c: Likewise. * cfgexpand.c: Likewise. * cfghooks.c: Likewise. * cfghooks.h: Likewise. * cfglayout.c: Likewise. * cfgloop.c: Likewise. * cfgloop.h: Likewise. * cfgloopmanip.c: Likewise. * cfgrtl.c: Likewise. * cgraph.c: Likewise. * cgraph.h: Likewise. * cgraphbuild.c: Likewise. * cgraphunit.c: Likewise. * cif-code.def: Likewise. * collect2.c: Likewise. * combine.c: Likewise. * convert.c: Likewise. * coverage.c: Likewise. * crtstuff.c: Likewise. * cse.c: Likewise. * cselib.c: Likewise. * dbgcnt.c: Likewise. * dbgcnt.def: Likewise. * dbgcnt.h: Likewise. * dbxout.c: Likewise. * dce.c: Likewise. * ddg.c: Likewise. * ddg.h: Likewise. * defaults.h: Likewise. * df-byte-scan.c: Likewise. * df-core.c: Likewise. * df-problems.c: Likewise. * df-scan.c: Likewise. * df.h: Likewise. * dfp.c: Likewise. * diagnostic.c: Likewise. * diagnostic.h: Likewise. * dominance.c: Likewise. * domwalk.c: Likewise. * double-int.c: Likewise. * double-int.h: Likewise. * dse.c: Likewise. * dwarf2asm.c: Likewise. * dwarf2asm.h: Likewise. * dwarf2out.c: Likewise. * ebitmap.c: Likewise. * ebitmap.h: Likewise. * emit-rtl.c: Likewise. * et-forest.c: Likewise. * except.c: Likewise. * except.h: Likewise. * expmed.c: Likewise. * expr.c: Likewise. * expr.h: Likewise. * final.c: Likewise. * flags.h: Likewise. * fold-const.c: Likewise. * function.c: Likewise. * function.h: Likewise. * fwprop.c: Likewise. * gcc.c: Likewise. * gcov-dump.c: Likewise. * gcov-io.c: Likewise. * gcov-io.h: Likewise. * gcov.c: Likewise. * gcse.c: Likewise. * genattr.c: Likewise. * genattrtab.c: Likewise. * genautomata.c: Likewise. * genchecksum.c: Likewise. * genconfig.c: Likewise. * genflags.c: Likewise. * gengtype-parse.c: Likewise. * gengtype.c: Likewise. * gengtype.h: Likewise. * genmddeps.c: Likewise. * genmodes.c: Likewise. * genopinit.c: Likewise. * genpreds.c: Likewise. * gensupport.c: Likewise. * ggc-common.c: Likewise. * ggc-page.c: Likewise. * ggc-zone.c: Likewise. * ggc.h: Likewise. * gimple-iterator.c: Likewise. * gimple-low.c: Likewise. * gimple-pretty-print.c: Likewise. * gimple.c: Likewise. * gimple.def: Likewise. * gimple.h: Likewise. * gimplify.c: Likewise. * graphds.c: Likewise. * graphite-clast-to-gimple.c: Likewise. * gthr-nks.h: Likewise. * gthr-posix.c: Likewise. * gthr-posix.h: Likewise. * gthr-posix95.h: Likewise. * gthr-single.h: Likewise. * gthr-tpf.h: Likewise. * gthr-vxworks.h: Likewise. * gthr.h: Likewise. * haifa-sched.c: Likewise. * hard-reg-set.h: Likewise. * hooks.c: Likewise. * hooks.h: Likewise. * hosthooks.h: Likewise. * hwint.h: Likewise. * ifcvt.c: Likewise. * incpath.c: Likewise. * init-regs.c: Likewise. * integrate.c: Likewise. * ipa-cp.c: Likewise. * ipa-inline.c: Likewise. * ipa-prop.c: Likewise. * ipa-pure-const.c: Likewise. * ipa-reference.c: Likewise. * ipa-struct-reorg.c: Likewise. * ipa-struct-reorg.h: Likewise. * ipa-type-escape.c: Likewise. * ipa-type-escape.h: Likewise. * ipa-utils.c: Likewise. * ipa-utils.h: Likewise. * ipa.c: Likewise. * ira-build.c: Likewise. * ira-color.c: Likewise. * ira-conflicts.c: Likewise. * ira-costs.c: Likewise. * ira-emit.c: Likewise. * ira-int.h: Likewise. * ira-lives.c: Likewise. * ira.c: Likewise. * jump.c: Likewise. * lambda-code.c: Likewise. * lambda-mat.c: Likewise. * lambda-trans.c: Likewise. * lambda.h: Likewise. * langhooks.c: Likewise. * lcm.c: Likewise. * libgcov.c: Likewise. * lists.c: Likewise. * loop-doloop.c: Likewise. * loop-init.c: Likewise. * loop-invariant.c: Likewise. * loop-iv.c: Likewise. * loop-unroll.c: Likewise. * lower-subreg.c: Likewise. * lto-cgraph.c: Likewise. * lto-compress.c: Likewise. * lto-opts.c: Likewise. * lto-section-in.c: Likewise. * lto-section-out.c: Likewise. * lto-streamer-in.c: Likewise. * lto-streamer-out.c: Likewise. * lto-streamer.c: Likewise. * lto-streamer.h: Likewise. * lto-symtab.c: Likewise. * lto-wpa-fixup.c: Likewise. * matrix-reorg.c: Likewise. * mcf.c: Likewise. * mode-switching.c: Likewise. * modulo-sched.c: Likewise. * omega.c: Likewise. * omega.h: Likewise. * omp-low.c: Likewise. * optabs.c: Likewise. * optabs.h: Likewise. * opts-common.c: Likewise. * opts.c: Likewise. * params.def: Likewise. * params.h: Likewise. * passes.c: Likewise. * plugin.c: Likewise. * postreload-gcse.c: Likewise. * postreload.c: Likewise. * predict.c: Likewise. * predict.def: Likewise. * pretty-print.c: Likewise. * pretty-print.h: Likewise. * print-rtl.c: Likewise. * print-tree.c: Likewise. * profile.c: Likewise. * read-rtl.c: Likewise. * real.c: Likewise. * recog.c: Likewise. * reg-stack.c: Likewise. * regcprop.c: Likewise. * reginfo.c: Likewise. * regmove.c: Likewise. * regrename.c: Likewise. * regs.h: Likewise. * regstat.c: Likewise. * reload.c: Likewise. * reload1.c: Likewise. * resource.c: Likewise. * rtl.c: Likewise. * rtl.def: Likewise. * rtl.h: Likewise. * rtlanal.c: Likewise. * sbitmap.c: Likewise. * sched-deps.c: Likewise. * sched-ebb.c: Likewise. * sched-int.h: Likewise. * sched-rgn.c: Likewise. * sched-vis.c: Likewise. * sdbout.c: Likewise. * sel-sched-dump.c: Likewise. * sel-sched-dump.h: Likewise. * sel-sched-ir.c: Likewise. * sel-sched-ir.h: Likewise. * sel-sched.c: Likewise. * sel-sched.h: Likewise. * sese.c: Likewise. * sese.h: Likewise. * simplify-rtx.c: Likewise. * stack-ptr-mod.c: Likewise. * stmt.c: Likewise. * stor-layout.c: Likewise. * store-motion.c: Likewise. * stringpool.c: Likewise. * stub-objc.c: Likewise. * sync-builtins.def: Likewise. * target-def.h: Likewise. * target.h: Likewise. * targhooks.c: Likewise. * targhooks.h: Likewise. * timevar.c: Likewise. * tlink.c: Likewise. * toplev.c: Likewise. * toplev.h: Likewise. * tracer.c: Likewise. * tree-affine.c: Likewise. * tree-affine.h: Likewise. * tree-browser.def: Likewise. * tree-call-cdce.c: Likewise. * tree-cfg.c: Likewise. * tree-cfgcleanup.c: Likewise. * tree-chrec.c: Likewise. * tree-chrec.h: Likewise. * tree-complex.c: Likewise. * tree-data-ref.c: Likewise. * tree-data-ref.h: Likewise. * tree-dfa.c: Likewise. * tree-dump.c: Likewise. * tree-dump.h: Likewise. * tree-eh.c: Likewise. * tree-flow-inline.h: Likewise. * tree-flow.h: Likewise. * tree-if-conv.c: Likewise. * tree-inline.c: Likewise. * tree-into-ssa.c: Likewise. * tree-loop-distribution.c: Likewise. * tree-loop-linear.c: Likewise. * tree-mudflap.c: Likewise. * tree-nested.c: Likewise. * tree-nomudflap.c: Likewise. * tree-nrv.c: Likewise. * tree-object-size.c: Likewise. * tree-optimize.c: Likewise. * tree-outof-ssa.c: Likewise. * tree-parloops.c: Likewise. * tree-pass.h: Likewise. * tree-phinodes.c: Likewise. * tree-predcom.c: Likewise. * tree-pretty-print.c: Likewise. * tree-profile.c: Likewise. * tree-scalar-evolution.c: Likewise. * tree-ssa-address.c: Likewise. * tree-ssa-alias.c: Likewise. * tree-ssa-ccp.c: Likewise. * tree-ssa-coalesce.c: Likewise. * tree-ssa-copy.c: Likewise. * tree-ssa-copyrename.c: Likewise. * tree-ssa-dce.c: Likewise. * tree-ssa-dom.c: Likewise. * tree-ssa-dse.c: Likewise. * tree-ssa-forwprop.c: Likewise. * tree-ssa-ifcombine.c: Likewise. * tree-ssa-live.c: Likewise. * tree-ssa-live.h: Likewise. * tree-ssa-loop-ch.c: Likewise. * tree-ssa-loop-im.c: Likewise. * tree-ssa-loop-ivcanon.c: Likewise. * tree-ssa-loop-ivopts.c: Likewise. * tree-ssa-loop-manip.c: Likewise. * tree-ssa-loop-niter.c: Likewise. * tree-ssa-loop-prefetch.c: Likewise. * tree-ssa-loop-unswitch.c: Likewise. * tree-ssa-loop.c: Likewise. * tree-ssa-math-opts.c: Likewise. * tree-ssa-operands.c: Likewise. * tree-ssa-operands.h: Likewise. * tree-ssa-phiopt.c: Likewise. * tree-ssa-phiprop.c: Likewise. * tree-ssa-pre.c: Likewise. * tree-ssa-propagate.c: Likewise. * tree-ssa-reassoc.c: Likewise. * tree-ssa-sccvn.c: Likewise. * tree-ssa-sink.c: Likewise. * tree-ssa-structalias.c: Likewise. * tree-ssa-ter.c: Likewise. * tree-ssa-threadedge.c: Likewise. * tree-ssa-threadupdate.c: Likewise. * tree-ssa-uncprop.c: Likewise. * tree-ssa.c: Likewise. * tree-ssanames.c: Likewise. * tree-switch-conversion.c: Likewise. * tree-tailcall.c: Likewise. * tree-vect-data-refs.c: Likewise. * tree-vect-generic.c: Likewise. * tree-vect-loop-manip.c: Likewise. * tree-vect-loop.c: Likewise. * tree-vect-patterns.c: Likewise. * tree-vect-slp.c: Likewise. * tree-vect-stmts.c: Likewise. * tree-vectorizer.c: Likewise. * tree-vectorizer.h: Likewise. * tree-vrp.c: Likewise. * tree.c: Likewise. * tree.def: Likewise. * tree.h: Likewise. * treestruct.def: Likewise. * unwind-compat.c: Likewise. * unwind-dw2-fde-glibc.c: Likewise. * unwind-dw2.c: Likewise. * value-prof.c: Likewise. * value-prof.h: Likewise. * var-tracking.c: Likewise. * varasm.c: Likewise. * varpool.c: Likewise. * vec.c: Likewise. * vec.h: Likewise. * vmsdbgout.c: Likewise. * web.c: Likewise. * xcoffout.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@154645 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 112 +++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 56 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 60f5a2f9b0d..2769c04ce0b 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -1,18 +1,18 @@ /* Array prefetching. Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc. - + This file is part of GCC. - + GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. - + GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ @@ -99,12 +99,12 @@ along with GCC; see the file COPYING3. If not see while still within this bound (starting with those with lowest prefetch_mod, since they are responsible for most of the cache misses). - + 5) We unroll and peel loops so that we are able to satisfy PREFETCH_MOD and PREFETCH_BEFORE requirements (within some bounds), and to avoid prefetching nonaccessed memory. TODO -- actually implement peeling. - + 6) We actually emit the prefetch instructions. ??? Perhaps emit the prefetch instructions with guards in cases where 5) was not sufficient to satisfy the constraints? @@ -114,18 +114,18 @@ along with GCC; see the file COPYING3. If not see model has two heuristcs: 1. A heuristic that determines whether the given loop has enough CPU ops that can be overlapped with cache missing memory ops. - If not, the loop won't benefit from prefetching. This is implemented - by requirung the ratio between the instruction count and the mem ref + If not, the loop won't benefit from prefetching. This is implemented + by requirung the ratio between the instruction count and the mem ref count to be above a certain minimum. 2. A heuristic that disables prefetching in a loop with an unknown trip - count if the prefetching cost is above a certain limit. The relative + count if the prefetching cost is above a certain limit. The relative prefetching cost is estimated by taking the ratio between the prefetch count and the total intruction count (this models the I-cache cost). The limits used in these heuristics are defined as parameters with - reasonable default values. Machine-specific default values will be + reasonable default values. Machine-specific default values will be added later. - + Some other TODO: -- write and use more general reuse analysis (that could be also used in other cache aimed loop optimizations) @@ -451,7 +451,7 @@ analyze_ref (struct loop *loop, tree *ref_p, tree *base, off = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)); bit_offset = TREE_INT_CST_LOW (off); gcc_assert (bit_offset % BITS_PER_UNIT == 0); - + *delta += bit_offset / BITS_PER_UNIT; } @@ -593,15 +593,15 @@ ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by) return (x + by - 1) / by; } -/* Given a CACHE_LINE_SIZE and two inductive memory references - with a common STEP greater than CACHE_LINE_SIZE and an address - difference DELTA, compute the probability that they will fall - in different cache lines. DISTINCT_ITERS is the number of - distinct iterations after which the pattern repeats itself. +/* Given a CACHE_LINE_SIZE and two inductive memory references + with a common STEP greater than CACHE_LINE_SIZE and an address + difference DELTA, compute the probability that they will fall + in different cache lines. DISTINCT_ITERS is the number of + distinct iterations after which the pattern repeats itself. ALIGN_UNIT is the unit of alignment in bytes. */ static int -compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, +compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, HOST_WIDE_INT step, HOST_WIDE_INT delta, unsigned HOST_WIDE_INT distinct_iters, int align_unit) @@ -612,7 +612,7 @@ compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, total_positions = 0; miss_positions = 0; - + /* Iterate through all possible alignments of the first memory reference within its cache line. */ for (align = 0; align < cache_line_size; align += align_unit) @@ -657,7 +657,7 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, former. */ if (by_is_before) ref->prefetch_before = 0; - + return; } @@ -711,11 +711,11 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, return; } - /* A more complicated case with step > prefetch_block. First reduce + /* A more complicated case with step > prefetch_block. First reduce the ratio between the step and the cache line size to its simplest - terms. The resulting denominator will then represent the number of - distinct iterations after which each address will go back to its - initial location within the cache line. This computation assumes + terms. The resulting denominator will then represent the number of + distinct iterations after which each address will go back to its + initial location within the cache line. This computation assumes that PREFETCH_BLOCK is a power of two. */ prefetch_block = PREFETCH_BLOCK; reduced_prefetch_block = prefetch_block; @@ -731,7 +731,7 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, delta %= step; ref_type = TREE_TYPE (ref->mem); align_unit = TYPE_ALIGN (ref_type) / 8; - miss_rate = compute_miss_rate(prefetch_block, step, delta, + miss_rate = compute_miss_rate(prefetch_block, step, delta, reduced_prefetch_block, align_unit); if (miss_rate <= ACCEPTABLE_MISS_RATE) { @@ -744,9 +744,9 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, /* Try also the following iteration. */ prefetch_before++; delta = step - delta; - miss_rate = compute_miss_rate(prefetch_block, step, delta, + miss_rate = compute_miss_rate(prefetch_block, step, delta, reduced_prefetch_block, align_unit); - if (miss_rate <= ACCEPTABLE_MISS_RATE) + if (miss_rate <= ACCEPTABLE_MISS_RATE) { if (prefetch_before < ref->prefetch_before) ref->prefetch_before = prefetch_before; @@ -1314,7 +1314,7 @@ self_reuse_distance (data_reference_p dr, unsigned *loop_sizes, unsigned n, know its stride. */ while (handled_component_p (ref) && TREE_CODE (ref) != ARRAY_REF) ref = TREE_OPERAND (ref, 0); - + if (TREE_CODE (ref) == ARRAY_REF) { stride = TYPE_SIZE_UNIT (TREE_TYPE (ref)); @@ -1457,7 +1457,7 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, /* If the dependence cannot be analyzed, assume that there might be a reuse. */ dist = 0; - + ref->independent_p = false; refb->independent_p = false; } @@ -1525,14 +1525,14 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, /* Do a cost-benefit analysis to determine if prefetching is profitable for the current loop given the following parameters: AHEAD: the iteration ahead distance, - EST_NITER: the estimated trip count, + EST_NITER: the estimated trip count, NINSNS: estimated number of instructions in the loop, PREFETCH_COUNT: an estimate of the number of prefetches MEM_REF_COUNT: total number of memory references in the loop. */ -static bool -is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, - unsigned ninsns, unsigned prefetch_count, +static bool +is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, + unsigned ninsns, unsigned prefetch_count, unsigned mem_ref_count) { int insn_to_mem_ratio, insn_to_prefetch_ratio; @@ -1540,41 +1540,41 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, if (mem_ref_count == 0) return false; - /* Prefetching improves performance by overlapping cache missing - memory accesses with CPU operations. If the loop does not have - enough CPU operations to overlap with memory operations, prefetching - won't give a significant benefit. One approximate way of checking - this is to require the ratio of instructions to memory references to + /* Prefetching improves performance by overlapping cache missing + memory accesses with CPU operations. If the loop does not have + enough CPU operations to overlap with memory operations, prefetching + won't give a significant benefit. One approximate way of checking + this is to require the ratio of instructions to memory references to be above a certain limit. This approximation works well in practice. TODO: Implement a more precise computation by estimating the time for each CPU or memory op in the loop. Time estimates for memory ops should account for cache misses. */ - insn_to_mem_ratio = ninsns / mem_ref_count; + insn_to_mem_ratio = ninsns / mem_ref_count; if (insn_to_mem_ratio < PREFETCH_MIN_INSN_TO_MEM_RATIO) return false; /* Profitability of prefetching is highly dependent on the trip count. - For a given AHEAD distance, the first AHEAD iterations do not benefit - from prefetching, and the last AHEAD iterations execute useless + For a given AHEAD distance, the first AHEAD iterations do not benefit + from prefetching, and the last AHEAD iterations execute useless prefetches. So, if the trip count is not large enough relative to AHEAD, prefetching may cause serious performance degradation. To avoid this - problem when the trip count is not known at compile time, we + problem when the trip count is not known at compile time, we conservatively skip loops with high prefetching costs. For now, only - the I-cache cost is considered. The relative I-cache cost is estimated + the I-cache cost is considered. The relative I-cache cost is estimated by taking the ratio between the number of prefetches and the total number of instructions. Since we are using integer arithmetic, we - compute the reciprocal of this ratio. + compute the reciprocal of this ratio. TODO: Account for loop unrolling, which may reduce the costs of - shorter stride prefetches. Note that not accounting for loop + shorter stride prefetches. Note that not accounting for loop unrolling over-estimates the cost and hence gives more conservative results. */ if (est_niter < 0) { - insn_to_prefetch_ratio = ninsns / prefetch_count; + insn_to_prefetch_ratio = ninsns / prefetch_count; return insn_to_prefetch_ratio >= MIN_INSN_TO_PREFETCH_RATIO; } - + if (est_niter <= (HOST_WIDE_INT) ahead) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -1626,19 +1626,19 @@ loop_prefetch_arrays (struct loop *loop) the loop body. */ time = tree_num_loop_insns (loop, &eni_time_weights); ahead = (PREFETCH_LATENCY + time - 1) / time; - est_niter = estimated_loop_iterations_int (loop, false); + est_niter = estimated_loop_iterations_int (loop, false); ninsns = tree_num_loop_insns (loop, &eni_size_weights); unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc, est_niter); if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Ahead %d, unroll factor %d, trip count " + fprintf (dump_file, "Ahead %d, unroll factor %d, trip count " HOST_WIDE_INT_PRINT_DEC "\n" - "insn count %d, mem ref count %d, prefetch count %d\n", - ahead, unroll_factor, est_niter, - ninsns, mem_ref_count, prefetch_count); + "insn count %d, mem ref count %d, prefetch count %d\n", + ahead, unroll_factor, est_niter, + ninsns, mem_ref_count, prefetch_count); - if (!is_loop_prefetching_profitable (ahead, est_niter, ninsns, + if (!is_loop_prefetching_profitable (ahead, est_niter, ninsns, prefetch_count, mem_ref_count)) goto fail; @@ -1693,10 +1693,10 @@ tree_ssa_prefetch_arrays (void) fprintf (dump_file, " L1 cache size: %d lines, %d kB\n", L1_CACHE_SIZE_BYTES / L1_CACHE_LINE_SIZE, L1_CACHE_SIZE); fprintf (dump_file, " L1 cache line size: %d\n", L1_CACHE_LINE_SIZE); - fprintf (dump_file, " L2 cache size: %d kB\n", L2_CACHE_SIZE); - fprintf (dump_file, " min insn-to-prefetch ratio: %d \n", + fprintf (dump_file, " L2 cache size: %d kB\n", L2_CACHE_SIZE); + fprintf (dump_file, " min insn-to-prefetch ratio: %d \n", MIN_INSN_TO_PREFETCH_RATIO); - fprintf (dump_file, " min insn-to-mem ratio: %d \n", + fprintf (dump_file, " min insn-to-mem ratio: %d \n", PREFETCH_MIN_INSN_TO_MEM_RATIO); fprintf (dump_file, "\n"); } -- cgit v1.2.1 From 3f9da5596a39ec1c557502bf3f559aaeaf48fc0d Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 30 Apr 2010 11:58:49 +0000 Subject: gcc/ChangeLog: * toplev.c: Include varray.h for statistics dumping. * tree.h: Do not declare varray_head_tag. * tree-into-ssa.c, tree-ssa-uninit.c, tree-phinodes.c, omega.c, regs.h, lto-cgraph.c, tree-ssa-loop-ivopts.c, tree-nomudflap.c, c-objc-common.c, lto-streamer-out.c, tree-ssa-propagate.c, gimple-low.c, c-semantics.c, dwarf2out.c, lto-streamer-in.c, lto-section-in.c, alias.c, tree-if-conv.c, gimplify.c, ggc-zone.c, tree-ssa.c, tree-ssa-loop-prefetch.c, integrate.h, c-gimplify.c, c-common.c, c-common.h, reg-stack.c, basic-block.h, tree-ssa-structalias.c, lto-section-out.c, tree-ssanames.c: Do not include varray.h. * Makefile.in: Update for abovementioned changes. objc/ChangeLog: * objc-act.c: Do not include varray.h. objcp/ChangeLog: * objcp-decl.c: Do not include varray.h. cp/ChangeLog: * optimize.c, parser.c,mangle.c, cp-tree.h: DO not include varray.h. * Make-lang.in: Don't include varray.h dependency in CXX_TREE_H. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@158933 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 1 - 1 file changed, 1 deletion(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 2769c04ce0b..3377eeb1dbb 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -32,7 +32,6 @@ along with GCC; see the file COPYING3. If not see #include "tree-dump.h" #include "timevar.h" #include "cfgloop.h" -#include "varray.h" #include "expr.h" #include "tree-pass.h" #include "ggc.h" -- cgit v1.2.1 From 3665c1ba3f3c8290fde59822a52ce6e8324d4612 Mon Sep 17 00:00:00 2001 From: spop Date: Fri, 7 May 2010 16:15:45 +0000 Subject: Dump a diagnostic info when the insn-to-mem ratio is too small. 2010-05-07 Changpeng Fang * tree-ssa-loop-prefetch.c (is_loop_prefetching_profitable): Dump a diagnostic info when the insn-to-mem ratio is too small. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159161 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 3377eeb1dbb..6f879887c9f 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -1551,7 +1551,13 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, insn_to_mem_ratio = ninsns / mem_ref_count; if (insn_to_mem_ratio < PREFETCH_MIN_INSN_TO_MEM_RATIO) - return false; + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Not prefetching -- instruction to memory reference ratio (%d) too small\n", + insn_to_mem_ratio); + return false; + } /* Profitability of prefetching is highly dependent on the trip count. For a given AHEAD distance, the first AHEAD iterations do not benefit -- cgit v1.2.1 From 3fa57e8490fea744e154c99d247822c1a44dd475 Mon Sep 17 00:00:00 2001 From: spop Date: Fri, 7 May 2010 16:15:52 +0000 Subject: Account for loop unrolling in the insn-to-prefetch ratio heuristic. 2010-05-07 Changpeng Fang * tree-ssa-loop-prefetch.c (is_loop_prefetching_profitable): Account for loop unrolling in the insn-to-prefetch ratio heuristic. (loop_prefetch_arrays): Pass to is_loop_prefetching_profitable the unroll_factor. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159162 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 6f879887c9f..38d8f233655 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -1532,7 +1532,7 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, static bool is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, unsigned ninsns, unsigned prefetch_count, - unsigned mem_ref_count) + unsigned mem_ref_count, unsigned unroll_factor) { int insn_to_mem_ratio, insn_to_prefetch_ratio; @@ -1570,13 +1570,18 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, by taking the ratio between the number of prefetches and the total number of instructions. Since we are using integer arithmetic, we compute the reciprocal of this ratio. - TODO: Account for loop unrolling, which may reduce the costs of - shorter stride prefetches. Note that not accounting for loop - unrolling over-estimates the cost and hence gives more conservative - results. */ + (unroll_factor * ninsns) is used to estimate the number of instructions in + the unrolled loop. This implementation is a bit simplistic -- the number + of issued prefetch instructions is also affected by unrolling. So, + prefetch_mod and the unroll factor should be taken into account when + determining prefetch_count. Also, the number of insns of the unrolled + loop will usually be significantly smaller than the number of insns of the + original loop * unroll_factor (at least the induction variable increases + and the exit branches will get eliminated), so it might be better to use + tree_estimate_loop_size + estimated_unrolled_size. */ if (est_niter < 0) { - insn_to_prefetch_ratio = ninsns / prefetch_count; + insn_to_prefetch_ratio = (unroll_factor * ninsns) / prefetch_count; return insn_to_prefetch_ratio >= MIN_INSN_TO_PREFETCH_RATIO; } @@ -1643,8 +1648,8 @@ loop_prefetch_arrays (struct loop *loop) ahead, unroll_factor, est_niter, ninsns, mem_ref_count, prefetch_count); - if (!is_loop_prefetching_profitable (ahead, est_niter, ninsns, - prefetch_count, mem_ref_count)) + if (!is_loop_prefetching_profitable (ahead, est_niter, ninsns, prefetch_count, + mem_ref_count, unroll_factor)) goto fail; mark_nontemporal_stores (loop, refs); -- cgit v1.2.1 From e20bb1260c7364d3a71b1f8f7e6e5ac2c3fe23d8 Mon Sep 17 00:00:00 2001 From: spop Date: Fri, 7 May 2010 17:26:02 +0000 Subject: Define the TRIP_COUNT_TO_AHEAD_RATIO heuristic. 2010-05-07 Changpeng Fang * tree-ssa-loop-prefetch.c (TRIP_COUNT_TO_AHEAD_RATIO): New. (is_loop_prefetching_profitable): Do not insert prefetches when the trip count is not at least TRIP_COUNT_TO_AHEAD_RATIO times the prefetch ahead distance. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159163 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 38d8f233655..74976167f4a 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -199,6 +199,18 @@ along with GCC; see the file COPYING3. If not see #define FENCE_FOLLOWING_MOVNT NULL_TREE #endif +/* It is not profitable to prefetch when the trip count is not at + least TRIP_COUNT_TO_AHEAD_RATIO times the prefetch ahead distance. + For example, in a loop with a prefetch ahead distance of 10, + supposing that TRIP_COUNT_TO_AHEAD_RATIO is equal to 4, it is + profitable to prefetch when the trip count is greater or equal to + 40. In that case, 30 out of the 40 iterations will benefit from + prefetching. */ + +#ifndef TRIP_COUNT_TO_AHEAD_RATIO +#define TRIP_COUNT_TO_AHEAD_RATIO 4 +#endif + /* The group of references between that reuse may occur. */ struct mem_ref_group @@ -1585,7 +1597,7 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, return insn_to_prefetch_ratio >= MIN_INSN_TO_PREFETCH_RATIO; } - if (est_niter <= (HOST_WIDE_INT) ahead) + if (est_niter < (HOST_WIDE_INT) (TRIP_COUNT_TO_AHEAD_RATIO * ahead)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, -- cgit v1.2.1 From 5d68c00f1c9af084246849a78843cb36b7c695da Mon Sep 17 00:00:00 2001 From: krebbel Date: Tue, 11 May 2010 07:28:42 +0000 Subject: 2010-05-11 Christian Borntraeger * tree-ssa-loop-prefetch.c: Add debug for dropped prefetches. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159256 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 74976167f4a..2fc901d907b 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -860,11 +860,20 @@ should_issue_prefetch_p (struct mem_ref *ref) /* For now do not issue prefetches for only first few of the iterations. */ if (ref->prefetch_before != PREFETCH_ALL) - return false; + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Ignoring %p due to prefetch_before\n", + (void *) ref); + return false; + } /* Do not prefetch nontemporal stores. */ if (ref->storent_p) - return false; + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Ignoring nontemporal store %p\n", (void *) ref); + return false; + } return true; } -- cgit v1.2.1 From 8234f09041f74605ace7f36a41ddca962d33a18e Mon Sep 17 00:00:00 2001 From: krebbel Date: Tue, 11 May 2010 07:29:59 +0000 Subject: 2010-05-11 Christian Borntraeger * tree-ssa-loop-prefetch.c (prune_ref_by_group_reuse): Reset prefetch_before to PREFETCH_ALL if to accesses "meet" beyond cache size. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159257 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 2fc901d907b..daf9956d3bc 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -716,6 +716,9 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, hit_from = ddown (delta_b, PREFETCH_BLOCK) * PREFETCH_BLOCK; prefetch_before = (hit_from - delta_r + step - 1) / step; + /* Do not reduce prefetch_before if we meet beyond cache size. */ + if (prefetch_before > abs (L2_CACHE_SIZE_BYTES / step)) + prefetch_before = PREFETCH_ALL; if (prefetch_before < ref->prefetch_before) ref->prefetch_before = prefetch_before; @@ -746,6 +749,9 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, reduced_prefetch_block, align_unit); if (miss_rate <= ACCEPTABLE_MISS_RATE) { + /* Do not reduce prefetch_before if we meet beyond cache size. */ + if (prefetch_before > L2_CACHE_SIZE_BYTES / PREFETCH_BLOCK) + prefetch_before = PREFETCH_ALL; if (prefetch_before < ref->prefetch_before) ref->prefetch_before = prefetch_before; -- cgit v1.2.1 From 3a4137294458e201f42049e4696ee86d1fd7cb4a Mon Sep 17 00:00:00 2001 From: pthaugen Date: Fri, 14 May 2010 15:48:51 +0000 Subject: * tree-ssa-loop.prefetch.c (prune_ref_by_group_reuse): Cast abs() result to unsigned. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159397 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index daf9956d3bc..f828ed3f1a1 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -717,7 +717,7 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, prefetch_before = (hit_from - delta_r + step - 1) / step; /* Do not reduce prefetch_before if we meet beyond cache size. */ - if (prefetch_before > abs (L2_CACHE_SIZE_BYTES / step)) + if (prefetch_before > (unsigned) abs (L2_CACHE_SIZE_BYTES / step)) prefetch_before = PREFETCH_ALL; if (prefetch_before < ref->prefetch_before) ref->prefetch_before = prefetch_before; -- cgit v1.2.1 From c0a0de5e914f37a95c9d67f1428d4b21c5f8da9d Mon Sep 17 00:00:00 2001 From: spop Date: Mon, 17 May 2010 22:33:53 +0000 Subject: Define PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO heuristic. 2010-05-17 Changpeng Fang * tree-ssa-loop-prefetch.c (PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO): New. (schedule_prefetches): Do not generate a prefetch if the unroll factor is far from what is required by the prefetch. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159514 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index f828ed3f1a1..4889604b6ab 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -225,6 +225,17 @@ struct mem_ref_group #define PREFETCH_ALL (~(unsigned HOST_WIDE_INT) 0) +/* Do not generate a prefetch if the unroll factor is significantly less + than what is required by the prefetch. This is to avoid redundant + prefetches. For example, if prefetch_mod is 16 and unroll_factor is + 1, this means prefetching requires unrolling the loop 16 times, but + the loop is not going to be unrolled. In this case (ratio = 16), + prefetching is not likely to be beneficial. */ + +#ifndef PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO +#define PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO 8 +#endif + /* The memory reference. */ struct mem_ref @@ -921,6 +932,12 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor, if (!should_issue_prefetch_p (ref)) continue; + /* The loop is far from being sufficiently unrolled for this + prefetch. Do not generate prefetch to avoid many redudant + prefetches. */ + if (ref->prefetch_mod / unroll_factor > PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO) + continue; + /* If we need to prefetch the reference each PREFETCH_MOD iterations, and we unroll the loop UNROLL_FACTOR times, we need to insert ceil (UNROLL_FACTOR / PREFETCH_MOD) instructions in each -- cgit v1.2.1 From 016efb936c2cf2bb300fb31f1c0dfccbaddec697 Mon Sep 17 00:00:00 2001 From: spop Date: Mon, 17 May 2010 22:34:03 +0000 Subject: Also apply the insn to prefetch ratio heuristic to loops with known trip count. 2010-05-17 Changpeng Fang * doc/invoke.texi: Update documentation for min-insn-to-prefetch-ratio. * tree-ssa-loop-prefetch.c (is_loop_prefetching_profitable): Also apply the insn to prefetch ratio heuristic to loops with known trip count. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159515 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 4889604b6ab..4d85f545567 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -1603,17 +1603,9 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, return false; } - /* Profitability of prefetching is highly dependent on the trip count. - For a given AHEAD distance, the first AHEAD iterations do not benefit - from prefetching, and the last AHEAD iterations execute useless - prefetches. So, if the trip count is not large enough relative to AHEAD, - prefetching may cause serious performance degradation. To avoid this - problem when the trip count is not known at compile time, we - conservatively skip loops with high prefetching costs. For now, only - the I-cache cost is considered. The relative I-cache cost is estimated - by taking the ratio between the number of prefetches and the total - number of instructions. Since we are using integer arithmetic, we - compute the reciprocal of this ratio. + /* Prefetching most likely causes performance degradation when the instruction + to prefetch ratio is too small. Too many prefetch instructions in a loop + may reduce the I-cache performance. (unroll_factor * ninsns) is used to estimate the number of instructions in the unrolled loop. This implementation is a bit simplistic -- the number of issued prefetch instructions is also affected by unrolling. So, @@ -1623,12 +1615,21 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, original loop * unroll_factor (at least the induction variable increases and the exit branches will get eliminated), so it might be better to use tree_estimate_loop_size + estimated_unrolled_size. */ - if (est_niter < 0) + insn_to_prefetch_ratio = (unroll_factor * ninsns) / prefetch_count; + if (insn_to_prefetch_ratio < MIN_INSN_TO_PREFETCH_RATIO) { - insn_to_prefetch_ratio = (unroll_factor * ninsns) / prefetch_count; - return insn_to_prefetch_ratio >= MIN_INSN_TO_PREFETCH_RATIO; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Not prefetching -- instruction to prefetch ratio (%d) too small\n", + insn_to_prefetch_ratio); + return false; } + /* Could not do further estimation if the trip count is unknown. Just assume + prefetching is profitable. Too aggressive??? */ + if (est_niter < 0) + return true; + if (est_niter < (HOST_WIDE_INT) (TRIP_COUNT_TO_AHEAD_RATIO * ahead)) { if (dump_file && (dump_flags & TDF_DETAILS)) -- cgit v1.2.1 From 81d2a38fdf952e032ef4e411a398563bf0b19bab Mon Sep 17 00:00:00 2001 From: krebbel Date: Wed, 19 May 2010 10:36:40 +0000 Subject: 2010-05-19 Christian Borntraeger * tree-ssa-loop-prefetch.c (mem_ref_group, ar_data): Change step to tree. (dump_mem_ref): Adopt debug code to handle a tree as step. This also checks for a constant int vs. non-constant but loop-invariant steps. (find_or_create_group): Change the sort algorithm to only consider steps that are constant ints. (idx_analyze_ref): Adopt code to handle a tree instead of a HOST_WIDE_INT for step. (gather_memory_references_ref): Handle tree instead of int and be prepared to see a NULL_TREE. (prune_ref_by_self_reuse, prune_ref_by_group_reuse): Do not prune prefetches if the step cannot be calculated at compile time. (issue_prefetch_ref): Issue prefetches for non-constant but loop-invariant steps. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159557 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 107 ++++++++++++++++++++++++++++++------------- 1 file changed, 75 insertions(+), 32 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 4d85f545567..becde8914e1 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -216,7 +216,7 @@ along with GCC; see the file COPYING3. If not see struct mem_ref_group { tree base; /* Base of the reference. */ - HOST_WIDE_INT step; /* Step of the reference. */ + tree step; /* Step of the reference. */ struct mem_ref *refs; /* References in the group. */ struct mem_ref_group *next; /* Next group of references. */ }; @@ -271,7 +271,10 @@ dump_mem_ref (FILE *file, struct mem_ref *ref) fprintf (file, " group %p (base ", (void *) ref->group); print_generic_expr (file, ref->group->base, TDF_SLIM); fprintf (file, ", step "); - fprintf (file, HOST_WIDE_INT_PRINT_DEC, ref->group->step); + if (cst_and_fits_in_hwi (ref->group->step)) + fprintf (file, HOST_WIDE_INT_PRINT_DEC, int_cst_value (ref->group->step)); + else + print_generic_expr (file, ref->group->step, TDF_TREE); fprintf (file, ")\n"); fprintf (file, " delta "); @@ -287,19 +290,20 @@ dump_mem_ref (FILE *file, struct mem_ref *ref) exist. */ static struct mem_ref_group * -find_or_create_group (struct mem_ref_group **groups, tree base, - HOST_WIDE_INT step) +find_or_create_group (struct mem_ref_group **groups, tree base, tree step) { struct mem_ref_group *group; for (; *groups; groups = &(*groups)->next) { - if ((*groups)->step == step + if (operand_equal_p ((*groups)->step, step, 0) && operand_equal_p ((*groups)->base, base, 0)) return *groups; - /* Keep the list of groups sorted by decreasing step. */ - if ((*groups)->step < step) + /* If step is an integer constant, keep the list of groups sorted + by decreasing step. */ + if (cst_and_fits_in_hwi ((*groups)->step) && cst_and_fits_in_hwi (step) + && int_cst_value ((*groups)->step) < int_cst_value (step)) break; } @@ -384,7 +388,7 @@ struct ar_data { struct loop *loop; /* Loop of the reference. */ gimple stmt; /* Statement of the reference. */ - HOST_WIDE_INT *step; /* Step of the memory reference. */ + tree *step; /* Step of the memory reference. */ HOST_WIDE_INT *delta; /* Offset of the memory reference. */ }; @@ -396,7 +400,7 @@ idx_analyze_ref (tree base, tree *index, void *data) { struct ar_data *ar_data = (struct ar_data *) data; tree ibase, step, stepsize; - HOST_WIDE_INT istep, idelta = 0, imult = 1; + HOST_WIDE_INT idelta = 0, imult = 1; affine_iv iv; if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF @@ -404,15 +408,11 @@ idx_analyze_ref (tree base, tree *index, void *data) return false; if (!simple_iv (ar_data->loop, loop_containing_stmt (ar_data->stmt), - *index, &iv, false)) + *index, &iv, true)) return false; ibase = iv.base; step = iv.step; - if (!cst_and_fits_in_hwi (step)) - return false; - istep = int_cst_value (step); - if (TREE_CODE (ibase) == POINTER_PLUS_EXPR && cst_and_fits_in_hwi (TREE_OPERAND (ibase, 1))) { @@ -425,6 +425,12 @@ idx_analyze_ref (tree base, tree *index, void *data) ibase = build_int_cst (TREE_TYPE (ibase), 0); } + if (*ar_data->step == NULL_TREE) + *ar_data->step = step; + else + *ar_data->step = fold_build2 (PLUS_EXPR, sizetype, + fold_convert (sizetype, *ar_data->step), + fold_convert (sizetype, step)); if (TREE_CODE (base) == ARRAY_REF) { stepsize = array_ref_element_size (base); @@ -432,11 +438,12 @@ idx_analyze_ref (tree base, tree *index, void *data) return false; imult = int_cst_value (stepsize); - istep *= imult; + *ar_data->step = fold_build2 (MULT_EXPR, sizetype, + fold_convert (sizetype, *ar_data->step), + fold_convert (sizetype, step)); idelta *= imult; } - *ar_data->step += istep; *ar_data->delta += idelta; *index = ibase; @@ -450,7 +457,7 @@ idx_analyze_ref (tree base, tree *index, void *data) static bool analyze_ref (struct loop *loop, tree *ref_p, tree *base, - HOST_WIDE_INT *step, HOST_WIDE_INT *delta, + tree *step, HOST_WIDE_INT *delta, gimple stmt) { struct ar_data ar_data; @@ -458,7 +465,7 @@ analyze_ref (struct loop *loop, tree *ref_p, tree *base, HOST_WIDE_INT bit_offset; tree ref = *ref_p; - *step = 0; + *step = NULL_TREE; *delta = 0; /* First strip off the component references. Ignore bitfields. */ @@ -493,8 +500,8 @@ static bool gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs, tree ref, bool write_p, gimple stmt) { - tree base; - HOST_WIDE_INT step, delta; + tree base, step; + HOST_WIDE_INT delta; struct mem_ref_group *agrp; if (get_base_address (ref) == NULL) @@ -502,6 +509,9 @@ gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs, if (!analyze_ref (loop, &ref, &base, &step, &delta, stmt)) return false; + /* If analyze_ref fails the default is a NULL_TREE. We can stop here. */ + if (step == NULL_TREE) + return false; /* Now we know that REF = &BASE + STEP * iter + DELTA, where DELTA and STEP are integer constants. */ @@ -576,8 +586,16 @@ gather_memory_references (struct loop *loop, bool *no_other_refs, unsigned *ref_ static void prune_ref_by_self_reuse (struct mem_ref *ref) { - HOST_WIDE_INT step = ref->group->step; - bool backward = step < 0; + HOST_WIDE_INT step; + bool backward; + + /* If the step size is non constant, we cannot calculate prefetch_mod. */ + if (!cst_and_fits_in_hwi (ref->group->step)) + return; + + step = int_cst_value (ref->group->step); + + backward = step < 0; if (step == 0) { @@ -661,8 +679,8 @@ static void prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, bool by_is_before) { - HOST_WIDE_INT step = ref->group->step; - bool backward = step < 0; + HOST_WIDE_INT step; + bool backward; HOST_WIDE_INT delta_r = ref->delta, delta_b = by->delta; HOST_WIDE_INT delta = delta_b - delta_r; HOST_WIDE_INT hit_from; @@ -673,6 +691,16 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, tree ref_type; int align_unit; + /* If the step is non constant we cannot calculate prefetch_before. */ + if (!cst_and_fits_in_hwi (ref->group->step)) { + return; + } + + step = int_cst_value (ref->group->step); + + backward = step < 0; + + if (delta == 0) { /* If the references has the same address, only prefetch the @@ -986,7 +1014,7 @@ static void issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead) { HOST_WIDE_INT delta; - tree addr, addr_base, write_p, local; + tree addr, addr_base, write_p, local, forward; gimple prefetch; gimple_stmt_iterator bsi; unsigned n_prefetches, ap; @@ -1009,13 +1037,28 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead) for (ap = 0; ap < n_prefetches; ap++) { - /* Determine the address to prefetch. */ - delta = (ahead + ap * ref->prefetch_mod) * ref->group->step; - addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, - addr_base, size_int (delta)); - addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true, NULL, - true, GSI_SAME_STMT); - + if (cst_and_fits_in_hwi (ref->group->step)) + { + /* Determine the address to prefetch. */ + delta = (ahead + ap * ref->prefetch_mod) * + int_cst_value (ref->group->step); + addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, + addr_base, size_int (delta)); + addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true, NULL, + true, GSI_SAME_STMT); + } + else + { + /* The step size is non-constant but loop-invariant. We use the + heuristic to simply prefetch ahead iterations ahead. */ + forward = fold_build2 (MULT_EXPR, sizetype, + fold_convert (sizetype, ref->group->step), + fold_convert (sizetype, size_int (ahead))); + addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, addr_base, + forward); + addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true, + NULL, true, GSI_SAME_STMT); + } /* Create the prefetch instruction. */ prefetch = gimple_build_call (built_in_decls[BUILT_IN_PREFETCH], 3, addr, write_p, local); -- cgit v1.2.1 From a7a4626828090600459358ca745c4482cf9551a1 Mon Sep 17 00:00:00 2001 From: steven Date: Fri, 21 May 2010 13:53:22 +0000 Subject: gcc/ChangeLog: * tree.h: Include real.h and fixed-value.h as basic datatypes. * dfp.c, convert.c, reload1.c, reginfo.c, tree-flow.h, tree-ssa-threadedge.c, tree-ssanames.c, tree-loop-linear.c, tree-into-ssa.c, tree-vect-generic.c, tree-ssa-structalias.c, tree-ssa-loop-im.c, tree-dump.c, tree-complex.c, tree-ssa-uninit.c, genrecog.c, tree-ssa-threadupdate.c, tree-ssa-loop-niter.c, tree-pretty-print.c, tree-loop-distribution.c, tree-ssa-loop-unswitch.c, c-lex.c, optabs.c, postreload-gcse.c, tree-ssa-loop-manip.c, postreload.c, tree-ssa-loop-ch.c, tree-tailcall.c, tree.c, reload.c, tree-scalar-evolution.c, rtlanal.c, tree-phinodes.c, builtins.c, final.c, genoutput.c, fold-const.c, tree-ssa-dse.c, genautomata.c, tree-ssa-uncprop.c, toplev.c, tree-chrec.c, genemit.c, c-cppbuiltin.c, tree-ssa-sccvn.c, tree-ssa-ccp.c, tree-ssa-loop-ivopts.c, mode-switching.c, tree-call-cdce.c, cse.c, genpeep.c, tree-ssa-math-opts.c, tree-ssa-dom.c, tree-nrv.c, tree-ssa-propagate.c, tree-ssa-alias.c, tree-ssa-sink.c, jump.c, ifcvt.c, dwarf2out.c, expr.c, genattrtab.c, genconditions.c, tree-ssa-loop-ivcanon.c, tree-ssa-loop.c, tree-parloops.c, recog.c, tree-ssa-address.c, lcm.c, tree-eh.c, gimple-pretty-print.c, c-pretty-print.c, print-rtl.c, gcse.c, tree-if-conv.c, tree-data-ref.c, tree-affine.c, gimplify.c, tree-ssa-phiopt.c, implicit-zee.c, expmed.c, tree-dfa.c, emit-rtl.c, store-motion.c, cselib.c, tree-cfgcleanup.c, simplify-rtx.c, tree-ssa-pre.c, genpreds.c, tree-mudflap.c, print-tree.c, tree-ssa-copy.c, tree-ssa-forwprop.c, tree-ssa-dce.c, varasm.c, tree-nested.c, tree-ssa.c, tree-ssa-loop-prefetch.c, rtl.c, tree-inline.c, integrate.c, tree-optimize.c, tree-ssa-phiprop.c, fixed-value.c, combine.c, tree-profile.c, c-common.c, sched-vis.c, tree-cfg.c, passes.c, tree-ssa-reassoc.c, config/alpha/alpha.c, config/frv/frv.c, config/s390/s390.c, config/m32c/m32c.c, config/spu/spu.c, config/sparc/sparc.c, config/mep/mep.c, config/m32r/m32r.c, config/rx/rx.c, config/i386/i386.c, config/sh/sh.c, config/pdp11/pdp11.c, config/avr/avr.c, config/crx/crx.c, config/xtensa/xtensa.c, config/stormy16/stormy16.c, config/fr30/fr30.c, config/lm32/lm32.c, config/moxie/moxie.c, config/m68hc11/m68hc11.c, config/cris/cris.c, config/iq2000/iq2000.c, config/mn10300/mn10300.c, config/ia64/ia64.c, config/m68k/m68k.c, config/rs6000/rs6000.c, config/picochip/picochip.c, config/darwin.c, config/arc/arc.c, config/mcore/mcore.c, config/score/score3.c, config/score/score7.c, config/score/score.c, config/arm/arm.c, config/pa/pa.c, config/mips/mips.c, config/vax/vax.c, config/h8300/h8300.c, config/v850/v850.c, config/mmix/mmix.c, config/bfin/bfin.c: Clean up redundant includes. * Makefile.in: Update accordingly. java/ChangeLog: * typeck.c, decl.c, jcf-parse.c, except.c, expr.c: cp/Changelog: * error.c, tree.c, typeck2.c, cxx-pretty-print.c, mangle.c: Clean up redundant includes. fortran/ChangeLog: * trans-const.c, trans-types.c, trans-intrinsic.c: Clean up redundant includes. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159663 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index becde8914e1..1f8225e0468 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -22,9 +22,7 @@ along with GCC; see the file COPYING3. If not see #include "coretypes.h" #include "tm.h" #include "tree.h" -#include "rtl.h" #include "tm_p.h" -#include "hard-reg-set.h" #include "basic-block.h" #include "output.h" #include "diagnostic.h" @@ -34,7 +32,6 @@ along with GCC; see the file COPYING3. If not see #include "cfgloop.h" #include "expr.h" #include "tree-pass.h" -#include "ggc.h" #include "insn-config.h" #include "recog.h" #include "hashtab.h" -- cgit v1.2.1 From ce084dfc1cd60d867d38dbed86a914d82fa908d1 Mon Sep 17 00:00:00 2001 From: jsm28 Date: Fri, 21 May 2010 22:34:26 +0000 Subject: * diagnostic.c: Don't include tm.h, tree.h, tm_p.h, langhooks.h or langhooks-def.h. (diagnostic_initialize): Initialize x_data not last_function. (diagnostic_report_current_function): Move to tree-diagnostic.c. (default_diagnostic_starter): Call diagnostic_report_current_module not diagnostic_report_current_function. (diagnostic_report_diagnostic): Initialize x_data not abstract_origin. (verbatim): Likewise. * diagnostic.h (struct diagnostic_info): Change abstract_origin to x_data. (struct diagnostic_context): Change last_function to x_data. (diagnostic_auxiliary_data): Replace with diagnostic_context_auxiliary_data and diagnostic_info_auxiliary_data. (diagnostic_last_function_changed, diagnostic_set_last_function, diagnostic_report_current_function): Move to tree-diagnostic.h. (print_declaration, dump_generic_node, print_generic_stmt, print_generic_stmt_indented, print_generic_expr, print_generic_decl, debug_c_tree, dump_omp_clauses, print_call_name, debug_generic_expr, debug_generic_stmt, debug_tree_chain, default_tree_printer): Move to tree-pretty-print.h. (debug_gimple_stmt, debug_gimple_seq, print_gimple_seq, print_gimple_stmt, print_gimple_expr, dump_gimple_stmt): Move to gimple-pretty-print.h. * pretty-print.c: Don't include tree.h (pp_base_format): Don't handle %K here. (pp_base_tree_identifier): Move to tree-pretty-print.c. * pretty-print.h (text_info): Change abstract_origin to x_data. (pp_tree_identifier, pp_unsupported_tree, pp_base_tree_identifier): Move to tree-pretty-print.h. * gimple-pretty-print.h, tree-diagnostic.c, tree-diagnostic.h, tree-pretty-print.h: New files. * tree-pretty-print.c: Include tree-pretty-print.h. (percent_K_format): New. Moved from pretty-print.c. (pp_base_tree_identifier): Move from pretty-print.c. * c-objc-common.c: Include tree-pretty-print.h. (c_tree_printer): Handle %K here. * langhooks.c: Include tree-diagnostic.h. (lhd_print_error_function): Use diagnostic_abstract_origin macro. * toplev.c: Include tree-diagnostic.h and tree-pretty-print.h. (default_tree_printer): Handle %K using percent_K_format. (general_init): Use default_tree_diagnostic_starter. * tree.c: Include tree-diagnostic.h and tree-pretty-print.h. (free_lang_data): Use default_tree_diagnostic_starter. * c-pretty-print.c: Include tree-pretty-print.h. * cfgexpand.c: Include tree-pretty-print.h and gimple-pretty-print.h. * cgraphunit.c: Include tree-pretty-print.h and gimple-pretty-print.h. * dwarf2out.c: Include tree-pretty-print.h. * except.c: Include tree-pretty-print.h. * gimple-pretty-print.c: Include tree-pretty-print.h and gimple-pretty-print.h. * gimplify.c: Include tree-pretty-print.h. * graphite-poly.c: Include tree-pretty-print.h and gimple-pretty-print.h. * ipa-cp.c: Include tree-pretty-print.h. * ipa-inline.c: Include gimple-pretty-print.h. * ipa-prop.c: Include tree-pretty-print.h and gimple-pretty-print.h. * ipa-pure-const.c: Include gimple-pretty-print.h. * ipa-struct-reorg.c: Include tree-pretty-print.h and gimple-pretty-print.h. * ipa-type-escape.c: Include tree-pretty-print.h. * print-rtl.c: Include tree-pretty-print.h. * print-tree.c: Include gimple-pretty-print.h. * sese.c: Include tree-pretty-print.h. * tree-affine.c: Include tree-pretty-print.h. * tree-browser.c: Include tree-pretty-print.h. * tree-call-cdce.c: Include gimple-pretty-print.h. * tree-cfg.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-chrec.c: Include tree-pretty-print.h. * tree-data-ref.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-dfa.c: Include tree-pretty-print.h. * tree-if-conv.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-inline.c: Include tree-pretty-print.h. * tree-into-ssa.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-nrv.c: Include tree-pretty-print.h. * tree-object-size.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-outof-ssa.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-parloops.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-predcom.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-scalar-evolution.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-sra.c: Include tree-pretty-print.h. * tree-ssa-address.c: Include tree-pretty-print.h. * tree-ssa-alias.c: Include tree-pretty-print.h. * tree-ssa-ccp.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-coalesce.c: Include tree-pretty-print.h. * tree-ssa-copy.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-copyrename.c: Include tree-pretty-print.h. * tree-ssa-dce.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-dom.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-dse.c: Include gimple-pretty-print.h. * tree-ssa-forwprop.c: Include tree-pretty-print.h. * tree-ssa-ifcombine.c: Include tree-pretty-print.h. * tree-ssa-live.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-loop-im.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-loop-ivcanon.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-loop-ivopts.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-loop-niter.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-loop-prefetch.c: Include tree-pretty-print.h. * tree-ssa-math-opts.c: Include gimple-pretty-print.h. * tree-ssa-operands.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-phiprop.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-pre.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-propagate.c: Include gimple-pretty-print.h. * tree-ssa-reassoc.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-sccvn.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-sink.c: Include gimple-pretty-print.h. * tree-ssa-ter.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-ssa-uninit.c: Include gimple-pretty-print.h. * tree-ssa.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-stdarg.c: Include gimple-pretty-print.h. * tree-switch-conversion.c: Include gimple-pretty-print.h. * tree-tailcall.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-vect-data-refs.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-vect-loop-manip.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-vect-loop.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-vect-patterns.c: Include gimple-pretty-print.h. * tree-vect-slp.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-vect-stmts.c: Include tree-pretty-print.h and gimple-pretty-print.h. * tree-vectorizer.c: Include tree-pretty-print.h. * tree-vrp.c: Include tree-pretty-print.h and gimple-pretty-print.h. * value-prof.c: Include tree-pretty-print.h and gimple-pretty-print.h. * var-tracking.c: Include tree-pretty-print.h. * Makefile.in (OBJS-common): Add tree-diagnostic.o. (tree-diagnostic.o): New dependencies. (c-objc-common.o, c-pretty-print.o, langhooks.o, tree.o, tree-inline.o, print-tree.o, stor-layout.o, tree-ssa-uninit.o, tree-ssa.o, tree-into-ssa.o, tree-ssa-ter.o, tree-ssa-coalesce.o, tree-outof-ssa.o, tree-ssa-forwprop.o, tree-ssa-phiprop.o, tree-ssa-ifcombine.o, tree-nrv.o, tree-ssa-copy.o, tree-ssa-propagate.o, tree-ssa-dom.o, tree-ssa-uncprop.o, tree-ssa-live.o, tree-ssa-copyrename.o, tree-ssa-pre.o, tree-ssa-sccvn.o, tree-vrp.o, tree-cfg.o, tree-tailcall.o, tree-ssa-sink.o, tree-if-conv.o, tree-dfa.o, tree-ssa-operands.o, tree-ssa-address.o, tree-ssa-loop-niter.o, tree-ssa-loop-ivcanon.o, tree-ssa-loop-prefetch.o, tree-predcom.o, tree-ssa-loop-ivopts.o, tree-affine.o, tree-ssa-loop-im.o, tree-ssa-math-opts.o, tree-ssa-alias.o, tree-ssa-reassoc.o, gimplify.o, tree-browser.o, tree-chrec.o, tree-scalar-evolution.o, tree-data-ref.o, sese.o, graphite-poly.o, tree-vect-loop.o, tree-vect-loop-manip.o, tree-vect-patterns.o, tree-vect-slp.o, tree-vect-stmts.o, tree-vect-data-refs.o, tree-vectorizer.o, tree-parloops.o, tree-stdarg.o, tree-object-size.o, gimple-pretty-print.o, tree-pretty-print.o, diagnostic.o, toplev.o, print-rtl.o, except.o, dwarf2out.o, cgraphunit.o, ipa-prop.o, ipa-cp.o, ipa-inline.o, ipa-pure-const.o, ipa-type-escape.o, ipa-struct-reorg.o, tree-ssa-dce.o, tree-call-cdce.o, tree-ssa-ccp.o, tree-sra.o, tree-switch-conversion.o, var-tracking.o, value-prof.o, cfgexpand.o, pretty-print.o): Update dependencies. cp: * error.c: Include tree-diagnostic.h and tree-pretty-print.h. (cp_print_error_function): Use diagnostic_abstract_origin macro. (cp_printer): Handle %K here using percent_K_format. * cxx-pretty-print.c: Include tree-pretty-print.h. * Make-lang.in (cp/error.o, cp/cxx-pretty-print.o): Update dependencies. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159685 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 1f8225e0468..633dd337c74 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -1,5 +1,5 @@ /* Array prefetching. - Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc. + Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. This file is part of GCC. @@ -26,6 +26,7 @@ along with GCC; see the file COPYING3. If not see #include "basic-block.h" #include "output.h" #include "diagnostic.h" +#include "tree-pretty-print.h" #include "tree-flow.h" #include "tree-dump.h" #include "timevar.h" -- cgit v1.2.1 From f547ca129ab797b6b3a5c98d6ebc3b93254104a6 Mon Sep 17 00:00:00 2001 From: krebbel Date: Tue, 25 May 2010 11:18:07 +0000 Subject: 2010-05-25 Christian Borntraeger PR 44203 * tree-ssa-loop-prefetch.c: Fix logic for step calculation to match the original (and intended) behaviour before r159557. This changeset changed a=a+b*c to a=(a+b)*b which was obviously wrong in two ways. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@159816 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 633dd337c74..d63ede1bbe9 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -423,25 +423,24 @@ idx_analyze_ref (tree base, tree *index, void *data) ibase = build_int_cst (TREE_TYPE (ibase), 0); } - if (*ar_data->step == NULL_TREE) - *ar_data->step = step; - else - *ar_data->step = fold_build2 (PLUS_EXPR, sizetype, - fold_convert (sizetype, *ar_data->step), - fold_convert (sizetype, step)); if (TREE_CODE (base) == ARRAY_REF) { stepsize = array_ref_element_size (base); if (!cst_and_fits_in_hwi (stepsize)) return false; imult = int_cst_value (stepsize); - - *ar_data->step = fold_build2 (MULT_EXPR, sizetype, - fold_convert (sizetype, *ar_data->step), - fold_convert (sizetype, step)); + step = fold_build2 (MULT_EXPR, sizetype, + fold_convert (sizetype, step), + fold_convert (sizetype, stepsize)); idelta *= imult; } + if (*ar_data->step == NULL_TREE) + *ar_data->step = step; + else + *ar_data->step = fold_build2 (PLUS_EXPR, sizetype, + fold_convert (sizetype, *ar_data->step), + fold_convert (sizetype, step)); *ar_data->delta += idelta; *index = ibase; -- cgit v1.2.1 From 8e3cb73bc66100e137b20bcd98316bc415b6e53c Mon Sep 17 00:00:00 2001 From: steven Date: Tue, 1 Jun 2010 22:00:56 +0000 Subject: * gimplify.c: Do not include except.h and optabs.h. (gimplify_body): Do not initialize RTL profiling. * gimple-low.c: Do not include rtl.h, diagnostic.h, langhooks.h, langhooks-def.h, timevar.h, except.h, hashtab.h, and expr.h. * gimple-fold.c: Do not include rtl.h, tm_p.h, ggc.h, basic-block.h, output.h, expr.h, diagnostic.h, timevar.h, value-prof.h, and langhooks.h. * tree-pretty-print.h: Include pretty-print.h. * gimple-pretty-print.h: Include pretty-print.h. * tree-pretty-print.c: Do not include diagnostic.h. * tree-vrp.c: Likewise. * tree-tailcall.c: Likewise * tree-scalar-evolution.c: Likewise * tree-ssa-dse.c: Likewise * tree-chrec.c: Likewise * tree-ssa-sccvn.c: Likewise * tree-ssa-copyrename.c: Likewise * tree-nomudflap.c: Likewise * tree-call-cdce.c: Likewise * tree-stdarg.c: Likewise * tree-ssa-math-opts.c: Likewise * tree-nrv.c: Likewise * tree-ssa-sink.c: Likewise * tree-browser.c: Likewise * tree-ssa-loop-ivcanon.c: Likewise * tree-ssa-loop.c: Likewise * tree-parloops.c: Likewise * tree-ssa-address.c: Likewise * tree-ssa-ifcombine.c: Likewise * tree-if-conv.c: Likewise * tree-data-ref.c: Likewise * tree-affine.c: Likewise * tree-ssa-phiopt.c: Likewise * tree-ssa-coalesce.c: Likewise * tree-ssa-pre.c: Likewise * tree-ssa-live.c: Likewise * tree-predcom.c: Likewise * tree-ssa-forwprop.c: Likewise * tree-ssa-dce.c: Likewise * tree-ssa-ter.c: Likewise * tree-ssa-loop-prefetch.c: Likewise * tree-optimize.c: Likewise * tree-ssa-phiprop.c: Likewise * tree-object-size.c: Likewise * tree-outof-ssa.c: Likewise * tree-ssa-structalias.c: Likewise * tree-switch-conversion.c: Likewise * tree-ssa-reassoc.c: Likewise * tree-ssa-operands.c: Likewise * tree-vectorizer.c: Likewise * tree-vect-data-refs.c: Likewise * tree-vect-generic.c: Likewise * tree-vect-stmts.c: Likewise * tree-vect-patterns.c: Likewise * tree-vect-slp.c: Likewise * tree-vect-loop.c: Likewise * tree-ssa-loop-ivopts.c: Likewise * tree-ssa-loop-im.c: Likewise * tree-ssa-loop-niter.c: Likewise * tree-ssa-loop-unswitch.c: Likewise * tree-ssa-loop-manip.c: Likewise * tree-ssa-loop-ch.c: Likewise * tree-dump.c: Likewise * tree-complex.c: Likewise * tree-into-ssa.c: Do not include diagnostic.h and expr.h. * tree-ssa-uninit.c: Likewise * tree-ssa-threadupdate.c: Likewise * tree-ssa-uncprop.c: Likewise * tree-ssa-ccp.c: Likewise * tree-ssa-dom.c: Likewise * tree-ssa-propagate.c: Likewise * tree-ssa-alias.c: Likewise * tree-dfa.c: Likewise * tree-cfgcleanup.c: Likewise * tree-sra.c: Likewise * tree-ssa-copy.c: Likewise * tree-ssa.c: Likewise * tree-profile.c: Likewise * tree-cfg.c: Likewise * tree-ssa-threadedge.c: Likewise * tree-vect-loop-manip.c: Likewise * tree-inline.c: Do not include diagnostic.h and expr.h. Include rtl.h. (copy_decl_for_dup_finish): Do not use NULL_RTX. * tree-loop-linear.c: Do not include diagnostic.h, expr.h, and optabs.h. * tree-loop-distribution.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@160125 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index d63ede1bbe9..9ccf72e18b2 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -25,13 +25,11 @@ along with GCC; see the file COPYING3. If not see #include "tm_p.h" #include "basic-block.h" #include "output.h" -#include "diagnostic.h" #include "tree-pretty-print.h" #include "tree-flow.h" #include "tree-dump.h" #include "timevar.h" #include "cfgloop.h" -#include "expr.h" #include "tree-pass.h" #include "insn-config.h" #include "recog.h" @@ -43,6 +41,11 @@ along with GCC; see the file COPYING3. If not see #include "langhooks.h" #include "tree-inline.h" #include "tree-data-ref.h" + + +/* FIXME: Needed for optabs, but this should all be moved to a TBD interface + between the GIMPLE and RTL worlds. */ +#include "expr.h" #include "optabs.h" /* This pass inserts prefetch instructions to optimize cache usage during -- cgit v1.2.1 From 1aabe697f03d7bb006744c168b1a51494ed2644d Mon Sep 17 00:00:00 2001 From: spop Date: Wed, 9 Jun 2010 22:51:46 +0000 Subject: Adjust the threshold value of PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO 2010-06-09 Changpeng Fang * tree-ssa-loop-prefetch.c (PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO) : Change the PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO threshold value from 8 to 4. Minor change of the related comments. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@160514 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 9ccf72e18b2..705ee81b9c8 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -228,13 +228,13 @@ struct mem_ref_group /* Do not generate a prefetch if the unroll factor is significantly less than what is required by the prefetch. This is to avoid redundant - prefetches. For example, if prefetch_mod is 16 and unroll_factor is - 1, this means prefetching requires unrolling the loop 16 times, but - the loop is not going to be unrolled. In this case (ratio = 16), + prefetches. For example, when prefetch_mod is 16 and unroll_factor is + 2, prefetching requires unrolling the loop 16 times, but + the loop is actually unrolled twice. In this case (ratio = 8), prefetching is not likely to be beneficial. */ #ifndef PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO -#define PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO 8 +#define PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO 4 #endif /* The memory reference. */ -- cgit v1.2.1 From 94ce9ff0a1203321644b4023a6f7960396484fbf Mon Sep 17 00:00:00 2001 From: spop Date: Wed, 9 Jun 2010 22:56:08 +0000 Subject: Limit non-constant step prefetching only to the innermost loops. 2010-06-09 Changpeng Fang * tree-ssa-loop-prefetch.c (gather_memory_references_ref): Do not the gather memory reference in the outer loop if the step is not a constant. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@160515 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 705ee81b9c8..8097124bc7f 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -513,6 +513,10 @@ gather_memory_references_ref (struct loop *loop, struct mem_ref_group **refs, if (step == NULL_TREE) return false; + /* Limit non-constant step prefetching only to the innermost loops. */ + if (!cst_and_fits_in_hwi (step) && loop->inner != NULL) + return false; + /* Now we know that REF = &BASE + STEP * iter + DELTA, where DELTA and STEP are integer constants. */ agrp = find_or_create_group (refs, base, step); -- cgit v1.2.1 From 5da8318cccacdc351d419e79b32abe81a4252a08 Mon Sep 17 00:00:00 2001 From: spop Date: Mon, 14 Jun 2010 20:51:26 +0000 Subject: Account prefetch_mod and unroll_factor for the computation of the prefetch count. 2010-06-14 Changpeng Fang * tree-ssa-loop-prefetch.c (nothing_to_prefetch_p): New. Return true if no prefetch is going to be generated for a given group. (estimate_prefetch_count): Use prefetch_mod and unroll_factor to estimate the prefetch_count. (loop_prefetch_arrays): Call nothing_to_prefetch_p; estimate the prefetch count by considering the unroll_factor and prefetch_mod for is_loop_prefetching_profitable. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@160766 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 8097124bc7f..65474898ad9 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -994,18 +994,40 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor, return any; } -/* Estimate the number of prefetches in the given GROUPS. */ +/* Return TRUE if no prefetch is going to be generated in the given + GROUPS. */ + +static bool +nothing_to_prefetch_p (struct mem_ref_group *groups) +{ + struct mem_ref *ref; + + for (; groups; groups = groups->next) + for (ref = groups->refs; ref; ref = ref->next) + if (should_issue_prefetch_p (ref)) + return false; + + return true; +} + +/* Estimate the number of prefetches in the given GROUPS. + UNROLL_FACTOR is the factor by which LOOP was unrolled. */ static int -estimate_prefetch_count (struct mem_ref_group *groups) +estimate_prefetch_count (struct mem_ref_group *groups, unsigned unroll_factor) { struct mem_ref *ref; + unsigned n_prefetches; int prefetch_count = 0; for (; groups; groups = groups->next) for (ref = groups->refs; ref; ref = ref->next) if (should_issue_prefetch_p (ref)) - prefetch_count++; + { + n_prefetches = ((unroll_factor + ref->prefetch_mod - 1) + / ref->prefetch_mod); + prefetch_count += n_prefetches; + } return prefetch_count; } @@ -1716,8 +1738,7 @@ loop_prefetch_arrays (struct loop *loop) /* Step 2: estimate the reuse effects. */ prune_by_reuse (refs); - prefetch_count = estimate_prefetch_count (refs); - if (prefetch_count == 0) + if (nothing_to_prefetch_p (refs)) goto fail; determine_loop_nest_reuse (loop, refs, no_other_refs); @@ -1733,6 +1754,12 @@ loop_prefetch_arrays (struct loop *loop) ninsns = tree_num_loop_insns (loop, &eni_size_weights); unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc, est_niter); + + /* Estimate prefetch count for the unrolled loop. */ + prefetch_count = estimate_prefetch_count (refs, unroll_factor); + if (prefetch_count == 0) + goto fail; + if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Ahead %d, unroll factor %d, trip count " HOST_WIDE_INT_PRINT_DEC "\n" -- cgit v1.2.1 From 5a91155f3ea2d63559d86797775c9c04b2ef7c05 Mon Sep 17 00:00:00 2001 From: spop Date: Fri, 2 Jul 2010 16:34:29 +0000 Subject: PR 44576: miss rate computation improvement for prefetching loop arrays. 2010-07-02 Changpeng Fang PR middle-end/44576 * tree-ssa-loop-prefetch.c (compute_miss_rate): Return 1000 (out of 1000) for miss rate if the address diference is greater than or equal to the cache line size (the two reference will never hit the same cache line). git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@161727 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 65474898ad9..934b49c0406 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -654,6 +654,11 @@ compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, int total_positions, miss_positions, miss_rate; int address1, address2, cache_line1, cache_line2; + /* It always misses if delta is greater than or equal to the cache + line size. */ + if (delta >= cache_line_size) + return 1000; + total_positions = 0; miss_positions = 0; -- cgit v1.2.1 From 3a2f43cf063565a48d77f8ca57ce797642da497a Mon Sep 17 00:00:00 2001 From: spop Date: Fri, 2 Jul 2010 16:34:38 +0000 Subject: Reduce the cost in miss rate computation. 2010-07-02 Changpeng Fang * tree-ssa-loop-prefetch.c (compute_miss_rate): Rename to is_miss_rate_acceptable. Pull total_positions computation out of the loops. Early return if miss_positions exceeds the acceptable threshold. * tree-ssa-loop-prefetch.c (prune_ref_by_group_reuse): Call is_miss_rate_acceptable after renaming of compute_miss_rate. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@161728 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 934b49c0406..c3e90d235ba 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -640,27 +640,29 @@ ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by) /* Given a CACHE_LINE_SIZE and two inductive memory references with a common STEP greater than CACHE_LINE_SIZE and an address difference DELTA, compute the probability that they will fall - in different cache lines. DISTINCT_ITERS is the number of - distinct iterations after which the pattern repeats itself. + in different cache lines. Return true if the computed miss rate + is not greater than the ACCEPTABLE_MISS_RATE. DISTINCT_ITERS is the + number of distinct iterations after which the pattern repeats itself. ALIGN_UNIT is the unit of alignment in bytes. */ -static int -compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, +static bool +is_miss_rate_acceptable (unsigned HOST_WIDE_INT cache_line_size, HOST_WIDE_INT step, HOST_WIDE_INT delta, unsigned HOST_WIDE_INT distinct_iters, int align_unit) { unsigned align, iter; - int total_positions, miss_positions, miss_rate; + int total_positions, miss_positions, max_allowed_miss_positions; int address1, address2, cache_line1, cache_line2; /* It always misses if delta is greater than or equal to the cache line size. */ - if (delta >= cache_line_size) - return 1000; + if (delta >= (HOST_WIDE_INT) cache_line_size) + return false; - total_positions = 0; miss_positions = 0; + total_positions = (cache_line_size / align_unit) * distinct_iters; + max_allowed_miss_positions = (ACCEPTABLE_MISS_RATE * total_positions) / 1000; /* Iterate through all possible alignments of the first memory reference within its cache line. */ @@ -673,12 +675,14 @@ compute_miss_rate (unsigned HOST_WIDE_INT cache_line_size, address2 = address1 + delta; cache_line1 = address1 / cache_line_size; cache_line2 = address2 / cache_line_size; - total_positions += 1; if (cache_line1 != cache_line2) - miss_positions += 1; + { + miss_positions += 1; + if (miss_positions > max_allowed_miss_positions) + return false; + } } - miss_rate = 1000 * miss_positions / total_positions; - return miss_rate; + return true; } /* Prune the prefetch candidate REF using the reuse with BY. @@ -694,7 +698,6 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, HOST_WIDE_INT delta = delta_b - delta_r; HOST_WIDE_INT hit_from; unsigned HOST_WIDE_INT prefetch_before, prefetch_block; - int miss_rate; HOST_WIDE_INT reduced_step; unsigned HOST_WIDE_INT reduced_prefetch_block; tree ref_type; @@ -793,9 +796,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, delta %= step; ref_type = TREE_TYPE (ref->mem); align_unit = TYPE_ALIGN (ref_type) / 8; - miss_rate = compute_miss_rate(prefetch_block, step, delta, - reduced_prefetch_block, align_unit); - if (miss_rate <= ACCEPTABLE_MISS_RATE) + if (is_miss_rate_acceptable (prefetch_block, step, delta, + reduced_prefetch_block, align_unit)) { /* Do not reduce prefetch_before if we meet beyond cache size. */ if (prefetch_before > L2_CACHE_SIZE_BYTES / PREFETCH_BLOCK) @@ -809,9 +811,8 @@ prune_ref_by_group_reuse (struct mem_ref *ref, struct mem_ref *by, /* Try also the following iteration. */ prefetch_before++; delta = step - delta; - miss_rate = compute_miss_rate(prefetch_block, step, delta, - reduced_prefetch_block, align_unit); - if (miss_rate <= ACCEPTABLE_MISS_RATE) + if (is_miss_rate_acceptable (prefetch_block, step, delta, + reduced_prefetch_block, align_unit)) { if (prefetch_before < ref->prefetch_before) ref->prefetch_before = prefetch_before; -- cgit v1.2.1 From d6bf3b142da7fb201b42a7ca49a1a27c04df4bb0 Mon Sep 17 00:00:00 2001 From: rsandifo Date: Sun, 4 Jul 2010 22:13:09 +0000 Subject: gcc/ * optabs.h (optab_handler, convert_optab_handler): Turn into inline functions that return an insn code. (set_optab_handler, set_convert_optab_handler): New functions. * builtins.c: Replace optab_handler(X)->insn_code with optab_handler or set_optab_handler thoughout. Likewise convert_optab_handler(X)->insn_code with convert_optab_handler and set_convert_optab_handler. * expmed.c, expr.c, genopinit.c, ifcvt.c, optabs.c, reload.c, reload1.c, stmt.c, targhooks.c, tree-ssa-loop-prefetch.c, tree-ssa-math-opts.c, tree-vect-data-refs.c, tree-vect-generic.c, tree-vect-loop.c, tree-vect-patterns.c, tree-vect-slp.c, tree-vect-stmts.c, config/m32c/m32c.c, config/rs6000/rs6000.c, config/spu/spu.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@161808 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index c3e90d235ba..96a8e920acc 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -1136,7 +1136,7 @@ nontemporal_store_p (struct mem_ref *ref) if (mode == BLKmode) return false; - code = optab_handler (storent_optab, mode)->insn_code; + code = optab_handler (storent_optab, mode); return code != CODE_FOR_nothing; } -- cgit v1.2.1 From 86638c2ef3b5ed40e2c8f19e5ce0cdbf86593413 Mon Sep 17 00:00:00 2001 From: rguenth Date: Mon, 5 Jul 2010 12:25:20 +0000 Subject: 2010-07-05 Richard Guenther * tree-ssa-loop-im.c (for_each_index): Do not handle ALIGN_INDIRECT_REF. (gen_lsm_tmp_name): Likewise. * tree-dump.c (dequeue_and_dump): Likewise. * tree-pretty-print.c (dump_generic_node): Likewise. (op_code_prio): Likewise. (op_symbol_code): Likewise. * tree.c (staticp): Likewise. (build1_stat): Likewise. * tree.h (INDIRECT_REF_P): Likewise. * fold-const.c (maybe_lvalue_p): Likewise. (operand_equal_p): Likewise. * tree-ssa-sccvn.c (copy_reference_ops_from_ref): Likewise. (ao_ref_init_from_vn_reference): Likewise. * tree-ssa-loop-ivopts.c (idx_find_step): Likewise. (find_interesting_uses_address): Likewise. * dwarf2out.c (loc_list_from_tree): Likewise. * gimplify.c (gimplify_expr): Likewise. * tree-eh.c (tree_could_trap_p): Likewise. * emit-rtl.c (set_mem_attributes_minus_bitpos): Likewise. * cfgexpand.c (expand_debug_expr): Likewise. * tree-ssa-pre.c (create_component_ref_by_pieces_1): Likewise. * tree-ssa-loop-prefetch.c (idx_analyze_ref): Likewise. * tree-cfg.c (verify_types_in_gimple_min_lval): Likewise. * config/rs6000/rs6000 (rs6000_check_sdmode): Likewise. * tree-ssa-operands.c (get_expr_operands): Likewise. * expr.c (safe_from_p): Likewise. (expand_expr_real_1): Likewise. TER BIT_AND_EXPRs into MEM_REFs. * tree-vect-data-refs.c (vect_setup_realignment): Build BIT_AND_EXPR and MEM_REF instead of ALIGN_INDIRECT_REF. * tree-vect-stmts.c (vectorizable_load): Likewise. * tree.def (ALIGN_INDIRECT_REF): Remove. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@161830 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 96a8e920acc..0282cbcc08a 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -404,8 +404,7 @@ idx_analyze_ref (tree base, tree *index, void *data) HOST_WIDE_INT idelta = 0, imult = 1; affine_iv iv; - if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF - || TREE_CODE (base) == ALIGN_INDIRECT_REF) + if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF) return false; if (!simple_iv (ar_data->loop, loop_containing_stmt (ar_data->stmt), -- cgit v1.2.1 From 0b205f4ca112a643f4f1b9c9886648b569e0b380 Mon Sep 17 00:00:00 2001 From: manu Date: Thu, 8 Jul 2010 04:22:54 +0000 Subject: =?UTF-8?q?2010-07-08=20=20Manuel=20L=C3=B3pez-Ib=C3=A1=C3=B1ez=20?= =?UTF-8?q?=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * toplev.h: Do not include diagnostic-core.h. Include diagnostic-core.h in every file that includes toplev.h. * c-tree.h: Do not include toplev.h. * pretty-print.h: Update comment. * Makefile.in: Update dependencies. * alias.c: Include diagnostic-core.h in every file that includes toplev.h. * attribs.c: Likewise. * auto-inc-dec.c: Likewise. * bb-reorder.c: Likewise. * bt-load.c: Likewise. * caller-save.c: Likewise. * calls.c: Likewise. * cfg.c: Likewise. * cfganal.c: Likewise. * cfgbuild.c: Likewise. * cfgcleanup.c: Likewise. * cfghooks.c: Likewise. * cfgloop.c: Likewise. * combine.c: Likewise. * config/alpha/alpha.c: Likewise. * config/arc/arc.c: Likewise. * config/arm/arm.c: Likewise. * config/arm/pe.c: Likewise. * config/avr/avr.c: Likewise. * config/bfin/bfin.c: Likewise. * config/cris/cris.c: Likewise. * config/crx/crx.c: Likewise. * config/darwin-c.c: Likewise. * config/darwin.c: Likewise. * config/fr30/fr30.c: Likewise. * config/frv/frv.c: Likewise. * config/h8300/h8300.c: Likewise. * config/host-darwin.c: Likewise. * config/i386/i386.c: Likewise. * config/i386/netware.c: Likewise. * config/i386/nwld.c: Likewise. * config/i386/winnt-cxx.c: Likewise. * config/i386/winnt-stubs.c: Likewise. * config/i386/winnt.c: Likewise. * config/ia64/ia64-c.c: Likewise. * config/ia64/ia64.c: Likewise. * config/iq2000/iq2000.c: Likewise. * config/lm32/lm32.c: Likewise. * config/m32c/m32c-pragma.c: Likewise. * config/m32c/m32c.c: Likewise. * config/m32r/m32r.c: Likewise. * config/m68hc11/m68hc11.c: Likewise. * config/m68k/m68k.c: Likewise. * config/mcore/mcore.c: Likewise. * config/mep/mep-pragma.c: Likewise. * config/mep/mep.c: Likewise. * config/mmix/mmix.c: Likewise. * config/mn10300/mn10300.c: Likewise. * config/moxie/moxie.c: Likewise. * config/pa/pa.c: Likewise. * config/pdp11/pdp11.c: Likewise. * config/picochip/picochip.c: Likewise. * config/rs6000/rs6000-c.c: Likewise. * config/rs6000/rs6000.c: Likewise. * config/rx/rx.c: Likewise. * config/s390/s390.c: Likewise. * config/score/score.c: Likewise. * config/score/score3.c: Likewise. * config/score/score7.c: Likewise. * config/sh/sh.c: Likewise. * config/sh/symbian-base.c: Likewise. * config/sh/symbian-c.c: Likewise. * config/sh/symbian-cxx.c: Likewise. * config/sol2-c.c: Likewise. * config/sol2.c: Likewise. * config/sparc/sparc.c: Likewise. * config/spu/spu.c: Likewise. * config/stormy16/stormy16.c: Likewise. * config/v850/v850-c.c: Likewise. * config/v850/v850.c: Likewise. * config/vax/vax.c: Likewise. * config/vxworks.c: Likewise. * config/xtensa/xtensa.c: Likewise. * convert.c: Likewise. * cse.c: Likewise. * cselib.c: Likewise. * dbgcnt.c: Likewise. * dbxout.c: Likewise. * ddg.c: Likewise. * dominance.c: Likewise. * emit-rtl.c: Likewise. * explow.c: Likewise. * expmed.c: Likewise. * fixed-value.c: Likewise. * fold-const.c: Likewise. * fwprop.c: Likewise. * gcse.c: Likewise. * ggc-common.c: Likewise. * ggc-page.c: Likewise. * ggc-zone.c: Likewise. * gimple-low.c: Likewise. * gimplify.c: Likewise. * graph.c: Likewise. * haifa-sched.c: Likewise. * ifcvt.c: Likewise. * implicit-zee.c: Likewise. * integrate.c: Likewise. * ira-build.c: Likewise. * ira-color.c: Likewise. * ira-conflicts.c: Likewise. * ira-costs.c: Likewise. * ira-lives.c: Likewise. * ira.c: Likewise. * lists.c: Likewise. * loop-doloop.c: Likewise. * loop-iv.c: Likewise. * lto-opts.c: Likewise. * lto-symtab.c: Likewise. * main.c: Likewise. * modulo-sched.c: Likewise. * optabs.c: Likewise. * params.c: Likewise. * plugin.c: Likewise. * postreload-gcse.c: Likewise. * postreload.c: Likewise. * predict.c: Likewise. * profile.c: Likewise. * real.c: Likewise. * regcprop.c: Likewise. * reginfo.c: Likewise. * regmove.c: Likewise. * reorg.c: Likewise. * resource.c: Likewise. * rtl.c: Likewise. * rtlanal.c: Likewise. * sched-deps.c: Likewise. * sched-ebb.c: Likewise. * sched-rgn.c: Likewise. * sdbout.c: Likewise. * sel-sched-dump.c: Likewise. * sel-sched-ir.c: Likewise. * simplify-rtx.c: Likewise. * stmt.c: Likewise. * stor-layout.c: Likewise. * store-motion.c: Likewise. * targhooks.c: Likewise. * tree-cfg.c: Likewise. * tree-cfgcleanup.c: Likewise. * tree-dump.c: Likewise. * tree-eh.c: Likewise. * tree-inline.c: Likewise. * tree-nomudflap.c: Likewise. * tree-object-size.c: Likewise. * tree-optimize.c: Likewise. * tree-outof-ssa.c: Likewise. * tree-phinodes.c: Likewise. * tree-profile.c: Likewise. * tree-ssa-ccp.c: Likewise. * tree-ssa-coalesce.c: Likewise. * tree-ssa-live.c: Likewise. * tree-ssa-loop-niter.c: Likewise. * tree-ssa-loop-prefetch.c: Likewise. * tree-ssa-loop.c: Likewise. * tree-ssa-structalias.c: Likewise. * tree-ssa-uninit.c: Likewise. * tree-ssa.c: Likewise. * tree-vect-data-refs.c: Likewise. * tree-vect-loop-manip.c: Likewise. * tree-vect-loop.c: Likewise. * tree-vect-patterns.c: Likewise. * tree-vect-stmts.c: Likewise. * tree-vrp.c: Likewise. * varasm.c: Likewise. * vec.c: Likewise. * web.c: Likewise. * xcoffout.c: Likewise. c-family/ * c-common.h: Include diagnostic-core.h. Error if already included. * c-semantics.c: Do not define GCC_DIAG_STYLE here. cp/ * cp-tree.h: Do not include toplev.h. java/ * boehm.c: Include diagnostic-core.h in every file that includes toplev.h. * class.c: Likewise. * constants.c: Likewise. * decl.c: Likewise. * except.c: Likewise. * expr.c: Likewise. * jcf-parse.c: Likewise. * mangle.c: Likewise. * mangle_name.c: Likewise. * resource.c: Likewise. * typeck.c: Likewise. * verify-glue.c: Likewise. ada/ * gcc-interface/utils.c: Include diagnostic-core.h in every file that includes toplev.h. lto/ * lto-coff.c: Include diagnostic-core.h in every file that includes toplev.h. * lto-elf.c: Likewise. * lto-lang.c: Likewise. * lto-macho.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@161943 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 0282cbcc08a..58abfd34e90 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see #include "hashtab.h" #include "tree-chrec.h" #include "tree-scalar-evolution.h" +#include "diagnostic-core.h" #include "toplev.h" #include "params.h" #include "langhooks.h" -- cgit v1.2.1 From 76595608131ec929f1e9cd5e5935d99fb4dac342 Mon Sep 17 00:00:00 2001 From: spop Date: Fri, 9 Jul 2010 23:08:55 +0000 Subject: pr44576 Avoid un-necessary prefetch analysis by distributing the cost models 2010-07-09 Changpeng Fang PR tree-optimization/44576 * tree-ssa-loop-prefetch.c (trip_count_to_ahead_ratio_too_small_p): New. Pull out from is_loop_prefetching_profitable to implement the trip count to ahead ratio heuristic. (mem_ref_count_reasonable_p): New. Pull out from is_loop_prefetching_profitable to implement the instruction to memory reference ratio heuristic. Also consider not reasonable if the memory reference count is above a threshold (to avoid explosive compilation time. (insn_to_prefetch_ratio_too_small_p): New. Pull out from is_loop_prefetching_profitable to implement the instruction to prefetch ratio heuristic. (is_loop_prefetching_profitable): Removed. (loop_prefetch_arrays): Distribute the cost analysis across the function to allow early exit of the prefetch analysis. is_loop_prefetching_profitable is splitted into three functions, with each one called as early as possible. (PREFETCH_MAX_MEM_REFS_PER_LOOP): New. Threshold above which the number of memory references in a loop is considered too many. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@162023 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 157 ++++++++++++++++++++++++++++++------------- 1 file changed, 110 insertions(+), 47 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 58abfd34e90..008f2ce4b7b 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -110,19 +110,29 @@ along with GCC; see the file COPYING3. If not see prefetch instructions with guards in cases where 5) was not sufficient to satisfy the constraints? - The function is_loop_prefetching_profitable() implements a cost model - to determine if prefetching is profitable for a given loop. The cost - model has two heuristcs: - 1. A heuristic that determines whether the given loop has enough CPU - ops that can be overlapped with cache missing memory ops. - If not, the loop won't benefit from prefetching. This is implemented - by requirung the ratio between the instruction count and the mem ref - count to be above a certain minimum. - 2. A heuristic that disables prefetching in a loop with an unknown trip - count if the prefetching cost is above a certain limit. The relative - prefetching cost is estimated by taking the ratio between the - prefetch count and the total intruction count (this models the I-cache - cost). + A cost model is implemented to determine whether or not prefetching is + profitable for a given loop. The cost model has three heuristics: + + 1. Function trip_count_to_ahead_ratio_too_small_p implements a + heuristic that determines whether or not the loop has too few + iterations (compared to ahead). Prefetching is not likely to be + beneficial if the trip count to ahead ratio is below a certain + minimum. + + 2. Function mem_ref_count_reasonable_p implements a heuristic that + determines whether the given loop has enough CPU ops that can be + overlapped with cache missing memory ops. If not, the loop + won't benefit from prefetching. In the implementation, + prefetching is not considered beneficial if the ratio between + the instruction count and the mem ref count is below a certain + minimum. + + 3. Function insn_to_prefetch_ratio_too_small_p implements a + heuristic that disables prefetching in a loop if the prefetching + cost is above a certain limit. The relative prefetching cost is + estimated by taking the ratio between the prefetch count and the + total intruction count (this models the I-cache cost). + The limits used in these heuristics are defined as parameters with reasonable default values. Machine-specific default values will be added later. @@ -238,6 +248,14 @@ struct mem_ref_group #define PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO 4 #endif +/* Some of the prefetch computations have quadratic complexity. We want to + avoid huge compile times and, therefore, want to limit the amount of + memory references per loop where we consider prefetching. */ + +#ifndef PREFETCH_MAX_MEM_REFS_PER_LOOP +#define PREFETCH_MAX_MEM_REFS_PER_LOOP 200 +#endif + /* The memory reference. */ struct mem_ref @@ -1640,24 +1658,51 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs, } } -/* Do a cost-benefit analysis to determine if prefetching is profitable - for the current loop given the following parameters: +/* Determine whether or not the trip count to ahead ratio is too small based + on prefitablility consideration. AHEAD: the iteration ahead distance, - EST_NITER: the estimated trip count, + EST_NITER: the estimated trip count. */ + +static bool +trip_count_to_ahead_ratio_too_small_p (unsigned ahead, HOST_WIDE_INT est_niter) +{ + /* Assume trip count to ahead ratio is big enough if the trip count could not + be estimated at compile time. */ + if (est_niter < 0) + return false; + + if (est_niter < (HOST_WIDE_INT) (TRIP_COUNT_TO_AHEAD_RATIO * ahead)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Not prefetching -- loop estimated to roll only %d times\n", + (int) est_niter); + return true; + } + + return false; +} + +/* Determine whether or not the number of memory references in the loop is + reasonable based on the profitablity and compilation time considerations. NINSNS: estimated number of instructions in the loop, - PREFETCH_COUNT: an estimate of the number of prefetches MEM_REF_COUNT: total number of memory references in the loop. */ static bool -is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, - unsigned ninsns, unsigned prefetch_count, - unsigned mem_ref_count, unsigned unroll_factor) +mem_ref_count_reasonable_p (unsigned ninsns, unsigned mem_ref_count) { - int insn_to_mem_ratio, insn_to_prefetch_ratio; + int insn_to_mem_ratio; if (mem_ref_count == 0) return false; + /* Miss rate computation (is_miss_rate_acceptable) and dependence analysis + (compute_all_dependences) have high costs based on quadratic complexity. + To avoid huge compilation time, we give up prefetching if mem_ref_count + is too large. */ + if (mem_ref_count > PREFETCH_MAX_MEM_REFS_PER_LOOP) + return false; + /* Prefetching improves performance by overlapping cache missing memory accesses with CPU operations. If the loop does not have enough CPU operations to overlap with memory operations, prefetching @@ -1678,6 +1723,21 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, return false; } + return true; +} + +/* Determine whether or not the instruction to prefetch ratio in the loop is + too small based on the profitablity consideration. + NINSNS: estimated number of instructions in the loop, + PREFETCH_COUNT: an estimate of the number of prefetches, + UNROLL_FACTOR: the factor to unroll the loop if prefetching. */ + +static bool +insn_to_prefetch_ratio_too_small_p (unsigned ninsns, unsigned prefetch_count, + unsigned unroll_factor) +{ + int insn_to_prefetch_ratio; + /* Prefetching most likely causes performance degradation when the instruction to prefetch ratio is too small. Too many prefetch instructions in a loop may reduce the I-cache performance. @@ -1697,23 +1757,10 @@ is_loop_prefetching_profitable (unsigned ahead, HOST_WIDE_INT est_niter, fprintf (dump_file, "Not prefetching -- instruction to prefetch ratio (%d) too small\n", insn_to_prefetch_ratio); - return false; + return true; } - /* Could not do further estimation if the trip count is unknown. Just assume - prefetching is profitable. Too aggressive??? */ - if (est_niter < 0) - return true; - - if (est_niter < (HOST_WIDE_INT) (TRIP_COUNT_TO_AHEAD_RATIO * ahead)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, - "Not prefetching -- loop estimated to roll only %d times\n", - (int) est_niter); - return false; - } - return true; + return false; } @@ -1738,9 +1785,31 @@ loop_prefetch_arrays (struct loop *loop) return false; } + /* FIXME: the time should be weighted by the probabilities of the blocks in + the loop body. */ + time = tree_num_loop_insns (loop, &eni_time_weights); + if (time == 0) + return false; + + ahead = (PREFETCH_LATENCY + time - 1) / time; + est_niter = estimated_loop_iterations_int (loop, false); + + /* Prefetching is not likely to be profitable if the trip count to ahead + ratio is too small. */ + if (trip_count_to_ahead_ratio_too_small_p (ahead, est_niter)) + return false; + + ninsns = tree_num_loop_insns (loop, &eni_size_weights); + /* Step 1: gather the memory references. */ refs = gather_memory_references (loop, &no_other_refs, &mem_ref_count); + /* Give up prefetching if the number of memory references in the + loop is not reasonable based on profitablity and compilation time + considerations. */ + if (!mem_ref_count_reasonable_p (ninsns, mem_ref_count)) + goto fail; + /* Step 2: estimate the reuse effects. */ prune_by_reuse (refs); @@ -1749,15 +1818,7 @@ loop_prefetch_arrays (struct loop *loop) determine_loop_nest_reuse (loop, refs, no_other_refs); - /* Step 3: determine the ahead and unroll factor. */ - - /* FIXME: the time should be weighted by the probabilities of the blocks in - the loop body. */ - time = tree_num_loop_insns (loop, &eni_time_weights); - ahead = (PREFETCH_LATENCY + time - 1) / time; - est_niter = estimated_loop_iterations_int (loop, false); - - ninsns = tree_num_loop_insns (loop, &eni_size_weights); + /* Step 3: determine unroll factor. */ unroll_factor = determine_unroll_factor (loop, refs, ninsns, &desc, est_niter); @@ -1773,8 +1834,10 @@ loop_prefetch_arrays (struct loop *loop) ahead, unroll_factor, est_niter, ninsns, mem_ref_count, prefetch_count); - if (!is_loop_prefetching_profitable (ahead, est_niter, ninsns, prefetch_count, - mem_ref_count, unroll_factor)) + /* Prefetching is not likely to be profitable if the instruction to prefetch + ratio is too small. */ + if (insn_to_prefetch_ratio_too_small_p (ninsns, prefetch_count, + unroll_factor)) goto fail; mark_nontemporal_stores (loop, refs); -- cgit v1.2.1 From 2512209b236017dc6514906a6f6cf624a490d680 Mon Sep 17 00:00:00 2001 From: aesok Date: Thu, 15 Jul 2010 18:47:23 +0000 Subject: * tree.h (enum tree_index): Add TI_INTEGER_THREE. (integer_three_node): Add. * tree.c (build_common_tree_nodes_2): Use integer_type_node insead of NULL_TREE in build_int_cst calls. Initialize the integer_three_node. * builtins.c (expand_builtin_prefetch): Use common tree nodes instead of call build_int_cst. * tree-ssa-sccvn.c (copy_reference_ops_from_ref): Ditto. * tree-ssa-loop-ivopts.c (idx_find_step): Ditto. (find_interesting_uses_address): Ditto. * tree-ssa-alias.c (ao_ref_init_from_ptr_and_size): Ditto. * tree-eh.c (lower_eh_constructs_2): Ditto. * tree-vect-loop.c (get_initial_def_for_induction): Ditto. * c-typeck.c (really_start_incremental_init, push_init_level): Ditto. * expmed.c (expand_divmod): Ditto. * tree-mudflap.c (mx_register_decls): Ditto. * varasm.c (array_size_for_constructor): Ditto. * tree-ssa-loop-prefetch.c (issue_prefetch_ref): Ditto. * c-parser.c (c_parser_postfix_expression): Ditto. /cp * decl.c (integer_three_node): Remove. (cxx_init_decl_processing): Do not initialize the integer_three_node. * cp-tree.h (integer_three_node): Remove. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@162230 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 008f2ce4b7b..a5b511262f9 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -1083,7 +1083,7 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead) addr_base = force_gimple_operand_gsi (&bsi, unshare_expr (addr_base), true, NULL, true, GSI_SAME_STMT); write_p = ref->write_p ? integer_one_node : integer_zero_node; - local = build_int_cst (integer_type_node, nontemporal ? 0 : 3); + local = nontemporal ? integer_zero_node : integer_three_node; for (ap = 0; ap < n_prefetches; ap++) { -- cgit v1.2.1 From dbdf4b3183c88b679cdb0b2741b5cb7891b67f54 Mon Sep 17 00:00:00 2001 From: froydnj Date: Fri, 16 Jul 2010 17:51:56 +0000 Subject: * tree.c (build_common_builtin_nodes): Use build_function_type_list instead of build_function_type. * tree-ssa-loop-prefetch.c (tree_ssa_prefetch_arrays): Likewise. * cgraphunit.c (cgraph_build_static_cdtor): Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@162264 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index a5b511262f9..1932d05c366 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -1903,10 +1903,8 @@ tree_ssa_prefetch_arrays (void) if (!built_in_decls[BUILT_IN_PREFETCH]) { - tree type = build_function_type (void_type_node, - tree_cons (NULL_TREE, - const_ptr_type_node, - NULL_TREE)); + tree type = build_function_type_list (void_type_node, + const_ptr_type_node, NULL_TREE); tree decl = add_builtin_function ("__builtin_prefetch", type, BUILT_IN_PREFETCH, BUILT_IN_NORMAL, NULL, NULL_TREE); -- cgit v1.2.1 From 0e948838fb53991de9810171eaa1e694d34541b3 Mon Sep 17 00:00:00 2001 From: spop Date: Wed, 21 Jul 2010 15:44:24 +0000 Subject: Fix PR 44955: Strip off the real and complex parts. 2010-07-21 Changpeng Fang PR tree-optimization/44955 * tree-ssa-loop-prefetch.c (analyze_ref): Strip off the real and imagine parts of a complex, so that they can have the same base and fall into the same group. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@162381 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/tree-ssa-loop-prefetch.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'gcc/tree-ssa-loop-prefetch.c') diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index 1932d05c366..4840704be19 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -486,10 +486,18 @@ analyze_ref (struct loop *loop, tree *ref_p, tree *base, *step = NULL_TREE; *delta = 0; - /* First strip off the component references. Ignore bitfields. */ - if (TREE_CODE (ref) == COMPONENT_REF - && DECL_NONADDRESSABLE_P (TREE_OPERAND (ref, 1))) - ref = TREE_OPERAND (ref, 0); + /* First strip off the component references. Ignore bitfields. + Also strip off the real and imagine parts of a complex, so that + they can have the same base. */ + if (TREE_CODE (ref) == REALPART_EXPR + || TREE_CODE (ref) == IMAGPART_EXPR + || (TREE_CODE (ref) == COMPONENT_REF + && DECL_NONADDRESSABLE_P (TREE_OPERAND (ref, 1)))) + { + if (TREE_CODE (ref) == IMAGPART_EXPR) + *delta += int_size_in_bytes (TREE_TYPE (ref)); + ref = TREE_OPERAND (ref, 0); + } *ref_p = ref; -- cgit v1.2.1