summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog30
-rw-r--r--gcc/Makefile.in4
-rw-r--r--gcc/cgraph.c27
-rw-r--r--gcc/cgraph.h13
-rw-r--r--gcc/cgraphbuild.c25
-rw-r--r--gcc/cgraphunit.c16
-rw-r--r--gcc/ipa-inline.c93
-rw-r--r--gcc/passes.c21
-rw-r--r--gcc/predict.c3
-rw-r--r--gcc/predict.h1
-rw-r--r--gcc/testsuite/g++.dg/gomp/pr30696.C12
-rw-r--r--gcc/tree-inline.c24
-rw-r--r--gcc/tree-pass.h15
13 files changed, 218 insertions, 66 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1467f09f43f..f9238459a01 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,33 @@
+2007-02-09 Jan Hubicka <jh@suse.cz>
+
+ * Makefile.in (passes.o, ipa-inline.o): Add dependencies.
+ * cgraphbuild.c (build_cgraph_edges): Compute frequencies.
+ (rebuild_cgraph_edges): Likewise.
+ * cgraph.c (cgraph_set_call_stmt): Add new argument frequency.
+ (dump_cgraph_node): Dump frequencies.
+ (cgraph_clone_edge): Add frequency scales.
+ (cgraph_clone_node): Add freuqnecy.
+ * cgraph.h (cgraph_edge): Add freuqnecy argument.
+ (CGRAPH_FREQ_BASE, CGRAPH_FREQ_MAX): New constants.
+ (cgraph_create_edge, cgraph_clone_edge, cgraph_clone_node): Update.
+ * tree-pass.h (TODO_rebuild_frequencies): New constant.
+ * cgraphunit.c (verify_cgraph_node): Verify frequencies.
+ (cgraph_copy_node_for_versioning): Update call of cgraph_clone_edge.
+ (save_inline_function_body): Likewise.
+ * ipa-inline.c: inluce rtl.h
+ (cgraph_clone_inlined_nods): Update call of cgraph_clone_node.
+ (cgraph_edge_badness): Use frequencies.
+ (cgraph_decide_recursive_inlining): Update clonning.
+ (cgraph_decide_inlining_of_small_function): Dump frequency.
+ * predict.c (estimate_bb_frequencies): Export.
+ * predict.h (estimate_bb_frequencies): Declare.
+ * tree-inline.c (copy_bb): Watch overflows.
+ (expand_call_inline): Update call of cgraph_create_edge.
+ (optimize_inline_calls): Use TODO flags to update frequnecies.
+ * passes.h: Include predict.h
+ (init_optimization_passes): Move profile ahead.
+ (execute_function_todo): Handle TODO_rebuild_frequencies.
+
2007-02-09 Roger Sayle <roger@eyesopen.com>
* config/alpha/alpha.c (emit_insxl): Force the first operand of
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 22111d5ea76..6b8882608d3 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2293,7 +2293,7 @@ passes.o : passes.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
langhooks.h insn-flags.h $(CFGLAYOUT_H) $(REAL_H) $(CFGLOOP_H) \
hosthooks.h $(CGRAPH_H) $(COVERAGE_H) tree-pass.h $(TREE_DUMP_H) \
$(GGC_H) $(INTEGRATE_H) $(CPPLIB_H) opts.h $(TREE_FLOW_H) $(TREE_INLINE_H) \
- gt-passes.h
+ gt-passes.h $(PREDICT_H)
main.o : main.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) toplev.h
@@ -2441,7 +2441,7 @@ ipa-cp.o : ipa-cp.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
ipa-inline.o : ipa-inline.c gt-ipa-inline.h $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(TREE_H) langhooks.h $(TREE_INLINE_H) $(FLAGS_H) $(CGRAPH_H) intl.h \
$(DIAGNOSTIC_H) $(FIBHEAP_H) $(PARAMS_H) $(TIMEVAR_H) tree-pass.h \
- $(COVERAGE_H) $(HASHTAB_H)
+ $(COVERAGE_H) $(HASHTAB_H) $(RTL_H)
ipa-utils.o : ipa-utils.c $(IPA_UTILS_H) $(CONFIG_H) $(SYSTEM_H) \
coretypes.h $(TM_H) $(TREE_H) $(TREE_FLOW_H) $(TREE_INLINE_H) langhooks.h \
pointer-set.h $(GGC_H) $(C_COMMON_H) $(TREE_GIMPLE_H) \
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 13b7fcd2938..87114e2ff13 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -324,7 +324,7 @@ cgraph_set_call_stmt (struct cgraph_edge *e, tree new_stmt)
struct cgraph_edge *
cgraph_create_edge (struct cgraph_node *caller, struct cgraph_node *callee,
- tree call_stmt, gcov_type count, int nest)
+ tree call_stmt, gcov_type count, int freq, int nest)
{
struct cgraph_edge *edge = GGC_NEW (struct cgraph_edge);
#ifdef ENABLE_CHECKING
@@ -362,6 +362,10 @@ cgraph_create_edge (struct cgraph_node *caller, struct cgraph_node *callee,
caller->callees = edge;
callee->callers = edge;
edge->count = count;
+ gcc_assert (count >= 0);
+ edge->frequency = freq;
+ gcc_assert (freq >= 0);
+ gcc_assert (freq <= CGRAPH_FREQ_MAX);
edge->loop_nest = nest;
if (caller->call_site_hash)
{
@@ -713,6 +717,9 @@ dump_cgraph_node (FILE *f, struct cgraph_node *node)
if (edge->count)
fprintf (f, "("HOST_WIDEST_INT_PRINT_DEC"x) ",
(HOST_WIDEST_INT)edge->count);
+ if (edge->frequency)
+ fprintf (f, "(%.2f per call) ",
+ edge->frequency / (double)CGRAPH_FREQ_BASE);
if (!edge->inline_failed)
fprintf(f, "(inlined) ");
}
@@ -727,6 +734,9 @@ dump_cgraph_node (FILE *f, struct cgraph_node *node)
if (edge->count)
fprintf (f, "("HOST_WIDEST_INT_PRINT_DEC"x) ",
(HOST_WIDEST_INT)edge->count);
+ if (edge->frequency)
+ fprintf (f, "(%.2f per call) ",
+ edge->frequency / (double)CGRAPH_FREQ_BASE);
if (edge->loop_nest)
fprintf (f, "(nested in %i loops) ", edge->loop_nest);
}
@@ -795,13 +805,16 @@ cgraph_function_possibly_inlined_p (tree decl)
/* Create clone of E in the node N represented by CALL_EXPR the callgraph. */
struct cgraph_edge *
cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n,
- tree call_stmt, gcov_type count_scale, int loop_nest,
- bool update_original)
+ tree call_stmt, gcov_type count_scale, int freq_scale,
+ int loop_nest, bool update_original)
{
struct cgraph_edge *new;
+ gcov_type count = e->count * count_scale / REG_BR_PROB_BASE;
+ gcov_type freq = e->frequency * (gcov_type) freq_scale / CGRAPH_FREQ_BASE;
- new = cgraph_create_edge (n, e->callee, call_stmt,
- e->count * count_scale / REG_BR_PROB_BASE,
+ if (freq > CGRAPH_FREQ_MAX)
+ freq = CGRAPH_FREQ_MAX;
+ new = cgraph_create_edge (n, e->callee, call_stmt, count, freq,
e->loop_nest + loop_nest);
new->inline_failed = e->inline_failed;
@@ -821,7 +834,7 @@ cgraph_clone_edge (struct cgraph_edge *e, struct cgraph_node *n,
function's profile to reflect the fact that part of execution is handled
by node. */
struct cgraph_node *
-cgraph_clone_node (struct cgraph_node *n, gcov_type count, int loop_nest,
+cgraph_clone_node (struct cgraph_node *n, gcov_type count, int freq, int loop_nest,
bool update_original)
{
struct cgraph_node *new = cgraph_create_node ();
@@ -853,7 +866,7 @@ cgraph_clone_node (struct cgraph_node *n, gcov_type count, int loop_nest,
}
for (e = n->callees;e; e=e->next_callee)
- cgraph_clone_edge (e, new, e->call_stmt, count_scale, loop_nest,
+ cgraph_clone_edge (e, new, e->call_stmt, count_scale, freq, loop_nest,
update_original);
new->next_clone = n->next_clone;
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 179a5f1714e..b905bfdf6c3 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -201,10 +201,17 @@ struct cgraph_edge GTY((chain_next ("%h.next_caller"), chain_prev ("%h.prev_call
const char *inline_failed;
/* Expected number of executions: calculated in profile.c. */
gcov_type count;
+ /* Expected frequency of executions within the function.
+ When set to CGRAPH_FREQ_BASE, the edge is expected to be called once
+ per function call. The range is 0 to CGRAPH_FREQ_MAX. */
+ int frequency;
/* Depth of loop nest, 1 means no loop nest. */
int loop_nest;
};
+#define CGRAPH_FREQ_BASE 1000
+#define CGRAPH_FREQ_MAX 100000
+
typedef struct cgraph_edge *cgraph_edge_p;
DEF_VEC_P(cgraph_edge_p);
@@ -290,7 +297,7 @@ void cgraph_release_function_body (struct cgraph_node *);
void cgraph_node_remove_callees (struct cgraph_node *node);
struct cgraph_edge *cgraph_create_edge (struct cgraph_node *,
struct cgraph_node *,
- tree, gcov_type, int);
+ tree, gcov_type, int, int);
struct cgraph_node *cgraph_node (tree);
struct cgraph_node *cgraph_node_for_asm (tree asmname);
struct cgraph_edge *cgraph_edge (struct cgraph_node *, tree);
@@ -301,8 +308,8 @@ struct cgraph_rtl_info *cgraph_rtl_info (tree);
const char * cgraph_node_name (struct cgraph_node *);
struct cgraph_edge * cgraph_clone_edge (struct cgraph_edge *,
struct cgraph_node *,
- tree, gcov_type, int, bool);
-struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type,
+ tree, gcov_type, int, int, bool);
+struct cgraph_node * cgraph_clone_node (struct cgraph_node *, gcov_type, int,
int, bool);
void cgraph_redirect_edge_callee (struct cgraph_edge *, struct cgraph_node *);
diff --git a/gcc/cgraphbuild.c b/gcc/cgraphbuild.c
index a2df564aa65..9d89aeb8d01 100644
--- a/gcc/cgraphbuild.c
+++ b/gcc/cgraphbuild.c
@@ -115,6 +115,10 @@ build_cgraph_edges (void)
struct pointer_set_t *visited_nodes = pointer_set_create ();
block_stmt_iterator bsi;
tree step;
+ int entry_freq = ENTRY_BLOCK_PTR->frequency;
+
+ if (!entry_freq)
+ entry_freq = 1;
/* Create the callgraph edges and record the nodes referenced by the function.
body. */
@@ -127,8 +131,12 @@ build_cgraph_edges (void)
if (call && (decl = get_callee_fndecl (call)))
{
+ int freq = (!bb->frequency && !entry_freq ? CGRAPH_FREQ_BASE
+ : bb->frequency * CGRAPH_FREQ_BASE / entry_freq);
+ if (freq > CGRAPH_FREQ_MAX)
+ freq = CGRAPH_FREQ_MAX;
cgraph_create_edge (node, cgraph_node (decl), stmt,
- bb->count,
+ bb->count, freq,
bb->loop_depth);
walk_tree (&TREE_OPERAND (call, 1),
record_reference, node, visited_nodes);
@@ -196,6 +204,10 @@ rebuild_cgraph_edges (void)
basic_block bb;
struct cgraph_node *node = cgraph_node (current_function_decl);
block_stmt_iterator bsi;
+ int entry_freq = ENTRY_BLOCK_PTR->frequency;
+
+ if (!entry_freq)
+ entry_freq = 1;
cgraph_node_remove_callees (node);
@@ -209,9 +221,14 @@ rebuild_cgraph_edges (void)
tree decl;
if (call && (decl = get_callee_fndecl (call)))
- cgraph_create_edge (node, cgraph_node (decl), stmt,
- bb->count,
- bb->loop_depth);
+ {
+ int freq = (!bb->frequency && !entry_freq ? CGRAPH_FREQ_BASE
+ : bb->frequency * CGRAPH_FREQ_BASE / entry_freq);
+ if (freq > CGRAPH_FREQ_MAX)
+ freq = CGRAPH_FREQ_MAX;
+ cgraph_create_edge (node, cgraph_node (decl), stmt,
+ bb->count, freq, bb->loop_depth);
+ }
}
initialize_inline_failed (node);
gcc_assert (!node->global.inlined_to);
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 892e952e238..2d1ecda64ac 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -519,6 +519,16 @@ verify_cgraph_node (struct cgraph_node *node)
error ("caller edge count is negative");
error_found = true;
}
+ if (e->frequency < 0)
+ {
+ error ("caller edge frequency is negative");
+ error_found = true;
+ }
+ if (e->frequency > CGRAPH_FREQ_MAX)
+ {
+ error ("caller edge frequency is too large");
+ error_found = true;
+ }
if (!e->inline_failed)
{
if (node->global.inlined_to
@@ -1412,7 +1422,8 @@ cgraph_copy_node_for_versioning (struct cgraph_node *old_version,
also cloned. */
for (e = old_version->callees;e; e=e->next_callee)
{
- new_e = cgraph_clone_edge (e, new_version, e->call_stmt, 0, e->loop_nest, true);
+ new_e = cgraph_clone_edge (e, new_version, e->call_stmt, 0, e->frequency,
+ e->loop_nest, true);
new_e->count = e->count;
}
/* Fix recursive calls.
@@ -1511,7 +1522,8 @@ save_inline_function_body (struct cgraph_node *node)
{
struct cgraph_edge *e;
- first_clone = cgraph_clone_node (node, node->count, 0, false);
+ first_clone = cgraph_clone_node (node, node->count, 0, CGRAPH_FREQ_BASE,
+ false);
first_clone->needed = 0;
first_clone->reachable = 1;
/* Recursively clone all bodies. */
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index 736a3ae7a93..50b02fbf6f5 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -139,6 +139,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "coverage.h"
#include "ggc.h"
#include "tree-flow.h"
+#include "rtl.h"
/* Mode incremental inliner operate on:
@@ -215,7 +216,7 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_o
else
{
struct cgraph_node *n;
- n = cgraph_clone_node (e->callee, e->count, e->loop_nest,
+ n = cgraph_clone_node (e->callee, e->count, e->frequency, e->loop_nest,
update_original);
cgraph_redirect_edge_callee (e, n);
}
@@ -478,44 +479,75 @@ cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
smallest badness are inlined first. After each inlining is performed
the costs of all caller edges of nodes affected are recomputed so the
metrics may accurately depend on values such as number of inlinable callers
- of the function or function body size.
-
- With profiling we use number of executions of each edge to drive the cost.
- We also should distinguish hot and cold calls where the cold calls are
- inlined into only when code size is overall improved.
- */
+ of the function or function body size. */
static int
cgraph_edge_badness (struct cgraph_edge *edge)
{
+ int badness;
+ int growth =
+ cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
+
+ growth -= edge->caller->global.insns;
+
+ /* Always prefer inlining saving code size. */
+ if (growth <= 0)
+ badness = INT_MIN - growth;
+
+ /* When profiling is available, base priorities -(#calls / growth).
+ So we optimize for overall number of "executed" inlined calls. */
if (max_count)
+ badness = ((int)((double)edge->count * INT_MIN / max_count)) / growth;
+
+ /* When function local profile is available, base priorities on
+ growth / frequency, so we optimize for overall frequency of inlined
+ calls. This is not too accurate since while the call might be frequent
+ within function, the function itself is infrequent.
+
+ Other objective to optimize for is number of different calls inlined.
+ We add the estimated growth after inlining all functions to biass the
+ priorities slightly in this direction (so fewer times called functions
+ of the same size gets priority). */
+ else if (flag_guess_branch_prob)
{
+ int div = edge->frequency * 100 / CGRAPH_FREQ_BASE;
int growth =
cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
growth -= edge->caller->global.insns;
+ badness = growth * 256;
+
+ /* Decrease badness if call is nested. */
+ /* Compress the range so we don't overflow. */
+ if (div > 256)
+ div = 256 + ceil_log2 (div) - 8;
+ if (div < 1)
+ div = 1;
+ if (badness > 0)
+ badness /= div;
+ badness += cgraph_estimate_growth (edge->callee);
+ }
+ /* When function local profile is not available or it does not give
+ useful information (ie frequency is zero), base the cost on
+ loop nest and overall size growth, so we optimize for overall number
+ of functions fully inlined in program. */
+ else
+ {
+ int nest = MIN (edge->loop_nest, 8);
+ badness = cgraph_estimate_growth (edge->callee) * 256;
- /* Always prefer inlining saving code size. */
- if (growth <= 0)
- return INT_MIN - growth;
- return ((int)((double)edge->count * INT_MIN / max_count)) / growth;
+ /* Decrease badness if call is nested. */
+ if (badness > 0)
+ badness >>= nest;
+ else
+ {
+ badness <<= nest;
+ }
}
+ /* Make recursive inlining happen always after other inlining is done. */
+ if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
+ return badness + 1;
else
- {
- int nest = MIN (edge->loop_nest, 8);
- int badness = cgraph_estimate_growth (edge->callee) * 256;
-
- /* Decrease badness if call is nested. */
- if (badness > 0)
- badness >>= nest;
- else
- badness <<= nest;
-
- /* Make recursive inlining happen always after other inlining is done. */
- if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
- return badness + 1;
- else
- return badness;
- }
+ return badness;
}
/* Recompute heap nodes for each of caller edge. */
@@ -651,7 +683,7 @@ cgraph_decide_recursive_inlining (struct cgraph_node *node)
cgraph_node_name (node));
/* We need original clone to copy around. */
- master_clone = cgraph_clone_node (node, node->count, 1, false);
+ master_clone = cgraph_clone_node (node, node->count, CGRAPH_FREQ_BASE, 1, false);
master_clone->needed = true;
for (e = master_clone->callees; e; e = e->next_callee)
if (!e->inline_failed)
@@ -831,10 +863,11 @@ cgraph_decide_inlining_of_small_functions (void)
fprintf (dump_file,
" to be inlined into %s\n"
" Estimated growth after inlined into all callees is %+i insns.\n"
- " Estimated badness is %i.\n",
+ " Estimated badness is %i, frequency %.2f.\n",
cgraph_node_name (edge->caller),
cgraph_estimate_growth (edge->callee),
- cgraph_edge_badness (edge));
+ cgraph_edge_badness (edge),
+ edge->frequency / (double)CGRAPH_FREQ_BASE);
if (edge->count)
fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
}
diff --git a/gcc/passes.c b/gcc/passes.c
index 44ea3f9ddc3..35e4164f4d3 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -83,6 +83,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "tree-flow.h"
#include "tree-pass.h"
#include "tree-dump.h"
+#include "predict.h"
#if defined (DWARF2_UNWIND_INFO) || defined (DWARF2_DEBUGGING_INFO)
#include "dwarf2out.h"
@@ -493,6 +494,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_merge_phi);
NEXT_PASS (pass_dce);
NEXT_PASS (pass_tail_recursion);
+ NEXT_PASS (pass_profile);
NEXT_PASS (pass_release_ssa_names);
}
NEXT_PASS (pass_rebuild_cgraph_edges);
@@ -540,7 +542,6 @@ init_optimization_passes (void)
NEXT_PASS (pass_phiopt);
NEXT_PASS (pass_may_alias);
NEXT_PASS (pass_tail_recursion);
- NEXT_PASS (pass_profile);
NEXT_PASS (pass_ch);
NEXT_PASS (pass_stdarg);
NEXT_PASS (pass_lower_complex);
@@ -886,6 +887,24 @@ execute_function_todo (void *data)
fflush (dump_file);
}
+ if (flags & TODO_rebuild_frequencies)
+ {
+ if (profile_status == PROFILE_GUESSED)
+ {
+ loop_optimizer_init (0);
+ add_noreturn_fake_exit_edges ();
+ mark_irreducible_loops ();
+ connect_infinite_loops_to_exit ();
+ estimate_bb_frequencies ();
+ remove_fake_exit_edges ();
+ loop_optimizer_finalize ();
+ }
+ else if (profile_status == PROFILE_READ)
+ counts_to_freqs ();
+ else
+ gcc_unreachable ();
+ }
+
#if defined ENABLE_CHECKING
if (flags & TODO_verify_ssa)
verify_ssa (true);
diff --git a/gcc/predict.c b/gcc/predict.c
index b29d04a56cf..39de19b21aa 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -79,7 +79,6 @@ static bool last_basic_block_p (basic_block);
static void compute_function_frequency (void);
static void choose_function_section (void);
static bool can_predict_insn_p (rtx);
-static void estimate_bb_frequencies (void);
/* Information we hold about each branch predictor.
Filled using information from predict.def. */
@@ -1685,7 +1684,7 @@ expensive_function_p (int threshold)
/* Estimate basic blocks frequency by given branch probabilities. */
-static void
+void
estimate_bb_frequencies (void)
{
basic_block bb;
diff --git a/gcc/predict.h b/gcc/predict.h
index 8972525092a..3862a63939b 100644
--- a/gcc/predict.h
+++ b/gcc/predict.h
@@ -38,5 +38,6 @@ enum prediction
extern void predict_insn_def (rtx, enum br_predictor, enum prediction);
extern int counts_to_freqs (void);
+extern void estimate_bb_frequencies (void);
#endif /* GCC_PREDICT_H */
diff --git a/gcc/testsuite/g++.dg/gomp/pr30696.C b/gcc/testsuite/g++.dg/gomp/pr30696.C
new file mode 100644
index 00000000000..8f4f2d9686a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/pr30696.C
@@ -0,0 +1,12 @@
+inline void foo() {}
+
+int main()
+{
+ foo();
+
+#pragma omp parallel for
+ for ( int i=0; i<1; ++i )
+ foo();
+
+ return 0;
+}
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 75a0553f72c..d2d9487d367 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -777,8 +777,13 @@ copy_bb (copy_body_data *id, basic_block bb, int frequency_scale, int count_scal
copy_basic_block = create_basic_block (NULL, (void *) 0,
(basic_block) bb->prev_bb->aux);
copy_basic_block->count = bb->count * count_scale / REG_BR_PROB_BASE;
- copy_basic_block->frequency = (bb->frequency
+
+ /* We are going to rebuild frequencies from scratch. These values have just
+ small importance to drive canonicalize_loop_headers. */
+ copy_basic_block->frequency = ((gcov_type)bb->frequency
* frequency_scale / REG_BR_PROB_BASE);
+ if (copy_basic_block->frequency > BB_FREQ_MAX)
+ copy_basic_block->frequency = BB_FREQ_MAX;
copy_bsi = bsi_start (copy_basic_block);
for (bsi = bsi_start (bb);
@@ -839,7 +844,7 @@ copy_bb (copy_body_data *id, basic_block bb, int frequency_scale, int count_scal
edge = cgraph_edge (id->src_node, orig_stmt);
if (edge)
cgraph_clone_edge (edge, id->dst_node, stmt,
- REG_BR_PROB_BASE, 1, true);
+ REG_BR_PROB_BASE, 1, edge->frequency, true);
break;
case CB_CGE_MOVE_CLONES:
@@ -2400,8 +2405,14 @@ expand_call_inline (basic_block bb, tree stmt, tree *tp, void *data)
(incorrect node sharing is most common reason for missing edges. */
gcc_assert (dest->needed || !flag_unit_at_a_time);
cgraph_create_edge (id->dst_node, dest, stmt,
- bb->count, bb->loop_depth)->inline_failed
+ bb->count, CGRAPH_FREQ_BASE,
+ bb->loop_depth)->inline_failed
= N_("originally indirect function call not considered for inlining");
+ if (dump_file)
+ {
+ fprintf (dump_file, "Created new direct edge to %s",
+ cgraph_node_name (dest));
+ }
goto egress;
}
@@ -2808,10 +2819,6 @@ optimize_inline_calls (tree fn)
gcc_assert (e->inline_failed);
}
#endif
- /* We need to rescale frequencies again to peak at REG_BR_PROB_BASE
- as inlining loops might increase the maximum. */
- if (ENTRY_BLOCK_PTR->count)
- counts_to_freqs ();
/* We are not going to maintain the cgraph edges up to date.
Kill it so it won't confuse us. */
@@ -2830,7 +2837,8 @@ optimize_inline_calls (tree fn)
throw and they don't care to proactively update local EH info. This is
done later in fixup_cfg pass that also execute the verification. */
return (TODO_update_ssa | TODO_cleanup_cfg
- | (gimple_in_ssa_p (cfun) ? TODO_remove_unused_locals : 0));
+ | (gimple_in_ssa_p (cfun) ? TODO_remove_unused_locals : 0)
+ | (profile_status != PROFILE_ABSENT ? TODO_rebuild_frequencies : 0));
}
/* FN is a function that has a complete body, and CLONE is a function whose
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index bb6261371c1..56679ca96d0 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -167,6 +167,7 @@ struct dump_file_info
#define TODO_verify_loops (1 << 6)
#define TODO_dump_cgraph (1 << 7)
#define TODO_remove_functions (1 << 8)
+#define TODO_rebuild_frequencies (1 << 9)
/* To-do flags for calls to update_ssa. */
@@ -178,13 +179,13 @@ struct dump_file_info
in blocks that have one or more edges with no incoming definition
for O_j. This would lead to uninitialized warnings for O_j's
symbol. */
-#define TODO_update_ssa (1 << 9)
+#define TODO_update_ssa (1 << 10)
/* Update the SSA form without inserting any new PHI nodes at all.
This is used by passes that have either inserted all the PHI nodes
themselves or passes that need only to patch use-def and def-def
chains for virtuals (e.g., DCE). */
-#define TODO_update_ssa_no_phi (1 << 10)
+#define TODO_update_ssa_no_phi (1 << 11)
/* Insert PHI nodes everywhere they are needed. No pruning of the
IDF is done. This is used by passes that need the PHI nodes for
@@ -195,7 +196,7 @@ struct dump_file_info
may be doing something wrong. Inserting PHI nodes for an old name
where not all edges carry a new replacement may lead to silent
codegen errors or spurious uninitialized warnings. */
-#define TODO_update_ssa_full_phi (1 << 11)
+#define TODO_update_ssa_full_phi (1 << 12)
/* Passes that update the SSA form on their own may want to delegate
the updating of virtual names to the generic updater. Since FUD
@@ -203,20 +204,20 @@ struct dump_file_info
to do. NOTE: If this flag is used, any OLD->NEW mappings for real
names are explicitly destroyed and only the symbols marked for
renaming are processed. */
-#define TODO_update_ssa_only_virtuals (1 << 12)
+#define TODO_update_ssa_only_virtuals (1 << 13)
/* Some passes leave unused local variables that can be removed from
cfun->unexpanded_var_list. This reduces the size of dump files and
the memory footprint for VAR_DECLs. */
-#define TODO_remove_unused_locals (1 << 13)
+#define TODO_remove_unused_locals (1 << 14)
/* Internally used for the first in a sequence of passes. It is set
for the passes that are handed to register_dump_files. */
-#define TODO_set_props (1 << 14)
+#define TODO_set_props (1 << 15)
/* Set by passes that may make SMT's that were previously never used
in statements, used. */
-#define TODO_update_smt_usage (1 << 15)
+#define TODO_update_smt_usage (1 << 16)
#define TODO_update_ssa_any \
(TODO_update_ssa \