summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog33
-rw-r--r--gcc/cgraph.c3
-rw-r--r--gcc/cgraph.h2
-rw-r--r--gcc/cgraphunit.c81
-rw-r--r--gcc/common.opt4
-rw-r--r--gcc/doc/invoke.texi12
-rw-r--r--gcc/ipa-inline.c103
-rw-r--r--gcc/ipa.c2
-rw-r--r--gcc/tree-inline.c4
-rw-r--r--gcc/tree-optimize.c60
-rw-r--r--gcc/tree-pass.h3
-rw-r--r--gcc/tree-profile.c27
12 files changed, 308 insertions, 26 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d7afa04b487..cd1f15cb4f9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,36 @@
+2005-06-28 Jan Hubicka <jh@suse.cz>
+
+ * cgraph.c (cgraph_remove_node): Do not release function bodies until
+ full cgraph is built.
+ * cgraph.h (cgraph_decide_inlining_incrementally): Add early argument.
+ * cgraphunit.c (cgraph_finalize_function): Update call of
+ cgraph_decide_inlining_incrementally.
+ (initialize_inline_failed): Break out of ...
+ (cgraph_analyze_function): ... here.
+ (rebuild_cgraph_edges): New function.
+ (pass_rebuild_cgraph_edges): New pass.
+ * common.opt (fearly-inlining): New flag.
+ * ipa-inline.c: Include ggc.h
+ (cgraph_clone_inlined_nodes): Avoid re-using of original copy
+ when cgraph is not fully built.
+ (cgraph_decide_inlining_incrementally): Add early mode.
+ (cgraph_early_inlining): New function.
+ (cgraph_gate_early_inlining): Likewise.
+ (pass_early_ipa_inline): New pass.
+ * ipa.c (cgraph_postorder): NULLify aux pointer.
+ * tree-inline.c (expand_call_inline): Avoid warning early.
+ * tree-optimize.c (pass_early_local_passes): New.
+ (execute_cleanup_cfg_pre_ipa): New.
+ (pass_cleanup_cfg): New.
+ (register_dump_files): Fix handling subpasses of IPA pass.
+ (init_tree_optimization_passes): Add early passes.
+ (execute_ipa_pass_list): Fix handling of subpasses of IPA pass.
+ * passes.h (pass_early_tree_profile, pass_rebuild_cgraph_edges,
+ pass_early_ipa_inline): New passes.
+ * tree-profile.c (do_early_tree_profiling, pass_early_tree_profile): New.
+
+ * invoke.texi: Document early-inlining.
+
2005-06-28 Kelley Cook <kcook@gcc.gnu.org>
* doc/include/fdl.texi: Merge in changes from upstream.
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 68e3ea6c2b6..93648327cf9 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -473,7 +473,8 @@ cgraph_remove_node (struct cgraph_node *node)
{
struct cgraph_node *n = *slot;
if (!n->next_clone && !n->global.inlined_to
- && (TREE_ASM_WRITTEN (n->decl) || DECL_EXTERNAL (n->decl)))
+ && (cgraph_global_info_ready
+ && (TREE_ASM_WRITTEN (n->decl) || DECL_EXTERNAL (n->decl))))
kill_body = true;
}
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 0cbe947266e..40a2648b36e 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -286,7 +286,7 @@ bool cgraph_remove_unreachable_nodes (bool, FILE *);
int cgraph_postorder (struct cgraph_node **);
/* In ipa-inline.c */
-void cgraph_decide_inlining_incrementally (struct cgraph_node *);
+bool cgraph_decide_inlining_incrementally (struct cgraph_node *, bool);
void cgraph_clone_inlined_nodes (struct cgraph_edge *, bool);
void cgraph_mark_inline_edge (struct cgraph_edge *);
bool cgraph_default_inline_p (struct cgraph_node *);
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index ab8924d2bcd..a29dace94dc 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -427,7 +427,7 @@ cgraph_finalize_function (tree decl, bool nested)
if (!flag_unit_at_a_time)
{
cgraph_analyze_function (node);
- cgraph_decide_inlining_incrementally (node);
+ cgraph_decide_inlining_incrementally (node, false);
}
if (decide_is_function_needed (node, decl))
@@ -569,6 +569,73 @@ cgraph_create_edges (struct cgraph_node *node, tree body)
visited_nodes = NULL;
}
+/* Give initial reasons why inlining would fail. Those gets
+ either NULLified or usually overwritten by more precise reason
+ later. */
+static void
+initialize_inline_failed (struct cgraph_node *node)
+{
+ struct cgraph_edge *e;
+
+ for (e = node->callers; e; e = e->next_caller)
+ {
+ gcc_assert (!e->callee->global.inlined_to);
+ gcc_assert (e->inline_failed);
+ if (node->local.redefined_extern_inline)
+ e->inline_failed = N_("redefined extern inline functions are not "
+ "considered for inlining");
+ else if (!node->local.inlinable)
+ e->inline_failed = N_("function not inlinable");
+ else
+ e->inline_failed = N_("function not considered for inlining");
+ }
+}
+
+/* Rebuild call edges from current function after a passes not aware
+ of cgraph updating. */
+static void
+rebuild_cgraph_edges (void)
+{
+ basic_block bb;
+ struct cgraph_node *node = cgraph_node (current_function_decl);
+ block_stmt_iterator bsi;
+
+ cgraph_node_remove_callees (node);
+
+ node->count = ENTRY_BLOCK_PTR->count;
+
+ FOR_EACH_BB (bb)
+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt = bsi_stmt (bsi);
+ tree call = get_call_expr_in (stmt);
+ tree decl;
+
+ if (call && (decl = get_callee_fndecl (call)))
+ cgraph_create_edge (node, cgraph_node (decl), stmt,
+ bb->count,
+ bb->loop_depth);
+ }
+ initialize_inline_failed (node);
+ gcc_assert (!node->global.inlined_to);
+}
+
+struct tree_opt_pass pass_rebuild_cgraph_edges =
+{
+ NULL, /* name */
+ NULL, /* gate */
+ rebuild_cgraph_edges, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ 0, /* tv_id */
+ PROP_cfg, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+ 0 /* letter */
+};
/* Verify cgraph nodes of given cgraph node. */
void
@@ -764,7 +831,6 @@ static void
cgraph_analyze_function (struct cgraph_node *node)
{
tree decl = node->decl;
- struct cgraph_edge *e;
current_function_decl = decl;
push_cfun (DECL_STRUCT_FUNCTION (decl));
@@ -778,16 +844,7 @@ cgraph_analyze_function (struct cgraph_node *node)
if (node->local.inlinable)
node->local.disregard_inline_limits
= lang_hooks.tree_inlining.disregard_inline_limits (decl);
- for (e = node->callers; e; e = e->next_caller)
- {
- if (node->local.redefined_extern_inline)
- e->inline_failed = N_("redefined extern inline functions are not "
- "considered for inlining");
- else if (!node->local.inlinable)
- e->inline_failed = N_("function not inlinable");
- else
- e->inline_failed = N_("function not considered for inlining");
- }
+ initialize_inline_failed (node);
if (flag_really_no_inline && !node->local.disregard_inline_limits)
node->local.inlinable = 0;
/* Inlining characteristics are maintained by the cgraph_mark_inline. */
diff --git a/gcc/common.opt b/gcc/common.opt
index 4d097c78fc3..66782ce32ae 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -476,6 +476,10 @@ finline-functions
Common Report Var(flag_inline_functions)
Integrate simple functions into their callers
+fearly-inlining
+Common Report Var(flag_early_inlining) Init(1)
+Perform early inlining
+
finline-limit-
Common RejectNegative Joined UInteger
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 41345ee4ae6..140983bc208 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -300,7 +300,7 @@ Objective-C and Objective-C++ Dialects}.
-fbranch-target-load-optimize2 -fbtr-bb-exclusive @gol
-fcaller-saves -fcprop-registers -fcse-follow-jumps @gol
-fcse-skip-blocks -fcx-limited-range -fdata-sections @gol
--fdelayed-branch -fdelete-null-pointer-checks @gol
+-fdelayed-branch -fdelete-null-pointer-checks -fearly-inlining @gol
-fexpensive-optimizations -ffast-math -ffloat-store @gol
-fforce-addr -fforce-mem -ffunction-sections @gol
-fgcse -fgcse-lm -fgcse-sm -fgcse-las -fgcse-after-reload @gol
@@ -4450,6 +4450,16 @@ assembler code in its own right.
Enabled at level @option{-O3}.
+@item -fearly-inlining
+@opindex fearly-inlining
+Inline functions marked by @code{always_inline} and functions whose body seems
+smaller than the function call overhead early before doing
+@option{-fprofile-generate} instrumentation and real inlining pass. Doing so
+makes profiling significantly cheaper and usually inlining faster on programs
+having large chains of nested wrapper functions.
+
+Enabled by default.
+
@item -finline-limit=@var{n}
@opindex finline-limit
By default, GCC limits the size of functions that can be inlined. This flag
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index c176eb719da..26ea8f5e5c3 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -79,6 +79,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
#include "intl.h"
#include "tree-pass.h"
#include "coverage.h"
+#include "ggc.h"
/* Statistics we collect about inlining algorithm. */
static int ncalls_inlined;
@@ -120,7 +121,7 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate)
if (!e->callee->callers->next_caller
&& (!e->callee->needed || DECL_EXTERNAL (e->callee->decl))
&& duplicate
- && flag_unit_at_a_time)
+ && (flag_unit_at_a_time && cgraph_global_info_ready))
{
gcc_assert (!e->callee->global.inlined_to);
if (!DECL_EXTERNAL (e->callee->decl))
@@ -870,10 +871,11 @@ cgraph_decide_inlining (void)
/* Decide on the inlining. We do so in the topological order to avoid
expenses on updating data structures. */
-void
-cgraph_decide_inlining_incrementally (struct cgraph_node *node)
+bool
+cgraph_decide_inlining_incrementally (struct cgraph_node *node, bool early)
{
struct cgraph_edge *e;
+ bool inlined = false;
/* First of all look for always inline functions. */
for (e = node->callees; e; e = e->next_callee)
@@ -883,7 +885,13 @@ cgraph_decide_inlining_incrementally (struct cgraph_node *node)
/* ??? It is possible that renaming variable removed the function body
in duplicate_decls. See gcc.c-torture/compile/20011119-2.c */
&& DECL_SAVED_TREE (e->callee->decl))
- cgraph_mark_inline (e);
+ {
+ if (dump_file && early)
+ fprintf (dump_file, " Early inlining %s into %s\n",
+ cgraph_node_name (e->callee), cgraph_node_name (node));
+ cgraph_mark_inline (e);
+ inlined = true;
+ }
/* Now do the automatic inlining. */
if (!flag_really_no_inline)
@@ -892,15 +900,36 @@ cgraph_decide_inlining_incrementally (struct cgraph_node *node)
&& e->inline_failed
&& !e->callee->local.disregard_inline_limits
&& !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
+ && (!early
+ || (cgraph_estimate_size_after_inlining (1, e->caller, node)
+ <= e->caller->global.insns))
&& cgraph_check_inline_limits (node, e->callee, &e->inline_failed)
&& DECL_SAVED_TREE (e->callee->decl))
{
if (cgraph_default_inline_p (e->callee))
- cgraph_mark_inline (e);
- else
+ {
+ if (dump_file && early)
+ fprintf (dump_file, " Early inlining %s into %s\n",
+ cgraph_node_name (e->callee), cgraph_node_name (node));
+ cgraph_mark_inline (e);
+ inlined = true;
+ }
+ else if (!early)
e->inline_failed
= N_("--param max-inline-insns-single limit reached");
}
+ if (early && inlined)
+ {
+ push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+ tree_register_cfg_hooks ();
+ current_function_decl = node->decl;
+ optimize_inline_calls (current_function_decl);
+ node->local.self_insns = node->global.insns;
+ current_function_decl = NULL;
+ pop_cfun ();
+ ggc_collect ();
+ }
+ return inlined;
}
/* When inlining shall be performed. */
@@ -920,7 +949,67 @@ struct tree_opt_pass pass_ipa_inline =
0, /* static_pass_number */
TV_INTEGRATION, /* tv_id */
0, /* properties_required */
- PROP_trees, /* properties_provided */
+ PROP_cfg, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_cgraph | TODO_dump_func, /* todo_flags_finish */
+ 0 /* letter */
+};
+
+/* Do inlining of small functions. Doing so early helps profiling and other
+ passes to be somewhat more effective and avoids some code duplication in
+ later real inlining pass for testcases with very many function calls. */
+static void
+cgraph_early_inlining (void)
+{
+ struct cgraph_node *node;
+ int nnodes;
+ struct cgraph_node **order =
+ xcalloc (cgraph_n_nodes, sizeof (struct cgraph_node *));
+ int i;
+
+ if (sorrycount || errorcount)
+ return;
+#ifdef ENABLE_CHECKING
+ for (node = cgraph_nodes; node; node = node->next)
+ gcc_assert (!node->aux);
+#endif
+
+ nnodes = cgraph_postorder (order);
+ for (i = nnodes - 1; i >= 0; i--)
+ {
+ node = order[i];
+ if (node->analyzed && node->local.inlinable
+ && (node->needed || node->reachable)
+ && node->callers)
+ cgraph_decide_inlining_incrementally (node, true);
+ }
+ cgraph_remove_unreachable_nodes (true, dump_file);
+#ifdef ENABLE_CHECKING
+ for (node = cgraph_nodes; node; node = node->next)
+ gcc_assert (!node->global.inlined_to);
+#endif
+ free (order);
+}
+
+/* When inlining shall be performed. */
+static bool
+cgraph_gate_early_inlining (void)
+{
+ return flag_inline_trees && flag_early_inlining;
+}
+
+struct tree_opt_pass pass_early_ipa_inline =
+{
+ "einline", /* name */
+ cgraph_gate_early_inlining, /* gate */
+ cgraph_early_inlining, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_INTEGRATION, /* tv_id */
+ 0, /* properties_required */
+ PROP_cfg, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_cgraph | TODO_dump_func, /* todo_flags_finish */
diff --git a/gcc/ipa.c b/gcc/ipa.c
index 64af3ebf4e5..20c90415e9d 100644
--- a/gcc/ipa.c
+++ b/gcc/ipa.c
@@ -83,6 +83,8 @@ cgraph_postorder (struct cgraph_node **order)
}
}
free (stack);
+ for (node = cgraph_nodes; node; node = node->next)
+ node->aux = NULL;
return order_pos;
}
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index b625af3f90b..665707056f6 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -1969,7 +1969,9 @@ expand_call_inline (basic_block bb, tree stmt, tree *tp, void *data)
else if (warn_inline && DECL_DECLARED_INLINE_P (fn)
&& !DECL_IN_SYSTEM_HEADER (fn)
&& strlen (reason)
- && !lookup_attribute ("noinline", DECL_ATTRIBUTES (fn)))
+ && !lookup_attribute ("noinline", DECL_ATTRIBUTES (fn))
+ /* Avoid warnings during early inline pass. */
+ && (!flag_unit_at_a_time || cgraph_global_info_ready))
{
warning (0, "%Jinlining failed in call to %qF: %s", fn, fn, reason);
warning (0, "called from here");
diff --git a/gcc/tree-optimize.c b/gcc/tree-optimize.c
index 24db18684f3..5ebd204c1a9 100644
--- a/gcc/tree-optimize.c
+++ b/gcc/tree-optimize.c
@@ -55,7 +55,7 @@ int dump_flags;
bool in_gimple_form;
/* The root of the compilation pass tree, once constructed. */
-static struct tree_opt_pass *all_passes, *all_ipa_passes, * all_lowering_passes;
+static struct tree_opt_pass *all_passes, *all_ipa_passes, *all_lowering_passes;
/* Gate: execute, or not, all of the non-trivial optimizations. */
@@ -84,6 +84,52 @@ static struct tree_opt_pass pass_all_optimizations =
0 /* letter */
};
+static struct tree_opt_pass pass_early_local_passes =
+{
+ NULL, /* name */
+ gate_all_optimizations, /* gate */
+ NULL, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ 0, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+ 0 /* letter */
+};
+
+/* Pass: cleanup the CFG just before expanding trees to RTL.
+ This is just a round of label cleanups and case node grouping
+ because after the tree optimizers have run such cleanups may
+ be necessary. */
+
+static void
+execute_cleanup_cfg_pre_ipa (void)
+{
+ cleanup_tree_cfg ();
+}
+
+static struct tree_opt_pass pass_cleanup_cfg =
+{
+ "cleanup_cfg", /* name */
+ NULL, /* gate */
+ execute_cleanup_cfg_pre_ipa, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ 0, /* tv_id */
+ PROP_cfg, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func, /* todo_flags_finish */
+ 0 /* letter */
+};
+
+
/* Pass: cleanup the CFG just before expanding trees to RTL.
This is just a round of label cleanups and case node grouping
because after the tree optimizers have run such cleanups may
@@ -322,7 +368,7 @@ register_dump_files (struct tree_opt_pass *pass, bool ipa, int properties)
n++;
if (pass->sub)
- new_properties = register_dump_files (pass->sub, ipa, new_properties);
+ new_properties = register_dump_files (pass->sub, false, new_properties);
/* If we have a gate, combine the properties that we could have with
and without the pass being examined. */
@@ -390,6 +436,8 @@ init_tree_optimization_passes (void)
#define NEXT_PASS(PASS) (p = next_pass_1 (p, &PASS))
/* Intraprocedural optimization passes. */
p = &all_ipa_passes;
+ NEXT_PASS (pass_early_ipa_inline);
+ NEXT_PASS (pass_early_local_passes);
NEXT_PASS (pass_ipa_inline);
*p = NULL;
@@ -405,7 +453,13 @@ init_tree_optimization_passes (void)
NEXT_PASS (pass_lower_complex_O0);
NEXT_PASS (pass_lower_vector);
NEXT_PASS (pass_warn_function_return);
+ NEXT_PASS (pass_early_tree_profile);
+ *p = NULL;
+
+ p = &pass_early_local_passes.sub;
NEXT_PASS (pass_tree_profile);
+ NEXT_PASS (pass_cleanup_cfg);
+ NEXT_PASS (pass_rebuild_cgraph_edges);
*p = NULL;
p = &all_passes;
@@ -716,7 +770,7 @@ execute_ipa_pass_list (struct tree_opt_pass *pass)
{
push_cfun (DECL_STRUCT_FUNCTION (node->decl));
current_function_decl = node->decl;
- execute_pass_list (pass);
+ execute_pass_list (pass->sub);
free_dominance_info (CDI_DOMINATORS);
free_dominance_info (CDI_POST_DOMINATORS);
current_function_decl = NULL;
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 471747bf73c..e6acea4fdf1 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -164,6 +164,7 @@ extern struct tree_opt_pass pass_lower_cf;
extern struct tree_opt_pass pass_lower_eh;
extern struct tree_opt_pass pass_build_cfg;
extern struct tree_opt_pass pass_tree_profile;
+extern struct tree_opt_pass pass_early_tree_profile;
extern struct tree_opt_pass pass_referenced_vars;
extern struct tree_opt_pass pass_sra;
extern struct tree_opt_pass pass_tail_recursion;
@@ -227,8 +228,10 @@ extern struct tree_opt_pass pass_del_pta;
extern struct tree_opt_pass pass_uncprop;
extern struct tree_opt_pass pass_return_slot;
extern struct tree_opt_pass pass_reassoc;
+extern struct tree_opt_pass pass_rebuild_cgraph_edges;
/* IPA Passes */
extern struct tree_opt_pass pass_ipa_inline;
+extern struct tree_opt_pass pass_early_ipa_inline;
#endif /* GCC_TREE_PASS_H */
diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c
index 0f73e3cf4cb..a19e3a42ad1 100644
--- a/gcc/tree-profile.c
+++ b/gcc/tree-profile.c
@@ -273,6 +273,33 @@ struct tree_opt_pass pass_tree_profile =
0 /* letter */
};
+/* Return 1 if tree-based profiling is in effect, else 0.
+ If it is, set up hooks for tree-based profiling.
+ Gate for pass_tree_profile. */
+
+static bool
+do_early_tree_profiling (void)
+{
+ return (do_tree_profiling () && (!flag_unit_at_a_time || !optimize));
+}
+
+struct tree_opt_pass pass_early_tree_profile =
+{
+ "early_tree_profile", /* name */
+ do_early_tree_profiling, /* gate */
+ tree_profiling, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_BRANCH_PROB, /* tv_id */
+ PROP_gimple_leh | PROP_cfg, /* properties_required */
+ PROP_gimple_leh | PROP_cfg, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_verify_stmts, /* todo_flags_finish */
+ 0 /* letter */
+};
+
struct profile_hooks tree_profile_hooks =
{
tree_init_edge_profiler, /* init_edge_profiler */