diff options
-rw-r--r-- | gcc/ChangeLog | 33 | ||||
-rw-r--r-- | gcc/cgraph.c | 3 | ||||
-rw-r--r-- | gcc/cgraph.h | 2 | ||||
-rw-r--r-- | gcc/cgraphunit.c | 81 | ||||
-rw-r--r-- | gcc/common.opt | 4 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 12 | ||||
-rw-r--r-- | gcc/ipa-inline.c | 103 | ||||
-rw-r--r-- | gcc/ipa.c | 2 | ||||
-rw-r--r-- | gcc/tree-inline.c | 4 | ||||
-rw-r--r-- | gcc/tree-optimize.c | 60 | ||||
-rw-r--r-- | gcc/tree-pass.h | 3 | ||||
-rw-r--r-- | gcc/tree-profile.c | 27 |
12 files changed, 308 insertions, 26 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d7afa04b487..cd1f15cb4f9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,36 @@ +2005-06-28 Jan Hubicka <jh@suse.cz> + + * cgraph.c (cgraph_remove_node): Do not release function bodies until + full cgraph is built. + * cgraph.h (cgraph_decide_inlining_incrementally): Add early argument. + * cgraphunit.c (cgraph_finalize_function): Update call of + cgraph_decide_inlining_incrementally. + (initialize_inline_failed): Break out of ... + (cgraph_analyze_function): ... here. + (rebuild_cgraph_edges): New function. + (pass_rebuild_cgraph_edges): New pass. + * common.opt (fearly-inlining): New flag. + * ipa-inline.c: Include ggc.h + (cgraph_clone_inlined_nodes): Avoid re-using of original copy + when cgraph is not fully built. + (cgraph_decide_inlining_incrementally): Add early mode. + (cgraph_early_inlining): New function. + (cgraph_gate_early_inlining): Likewise. + (pass_early_ipa_inline): New pass. + * ipa.c (cgraph_postorder): NULLify aux pointer. + * tree-inline.c (expand_call_inline): Avoid warning early. + * tree-optimize.c (pass_early_local_passes): New. + (execute_cleanup_cfg_pre_ipa): New. + (pass_cleanup_cfg): New. + (register_dump_files): Fix handling subpasses of IPA pass. + (init_tree_optimization_passes): Add early passes. + (execute_ipa_pass_list): Fix handling of subpasses of IPA pass. + * passes.h (pass_early_tree_profile, pass_rebuild_cgraph_edges, + pass_early_ipa_inline): New passes. + * tree-profile.c (do_early_tree_profiling, pass_early_tree_profile): New. + + * invoke.texi: Document early-inlining. + 2005-06-28 Kelley Cook <kcook@gcc.gnu.org> * doc/include/fdl.texi: Merge in changes from upstream. diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 68e3ea6c2b6..93648327cf9 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -473,7 +473,8 @@ cgraph_remove_node (struct cgraph_node *node) { struct cgraph_node *n = *slot; if (!n->next_clone && !n->global.inlined_to - && (TREE_ASM_WRITTEN (n->decl) || DECL_EXTERNAL (n->decl))) + && (cgraph_global_info_ready + && (TREE_ASM_WRITTEN (n->decl) || DECL_EXTERNAL (n->decl)))) kill_body = true; } diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 0cbe947266e..40a2648b36e 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -286,7 +286,7 @@ bool cgraph_remove_unreachable_nodes (bool, FILE *); int cgraph_postorder (struct cgraph_node **); /* In ipa-inline.c */ -void cgraph_decide_inlining_incrementally (struct cgraph_node *); +bool cgraph_decide_inlining_incrementally (struct cgraph_node *, bool); void cgraph_clone_inlined_nodes (struct cgraph_edge *, bool); void cgraph_mark_inline_edge (struct cgraph_edge *); bool cgraph_default_inline_p (struct cgraph_node *); diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index ab8924d2bcd..a29dace94dc 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -427,7 +427,7 @@ cgraph_finalize_function (tree decl, bool nested) if (!flag_unit_at_a_time) { cgraph_analyze_function (node); - cgraph_decide_inlining_incrementally (node); + cgraph_decide_inlining_incrementally (node, false); } if (decide_is_function_needed (node, decl)) @@ -569,6 +569,73 @@ cgraph_create_edges (struct cgraph_node *node, tree body) visited_nodes = NULL; } +/* Give initial reasons why inlining would fail. Those gets + either NULLified or usually overwritten by more precise reason + later. */ +static void +initialize_inline_failed (struct cgraph_node *node) +{ + struct cgraph_edge *e; + + for (e = node->callers; e; e = e->next_caller) + { + gcc_assert (!e->callee->global.inlined_to); + gcc_assert (e->inline_failed); + if (node->local.redefined_extern_inline) + e->inline_failed = N_("redefined extern inline functions are not " + "considered for inlining"); + else if (!node->local.inlinable) + e->inline_failed = N_("function not inlinable"); + else + e->inline_failed = N_("function not considered for inlining"); + } +} + +/* Rebuild call edges from current function after a passes not aware + of cgraph updating. */ +static void +rebuild_cgraph_edges (void) +{ + basic_block bb; + struct cgraph_node *node = cgraph_node (current_function_decl); + block_stmt_iterator bsi; + + cgraph_node_remove_callees (node); + + node->count = ENTRY_BLOCK_PTR->count; + + FOR_EACH_BB (bb) + for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) + { + tree stmt = bsi_stmt (bsi); + tree call = get_call_expr_in (stmt); + tree decl; + + if (call && (decl = get_callee_fndecl (call))) + cgraph_create_edge (node, cgraph_node (decl), stmt, + bb->count, + bb->loop_depth); + } + initialize_inline_failed (node); + gcc_assert (!node->global.inlined_to); +} + +struct tree_opt_pass pass_rebuild_cgraph_edges = +{ + NULL, /* name */ + NULL, /* gate */ + rebuild_cgraph_edges, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ + 0 /* letter */ +}; /* Verify cgraph nodes of given cgraph node. */ void @@ -764,7 +831,6 @@ static void cgraph_analyze_function (struct cgraph_node *node) { tree decl = node->decl; - struct cgraph_edge *e; current_function_decl = decl; push_cfun (DECL_STRUCT_FUNCTION (decl)); @@ -778,16 +844,7 @@ cgraph_analyze_function (struct cgraph_node *node) if (node->local.inlinable) node->local.disregard_inline_limits = lang_hooks.tree_inlining.disregard_inline_limits (decl); - for (e = node->callers; e; e = e->next_caller) - { - if (node->local.redefined_extern_inline) - e->inline_failed = N_("redefined extern inline functions are not " - "considered for inlining"); - else if (!node->local.inlinable) - e->inline_failed = N_("function not inlinable"); - else - e->inline_failed = N_("function not considered for inlining"); - } + initialize_inline_failed (node); if (flag_really_no_inline && !node->local.disregard_inline_limits) node->local.inlinable = 0; /* Inlining characteristics are maintained by the cgraph_mark_inline. */ diff --git a/gcc/common.opt b/gcc/common.opt index 4d097c78fc3..66782ce32ae 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -476,6 +476,10 @@ finline-functions Common Report Var(flag_inline_functions) Integrate simple functions into their callers +fearly-inlining +Common Report Var(flag_early_inlining) Init(1) +Perform early inlining + finline-limit- Common RejectNegative Joined UInteger diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 41345ee4ae6..140983bc208 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -300,7 +300,7 @@ Objective-C and Objective-C++ Dialects}. -fbranch-target-load-optimize2 -fbtr-bb-exclusive @gol -fcaller-saves -fcprop-registers -fcse-follow-jumps @gol -fcse-skip-blocks -fcx-limited-range -fdata-sections @gol --fdelayed-branch -fdelete-null-pointer-checks @gol +-fdelayed-branch -fdelete-null-pointer-checks -fearly-inlining @gol -fexpensive-optimizations -ffast-math -ffloat-store @gol -fforce-addr -fforce-mem -ffunction-sections @gol -fgcse -fgcse-lm -fgcse-sm -fgcse-las -fgcse-after-reload @gol @@ -4450,6 +4450,16 @@ assembler code in its own right. Enabled at level @option{-O3}. +@item -fearly-inlining +@opindex fearly-inlining +Inline functions marked by @code{always_inline} and functions whose body seems +smaller than the function call overhead early before doing +@option{-fprofile-generate} instrumentation and real inlining pass. Doing so +makes profiling significantly cheaper and usually inlining faster on programs +having large chains of nested wrapper functions. + +Enabled by default. + @item -finline-limit=@var{n} @opindex finline-limit By default, GCC limits the size of functions that can be inlined. This flag diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index c176eb719da..26ea8f5e5c3 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -79,6 +79,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "intl.h" #include "tree-pass.h" #include "coverage.h" +#include "ggc.h" /* Statistics we collect about inlining algorithm. */ static int ncalls_inlined; @@ -120,7 +121,7 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate) if (!e->callee->callers->next_caller && (!e->callee->needed || DECL_EXTERNAL (e->callee->decl)) && duplicate - && flag_unit_at_a_time) + && (flag_unit_at_a_time && cgraph_global_info_ready)) { gcc_assert (!e->callee->global.inlined_to); if (!DECL_EXTERNAL (e->callee->decl)) @@ -870,10 +871,11 @@ cgraph_decide_inlining (void) /* Decide on the inlining. We do so in the topological order to avoid expenses on updating data structures. */ -void -cgraph_decide_inlining_incrementally (struct cgraph_node *node) +bool +cgraph_decide_inlining_incrementally (struct cgraph_node *node, bool early) { struct cgraph_edge *e; + bool inlined = false; /* First of all look for always inline functions. */ for (e = node->callees; e; e = e->next_callee) @@ -883,7 +885,13 @@ cgraph_decide_inlining_incrementally (struct cgraph_node *node) /* ??? It is possible that renaming variable removed the function body in duplicate_decls. See gcc.c-torture/compile/20011119-2.c */ && DECL_SAVED_TREE (e->callee->decl)) - cgraph_mark_inline (e); + { + if (dump_file && early) + fprintf (dump_file, " Early inlining %s into %s\n", + cgraph_node_name (e->callee), cgraph_node_name (node)); + cgraph_mark_inline (e); + inlined = true; + } /* Now do the automatic inlining. */ if (!flag_really_no_inline) @@ -892,15 +900,36 @@ cgraph_decide_inlining_incrementally (struct cgraph_node *node) && e->inline_failed && !e->callee->local.disregard_inline_limits && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed) + && (!early + || (cgraph_estimate_size_after_inlining (1, e->caller, node) + <= e->caller->global.insns)) && cgraph_check_inline_limits (node, e->callee, &e->inline_failed) && DECL_SAVED_TREE (e->callee->decl)) { if (cgraph_default_inline_p (e->callee)) - cgraph_mark_inline (e); - else + { + if (dump_file && early) + fprintf (dump_file, " Early inlining %s into %s\n", + cgraph_node_name (e->callee), cgraph_node_name (node)); + cgraph_mark_inline (e); + inlined = true; + } + else if (!early) e->inline_failed = N_("--param max-inline-insns-single limit reached"); } + if (early && inlined) + { + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + tree_register_cfg_hooks (); + current_function_decl = node->decl; + optimize_inline_calls (current_function_decl); + node->local.self_insns = node->global.insns; + current_function_decl = NULL; + pop_cfun (); + ggc_collect (); + } + return inlined; } /* When inlining shall be performed. */ @@ -920,7 +949,67 @@ struct tree_opt_pass pass_ipa_inline = 0, /* static_pass_number */ TV_INTEGRATION, /* tv_id */ 0, /* properties_required */ - PROP_trees, /* properties_provided */ + PROP_cfg, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_cgraph | TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ +}; + +/* Do inlining of small functions. Doing so early helps profiling and other + passes to be somewhat more effective and avoids some code duplication in + later real inlining pass for testcases with very many function calls. */ +static void +cgraph_early_inlining (void) +{ + struct cgraph_node *node; + int nnodes; + struct cgraph_node **order = + xcalloc (cgraph_n_nodes, sizeof (struct cgraph_node *)); + int i; + + if (sorrycount || errorcount) + return; +#ifdef ENABLE_CHECKING + for (node = cgraph_nodes; node; node = node->next) + gcc_assert (!node->aux); +#endif + + nnodes = cgraph_postorder (order); + for (i = nnodes - 1; i >= 0; i--) + { + node = order[i]; + if (node->analyzed && node->local.inlinable + && (node->needed || node->reachable) + && node->callers) + cgraph_decide_inlining_incrementally (node, true); + } + cgraph_remove_unreachable_nodes (true, dump_file); +#ifdef ENABLE_CHECKING + for (node = cgraph_nodes; node; node = node->next) + gcc_assert (!node->global.inlined_to); +#endif + free (order); +} + +/* When inlining shall be performed. */ +static bool +cgraph_gate_early_inlining (void) +{ + return flag_inline_trees && flag_early_inlining; +} + +struct tree_opt_pass pass_early_ipa_inline = +{ + "einline", /* name */ + cgraph_gate_early_inlining, /* gate */ + cgraph_early_inlining, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_INTEGRATION, /* tv_id */ + 0, /* properties_required */ + PROP_cfg, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ TODO_dump_cgraph | TODO_dump_func, /* todo_flags_finish */ diff --git a/gcc/ipa.c b/gcc/ipa.c index 64af3ebf4e5..20c90415e9d 100644 --- a/gcc/ipa.c +++ b/gcc/ipa.c @@ -83,6 +83,8 @@ cgraph_postorder (struct cgraph_node **order) } } free (stack); + for (node = cgraph_nodes; node; node = node->next) + node->aux = NULL; return order_pos; } diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index b625af3f90b..665707056f6 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -1969,7 +1969,9 @@ expand_call_inline (basic_block bb, tree stmt, tree *tp, void *data) else if (warn_inline && DECL_DECLARED_INLINE_P (fn) && !DECL_IN_SYSTEM_HEADER (fn) && strlen (reason) - && !lookup_attribute ("noinline", DECL_ATTRIBUTES (fn))) + && !lookup_attribute ("noinline", DECL_ATTRIBUTES (fn)) + /* Avoid warnings during early inline pass. */ + && (!flag_unit_at_a_time || cgraph_global_info_ready)) { warning (0, "%Jinlining failed in call to %qF: %s", fn, fn, reason); warning (0, "called from here"); diff --git a/gcc/tree-optimize.c b/gcc/tree-optimize.c index 24db18684f3..5ebd204c1a9 100644 --- a/gcc/tree-optimize.c +++ b/gcc/tree-optimize.c @@ -55,7 +55,7 @@ int dump_flags; bool in_gimple_form; /* The root of the compilation pass tree, once constructed. */ -static struct tree_opt_pass *all_passes, *all_ipa_passes, * all_lowering_passes; +static struct tree_opt_pass *all_passes, *all_ipa_passes, *all_lowering_passes; /* Gate: execute, or not, all of the non-trivial optimizations. */ @@ -84,6 +84,52 @@ static struct tree_opt_pass pass_all_optimizations = 0 /* letter */ }; +static struct tree_opt_pass pass_early_local_passes = +{ + NULL, /* name */ + gate_all_optimizations, /* gate */ + NULL, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ + 0 /* letter */ +}; + +/* Pass: cleanup the CFG just before expanding trees to RTL. + This is just a round of label cleanups and case node grouping + because after the tree optimizers have run such cleanups may + be necessary. */ + +static void +execute_cleanup_cfg_pre_ipa (void) +{ + cleanup_tree_cfg (); +} + +static struct tree_opt_pass pass_cleanup_cfg = +{ + "cleanup_cfg", /* name */ + NULL, /* gate */ + execute_cleanup_cfg_pre_ipa, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + PROP_cfg, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ +}; + + /* Pass: cleanup the CFG just before expanding trees to RTL. This is just a round of label cleanups and case node grouping because after the tree optimizers have run such cleanups may @@ -322,7 +368,7 @@ register_dump_files (struct tree_opt_pass *pass, bool ipa, int properties) n++; if (pass->sub) - new_properties = register_dump_files (pass->sub, ipa, new_properties); + new_properties = register_dump_files (pass->sub, false, new_properties); /* If we have a gate, combine the properties that we could have with and without the pass being examined. */ @@ -390,6 +436,8 @@ init_tree_optimization_passes (void) #define NEXT_PASS(PASS) (p = next_pass_1 (p, &PASS)) /* Intraprocedural optimization passes. */ p = &all_ipa_passes; + NEXT_PASS (pass_early_ipa_inline); + NEXT_PASS (pass_early_local_passes); NEXT_PASS (pass_ipa_inline); *p = NULL; @@ -405,7 +453,13 @@ init_tree_optimization_passes (void) NEXT_PASS (pass_lower_complex_O0); NEXT_PASS (pass_lower_vector); NEXT_PASS (pass_warn_function_return); + NEXT_PASS (pass_early_tree_profile); + *p = NULL; + + p = &pass_early_local_passes.sub; NEXT_PASS (pass_tree_profile); + NEXT_PASS (pass_cleanup_cfg); + NEXT_PASS (pass_rebuild_cgraph_edges); *p = NULL; p = &all_passes; @@ -716,7 +770,7 @@ execute_ipa_pass_list (struct tree_opt_pass *pass) { push_cfun (DECL_STRUCT_FUNCTION (node->decl)); current_function_decl = node->decl; - execute_pass_list (pass); + execute_pass_list (pass->sub); free_dominance_info (CDI_DOMINATORS); free_dominance_info (CDI_POST_DOMINATORS); current_function_decl = NULL; diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 471747bf73c..e6acea4fdf1 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -164,6 +164,7 @@ extern struct tree_opt_pass pass_lower_cf; extern struct tree_opt_pass pass_lower_eh; extern struct tree_opt_pass pass_build_cfg; extern struct tree_opt_pass pass_tree_profile; +extern struct tree_opt_pass pass_early_tree_profile; extern struct tree_opt_pass pass_referenced_vars; extern struct tree_opt_pass pass_sra; extern struct tree_opt_pass pass_tail_recursion; @@ -227,8 +228,10 @@ extern struct tree_opt_pass pass_del_pta; extern struct tree_opt_pass pass_uncprop; extern struct tree_opt_pass pass_return_slot; extern struct tree_opt_pass pass_reassoc; +extern struct tree_opt_pass pass_rebuild_cgraph_edges; /* IPA Passes */ extern struct tree_opt_pass pass_ipa_inline; +extern struct tree_opt_pass pass_early_ipa_inline; #endif /* GCC_TREE_PASS_H */ diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c index 0f73e3cf4cb..a19e3a42ad1 100644 --- a/gcc/tree-profile.c +++ b/gcc/tree-profile.c @@ -273,6 +273,33 @@ struct tree_opt_pass pass_tree_profile = 0 /* letter */ }; +/* Return 1 if tree-based profiling is in effect, else 0. + If it is, set up hooks for tree-based profiling. + Gate for pass_tree_profile. */ + +static bool +do_early_tree_profiling (void) +{ + return (do_tree_profiling () && (!flag_unit_at_a_time || !optimize)); +} + +struct tree_opt_pass pass_early_tree_profile = +{ + "early_tree_profile", /* name */ + do_early_tree_profiling, /* gate */ + tree_profiling, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_BRANCH_PROB, /* tv_id */ + PROP_gimple_leh | PROP_cfg, /* properties_required */ + PROP_gimple_leh | PROP_cfg, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_stmts, /* todo_flags_finish */ + 0 /* letter */ +}; + struct profile_hooks tree_profile_hooks = { tree_init_edge_profiler, /* init_edge_profiler */ |