diff options
author | Jan Hubicka <jh@suse.cz> | 2002-05-15 11:00:30 +0200 |
---|---|---|
committer | Jan Hubicka <hubicka@gcc.gnu.org> | 2002-05-15 09:00:30 +0000 |
commit | 194734e9e5501f9a295212b91978eee396879cda (patch) | |
tree | 6dab9b09a4ebcfb309dc527384a3d4f4607f240d | |
parent | 61ad9a3472b5cafde230efd925bf9c8f61d6b65e (diff) | |
download | gcc-194734e9e5501f9a295212b91978eee396879cda.tar.gz |
invoke.texi (-malign-double): Re-add lost warning.
* invoke.texi (-malign-double): Re-add lost warning.
* i386-protos.h (x86_output_mi_thunk): Declare.
* unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ...
* i386.c (x86_output_mi_thunk): ... here; handle 64bits.
* dwarf2out.c (output_call_frame_info): Do not skip unwind info
when flag_asynchronous_unwind_tables is set.
* flags.h (flag_reorder_functions): Declare.
* function.c (prepare_function_start): Initialize frequnecy.
* params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters.
* Makefile.in (predict.o): Add dependency on target.h and params.h
* defaults.h (HOT_TEXT_SECTION_NAME,
UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros.
* predict.c (choose_function_section): New function.
(estimate_bb_frequencies): Use it.
* toplev.c (flag_reorder_functions): New global variable.
(lang_independent_options): New.
(parse_options_and_default_flags): Set.
* varasm.c (assemble_start_function): Bypass functdion alignment
for never executed functions.
* invoke.texi (-freorder-blocks, -freorder-functions): Document.
(param hot-bb-count-fraction, hot-bb-frequency-fraction): New.
* tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME):
Document.
Thu Jan 3 21:52:09 CET 2002 Jan Hubicka <jh@suse.cz>
* predict.c: Inlude profile.h
(MIN_COUNT): Rename to MIN_COUNT_FRACTION
(maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p):
Use the information about maximal counter in the program.
Thu Dec 20 22:14:00 CET 2001 Jan Hubicka <jh@suse.cz>
* basic-block.h (maybe_hot_bb_p, probably_cold_bb_p,
probably_never_executed_bb_p): New functions.
* cfgcleanup.c (outgoing_edges_match): Use them.
* predict.c (MIN_COUNT, MIN_FREQUENCY): New macros.
(maybe_hot_bb_p, probably_cold_bb_p,
probably_never_executed_bb_p): New functions.
* function.h (function): Add new field function_frequency.
* predict.c (compute_function_frequency): New function.
(estimate_probability): Call it.
From-SVN: r53478
-rw-r--r-- | gcc/ChangeLog | 49 | ||||
-rw-r--r-- | gcc/Makefile.in | 3 | ||||
-rw-r--r-- | gcc/basic-block.h | 4 | ||||
-rw-r--r-- | gcc/cfgcleanup.c | 4 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 75 | ||||
-rw-r--r-- | gcc/config/i386/unix.h | 56 | ||||
-rw-r--r-- | gcc/defaults.h | 8 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 29 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 11 | ||||
-rw-r--r-- | gcc/dwarf2out.c | 3 | ||||
-rw-r--r-- | gcc/flags.h | 4 | ||||
-rw-r--r-- | gcc/function.c | 4 | ||||
-rw-r--r-- | gcc/function.h | 13 | ||||
-rw-r--r-- | gcc/params.def | 9 | ||||
-rw-r--r-- | gcc/predict.c | 274 | ||||
-rw-r--r-- | gcc/toplev.c | 7 | ||||
-rw-r--r-- | gcc/varasm.c | 3 |
18 files changed, 408 insertions, 149 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6a37fb5398a..f543a744a91 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,52 @@ +Wed May 15 10:38:27 CEST 2002 Jan Hubicka <jh@suse.cz> + + * invoke.texi (-malign-double): Re-add lost warning. + + * i386-protos.h (x86_output_mi_thunk): Declare. + * unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ... + * i386.c (x86_output_mi_thunk): ... here; handle 64bits. + + * dwarf2out.c (output_call_frame_info): Do not skip unwind info + when flag_asynchronous_unwind_tables is set. + + * flags.h (flag_reorder_functions): Declare. + * function.c (prepare_function_start): Initialize frequnecy. + * params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters. + * Makefile.in (predict.o): Add dependency on target.h and params.h + * defaults.h (HOT_TEXT_SECTION_NAME, + UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros. + * predict.c (choose_function_section): New function. + (estimate_bb_frequencies): Use it. + * toplev.c (flag_reorder_functions): New global variable. + (lang_independent_options): New. + (parse_options_and_default_flags): Set. + * varasm.c (assemble_start_function): Bypass functdion alignment + for never executed functions. + * invoke.texi (-freorder-blocks, -freorder-functions): Document. + (param hot-bb-count-fraction, hot-bb-frequency-fraction): New. + * tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME): + Document. + + Thu Jan 3 21:52:09 CET 2002 Jan Hubicka <jh@suse.cz> + + * predict.c: Inlude profile.h + (MIN_COUNT): Rename to MIN_COUNT_FRACTION + (maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p): + Use the information about maximal counter in the program. + + Thu Dec 20 22:14:00 CET 2001 Jan Hubicka <jh@suse.cz> + + * basic-block.h (maybe_hot_bb_p, probably_cold_bb_p, + probably_never_executed_bb_p): New functions. + * cfgcleanup.c (outgoing_edges_match): Use them. + * predict.c (MIN_COUNT, MIN_FREQUENCY): New macros. + (maybe_hot_bb_p, probably_cold_bb_p, + probably_never_executed_bb_p): New functions. + + * function.h (function): Add new field function_frequency. + * predict.c (compute_function_frequency): New function. + (estimate_probability): Call it. + 2002-03-09 Jakub Jelinek <jakub@redhat.com> PR optimization/5172, optimization/5200 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 937b6e50d2f..5791e8e238b 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1579,7 +1579,8 @@ reg-stack.o : reg-stack.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) $(RECOG_H) varray.h function.h $(TM_P_H) predict.o: predict.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) flags.h \ insn-config.h $(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h output.h toplev.h \ - $(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h + $(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h \ + $(PARAMS_H) $(TARGET_H) lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) toplev.h $(RTL_H) $(GGC_H) bb-reorder.o : bb-reorder.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) \ flags.h $(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h $(TARGET_H) diff --git a/gcc/basic-block.h b/gcc/basic-block.h index 05b4b7c9002..5615b145f02 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -628,6 +628,10 @@ extern rtx emit_block_insn_before PARAMS ((rtx, rtx, basic_block)); extern void estimate_probability PARAMS ((struct loops *)); extern void note_prediction_to_br_prob PARAMS ((void)); extern void expected_value_to_br_prob PARAMS ((void)); +extern void note_prediction_to_br_prob PARAMS ((void)); +extern bool maybe_hot_bb_p PARAMS ((basic_block)); +extern bool probably_cold_bb_p PARAMS ((basic_block)); +extern bool probably_never_executed_bb_p PARAMS ((basic_block)); /* In flow.c */ extern void init_flow PARAMS ((void)); diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c index 826569ad723..fcf6944d4bb 100644 --- a/gcc/cfgcleanup.c +++ b/gcc/cfgcleanup.c @@ -1211,8 +1211,8 @@ outgoing_edges_match (mode, bb1, bb2) roughly similar. */ if (match && !optimize_size - && bb1->frequency > BB_FREQ_MAX / 1000 - && bb2->frequency > BB_FREQ_MAX / 1000) + && maybe_hot_bb_p (bb1) + && maybe_hot_bb_p (bb2)) { int prob2; diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 8321d478ec0..b3b16884579 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -197,4 +197,5 @@ extern tree ix86_handle_shared_attribute PARAMS ((tree *, tree, tree, int, bool extern unsigned int i386_pe_section_type_flags PARAMS ((tree, const char *, int)); extern void i386_pe_asm_named_section PARAMS ((const char *, unsigned int)); +extern void x86_output_mi_thunk PARAMS ((FILE *, int, tree)); #endif diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9c328b72fb2..8d939f2e098 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -13049,3 +13049,78 @@ x86_order_regs_for_local_alloc () while (pos < FIRST_PSEUDO_REGISTER) reg_alloc_order [pos++] = 0; } + +void +x86_output_mi_thunk (file, delta, function) + FILE *file; + int delta; + tree function; +{ + tree parm; + rtx xops[3]; + + if (ix86_regparm > 0) + parm = TYPE_ARG_TYPES (TREE_TYPE (function)); + else + parm = NULL_TREE; + for (; parm; parm = TREE_CHAIN (parm)) + if (TREE_VALUE (parm) == void_type_node) + break; + + xops[0] = GEN_INT (delta); + if (TARGET_64BIT) + { + int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0; + xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); + output_asm_insn ("add{q} {%0, %1|%1, %0}", xops); + if (flag_pic) + { + fprintf (file, "\tjmp *"); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + fprintf (file, "@GOTPCREL(%%rip)\n"); + } + else + { + fprintf (file, "\tjmp "); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + fprintf (file, "\n"); + } + } + else + { + if (parm) + xops[1] = gen_rtx_REG (SImode, 0); + else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)))) + xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); + else + xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); + output_asm_insn ("add{l} {%0, %1|%1, %0}", xops); + + if (flag_pic) + { + xops[0] = pic_offset_table_rtx; + xops[1] = gen_label_rtx (); + xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + + if (ix86_regparm > 2) + abort (); + output_asm_insn ("push{l}\t%0", xops); + output_asm_insn ("call\t%P1", xops); + ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1])); + output_asm_insn ("pop{l}\t%0", xops); + output_asm_insn + ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops); + xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0)); + output_asm_insn + ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops); + asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n"); + asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n"); + } + else + { + fprintf (file, "\tjmp "); + assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0)); + fprintf (file, "\n"); + } + } +} diff --git a/gcc/config/i386/unix.h b/gcc/config/i386/unix.h index 15a07018cf3..f7e38b48e9a 100644 --- a/gcc/config/i386/unix.h +++ b/gcc/config/i386/unix.h @@ -79,57 +79,5 @@ Boston, MA 02111-1307, USA. */ /* Output code to add DELTA to the first argument, and then jump to FUNCTION. Used for C++ multiple inheritance. */ -#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \ -do { \ - tree parm; \ - rtx xops[3]; \ - \ - if (ix86_regparm > 0) \ - parm = TYPE_ARG_TYPES (TREE_TYPE (function)); \ - else \ - parm = NULL_TREE; \ - for (; parm; parm = TREE_CHAIN (parm)) \ - if (TREE_VALUE (parm) == void_type_node) \ - break; \ - \ - xops[0] = GEN_INT (DELTA); \ - if (parm) \ - xops[1] = gen_rtx_REG (SImode, 0); \ - else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (FUNCTION)))) \ - xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); \ - else \ - xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); \ - output_asm_insn ("add{l} {%0, %1|%1, %0}", xops); \ - \ - if (flag_pic && !TARGET_64BIT) \ - { \ - xops[0] = pic_offset_table_rtx; \ - xops[1] = gen_label_rtx (); \ - xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); \ - \ - if (ix86_regparm > 2) \ - abort (); \ - output_asm_insn ("push{l}\t%0", xops); \ - output_asm_insn ("call\t%P1", xops); \ - ASM_OUTPUT_INTERNAL_LABEL (FILE, "L", CODE_LABEL_NUMBER (xops[1])); \ - output_asm_insn ("pop{l}\t%0", xops); \ - output_asm_insn ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops); \ - xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (FUNCTION), 0)); \ - output_asm_insn ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}",\ - xops); \ - asm_fprintf (FILE, "\tpop{l\t%%ebx|\t%%ebx}\n"); \ - asm_fprintf (FILE, "\tjmp\t{*%%ecx|%%ecx}\n"); \ - } \ - else if (flag_pic && TARGET_64BIT) \ - { \ - fprintf (FILE, "\tjmp *"); \ - assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \ - fprintf (FILE, "@GOTPCREL(%%rip)\n"); \ - } \ - else \ - { \ - fprintf (FILE, "\tjmp "); \ - assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \ - fprintf (FILE, "\n"); \ - } \ -} while (0) +#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \ + x86_output_mi_thunk (FILE, DELTA, FUNCTION); diff --git a/gcc/defaults.h b/gcc/defaults.h index 7a45877f329..12f363a33d8 100644 --- a/gcc/defaults.h +++ b/gcc/defaults.h @@ -517,4 +517,12 @@ You Lose! You must define PREFERRED_DEBUGGING_TYPE! && !ROUND_TOWARDS_ZERO) #endif +#ifndef HOT_TEXT_SECTION_NAME +#define HOT_TEXT_SECTION_NAME "text.hot" +#endif + +#ifndef UNLIKELY_EXECUTED_TEXT_SECTION_NAME +#define UNLIKELY_EXECUTED_TEXT_SECTION_NAME "text.unlikely" +#endif + #endif /* ! GCC_DEFAULTS_H */ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 001ab25b5f8..1190c97965e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -278,6 +278,7 @@ in the following sections. -fomit-frame-pointer -foptimize-register-move @gol -foptimize-sibling-calls -fprefetch-loop-arrays @gol -freduce-all-givs -fregmove -frename-registers @gol +-freorder-blocks -freorder-functions @gol -frerun-cse-after-loop -frerun-loop-opt @gol -fschedule-insns -fschedule-insns2 @gol -fsingle-precision-constant -fssa -fssa-ccp -fssa-dce @gol @@ -3712,6 +3713,23 @@ non-determinism is of paramount import. This switch allows users to reduce non-determinism, possibly at the expense of inferior optimization. +@item -freorder-blocks +@opindex freorder-blocks +Reorder basic blocks in the compiled function in order to reduce number of +taken branches and improve code locality. + +@item -freorder-functions +@opindex freorder-functions +Reorder basic blocks in the compiled function in order to reduce number of +taken branches and improve code locality. This is implemented by using special +subsections @code{text.hot} for most frequently executed functions and +@code{text.unlikely} for unlikely executed functions. Reordering is done by +the linker so object file format must support named sections and linker must +place them in resonable way. + +Also profile feedback must be available in to make this option effective. See +@option{-fprofile-arcs} for details. + @item -fstrict-aliasing @opindex fstrict-aliasing Allows the compiler to assume the strictest aliasing rules applicable to @@ -3900,6 +3918,13 @@ The maximum number of instructions that a loop should have if that loop is unrolled, and if the loop is unrolled, it determines how many times the loop code is unrolled. +@item hot-bb-count-fraction +Select fraction of the maximal count of repetitions of basic block in program +given basic block needs to have to be considered hot. + +@item hot-bb-frequency-fraction +Select fraction of the maximal frequency of executions of basic block in +function given basic block needs to have to be considered hot @end table @end table @@ -7389,6 +7414,10 @@ boundary. Aligning @code{double} variables on a two word boundary will produce code that runs somewhat faster on a @samp{Pentium} at the expense of more memory. +@strong{Warning:} if you use the @samp{-malign-double} switch, +structures containing the above types will be aligned differently than +the published application binary interface specifications for the 386. + @item -m128bit-long-double @opindex m128bit-long-double Control the size of @code{long double} type. i386 application binary interface diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 8d4e92522c5..bbb2c5010bd 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5651,6 +5651,17 @@ Normally this is not needed, as simply defining @code{TEXT_SECTION_ASM_OP} is enough. The MIPS port uses this to sort all functions after all data declarations. +@findex HOT_TEXT_SECTION_NAME +@item HOT_TEXT_SECTION_NAME +If defined, a C string constant for the name of the section containing most +frequently executed functions of the program. If not defined, GCC will provide +a default definition if the target supports named sections. + +@findex UNLIKELY_EXECUTED_TEXT_SECTION_NAME +@item UNLIKELY_EXECUTED_TEXT_SECTION_NAME +If defined, a C string constant for the name of the section containing unlikely +executed functions in the program. + @findex DATA_SECTION_ASM_OP @item DATA_SECTION_ASM_OP A C expression whose value is a string, including spacing, containing the diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c index 47edc4339e7..2fa9f64817e 100644 --- a/gcc/dwarf2out.c +++ b/gcc/dwarf2out.c @@ -1968,7 +1968,8 @@ output_call_frame_info (for_eh) fde = &fde_table[i]; /* Don't emit EH unwind info for leaf functions that don't need it. */ - if (for_eh && fde->nothrow && ! fde->uses_eh_lsda) + if (!flag_asynchronous_unwind_tables && for_eh && fde->nothrow + && ! fde->uses_eh_lsda) continue; ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, FDE_LABEL, for_eh + i * 2); diff --git a/gcc/flags.h b/gcc/flags.h index b9fca23d29c..efcc7716e3e 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -204,6 +204,10 @@ extern int flag_branch_probabilities; extern int flag_reorder_blocks; +/* Nonzero if functions should be reordered. */ + +extern int flag_reorder_functions; + /* Nonzero if registers should be renamed. */ extern int flag_rename_registers; diff --git a/gcc/function.c b/gcc/function.c index 9f1c00a08b1..5bd70a0560f 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -6320,6 +6320,10 @@ prepare_function_start () cfun->arc_profile = profile_arc_flag || flag_test_coverage; + cfun->arc_profile = profile_arc_flag || flag_test_coverage; + + cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL; + (*lang_hooks.function.init) (cfun); if (init_machine_status) (*init_machine_status) (cfun); diff --git a/gcc/function.h b/gcc/function.h index bc789c8181a..912f8513c53 100644 --- a/gcc/function.h +++ b/gcc/function.h @@ -481,6 +481,19 @@ struct function /* Nonzero if code to initialize arg_pointer_save_area has been emited. */ unsigned int arg_pointer_save_area_init : 1; + + /* How commonly executed the function is. Initialized during branch + probabilities pass. */ + enum function_frequency { + /* This function most likely won't be executed at all. + (set only when profile feedback is available). */ + FUNCTION_FREQUENCY_UNLIKELY_EXECUTED, + /* The default value. */ + FUNCTION_FREQUENCY_NORMAL, + /* Optimize this function hard + (set only when profile feedback is available). */ + FUNCTION_FREQUENCY_HOT + } function_frequency; }; /* The function currently being compiled. */ diff --git a/gcc/params.def b/gcc/params.def index 2b2cfe67c4d..de55ecc5841 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -150,6 +150,15 @@ DEFPARAM(PARAM_MAX_UNROLLED_INSNS, "max-unrolled-insns", "The maximum number of instructions to consider to unroll in a loop", 100) + +DEFPARAM(HOT_BB_COUNT_FRACTION, + "hot-bb-count-fraction", + "Select fraction of the maximal count of repetitions of basic block in program given basic block needs to have to be considered hot", + 10000) +DEFPARAM(HOT_BB_FREQUENCY_FRACTION, + "hot-bb-frequency-fraction", + "Select fraction of the maximal frequency of executions of basic block in function given basic block needs to have to be considered hot", + 1000) /* Local variables: mode:c diff --git a/gcc/predict.c b/gcc/predict.c index 5896c10a191..f457817956d 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -45,7 +45,10 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "recog.h" #include "expr.h" #include "predict.h" +#include "profile.h" #include "real.h" +#include "params.h" +#include "target.h" /* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 0.5, REAL_BB_FREQ_MAX. */ @@ -75,6 +78,8 @@ static void process_note_predictions PARAMS ((basic_block, int *, int *, static void process_note_prediction PARAMS ((basic_block, int *, int *, sbitmap *, int, int)); static bool last_basic_block_p PARAMS ((basic_block)); +static void compute_function_frequency PARAMS ((void)); +static void choose_function_section PARAMS ((void)); /* Information we hold about each branch predictor. Filled using information from predict.def. */ @@ -103,6 +108,54 @@ static const struct predictor_info predictor_info[]= { {NULL, 0, 0} }; #undef DEF_PREDICTOR + +/* Return true in case BB can be CPU intensive and should be optimized + for maximal perofmrance. */ + +bool +maybe_hot_bb_p (bb) + basic_block bb; +{ + if (profile_info.count_profiles_merged + && flag_branch_probabilities + && (bb->count + < profile_info.max_counter_in_program + / PARAM_VALUE (HOT_BB_COUNT_FRACTION))) + return false; + if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)) + return false; + return true; +} + +/* Return true in case BB is cold and should be optimized for size. */ + +bool +probably_cold_bb_p (bb) + basic_block bb; +{ + if (profile_info.count_profiles_merged + && flag_branch_probabilities + && (bb->count + < profile_info.max_counter_in_program + / PARAM_VALUE (HOT_BB_COUNT_FRACTION))) + return true; + if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION)) + return true; + return false; +} + +/* Return true in case BB is probably never executed. */ +bool +probably_never_executed_bb_p (bb) + basic_block bb; +{ + if (profile_info.count_profiles_merged + && flag_branch_probabilities) + return ((bb->count + profile_info.count_profiles_merged / 2) + / profile_info.count_profiles_merged) == 0; + return false; +} + /* Return true if the one of outgoing edges is already predicted by PREDICTOR. */ @@ -1095,118 +1148,159 @@ estimate_bb_frequencies (loops) REAL_VALUE_TYPE freq_max; enum machine_mode double_mode = TYPE_MODE (double_type_node); - REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode); - REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode); - REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode); - REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode); - REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode); + if (flag_branch_probabilities) + counts_to_freqs (); + else + { + REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode); + REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode); + REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode); + REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode); + REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode); - REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half); + REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half); - REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base); - REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one); + REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base); + REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one); - mark_dfs_back_edges (); - if (flag_branch_probabilities) - { - counts_to_freqs (); - return; - } + mark_dfs_back_edges (); + /* Fill in the probability values in flowgraph based on the REG_BR_PROB + notes. */ + for (i = 0; i < n_basic_blocks; i++) + { + rtx last_insn = BLOCK_END (i); - /* Fill in the probability values in flowgraph based on the REG_BR_PROB - notes. */ - for (i = 0; i < n_basic_blocks; i++) - { - rtx last_insn = BLOCK_END (i); + if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn) + /* Avoid handling of conditional jumps jumping to fallthru edge. */ + || BASIC_BLOCK (i)->succ->succ_next == NULL) + { + /* We can predict only conditional jumps at the moment. + Expect each edge to be equally probable. + ?? In the future we want to make abnormal edges improbable. */ + int nedges = 0; + edge e; - if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn) - /* Avoid handling of conditional jumps jumping to fallthru edge. */ - || BASIC_BLOCK (i)->succ->succ_next == NULL) + for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next) + { + nedges++; + if (e->probability != 0) + break; + } + if (!e) + for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next) + e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges; + } + } + + ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE; + + /* Set up block info for each basic block. */ + alloc_aux_for_blocks (sizeof (struct block_info_def)); + alloc_aux_for_edges (sizeof (struct edge_info_def)); + for (i = -2; i < n_basic_blocks; i++) { - /* We can predict only conditional jumps at the moment. - Expect each edge to be equally probable. - ?? In the future we want to make abnormal edges improbable. */ - int nedges = 0; edge e; + basic_block bb; - for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next) + if (i == -2) + bb = ENTRY_BLOCK_PTR; + else if (i == -1) + bb = EXIT_BLOCK_PTR; + else + bb = BASIC_BLOCK (i); + + BLOCK_INFO (bb)->tovisit = 0; + for (e = bb->succ; e; e = e->succ_next) { - nedges++; - if (e->probability != 0) - break; + + REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob, + e->probability, 0, double_mode); + REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob, + RDIV_EXPR, EDGE_INFO (e)->back_edge_prob, + real_br_prob_base); } - if (!e) - for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next) - e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges; } - } - ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE; + /* First compute probabilities locally for each loop from innermost + to outermost to examine probabilities for back edges. */ + estimate_loops_at_level (loops->tree_root); - /* Set up block info for each basic block. */ - alloc_aux_for_blocks (sizeof (struct block_info_def)); - alloc_aux_for_edges (sizeof (struct edge_info_def)); - for (i = -2; i < n_basic_blocks; i++) - { - edge e; - basic_block bb; + /* Now fake loop around whole function to finalize probabilities. */ + for (i = 0; i < n_basic_blocks; i++) + BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1; - if (i == -2) - bb = ENTRY_BLOCK_PTR; - else if (i == -1) - bb = EXIT_BLOCK_PTR; - else - bb = BASIC_BLOCK (i); + BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1; + BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1; + propagate_freq (ENTRY_BLOCK_PTR); - BLOCK_INFO (bb)->tovisit = 0; - for (e = bb->succ; e; e = e->succ_next) + memcpy (&freq_max, &real_zero, sizeof (real_zero)); + for (i = 0; i < n_basic_blocks; i++) + if (REAL_VALUES_LESS + (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency)) + memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency, + sizeof (freq_max)); + + for (i = -2; i < n_basic_blocks; i++) { - - REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob, - e->probability, 0, double_mode); - REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob, - RDIV_EXPR, EDGE_INFO (e)->back_edge_prob, - real_br_prob_base); - } - } + basic_block bb; + REAL_VALUE_TYPE tmp; - /* First compute probabilities locally for each loop from innermost - to outermost to examine probabilities for back edges. */ - estimate_loops_at_level (loops->tree_root); + if (i == -2) + bb = ENTRY_BLOCK_PTR; + else if (i == -1) + bb = EXIT_BLOCK_PTR; + else + bb = BASIC_BLOCK (i); - /* Now fake loop around whole function to finalize probabilities. */ - for (i = 0; i < n_basic_blocks; i++) - BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1; + REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency, + real_bb_freq_max); + REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max); + REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half); + bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp); + } - BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1; - BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1; - propagate_freq (ENTRY_BLOCK_PTR); + free_aux_for_blocks (); + free_aux_for_edges (); + } + compute_function_frequency (); + if (flag_reorder_functions) + choose_function_section (); +} - memcpy (&freq_max, &real_zero, sizeof (real_zero)); +/* Decide whether function is hot, cold or unlikely executed. */ +static void +compute_function_frequency () +{ + int i; + if (!profile_info.count_profiles_merged + || !flag_branch_probabilities) + return; + cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED; for (i = 0; i < n_basic_blocks; i++) - if (REAL_VALUES_LESS (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency)) - memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency, - sizeof (freq_max)); - - for (i = -2; i < n_basic_blocks; i++) { - basic_block bb; - REAL_VALUE_TYPE tmp; - - if (i == -2) - bb = ENTRY_BLOCK_PTR; - else if (i == -1) - bb = EXIT_BLOCK_PTR; - else - bb = BASIC_BLOCK (i); - - REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency, - real_bb_freq_max); - REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max); - REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half); - bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp); + basic_block bb = BASIC_BLOCK (i); + if (maybe_hot_bb_p (bb)) + { + cfun->function_frequency = FUNCTION_FREQUENCY_HOT; + return; + } + if (!probably_never_executed_bb_p (bb)) + cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL; } +} - free_aux_for_blocks (); - free_aux_for_edges (); +/* Choose appropriate section for the function. */ +static void +choose_function_section () +{ + if (DECL_SECTION_NAME (current_function_decl) + || !targetm.have_named_sections) + return; + if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT) + DECL_SECTION_NAME (current_function_decl) = + build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME); + if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) + DECL_SECTION_NAME (current_function_decl) = + build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME), + UNLIKELY_EXECUTED_TEXT_SECTION_NAME); } diff --git a/gcc/toplev.c b/gcc/toplev.c index 9213730f629..5f5eb462811 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -381,6 +381,10 @@ int flag_branch_probabilities = 0; int flag_reorder_blocks = 0; +/* Nonzero if functions should be reordered. */ + +int flag_reorder_functions = 0; + /* Nonzero if registers should be renamed. */ int flag_rename_registers = 0; @@ -1076,6 +1080,8 @@ static const lang_independent_options f_options[] = N_("Enable basic program profiling code") }, {"reorder-blocks", &flag_reorder_blocks, 1, N_("Reorder basic blocks to improve code placement") }, + {"reorder-functions", &flag_reorder_functions, 1, + N_("Reorder functions to improve code placement") }, {"rename-registers", &flag_rename_registers, 1, N_("Do the register renaming optimization pass") }, {"cprop-registers", &flag_cprop_registers, 1, @@ -4657,6 +4663,7 @@ parse_options_and_default_flags (argc, argv) flag_strict_aliasing = 1; flag_delete_null_pointer_checks = 1; flag_reorder_blocks = 1; + flag_reorder_functions = 1; } if (optimize >= 3) diff --git a/gcc/varasm.c b/gcc/varasm.c index 279dc9085f4..6fb663e4e88 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -1197,7 +1197,8 @@ assemble_start_function (decl, fnname) /* Handle a user-specified function alignment. Note that we still need to align to FUNCTION_BOUNDARY, as above, because ASM_OUTPUT_MAX_SKIP_ALIGN might not do any alignment at all. */ - if (align_functions_log > align) + if (align_functions_log > align + && cfun->function_frequency != FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) { #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file, |