summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2002-05-15 11:00:30 +0200
committerJan Hubicka <hubicka@gcc.gnu.org>2002-05-15 09:00:30 +0000
commit194734e9e5501f9a295212b91978eee396879cda (patch)
tree6dab9b09a4ebcfb309dc527384a3d4f4607f240d
parent61ad9a3472b5cafde230efd925bf9c8f61d6b65e (diff)
downloadgcc-194734e9e5501f9a295212b91978eee396879cda.tar.gz
invoke.texi (-malign-double): Re-add lost warning.
* invoke.texi (-malign-double): Re-add lost warning. * i386-protos.h (x86_output_mi_thunk): Declare. * unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ... * i386.c (x86_output_mi_thunk): ... here; handle 64bits. * dwarf2out.c (output_call_frame_info): Do not skip unwind info when flag_asynchronous_unwind_tables is set. * flags.h (flag_reorder_functions): Declare. * function.c (prepare_function_start): Initialize frequnecy. * params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters. * Makefile.in (predict.o): Add dependency on target.h and params.h * defaults.h (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros. * predict.c (choose_function_section): New function. (estimate_bb_frequencies): Use it. * toplev.c (flag_reorder_functions): New global variable. (lang_independent_options): New. (parse_options_and_default_flags): Set. * varasm.c (assemble_start_function): Bypass functdion alignment for never executed functions. * invoke.texi (-freorder-blocks, -freorder-functions): Document. (param hot-bb-count-fraction, hot-bb-frequency-fraction): New. * tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME): Document. Thu Jan 3 21:52:09 CET 2002 Jan Hubicka <jh@suse.cz> * predict.c: Inlude profile.h (MIN_COUNT): Rename to MIN_COUNT_FRACTION (maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p): Use the information about maximal counter in the program. Thu Dec 20 22:14:00 CET 2001 Jan Hubicka <jh@suse.cz> * basic-block.h (maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p): New functions. * cfgcleanup.c (outgoing_edges_match): Use them. * predict.c (MIN_COUNT, MIN_FREQUENCY): New macros. (maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p): New functions. * function.h (function): Add new field function_frequency. * predict.c (compute_function_frequency): New function. (estimate_probability): Call it. From-SVN: r53478
-rw-r--r--gcc/ChangeLog49
-rw-r--r--gcc/Makefile.in3
-rw-r--r--gcc/basic-block.h4
-rw-r--r--gcc/cfgcleanup.c4
-rw-r--r--gcc/config/i386/i386-protos.h1
-rw-r--r--gcc/config/i386/i386.c75
-rw-r--r--gcc/config/i386/unix.h56
-rw-r--r--gcc/defaults.h8
-rw-r--r--gcc/doc/invoke.texi29
-rw-r--r--gcc/doc/tm.texi11
-rw-r--r--gcc/dwarf2out.c3
-rw-r--r--gcc/flags.h4
-rw-r--r--gcc/function.c4
-rw-r--r--gcc/function.h13
-rw-r--r--gcc/params.def9
-rw-r--r--gcc/predict.c274
-rw-r--r--gcc/toplev.c7
-rw-r--r--gcc/varasm.c3
18 files changed, 408 insertions, 149 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6a37fb5398a..f543a744a91 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,52 @@
+Wed May 15 10:38:27 CEST 2002 Jan Hubicka <jh@suse.cz>
+
+ * invoke.texi (-malign-double): Re-add lost warning.
+
+ * i386-protos.h (x86_output_mi_thunk): Declare.
+ * unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ...
+ * i386.c (x86_output_mi_thunk): ... here; handle 64bits.
+
+ * dwarf2out.c (output_call_frame_info): Do not skip unwind info
+ when flag_asynchronous_unwind_tables is set.
+
+ * flags.h (flag_reorder_functions): Declare.
+ * function.c (prepare_function_start): Initialize frequnecy.
+ * params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters.
+ * Makefile.in (predict.o): Add dependency on target.h and params.h
+ * defaults.h (HOT_TEXT_SECTION_NAME,
+ UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros.
+ * predict.c (choose_function_section): New function.
+ (estimate_bb_frequencies): Use it.
+ * toplev.c (flag_reorder_functions): New global variable.
+ (lang_independent_options): New.
+ (parse_options_and_default_flags): Set.
+ * varasm.c (assemble_start_function): Bypass functdion alignment
+ for never executed functions.
+ * invoke.texi (-freorder-blocks, -freorder-functions): Document.
+ (param hot-bb-count-fraction, hot-bb-frequency-fraction): New.
+ * tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME):
+ Document.
+
+ Thu Jan 3 21:52:09 CET 2002 Jan Hubicka <jh@suse.cz>
+
+ * predict.c: Inlude profile.h
+ (MIN_COUNT): Rename to MIN_COUNT_FRACTION
+ (maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p):
+ Use the information about maximal counter in the program.
+
+ Thu Dec 20 22:14:00 CET 2001 Jan Hubicka <jh@suse.cz>
+
+ * basic-block.h (maybe_hot_bb_p, probably_cold_bb_p,
+ probably_never_executed_bb_p): New functions.
+ * cfgcleanup.c (outgoing_edges_match): Use them.
+ * predict.c (MIN_COUNT, MIN_FREQUENCY): New macros.
+ (maybe_hot_bb_p, probably_cold_bb_p,
+ probably_never_executed_bb_p): New functions.
+
+ * function.h (function): Add new field function_frequency.
+ * predict.c (compute_function_frequency): New function.
+ (estimate_probability): Call it.
+
2002-03-09 Jakub Jelinek <jakub@redhat.com>
PR optimization/5172, optimization/5200
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 937b6e50d2f..5791e8e238b 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1579,7 +1579,8 @@ reg-stack.o : reg-stack.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) $(RECOG_H)
varray.h function.h $(TM_P_H)
predict.o: predict.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) flags.h \
insn-config.h $(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h output.h toplev.h \
- $(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h
+ $(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h \
+ $(PARAMS_H) $(TARGET_H)
lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) toplev.h $(RTL_H) $(GGC_H)
bb-reorder.o : bb-reorder.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) \
flags.h $(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h $(TARGET_H)
diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index 05b4b7c9002..5615b145f02 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -628,6 +628,10 @@ extern rtx emit_block_insn_before PARAMS ((rtx, rtx, basic_block));
extern void estimate_probability PARAMS ((struct loops *));
extern void note_prediction_to_br_prob PARAMS ((void));
extern void expected_value_to_br_prob PARAMS ((void));
+extern void note_prediction_to_br_prob PARAMS ((void));
+extern bool maybe_hot_bb_p PARAMS ((basic_block));
+extern bool probably_cold_bb_p PARAMS ((basic_block));
+extern bool probably_never_executed_bb_p PARAMS ((basic_block));
/* In flow.c */
extern void init_flow PARAMS ((void));
diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
index 826569ad723..fcf6944d4bb 100644
--- a/gcc/cfgcleanup.c
+++ b/gcc/cfgcleanup.c
@@ -1211,8 +1211,8 @@ outgoing_edges_match (mode, bb1, bb2)
roughly similar. */
if (match
&& !optimize_size
- && bb1->frequency > BB_FREQ_MAX / 1000
- && bb2->frequency > BB_FREQ_MAX / 1000)
+ && maybe_hot_bb_p (bb1)
+ && maybe_hot_bb_p (bb2))
{
int prob2;
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 8321d478ec0..b3b16884579 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -197,4 +197,5 @@ extern tree ix86_handle_shared_attribute PARAMS ((tree *, tree, tree, int, bool
extern unsigned int i386_pe_section_type_flags PARAMS ((tree, const char *,
int));
extern void i386_pe_asm_named_section PARAMS ((const char *, unsigned int));
+extern void x86_output_mi_thunk PARAMS ((FILE *, int, tree));
#endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9c328b72fb2..8d939f2e098 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -13049,3 +13049,78 @@ x86_order_regs_for_local_alloc ()
while (pos < FIRST_PSEUDO_REGISTER)
reg_alloc_order [pos++] = 0;
}
+
+void
+x86_output_mi_thunk (file, delta, function)
+ FILE *file;
+ int delta;
+ tree function;
+{
+ tree parm;
+ rtx xops[3];
+
+ if (ix86_regparm > 0)
+ parm = TYPE_ARG_TYPES (TREE_TYPE (function));
+ else
+ parm = NULL_TREE;
+ for (; parm; parm = TREE_CHAIN (parm))
+ if (TREE_VALUE (parm) == void_type_node)
+ break;
+
+ xops[0] = GEN_INT (delta);
+ if (TARGET_64BIT)
+ {
+ int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
+ xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
+ output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
+ if (flag_pic)
+ {
+ fprintf (file, "\tjmp *");
+ assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+ fprintf (file, "@GOTPCREL(%%rip)\n");
+ }
+ else
+ {
+ fprintf (file, "\tjmp ");
+ assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+ fprintf (file, "\n");
+ }
+ }
+ else
+ {
+ if (parm)
+ xops[1] = gen_rtx_REG (SImode, 0);
+ else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
+ xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
+ else
+ xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
+ output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
+
+ if (flag_pic)
+ {
+ xops[0] = pic_offset_table_rtx;
+ xops[1] = gen_label_rtx ();
+ xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+
+ if (ix86_regparm > 2)
+ abort ();
+ output_asm_insn ("push{l}\t%0", xops);
+ output_asm_insn ("call\t%P1", xops);
+ ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
+ output_asm_insn ("pop{l}\t%0", xops);
+ output_asm_insn
+ ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
+ xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
+ output_asm_insn
+ ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
+ asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
+ asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
+ }
+ else
+ {
+ fprintf (file, "\tjmp ");
+ assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+ fprintf (file, "\n");
+ }
+ }
+}
diff --git a/gcc/config/i386/unix.h b/gcc/config/i386/unix.h
index 15a07018cf3..f7e38b48e9a 100644
--- a/gcc/config/i386/unix.h
+++ b/gcc/config/i386/unix.h
@@ -79,57 +79,5 @@ Boston, MA 02111-1307, USA. */
/* Output code to add DELTA to the first argument, and then jump to FUNCTION.
Used for C++ multiple inheritance. */
-#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
-do { \
- tree parm; \
- rtx xops[3]; \
- \
- if (ix86_regparm > 0) \
- parm = TYPE_ARG_TYPES (TREE_TYPE (function)); \
- else \
- parm = NULL_TREE; \
- for (; parm; parm = TREE_CHAIN (parm)) \
- if (TREE_VALUE (parm) == void_type_node) \
- break; \
- \
- xops[0] = GEN_INT (DELTA); \
- if (parm) \
- xops[1] = gen_rtx_REG (SImode, 0); \
- else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (FUNCTION)))) \
- xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); \
- else \
- xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); \
- output_asm_insn ("add{l} {%0, %1|%1, %0}", xops); \
- \
- if (flag_pic && !TARGET_64BIT) \
- { \
- xops[0] = pic_offset_table_rtx; \
- xops[1] = gen_label_rtx (); \
- xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); \
- \
- if (ix86_regparm > 2) \
- abort (); \
- output_asm_insn ("push{l}\t%0", xops); \
- output_asm_insn ("call\t%P1", xops); \
- ASM_OUTPUT_INTERNAL_LABEL (FILE, "L", CODE_LABEL_NUMBER (xops[1])); \
- output_asm_insn ("pop{l}\t%0", xops); \
- output_asm_insn ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops); \
- xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (FUNCTION), 0)); \
- output_asm_insn ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}",\
- xops); \
- asm_fprintf (FILE, "\tpop{l\t%%ebx|\t%%ebx}\n"); \
- asm_fprintf (FILE, "\tjmp\t{*%%ecx|%%ecx}\n"); \
- } \
- else if (flag_pic && TARGET_64BIT) \
- { \
- fprintf (FILE, "\tjmp *"); \
- assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \
- fprintf (FILE, "@GOTPCREL(%%rip)\n"); \
- } \
- else \
- { \
- fprintf (FILE, "\tjmp "); \
- assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \
- fprintf (FILE, "\n"); \
- } \
-} while (0)
+#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
+ x86_output_mi_thunk (FILE, DELTA, FUNCTION);
diff --git a/gcc/defaults.h b/gcc/defaults.h
index 7a45877f329..12f363a33d8 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -517,4 +517,12 @@ You Lose! You must define PREFERRED_DEBUGGING_TYPE!
&& !ROUND_TOWARDS_ZERO)
#endif
+#ifndef HOT_TEXT_SECTION_NAME
+#define HOT_TEXT_SECTION_NAME "text.hot"
+#endif
+
+#ifndef UNLIKELY_EXECUTED_TEXT_SECTION_NAME
+#define UNLIKELY_EXECUTED_TEXT_SECTION_NAME "text.unlikely"
+#endif
+
#endif /* ! GCC_DEFAULTS_H */
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 001ab25b5f8..1190c97965e 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -278,6 +278,7 @@ in the following sections.
-fomit-frame-pointer -foptimize-register-move @gol
-foptimize-sibling-calls -fprefetch-loop-arrays @gol
-freduce-all-givs -fregmove -frename-registers @gol
+-freorder-blocks -freorder-functions @gol
-frerun-cse-after-loop -frerun-loop-opt @gol
-fschedule-insns -fschedule-insns2 @gol
-fsingle-precision-constant -fssa -fssa-ccp -fssa-dce @gol
@@ -3712,6 +3713,23 @@ non-determinism is of paramount import. This switch allows users to
reduce non-determinism, possibly at the expense of inferior
optimization.
+@item -freorder-blocks
+@opindex freorder-blocks
+Reorder basic blocks in the compiled function in order to reduce number of
+taken branches and improve code locality.
+
+@item -freorder-functions
+@opindex freorder-functions
+Reorder basic blocks in the compiled function in order to reduce number of
+taken branches and improve code locality. This is implemented by using special
+subsections @code{text.hot} for most frequently executed functions and
+@code{text.unlikely} for unlikely executed functions. Reordering is done by
+the linker so object file format must support named sections and linker must
+place them in resonable way.
+
+Also profile feedback must be available in to make this option effective. See
+@option{-fprofile-arcs} for details.
+
@item -fstrict-aliasing
@opindex fstrict-aliasing
Allows the compiler to assume the strictest aliasing rules applicable to
@@ -3900,6 +3918,13 @@ The maximum number of instructions that a loop should have if that loop
is unrolled, and if the loop is unrolled, it determines how many times
the loop code is unrolled.
+@item hot-bb-count-fraction
+Select fraction of the maximal count of repetitions of basic block in program
+given basic block needs to have to be considered hot.
+
+@item hot-bb-frequency-fraction
+Select fraction of the maximal frequency of executions of basic block in
+function given basic block needs to have to be considered hot
@end table
@end table
@@ -7389,6 +7414,10 @@ boundary. Aligning @code{double} variables on a two word boundary will
produce code that runs somewhat faster on a @samp{Pentium} at the
expense of more memory.
+@strong{Warning:} if you use the @samp{-malign-double} switch,
+structures containing the above types will be aligned differently than
+the published application binary interface specifications for the 386.
+
@item -m128bit-long-double
@opindex m128bit-long-double
Control the size of @code{long double} type. i386 application binary interface
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 8d4e92522c5..bbb2c5010bd 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5651,6 +5651,17 @@ Normally this is not needed, as simply defining @code{TEXT_SECTION_ASM_OP}
is enough. The MIPS port uses this to sort all functions after all data
declarations.
+@findex HOT_TEXT_SECTION_NAME
+@item HOT_TEXT_SECTION_NAME
+If defined, a C string constant for the name of the section containing most
+frequently executed functions of the program. If not defined, GCC will provide
+a default definition if the target supports named sections.
+
+@findex UNLIKELY_EXECUTED_TEXT_SECTION_NAME
+@item UNLIKELY_EXECUTED_TEXT_SECTION_NAME
+If defined, a C string constant for the name of the section containing unlikely
+executed functions in the program.
+
@findex DATA_SECTION_ASM_OP
@item DATA_SECTION_ASM_OP
A C expression whose value is a string, including spacing, containing the
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 47edc4339e7..2fa9f64817e 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -1968,7 +1968,8 @@ output_call_frame_info (for_eh)
fde = &fde_table[i];
/* Don't emit EH unwind info for leaf functions that don't need it. */
- if (for_eh && fde->nothrow && ! fde->uses_eh_lsda)
+ if (!flag_asynchronous_unwind_tables && for_eh && fde->nothrow
+ && ! fde->uses_eh_lsda)
continue;
ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, FDE_LABEL, for_eh + i * 2);
diff --git a/gcc/flags.h b/gcc/flags.h
index b9fca23d29c..efcc7716e3e 100644
--- a/gcc/flags.h
+++ b/gcc/flags.h
@@ -204,6 +204,10 @@ extern int flag_branch_probabilities;
extern int flag_reorder_blocks;
+/* Nonzero if functions should be reordered. */
+
+extern int flag_reorder_functions;
+
/* Nonzero if registers should be renamed. */
extern int flag_rename_registers;
diff --git a/gcc/function.c b/gcc/function.c
index 9f1c00a08b1..5bd70a0560f 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -6320,6 +6320,10 @@ prepare_function_start ()
cfun->arc_profile = profile_arc_flag || flag_test_coverage;
+ cfun->arc_profile = profile_arc_flag || flag_test_coverage;
+
+ cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
+
(*lang_hooks.function.init) (cfun);
if (init_machine_status)
(*init_machine_status) (cfun);
diff --git a/gcc/function.h b/gcc/function.h
index bc789c8181a..912f8513c53 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -481,6 +481,19 @@ struct function
/* Nonzero if code to initialize arg_pointer_save_area has been emited. */
unsigned int arg_pointer_save_area_init : 1;
+
+ /* How commonly executed the function is. Initialized during branch
+ probabilities pass. */
+ enum function_frequency {
+ /* This function most likely won't be executed at all.
+ (set only when profile feedback is available). */
+ FUNCTION_FREQUENCY_UNLIKELY_EXECUTED,
+ /* The default value. */
+ FUNCTION_FREQUENCY_NORMAL,
+ /* Optimize this function hard
+ (set only when profile feedback is available). */
+ FUNCTION_FREQUENCY_HOT
+ } function_frequency;
};
/* The function currently being compiled. */
diff --git a/gcc/params.def b/gcc/params.def
index 2b2cfe67c4d..de55ecc5841 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -150,6 +150,15 @@ DEFPARAM(PARAM_MAX_UNROLLED_INSNS,
"max-unrolled-insns",
"The maximum number of instructions to consider to unroll in a loop",
100)
+
+DEFPARAM(HOT_BB_COUNT_FRACTION,
+ "hot-bb-count-fraction",
+ "Select fraction of the maximal count of repetitions of basic block in program given basic block needs to have to be considered hot",
+ 10000)
+DEFPARAM(HOT_BB_FREQUENCY_FRACTION,
+ "hot-bb-frequency-fraction",
+ "Select fraction of the maximal frequency of executions of basic block in function given basic block needs to have to be considered hot",
+ 1000)
/*
Local variables:
mode:c
diff --git a/gcc/predict.c b/gcc/predict.c
index 5896c10a191..f457817956d 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -45,7 +45,10 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "recog.h"
#include "expr.h"
#include "predict.h"
+#include "profile.h"
#include "real.h"
+#include "params.h"
+#include "target.h"
/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE, 0.5,
REAL_BB_FREQ_MAX. */
@@ -75,6 +78,8 @@ static void process_note_predictions PARAMS ((basic_block, int *, int *,
static void process_note_prediction PARAMS ((basic_block, int *, int *,
sbitmap *, int, int));
static bool last_basic_block_p PARAMS ((basic_block));
+static void compute_function_frequency PARAMS ((void));
+static void choose_function_section PARAMS ((void));
/* Information we hold about each branch predictor.
Filled using information from predict.def. */
@@ -103,6 +108,54 @@ static const struct predictor_info predictor_info[]= {
{NULL, 0, 0}
};
#undef DEF_PREDICTOR
+
+/* Return true in case BB can be CPU intensive and should be optimized
+ for maximal perofmrance. */
+
+bool
+maybe_hot_bb_p (bb)
+ basic_block bb;
+{
+ if (profile_info.count_profiles_merged
+ && flag_branch_probabilities
+ && (bb->count
+ < profile_info.max_counter_in_program
+ / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
+ return false;
+ if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
+ return false;
+ return true;
+}
+
+/* Return true in case BB is cold and should be optimized for size. */
+
+bool
+probably_cold_bb_p (bb)
+ basic_block bb;
+{
+ if (profile_info.count_profiles_merged
+ && flag_branch_probabilities
+ && (bb->count
+ < profile_info.max_counter_in_program
+ / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
+ return true;
+ if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
+ return true;
+ return false;
+}
+
+/* Return true in case BB is probably never executed. */
+bool
+probably_never_executed_bb_p (bb)
+ basic_block bb;
+{
+ if (profile_info.count_profiles_merged
+ && flag_branch_probabilities)
+ return ((bb->count + profile_info.count_profiles_merged / 2)
+ / profile_info.count_profiles_merged) == 0;
+ return false;
+}
+
/* Return true if the one of outgoing edges is already predicted by
PREDICTOR. */
@@ -1095,118 +1148,159 @@ estimate_bb_frequencies (loops)
REAL_VALUE_TYPE freq_max;
enum machine_mode double_mode = TYPE_MODE (double_type_node);
- REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
- REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
- REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
- REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
- REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
+ if (flag_branch_probabilities)
+ counts_to_freqs ();
+ else
+ {
+ REAL_VALUE_FROM_INT (real_zero, 0, 0, double_mode);
+ REAL_VALUE_FROM_INT (real_one, 1, 0, double_mode);
+ REAL_VALUE_FROM_INT (real_br_prob_base, REG_BR_PROB_BASE, 0, double_mode);
+ REAL_VALUE_FROM_INT (real_bb_freq_max, BB_FREQ_MAX, 0, double_mode);
+ REAL_VALUE_FROM_INT (real_one_half, 2, 0, double_mode);
- REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
+ REAL_ARITHMETIC (real_one_half, RDIV_EXPR, real_one, real_one_half);
- REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
- REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
+ REAL_ARITHMETIC (real_almost_one, RDIV_EXPR, real_one, real_br_prob_base);
+ REAL_ARITHMETIC (real_almost_one, MINUS_EXPR, real_one, real_almost_one);
- mark_dfs_back_edges ();
- if (flag_branch_probabilities)
- {
- counts_to_freqs ();
- return;
- }
+ mark_dfs_back_edges ();
+ /* Fill in the probability values in flowgraph based on the REG_BR_PROB
+ notes. */
+ for (i = 0; i < n_basic_blocks; i++)
+ {
+ rtx last_insn = BLOCK_END (i);
- /* Fill in the probability values in flowgraph based on the REG_BR_PROB
- notes. */
- for (i = 0; i < n_basic_blocks; i++)
- {
- rtx last_insn = BLOCK_END (i);
+ if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
+ /* Avoid handling of conditional jumps jumping to fallthru edge. */
+ || BASIC_BLOCK (i)->succ->succ_next == NULL)
+ {
+ /* We can predict only conditional jumps at the moment.
+ Expect each edge to be equally probable.
+ ?? In the future we want to make abnormal edges improbable. */
+ int nedges = 0;
+ edge e;
- if (GET_CODE (last_insn) != JUMP_INSN || !any_condjump_p (last_insn)
- /* Avoid handling of conditional jumps jumping to fallthru edge. */
- || BASIC_BLOCK (i)->succ->succ_next == NULL)
+ for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
+ {
+ nedges++;
+ if (e->probability != 0)
+ break;
+ }
+ if (!e)
+ for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
+ e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
+ }
+ }
+
+ ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
+
+ /* Set up block info for each basic block. */
+ alloc_aux_for_blocks (sizeof (struct block_info_def));
+ alloc_aux_for_edges (sizeof (struct edge_info_def));
+ for (i = -2; i < n_basic_blocks; i++)
{
- /* We can predict only conditional jumps at the moment.
- Expect each edge to be equally probable.
- ?? In the future we want to make abnormal edges improbable. */
- int nedges = 0;
edge e;
+ basic_block bb;
- for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
+ if (i == -2)
+ bb = ENTRY_BLOCK_PTR;
+ else if (i == -1)
+ bb = EXIT_BLOCK_PTR;
+ else
+ bb = BASIC_BLOCK (i);
+
+ BLOCK_INFO (bb)->tovisit = 0;
+ for (e = bb->succ; e; e = e->succ_next)
{
- nedges++;
- if (e->probability != 0)
- break;
+
+ REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
+ e->probability, 0, double_mode);
+ REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
+ RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
+ real_br_prob_base);
}
- if (!e)
- for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
- e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
}
- }
- ENTRY_BLOCK_PTR->succ->probability = REG_BR_PROB_BASE;
+ /* First compute probabilities locally for each loop from innermost
+ to outermost to examine probabilities for back edges. */
+ estimate_loops_at_level (loops->tree_root);
- /* Set up block info for each basic block. */
- alloc_aux_for_blocks (sizeof (struct block_info_def));
- alloc_aux_for_edges (sizeof (struct edge_info_def));
- for (i = -2; i < n_basic_blocks; i++)
- {
- edge e;
- basic_block bb;
+ /* Now fake loop around whole function to finalize probabilities. */
+ for (i = 0; i < n_basic_blocks; i++)
+ BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
- if (i == -2)
- bb = ENTRY_BLOCK_PTR;
- else if (i == -1)
- bb = EXIT_BLOCK_PTR;
- else
- bb = BASIC_BLOCK (i);
+ BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
+ BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
+ propagate_freq (ENTRY_BLOCK_PTR);
- BLOCK_INFO (bb)->tovisit = 0;
- for (e = bb->succ; e; e = e->succ_next)
+ memcpy (&freq_max, &real_zero, sizeof (real_zero));
+ for (i = 0; i < n_basic_blocks; i++)
+ if (REAL_VALUES_LESS
+ (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
+ memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
+ sizeof (freq_max));
+
+ for (i = -2; i < n_basic_blocks; i++)
{
-
- REAL_VALUE_FROM_INT (EDGE_INFO (e)->back_edge_prob,
- e->probability, 0, double_mode);
- REAL_ARITHMETIC (EDGE_INFO (e)->back_edge_prob,
- RDIV_EXPR, EDGE_INFO (e)->back_edge_prob,
- real_br_prob_base);
- }
- }
+ basic_block bb;
+ REAL_VALUE_TYPE tmp;
- /* First compute probabilities locally for each loop from innermost
- to outermost to examine probabilities for back edges. */
- estimate_loops_at_level (loops->tree_root);
+ if (i == -2)
+ bb = ENTRY_BLOCK_PTR;
+ else if (i == -1)
+ bb = EXIT_BLOCK_PTR;
+ else
+ bb = BASIC_BLOCK (i);
- /* Now fake loop around whole function to finalize probabilities. */
- for (i = 0; i < n_basic_blocks; i++)
- BLOCK_INFO (BASIC_BLOCK (i))->tovisit = 1;
+ REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
+ real_bb_freq_max);
+ REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
+ REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
+ bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
+ }
- BLOCK_INFO (ENTRY_BLOCK_PTR)->tovisit = 1;
- BLOCK_INFO (EXIT_BLOCK_PTR)->tovisit = 1;
- propagate_freq (ENTRY_BLOCK_PTR);
+ free_aux_for_blocks ();
+ free_aux_for_edges ();
+ }
+ compute_function_frequency ();
+ if (flag_reorder_functions)
+ choose_function_section ();
+}
- memcpy (&freq_max, &real_zero, sizeof (real_zero));
+/* Decide whether function is hot, cold or unlikely executed. */
+static void
+compute_function_frequency ()
+{
+ int i;
+ if (!profile_info.count_profiles_merged
+ || !flag_branch_probabilities)
+ return;
+ cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
for (i = 0; i < n_basic_blocks; i++)
- if (REAL_VALUES_LESS (freq_max, BLOCK_INFO (BASIC_BLOCK (i))->frequency))
- memcpy (&freq_max, &BLOCK_INFO (BASIC_BLOCK (i))->frequency,
- sizeof (freq_max));
-
- for (i = -2; i < n_basic_blocks; i++)
{
- basic_block bb;
- REAL_VALUE_TYPE tmp;
-
- if (i == -2)
- bb = ENTRY_BLOCK_PTR;
- else if (i == -1)
- bb = EXIT_BLOCK_PTR;
- else
- bb = BASIC_BLOCK (i);
-
- REAL_ARITHMETIC (tmp, MULT_EXPR, BLOCK_INFO (bb)->frequency,
- real_bb_freq_max);
- REAL_ARITHMETIC (tmp, RDIV_EXPR, tmp, freq_max);
- REAL_ARITHMETIC (tmp, PLUS_EXPR, tmp, real_one_half);
- bb->frequency = REAL_VALUE_UNSIGNED_FIX (tmp);
+ basic_block bb = BASIC_BLOCK (i);
+ if (maybe_hot_bb_p (bb))
+ {
+ cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
+ return;
+ }
+ if (!probably_never_executed_bb_p (bb))
+ cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
}
+}
- free_aux_for_blocks ();
- free_aux_for_edges ();
+/* Choose appropriate section for the function. */
+static void
+choose_function_section ()
+{
+ if (DECL_SECTION_NAME (current_function_decl)
+ || !targetm.have_named_sections)
+ return;
+ if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
+ DECL_SECTION_NAME (current_function_decl) =
+ build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
+ if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
+ DECL_SECTION_NAME (current_function_decl) =
+ build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
+ UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
}
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 9213730f629..5f5eb462811 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -381,6 +381,10 @@ int flag_branch_probabilities = 0;
int flag_reorder_blocks = 0;
+/* Nonzero if functions should be reordered. */
+
+int flag_reorder_functions = 0;
+
/* Nonzero if registers should be renamed. */
int flag_rename_registers = 0;
@@ -1076,6 +1080,8 @@ static const lang_independent_options f_options[] =
N_("Enable basic program profiling code") },
{"reorder-blocks", &flag_reorder_blocks, 1,
N_("Reorder basic blocks to improve code placement") },
+ {"reorder-functions", &flag_reorder_functions, 1,
+ N_("Reorder functions to improve code placement") },
{"rename-registers", &flag_rename_registers, 1,
N_("Do the register renaming optimization pass") },
{"cprop-registers", &flag_cprop_registers, 1,
@@ -4657,6 +4663,7 @@ parse_options_and_default_flags (argc, argv)
flag_strict_aliasing = 1;
flag_delete_null_pointer_checks = 1;
flag_reorder_blocks = 1;
+ flag_reorder_functions = 1;
}
if (optimize >= 3)
diff --git a/gcc/varasm.c b/gcc/varasm.c
index 279dc9085f4..6fb663e4e88 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -1197,7 +1197,8 @@ assemble_start_function (decl, fnname)
/* Handle a user-specified function alignment.
Note that we still need to align to FUNCTION_BOUNDARY, as above,
because ASM_OUTPUT_MAX_SKIP_ALIGN might not do any alignment at all. */
- if (align_functions_log > align)
+ if (align_functions_log > align
+ && cfun->function_frequency != FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
{
#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file,