summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog27
-rw-r--r--gcc/Makefile.in3
-rw-r--r--gcc/ipa.c276
-rw-r--r--gcc/lto-cgraph.c31
-rw-r--r--gcc/lto-section-in.c1
-rw-r--r--gcc/lto-streamer.h1
-rw-r--r--gcc/predict.c35
-rw-r--r--gcc/profile.h4
8 files changed, 354 insertions, 24 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0fd0f954e41..3c5351303e9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,30 @@
+2013-03-29 Jan Hubicka <jh@suse.cz>
+
+ * lto-cgraph.c (output_profile_summary, input_profile_summary): Use
+ gcov streaming; stream hot bb threshold to ltrans.
+ * predict.c (get_hot_bb_threshold): Break out from ....
+ (maybe_hot_count_p): ... here.
+ (set_hot_bb_threshold): New function.
+ * lto-section-in.c (lto_section_name): Add profile.
+ * profile.h (get_hot_bb_threshold, set_hot_bb_threshold): Declare.
+ * ipa.c: Include hash-table.h, tree-inline.h, profile.h, lto-streamer.h
+ and data-streamer.h
+ (histogram_entry): New structure.
+ (histogram, histogram_pool): New global vars.
+ (histogram_hash): New structure.
+ (histogram_hash::hash): New method.
+ (histogram_hash::equal): Likewise.
+ (account_time_size): New function.
+ (cmp_counts): New function.
+ (dump_histogram): New function.
+ (ipa_profile_generate_summary): New function.
+ (ipa_profile_write_summary): New function.
+ (ipa_profile_read_summary): New function.
+ (ipa_profile): Decide on threshold.
+ (pass_ipa_profile): Add ipa_profile_write_summary and ipa_profile_read_summary.
+ * Makefile.in (ipa.o): Update dependencies.
+ * lto-streamer.h (LTO_section_ipa_profile): New section.
+
2013-03-29 Gabriel Dos Reis <gdr@integrable-solutions.net>
* tree.h (VAR_P): New.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 44533772722..19377a9a8a5 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2903,7 +2903,8 @@ varpool.o : varpool.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(TREE_FLOW_H)
ipa.o : ipa.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(CGRAPH_H) \
$(TREE_PASS_H) $(GIMPLE_H) $(TARGET_H) $(GGC_H) pointer-set.h \
- $(IPA_UTILS_H)
+ $(IPA_UTILS_H) tree-inline.h $(HASH_TABLE_H) profile.h $(PARAMS_H) \
+ $(LTO_STREAMER_H) $(DATA_STREAMER_H)
ipa-prop.o : ipa-prop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
langhooks.h $(GGC_H) $(TARGET_H) $(CGRAPH_H) $(IPA_PROP_H) $(DIAGNOSTIC_H) \
$(TREE_FLOW_H) $(TM_H) $(TREE_PASS_H) $(FLAGS_H) $(TREE_H) \
diff --git a/gcc/ipa.c b/gcc/ipa.c
index a9b8fb41988..0ea73dd8e86 100644
--- a/gcc/ipa.c
+++ b/gcc/ipa.c
@@ -32,6 +32,12 @@ along with GCC; see the file COPYING3. If not see
#include "ipa-utils.h"
#include "pointer-set.h"
#include "ipa-inline.h"
+#include "hash-table.h"
+#include "tree-inline.h"
+#include "profile.h"
+#include "params.h"
+#include "lto-streamer.h"
+#include "data-streamer.h"
/* Look for all functions inlined to NODE and update their inlined_to pointers
to INLINED_TO. */
@@ -1040,6 +1046,201 @@ struct ipa_opt_pass_d pass_ipa_whole_program_visibility =
NULL, /* variable_transform */
};
+/* Entry in the histogram. */
+
+struct histogram_entry
+{
+ gcov_type count;
+ int time;
+ int size;
+};
+
+/* Histogram of profile values.
+ The histogram is represented as an ordered vector of entries allocated via
+ histogram_pool. During construction a separate hashtable is kept to lookup
+ duplicate entries. */
+
+vec<histogram_entry *> histogram;
+static alloc_pool histogram_pool;
+
+/* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
+
+struct histogram_hash : typed_noop_remove <histogram_entry>
+{
+ typedef histogram_entry value_type;
+ typedef histogram_entry compare_type;
+ static inline hashval_t hash (const value_type *);
+ static inline int equal (const value_type *, const compare_type *);
+};
+
+inline hashval_t
+histogram_hash::hash (const histogram_entry *val)
+{
+ return val->count;
+}
+
+inline int
+histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
+{
+ return val->count == val2->count;
+}
+
+/* Account TIME and SIZE executed COUNT times into HISTOGRAM.
+ HASHTABLE is the on-side hash kept to avoid duplicates. */
+
+static void
+account_time_size (hash_table <histogram_hash> hashtable,
+ vec<histogram_entry *> &histogram,
+ gcov_type count, int time, int size)
+{
+ histogram_entry key = {count, 0, 0};
+ histogram_entry **val = hashtable.find_slot (&key, INSERT);
+
+ if (!*val)
+ {
+ *val = (histogram_entry *) pool_alloc (histogram_pool);
+ **val = key;
+ histogram.safe_push (*val);
+ }
+ (*val)->time += time;
+ (*val)->size += size;
+}
+
+int
+cmp_counts (const void *v1, const void *v2)
+{
+ const histogram_entry *h1 = *(const histogram_entry * const *)v1;
+ const histogram_entry *h2 = *(const histogram_entry * const *)v2;
+ if (h1->count < h2->count)
+ return 1;
+ if (h1->count > h2->count)
+ return -1;
+ return 0;
+}
+
+/* Dump HISTOGRAM to FILE. */
+
+static void
+dump_histogram (FILE *file, vec<histogram_entry *> histogram)
+{
+ unsigned int i;
+ gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0;
+
+ fprintf (dump_file, "Histogram:\n");
+ for (i = 0; i < histogram.length (); i++)
+ {
+ overall_time += histogram[i]->count * histogram[i]->time;
+ overall_size += histogram[i]->size;
+ }
+ if (!overall_time)
+ overall_time = 1;
+ if (!overall_size)
+ overall_size = 1;
+ for (i = 0; i < histogram.length (); i++)
+ {
+ cumulated_time += histogram[i]->count * histogram[i]->time;
+ cumulated_size += histogram[i]->size;
+ fprintf (file, " "HOST_WIDEST_INT_PRINT_DEC": time:%i (%2.2f) size:%i (%2.2f)\n",
+ (HOST_WIDEST_INT) histogram[i]->count,
+ histogram[i]->time,
+ cumulated_time * 100.0 / overall_time,
+ histogram[i]->size,
+ cumulated_size * 100.0 / overall_size);
+ }
+}
+
+/* Collect histogram from CFG profiles. */
+
+static void
+ipa_profile_generate_summary (void)
+{
+ struct cgraph_node *node;
+ gimple_stmt_iterator gsi;
+ hash_table <histogram_hash> hashtable;
+ basic_block bb;
+
+ hashtable.create (10);
+ histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
+ 10);
+
+ FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
+ FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->symbol.decl))
+ {
+ int time = 0;
+ int size = 0;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ time += estimate_num_insns (gsi_stmt (gsi), &eni_time_weights);
+ size += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
+ }
+ account_time_size (hashtable, histogram, bb->count, time, size);
+ }
+ hashtable.dispose ();
+ histogram.qsort (cmp_counts);
+}
+
+/* Serialize the ipa info for lto. */
+
+static void
+ipa_profile_write_summary (void)
+{
+ struct lto_simple_output_block *ob
+ = lto_create_simple_output_block (LTO_section_ipa_profile);
+ unsigned int i;
+
+ streamer_write_uhwi_stream (ob->main_stream, histogram.length());
+ for (i = 0; i < histogram.length (); i++)
+ {
+ streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
+ streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
+ streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
+ }
+ lto_destroy_simple_output_block (ob);
+}
+
+/* Deserialize the ipa info for lto. */
+
+static void
+ipa_profile_read_summary (void)
+{
+ struct lto_file_decl_data ** file_data_vec
+ = lto_get_file_decl_data ();
+ struct lto_file_decl_data * file_data;
+ hash_table <histogram_hash> hashtable;
+ int j = 0;
+
+ hashtable.create (10);
+ histogram_pool = create_alloc_pool ("IPA histogram", sizeof (struct histogram_entry),
+ 10);
+
+ while ((file_data = file_data_vec[j++]))
+ {
+ const char *data;
+ size_t len;
+ struct lto_input_block *ib
+ = lto_create_simple_input_block (file_data,
+ LTO_section_ipa_profile,
+ &data, &len);
+ if (ib)
+ {
+ unsigned int num = streamer_read_uhwi (ib);
+ unsigned int n;
+ for (n = 0; n < num; n++)
+ {
+ gcov_type count = streamer_read_gcov_count (ib);
+ int time = streamer_read_uhwi (ib);
+ int size = streamer_read_uhwi (ib);
+ account_time_size (hashtable, histogram,
+ count, time, size);
+ }
+ lto_destroy_simple_input_block (file_data,
+ LTO_section_ipa_profile,
+ ib, data, len);
+ }
+ }
+ hashtable.dispose ();
+ histogram.qsort (cmp_counts);
+}
/* Simple ipa profile pass propagating frequencies across the callgraph. */
@@ -1051,6 +1252,75 @@ ipa_profile (void)
int order_pos;
bool something_changed = false;
int i;
+ gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
+
+ if (dump_file)
+ dump_histogram (dump_file, histogram);
+ for (i = 0; i < (int)histogram.length (); i++)
+ {
+ overall_time += histogram[i]->count * histogram[i]->time;
+ overall_size += histogram[i]->size;
+ }
+ if (overall_time)
+ {
+ gcov_type threshold;
+
+ gcc_assert (overall_size);
+ if (dump_file)
+ {
+ gcov_type min, cumulated_time = 0, cumulated_size = 0;
+
+ fprintf (dump_file, "Overall time: "HOST_WIDEST_INT_PRINT_DEC"\n",
+ (HOST_WIDEST_INT)overall_time);
+ min = get_hot_bb_threshold ();
+ for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min;
+ i++)
+ {
+ cumulated_time += histogram[i]->count * histogram[i]->time;
+ cumulated_size += histogram[i]->size;
+ }
+ fprintf (dump_file, "GCOV min count: "HOST_WIDEST_INT_PRINT_DEC
+ " Time:%3.2f%% Size:%3.2f%%\n",
+ (HOST_WIDEST_INT)min,
+ cumulated_time * 100.0 / overall_time,
+ cumulated_size * 100.0 / overall_size);
+ }
+ cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000;
+ threshold = 0;
+ for (i = 0; cumulated < cutoff; i++)
+ {
+ cumulated += histogram[i]->count * histogram[i]->time;
+ threshold = histogram[i]->count;
+ }
+ if (!threshold)
+ threshold = 1;
+ if (dump_file)
+ {
+ gcov_type cumulated_time = 0, cumulated_size = 0;
+
+ for (i = 0;
+ i < (int)histogram.length () && histogram[i]->count >= threshold;
+ i++)
+ {
+ cumulated_time += histogram[i]->count * histogram[i]->time;
+ cumulated_size += histogram[i]->size;
+ }
+ fprintf (dump_file, "Determined min count: "HOST_WIDEST_INT_PRINT_DEC
+ " Time:%3.2f%% Size:%3.2f%%\n",
+ (HOST_WIDEST_INT)threshold,
+ cumulated_time * 100.0 / overall_time,
+ cumulated_size * 100.0 / overall_size);
+ }
+ if (threshold > get_hot_bb_threshold ()
+ || in_lto_p)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Threshold updated.\n");
+ set_hot_bb_threshold (threshold);
+ }
+ }
+ histogram.release();
+ free_alloc_pool (histogram_pool);
order_pos = ipa_reverse_postorder (order);
for (i = order_pos - 1; i >= 0; i--)
@@ -1112,9 +1382,9 @@ struct ipa_opt_pass_d pass_ipa_profile =
0, /* todo_flags_start */
0 /* todo_flags_finish */
},
- NULL, /* generate_summary */
- NULL, /* write_summary */
- NULL, /* read_summary */
+ ipa_profile_generate_summary, /* generate_summary */
+ ipa_profile_write_summary, /* write_summary */
+ ipa_profile_read_summary, /* read_summary */
NULL, /* write_optimization_summary */
NULL, /* read_optimization_summary */
NULL, /* stmt_fixup */
diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index 6d639a9459d..c619197d5dd 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -604,11 +604,11 @@ output_profile_summary (struct lto_simple_output_block *ob)
units. */
gcc_assert (profile_info->runs);
streamer_write_uhwi_stream (ob->main_stream, profile_info->runs);
- streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_max);
+ streamer_write_gcov_count_stream (ob->main_stream, profile_info->sum_max);
/* sum_all is needed for computing the working set with the
histogram. */
- streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_all);
+ streamer_write_gcov_count_stream (ob->main_stream, profile_info->sum_all);
/* Create and output a bitpack of non-zero histogram entries indices. */
bp = bitpack_create (ob->main_stream);
@@ -620,13 +620,18 @@ output_profile_summary (struct lto_simple_output_block *ob)
{
if (!profile_info->histogram[h_ix].num_counters)
continue;
- streamer_write_uhwi_stream (ob->main_stream,
+ streamer_write_gcov_count_stream (ob->main_stream,
profile_info->histogram[h_ix].num_counters);
- streamer_write_uhwi_stream (ob->main_stream,
+ streamer_write_gcov_count_stream (ob->main_stream,
profile_info->histogram[h_ix].min_value);
- streamer_write_uhwi_stream (ob->main_stream,
+ streamer_write_gcov_count_stream (ob->main_stream,
profile_info->histogram[h_ix].cum_value);
- }
+ }
+ /* IPA-profile computes hot bb threshold based on cumulated
+ whole program profile. We need to stream it down to ltrans. */
+ if (flag_wpa)
+ streamer_write_gcov_count_stream (ob->main_stream,
+ get_hot_bb_threshold ());
}
else
streamer_write_uhwi_stream (ob->main_stream, 0);
@@ -1259,8 +1264,8 @@ input_profile_summary (struct lto_input_block *ib,
if (runs)
{
file_data->profile_info.runs = runs;
- file_data->profile_info.sum_max = streamer_read_uhwi (ib);
- file_data->profile_info.sum_all = streamer_read_uhwi (ib);
+ file_data->profile_info.sum_max = streamer_read_gcov_count (ib);
+ file_data->profile_info.sum_all = streamer_read_gcov_count (ib);
memset (file_data->profile_info.histogram, 0,
sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
@@ -1279,12 +1284,16 @@ input_profile_summary (struct lto_input_block *ib,
continue;
file_data->profile_info.histogram[h_ix].num_counters
- = streamer_read_uhwi (ib);
+ = streamer_read_gcov_count (ib);
file_data->profile_info.histogram[h_ix].min_value
- = streamer_read_uhwi (ib);
+ = streamer_read_gcov_count (ib);
file_data->profile_info.histogram[h_ix].cum_value
- = streamer_read_uhwi (ib);
+ = streamer_read_gcov_count (ib);
}
+ /* IPA-profile computes hot bb threshold based on cumulated
+ whole program profile. We need to stream it down to ltrans. */
+ if (flag_ltrans)
+ set_hot_bb_threshold (streamer_read_gcov_count (ib));
}
}
diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c
index 3a861511c16..0ef421f6167 100644
--- a/gcc/lto-section-in.c
+++ b/gcc/lto-section-in.c
@@ -55,6 +55,7 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] =
"jmpfuncs",
"pureconst",
"reference",
+ "profile",
"symbol_nodes",
"opts",
"cgraphopt",
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index 919e304e107..95fe33ae1a7 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -243,6 +243,7 @@ enum lto_section_type
LTO_section_jump_functions,
LTO_section_ipa_pure_const,
LTO_section_ipa_reference,
+ LTO_section_ipa_profile,
LTO_section_symtab_nodes,
LTO_section_opts,
LTO_section_cgraph_opt_sum,
diff --git a/gcc/predict.c b/gcc/predict.c
index 57975d18da0..5394ef587b6 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -128,25 +128,42 @@ maybe_hot_frequency_p (struct function *fun, int freq)
return true;
}
+static gcov_type min_count = -1;
+
+/* Determine the threshold for hot BB counts. */
+
+gcov_type
+get_hot_bb_threshold ()
+{
+ gcov_working_set_t *ws;
+ if (min_count == -1)
+ {
+ ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
+ gcc_assert (ws);
+ min_count = ws->min_counter;
+ }
+ return min_count;
+}
+
+/* Set the threshold for hot BB counts. */
+
+void
+set_hot_bb_threshold (gcov_type min)
+{
+ min_count = min;
+}
+
/* Return TRUE if frequency FREQ is considered to be hot. */
static inline bool
maybe_hot_count_p (struct function *fun, gcov_type count)
{
- gcov_working_set_t *ws;
- static gcov_type min_count = -1;
if (fun && profile_status_for_function (fun) != PROFILE_READ)
return true;
/* Code executed at most once is not hot. */
if (profile_info->runs >= count)
return false;
- if (min_count == -1)
- {
- ws = find_working_set (PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE));
- gcc_assert (ws);
- min_count = ws->min_counter;
- }
- return (count >= min_count);
+ return (count >= get_hot_bb_threshold ());
}
/* Return true in case BB can be CPU intensive and should be optimized
diff --git a/gcc/profile.h b/gcc/profile.h
index e21250bda39..c7d5f1aa62c 100644
--- a/gcc/profile.h
+++ b/gcc/profile.h
@@ -48,4 +48,8 @@ extern void del_node_map (void);
extern void compute_working_sets (void);
+/* In predict.c. */
+extern gcov_type get_hot_bb_threshold (void);
+extern void set_hot_bb_threshold (gcov_type);
+
#endif /* PROFILE_H */