summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog35
-rw-r--r--gcc/Makefile.in1
-rw-r--r--gcc/auto-profile.c1687
-rw-r--r--gcc/auto-profile.h31
-rw-r--r--gcc/basic-block.h2
-rw-r--r--gcc/common.opt10
-rw-r--r--gcc/coverage.c5
-rw-r--r--gcc/doc/invoke.texi38
-rw-r--r--gcc/gcov-io.h3
-rw-r--r--gcc/ipa-inline.c79
-rw-r--r--gcc/ipa-inline.h1
-rw-r--r--gcc/opts.c100
-rw-r--r--gcc/passes.def1
-rw-r--r--gcc/predict.c7
-rw-r--r--gcc/profile.c8
-rw-r--r--gcc/timevar.def1
-rw-r--r--gcc/toplev.c5
-rw-r--r--gcc/tree-pass.h1
-rw-r--r--gcc/tree-profile.c6
-rw-r--r--gcc/tree-ssa-live.c2
-rw-r--r--gcc/value-prof.c4
-rw-r--r--gcc/value-prof.h3
22 files changed, 1950 insertions, 80 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index bce5f7010eb..931ec2e5253 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,38 @@
+2014-10-21 Dehao Chen <dehao@google.com>
+
+ * auto-profile.c: New file.
+ * auto-profile.h: New file.
+ * basic-block.h (maybe_hot_count_p): New export func.
+ (add_working_set): New export func.
+ * gcov-io.h (GCOV_TAG_AFDO_FILE_NAMES): New tag.
+ (GCOV_TAG_AFDO_FUNCTION): Likewise.
+ (GCOV_TAG_AFDO_WORKING_SET): Likewise.
+ * opts.c (enable_fdo_optimizations): New func.
+ (common_handle_option): Handle -fauto-profile flag.
+ * ipa-inline.c (want_early_inline_function_p): Iterative-einline.
+ (class pass_early_inline): Export early_inliner.
+ (early_inliner): Likewise.
+ (pass_early_inline::execute): Likewise.
+ * ipa-inline.h (early_inliner): Likewise.
+ * predict.c (maybe_hot_count_p): New export func.
+ (counts_to_freqs): AutoFDO logic.
+ (rebuild_frequencies): Likewise.
+ * tree-profile.c (pass_ipa_tree_profile::gate): Likewise.
+ * profile.c (add_working_set): New func.
+ * Makefile.in (auto-profile.o): New object file.
+ * passes.def (pass_ipa_auto_profile): New pass.
+ * tree-ssa-live.c (remove_unused_scope_block_p): AutoFDO logic.
+ * tree-pass.h (make_pass_ipa_auto_profile): New pass.
+ * toplev.c (compile_file): AutoFDO logic.
+ * doc/invoke.texi (-fauto-profile): New doc.
+ * coverage.c (coverage_init): AutoFDO logic.
+ * common.opt (-fauto-profile): New flag.
+ * timevar.def (TV_IPA_AUTOFDO): New tag.
+ * value-prof.c (gimple_alloc_histogram_value): New export func.
+ (check_ic_target): Likewise.
+ * value-prof.h (gimple_alloc_histogram_value): Likewise.
+ (check_ic_target): Likewise.
+
2014-10-21 David Malcolm <dmalcolm@redhat.com>
* cgraph.c (cgraph_c_finalize): New function.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index de2adc761e4..1fd7915814c 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1153,6 +1153,7 @@ OBJS = \
alias.o \
alloc-pool.o \
auto-inc-dec.o \
+ auto-profile.o \
bb-reorder.o \
bitmap.o \
bt-load.o \
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
new file mode 100644
index 00000000000..24d7126a07a
--- /dev/null
+++ b/gcc/auto-profile.c
@@ -0,0 +1,1687 @@
+/* Read and annotate call graph profile from the auto profile data file.
+ Copyright (C) 2014. Free Software Foundation, Inc.
+ Contributed by Dehao Chen (dehao@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+#include <map>
+#include <set>
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "flags.h"
+#include "basic-block.h"
+#include "diagnostic-core.h"
+#include "gcov-io.h"
+#include "input.h"
+#include "profile.h"
+#include "langhooks.h"
+#include "opts.h"
+#include "tree-pass.h"
+#include "cfgloop.h"
+#include "tree-ssa-alias.h"
+#include "tree-cfg.h"
+#include "tree-cfgcleanup.h"
+#include "tree-ssa-operands.h"
+#include "tree-into-ssa.h"
+#include "internal-fn.h"
+#include "is-a.h"
+#include "gimple-expr.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimple-ssa.h"
+#include "cgraph.h"
+#include "value-prof.h"
+#include "coverage.h"
+#include "params.h"
+#include "ipa-inline.h"
+#include "tree-inline.h"
+#include "stringpool.h"
+#include "auto-profile.h"
+#include "vec.h"
+
+/* The following routines implements AutoFDO optimization.
+
+ This optimization uses sampling profiles to annotate basic block counts
+ and uses heuristics to estimate branch probabilities.
+
+ There are three phases in AutoFDO:
+
+ Phase 1: Read profile from the profile data file.
+ The following info is read from the profile datafile:
+ * string_table: a map between function name and its index.
+ * autofdo_source_profile: a map from function_instance name to
+ function_instance. This is represented as a forest of
+ function_instances.
+ * WorkingSet: a histogram of how many instructions are covered for a
+ given percentage of total cycles. This is describing the binary
+ level information (not source level). This info is used to help
+ decide if we want aggressive optimizations that could increase
+ code footprint (e.g. loop unroll etc.)
+ A function instance is an instance of function that could either be a
+ standalone symbol, or a clone of a function that is inlined into another
+ function.
+
+ Phase 2: Early inline + valur profile transformation.
+ Early inline uses autofdo_source_profile to find if a callsite is:
+ * inlined in the profiled binary.
+ * callee body is hot in the profiling run.
+ If both condition satisfies, early inline will inline the callsite
+ regardless of the code growth.
+ Phase 2 is an iterative process. During each iteration, we also check
+ if an indirect callsite is promoted and inlined in the profiling run.
+ If yes, vpt will happen to force promote it and in the next iteration,
+ einline will inline the promoted callsite in the next iteration.
+
+ Phase 3: Annotate control flow graph.
+ AutoFDO uses a separate pass to:
+ * Annotate basic block count
+ * Estimate branch probability
+
+ After the above 3 phases, all profile is readily annotated on the GCC IR.
+ AutoFDO tries to reuse all FDO infrastructure as much as possible to make
+ use of the profile. E.g. it uses existing mechanism to calculate the basic
+ block/edge frequency, as well as the cgraph node/edge count.
+*/
+
+#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
+#define AUTO_PROFILE_VERSION 1
+
+namespace autofdo
+{
+
+/* Represent a source location: (function_decl, lineno). */
+typedef std::pair<tree, unsigned> decl_lineno;
+
+/* Represent an inline stack. vector[0] is the leaf node. */
+typedef auto_vec<decl_lineno> inline_stack;
+
+/* String array that stores function names. */
+typedef auto_vec<char *> string_vector;
+
+/* Map from function name's index in string_table to target's
+ execution count. */
+typedef std::map<unsigned, gcov_type> icall_target_map;
+
+/* Set of gimple stmts. Used to track if the stmt has already been promoted
+ to direct call. */
+typedef std::set<gimple> stmt_set;
+
+/* Represent count info of an inline stack. */
+struct count_info
+{
+ /* Sampled count of the inline stack. */
+ gcov_type count;
+
+ /* Map from indirect call target to its sample count. */
+ icall_target_map targets;
+
+ /* Whether this inline stack is already used in annotation.
+
+ Each inline stack should only be used to annotate IR once.
+ This will be enforced when instruction-level discriminator
+ is supported. */
+ bool annotated;
+};
+
+/* operator< for "const char *". */
+struct string_compare
+{
+ bool operator()(const char *a, const char *b) const
+ {
+ return strcmp (a, b) < 0;
+ }
+};
+
+/* Store a string array, indexed by string position in the array. */
+class string_table
+{
+public:
+ string_table ()
+ {}
+
+ ~string_table ();
+
+ /* For a given string, returns its index. */
+ int get_index (const char *name) const;
+
+ /* For a given decl, returns the index of the decl name. */
+ int get_index_by_decl (tree decl) const;
+
+ /* For a given index, returns the string. */
+ const char *get_name (int index) const;
+
+ /* Read profile, return TRUE on success. */
+ bool read ();
+
+private:
+ typedef std::map<const char *, unsigned, string_compare> string_index_map;
+ string_vector vector_;
+ string_index_map map_;
+};
+
+/* Profile of a function instance:
+ 1. total_count of the function.
+ 2. head_count (entry basic block count) of the function (only valid when
+ function is a top-level function_instance, i.e. it is the original copy
+ instead of the inlined copy).
+ 3. map from source location (decl_lineno) to profile (count_info).
+ 4. map from callsite to callee function_instance. */
+class function_instance
+{
+public:
+ typedef auto_vec<function_instance *> function_instance_stack;
+
+ /* Read the profile and return a function_instance with head count as
+ HEAD_COUNT. Recursively read callsites to create nested function_instances
+ too. STACK is used to track the recursive creation process. */
+ static function_instance *
+ read_function_instance (function_instance_stack *stack,
+ gcov_type head_count);
+
+ /* Recursively deallocate all callsites (nested function_instances). */
+ ~function_instance ();
+
+ /* Accessors. */
+ int
+ name () const
+ {
+ return name_;
+ }
+ gcov_type
+ total_count () const
+ {
+ return total_count_;
+ }
+ gcov_type
+ head_count () const
+ {
+ return head_count_;
+ }
+
+ /* Traverse callsites of the current function_instance to find one at the
+ location of LINENO and callee name represented in DECL. */
+ function_instance *get_function_instance_by_decl (unsigned lineno,
+ tree decl) const;
+
+ /* Store the profile info for LOC in INFO. Return TRUE if profile info
+ is found. */
+ bool get_count_info (location_t loc, count_info *info) const;
+
+ /* Read the inlined indirect call target profile for STMT and store it in
+ MAP, return the total count for all inlined indirect calls. */
+ gcov_type find_icall_target_map (gimple stmt, icall_target_map *map) const;
+
+ /* Sum of counts that is used during annotation. */
+ gcov_type total_annotated_count () const;
+
+ /* Mark LOC as annotated. */
+ void mark_annotated (location_t loc);
+
+private:
+ /* Callsite, represented as (decl_lineno, callee_function_name_index). */
+ typedef std::pair<unsigned, unsigned> callsite;
+
+ /* Map from callsite to callee function_instance. */
+ typedef std::map<callsite, function_instance *> callsite_map;
+
+ function_instance (unsigned name, gcov_type head_count)
+ : name_ (name), total_count_ (0), head_count_ (head_count)
+ {
+ }
+
+ /* Map from source location (decl_lineno) to profile (count_info). */
+ typedef std::map<unsigned, count_info> position_count_map;
+
+ /* function_instance name index in the string_table. */
+ unsigned name_;
+
+ /* Total sample count. */
+ gcov_type total_count_;
+
+ /* Entry BB's sample count. */
+ gcov_type head_count_;
+
+ /* Map from callsite location to callee function_instance. */
+ callsite_map callsites;
+
+ /* Map from source location to count_info. */
+ position_count_map pos_counts;
+};
+
+/* Profile for all functions. */
+class autofdo_source_profile
+{
+public:
+ static autofdo_source_profile *
+ create ()
+ {
+ autofdo_source_profile *map = new autofdo_source_profile ();
+
+ if (map->read ())
+ return map;
+ delete map;
+ return NULL;
+ }
+
+ ~autofdo_source_profile ();
+
+ /* For a given DECL, returns the top-level function_instance. */
+ function_instance *get_function_instance_by_decl (tree decl) const;
+
+ /* Find count_info for a given gimple STMT. If found, store the count_info
+ in INFO and return true; otherwise return false. */
+ bool get_count_info (gimple stmt, count_info *info) const;
+
+ /* Find total count of the callee of EDGE. */
+ gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
+
+ /* Update value profile INFO for STMT from the inlined indirect callsite.
+ Return true if INFO is updated. */
+ bool update_inlined_ind_target (gimple stmt, count_info *info);
+
+ /* Mark LOC as annotated. */
+ void mark_annotated (location_t loc);
+
+private:
+ /* Map from function_instance name index (in string_table) to
+ function_instance. */
+ typedef std::map<unsigned, function_instance *> name_function_instance_map;
+
+ autofdo_source_profile () {}
+
+ /* Read AutoFDO profile and returns TRUE on success. */
+ bool read ();
+
+ /* Return the function_instance in the profile that correspond to the
+ inline STACK. */
+ function_instance *
+ get_function_instance_by_inline_stack (const inline_stack &stack) const;
+
+ name_function_instance_map map_;
+};
+
+/* Store the strings read from the profile data file. */
+static string_table *afdo_string_table;
+
+/* Store the AutoFDO source profile. */
+static autofdo_source_profile *afdo_source_profile;
+
+/* gcov_ctr_summary structure to store the profile_info. */
+static struct gcov_ctr_summary *afdo_profile_info;
+
+/* Helper functions. */
+
+/* Return the original name of NAME: strip the suffix that starts
+ with '.' Caller is responsible for freeing RET. */
+
+static char *
+get_original_name (const char *name)
+{
+ char *ret = xstrdup (name);
+ char *find = strchr (ret, '.');
+ if (find != NULL)
+ *find = 0;
+ return ret;
+}
+
+/* Return the combined location, which is a 32bit integer in which
+ higher 16 bits stores the line offset of LOC to the start lineno
+ of DECL, The lower 16 bits stores the discrimnator. */
+
+static unsigned
+get_combined_location (location_t loc, tree decl)
+{
+ /* TODO: allow more bits for line and less bits for discriminator. */
+ if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
+ warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes.");
+ return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
+}
+
+/* Return the function decl of a given lexical BLOCK. */
+
+static tree
+get_function_decl_from_block (tree block)
+{
+ tree decl;
+
+ if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION))
+ return NULL_TREE;
+
+ for (decl = BLOCK_ABSTRACT_ORIGIN (block);
+ decl && (TREE_CODE (decl) == BLOCK);
+ decl = BLOCK_ABSTRACT_ORIGIN (decl))
+ if (TREE_CODE (decl) == FUNCTION_DECL)
+ break;
+ return decl;
+}
+
+/* Store inline stack for STMT in STACK. */
+
+static void
+get_inline_stack (location_t locus, inline_stack *stack)
+{
+ if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
+ return;
+
+ tree block = LOCATION_BLOCK (locus);
+ if (block && TREE_CODE (block) == BLOCK)
+ {
+ int level = 0;
+ for (block = BLOCK_SUPERCONTEXT (block);
+ block && (TREE_CODE (block) == BLOCK);
+ block = BLOCK_SUPERCONTEXT (block))
+ {
+ location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
+ if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
+ continue;
+
+ tree decl = get_function_decl_from_block (block);
+ stack->safe_push (
+ std::make_pair (decl, get_combined_location (locus, decl)));
+ locus = tmp_locus;
+ level++;
+ }
+ }
+ stack->safe_push (
+ std::make_pair (current_function_decl,
+ get_combined_location (locus, current_function_decl)));
+}
+
+/* Return STMT's combined location, which is a 32bit integer in which
+ higher 16 bits stores the line offset of LOC to the start lineno
+ of DECL, The lower 16 bits stores the discrimnator. */
+
+static unsigned
+get_relative_location_for_stmt (gimple stmt)
+{
+ location_t locus = gimple_location (stmt);
+ if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
+ return UNKNOWN_LOCATION;
+
+ for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
+ block = BLOCK_SUPERCONTEXT (block))
+ if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
+ return get_combined_location (locus,
+ get_function_decl_from_block (block));
+ return get_combined_location (locus, current_function_decl);
+}
+
+/* Return true if BB contains indirect call. */
+
+static bool
+has_indirect_call (basic_block bb)
+{
+ gimple_stmt_iterator gsi;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
+ && (gimple_call_fn (stmt) == NULL
+ || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
+ return true;
+ }
+ return false;
+}
+
+/* Member functions for string_table. */
+
+/* Deconstructor. */
+
+string_table::~string_table ()
+{
+ for (unsigned i = 0; i < vector_.length (); i++)
+ free (vector_[i]);
+}
+
+
+/* Return the index of a given function NAME. Return -1 if NAME is not
+ found in string table. */
+
+int
+string_table::get_index (const char *name) const
+{
+ if (name == NULL)
+ return -1;
+ string_index_map::const_iterator iter = map_.find (name);
+ if (iter == map_.end ())
+ return -1;
+ else
+ return iter->second;
+}
+
+/* Return the index of a given function DECL. Return -1 if DECL is not
+ found in string table. */
+
+int
+string_table::get_index_by_decl (tree decl) const
+{
+ char *name
+ = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
+ int ret = get_index (name);
+ free (name);
+ if (ret != -1)
+ return ret;
+ ret = get_index (lang_hooks.dwarf_name (decl, 0));
+ if (ret != -1)
+ return ret;
+ if (DECL_ABSTRACT_ORIGIN (decl))
+ return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
+ else
+ return -1;
+}
+
+/* Return the function name of a given INDEX. */
+
+const char *
+string_table::get_name (int index) const
+{
+ gcc_assert (index > 0 && index < (int)vector_.length ());
+ return vector_[index];
+}
+
+/* Read the string table. Return TRUE if reading is successful. */
+
+bool
+string_table::read ()
+{
+ if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
+ return false;
+ /* Skip the length of the section. */
+ gcov_read_unsigned ();
+ /* Read in the file name table. */
+ unsigned string_num = gcov_read_unsigned ();
+ for (unsigned i = 0; i < string_num; i++)
+ {
+ vector_.safe_push (get_original_name (gcov_read_string ()));
+ map_[vector_.last ()] = i;
+ }
+ return true;
+}
+
+/* Member functions for function_instance. */
+
+function_instance::~function_instance ()
+{
+ for (callsite_map::iterator iter = callsites.begin ();
+ iter != callsites.end (); ++iter)
+ delete iter->second;
+}
+
+/* Traverse callsites of the current function_instance to find one at the
+ location of LINENO and callee name represented in DECL. */
+
+function_instance *
+function_instance::get_function_instance_by_decl (unsigned lineno,
+ tree decl) const
+{
+ int func_name_idx = afdo_string_table->get_index_by_decl (decl);
+ if (func_name_idx != -1)
+ {
+ callsite_map::const_iterator ret
+ = callsites.find (std::make_pair (lineno, func_name_idx));
+ if (ret != callsites.end ())
+ return ret->second;
+ }
+ func_name_idx
+ = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
+ if (func_name_idx != -1)
+ {
+ callsite_map::const_iterator ret
+ = callsites.find (std::make_pair (lineno, func_name_idx));
+ if (ret != callsites.end ())
+ return ret->second;
+ }
+ if (DECL_ABSTRACT_ORIGIN (decl))
+ return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
+ else
+ return NULL;
+}
+
+/* Store the profile info for LOC in INFO. Return TRUE if profile info
+ is found. */
+
+bool
+function_instance::get_count_info (location_t loc, count_info *info) const
+{
+ position_count_map::const_iterator iter = pos_counts.find (loc);
+ if (iter == pos_counts.end ())
+ return false;
+ *info = iter->second;
+ return true;
+}
+
+/* Mark LOC as annotated. */
+
+void
+function_instance::mark_annotated (location_t loc)
+{
+ position_count_map::iterator iter = pos_counts.find (loc);
+ if (iter == pos_counts.end ())
+ return;
+ iter->second.annotated = true;
+}
+
+/* Read the inlinied indirect call target profile for STMT and store it in
+ MAP, return the total count for all inlined indirect calls. */
+
+gcov_type
+function_instance::find_icall_target_map (gimple stmt,
+ icall_target_map *map) const
+{
+ gcov_type ret = 0;
+ unsigned stmt_offset = get_relative_location_for_stmt (stmt);
+
+ for (callsite_map::const_iterator iter = callsites.begin ();
+ iter != callsites.end (); ++iter)
+ {
+ unsigned callee = iter->second->name ();
+ /* Check if callsite location match the stmt. */
+ if (iter->first.first != stmt_offset)
+ continue;
+ struct cgraph_node *node = cgraph_node::get_for_asmname (
+ get_identifier (afdo_string_table->get_name (callee)));
+ if (node == NULL)
+ continue;
+ if (!check_ic_target (stmt, node))
+ continue;
+ (*map)[callee] = iter->second->total_count ();
+ ret += iter->second->total_count ();
+ }
+ return ret;
+}
+
+/* Read the profile and create a function_instance with head count as
+ HEAD_COUNT. Recursively read callsites to create nested function_instances
+ too. STACK is used to track the recursive creation process. */
+
+/* function instance profile format:
+
+ ENTRY_COUNT: 8 bytes
+ NAME_INDEX: 4 bytes
+ NUM_POS_COUNTS: 4 bytes
+ NUM_CALLSITES: 4 byte
+ POS_COUNT_1:
+ POS_1_OFFSET: 4 bytes
+ NUM_TARGETS: 4 bytes
+ COUNT: 8 bytes
+ TARGET_1:
+ VALUE_PROFILE_TYPE: 4 bytes
+ TARGET_IDX: 8 bytes
+ COUNT: 8 bytes
+ TARGET_2
+ ...
+ TARGET_n
+ POS_COUNT_2
+ ...
+ POS_COUNT_N
+ CALLSITE_1:
+ CALLSITE_1_OFFSET: 4 bytes
+ FUNCTION_INSTANCE_PROFILE (nested)
+ CALLSITE_2
+ ...
+ CALLSITE_n. */
+
+function_instance *
+function_instance::read_function_instance (function_instance_stack *stack,
+ gcov_type head_count)
+{
+ unsigned name = gcov_read_unsigned ();
+ unsigned num_pos_counts = gcov_read_unsigned ();
+ unsigned num_callsites = gcov_read_unsigned ();
+ function_instance *s = new function_instance (name, head_count);
+ stack->safe_push (s);
+
+ for (unsigned i = 0; i < num_pos_counts; i++)
+ {
+ unsigned offset = gcov_read_unsigned () & 0xffff0000;
+ unsigned num_targets = gcov_read_unsigned ();
+ gcov_type count = gcov_read_counter ();
+ s->pos_counts[offset].count = count;
+ for (unsigned j = 0; j < stack->length (); j++)
+ (*stack)[j]->total_count_ += count;
+ for (unsigned j = 0; j < num_targets; j++)
+ {
+ /* Only indirect call target histogram is supported now. */
+ gcov_read_unsigned ();
+ gcov_type target_idx = gcov_read_counter ();
+ s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
+ }
+ }
+ for (unsigned i = 0; i < num_callsites; i++)
+ {
+ unsigned offset = gcov_read_unsigned ();
+ function_instance *callee_function_instance
+ = read_function_instance (stack, 0);
+ s->callsites[std::make_pair (offset, callee_function_instance->name ())]
+ = callee_function_instance;
+ }
+ stack->pop ();
+ return s;
+}
+
+/* Sum of counts that is used during annotation. */
+
+gcov_type
+function_instance::total_annotated_count () const
+{
+ gcov_type ret = 0;
+ for (callsite_map::const_iterator iter = callsites.begin ();
+ iter != callsites.end (); ++iter)
+ ret += iter->second->total_annotated_count ();
+ for (position_count_map::const_iterator iter = pos_counts.begin ();
+ iter != pos_counts.end (); ++iter)
+ if (iter->second.annotated)
+ ret += iter->second.count;
+ return ret;
+}
+
+/* Member functions for autofdo_source_profile. */
+
+autofdo_source_profile::~autofdo_source_profile ()
+{
+ for (name_function_instance_map::const_iterator iter = map_.begin ();
+ iter != map_.end (); ++iter)
+ delete iter->second;
+}
+
+/* For a given DECL, returns the top-level function_instance. */
+
+function_instance *
+autofdo_source_profile::get_function_instance_by_decl (tree decl) const
+{
+ int index = afdo_string_table->get_index_by_decl (decl);
+ if (index == -1)
+ return NULL;
+ name_function_instance_map::const_iterator ret = map_.find (index);
+ return ret == map_.end () ? NULL : ret->second;
+}
+
+/* Find count_info for a given gimple STMT. If found, store the count_info
+ in INFO and return true; otherwise return false. */
+
+bool
+autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const
+{
+ if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
+ return false;
+
+ inline_stack stack;
+ get_inline_stack (gimple_location (stmt), &stack);
+ if (stack.length () == 0)
+ return false;
+ function_instance *s = get_function_instance_by_inline_stack (stack);
+ if (s == NULL)
+ return false;
+ return s->get_count_info (stack[0].second, info);
+}
+
+/* Mark LOC as annotated. */
+
+void
+autofdo_source_profile::mark_annotated (location_t loc)
+{
+ inline_stack stack;
+ get_inline_stack (loc, &stack);
+ if (stack.length () == 0)
+ return;
+ function_instance *s = get_function_instance_by_inline_stack (stack);
+ if (s == NULL)
+ return;
+ s->mark_annotated (stack[0].second);
+}
+
+/* Update value profile INFO for STMT from the inlined indirect callsite.
+ Return true if INFO is updated. */
+
+bool
+autofdo_source_profile::update_inlined_ind_target (gimple stmt,
+ count_info *info)
+{
+ if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
+ return false;
+
+ count_info old_info;
+ get_count_info (stmt, &old_info);
+ gcov_type total = 0;
+ for (icall_target_map::const_iterator iter = old_info.targets.begin ();
+ iter != old_info.targets.end (); ++iter)
+ total += iter->second;
+
+ /* Program behavior changed, original promoted (and inlined) target is not
+ hot any more. Will avoid promote the original target.
+
+ To check if original promoted target is still hot, we check the total
+ count of the unpromoted targets (stored in old_info). If it is no less
+ than half of the callsite count (stored in INFO), the original promoted
+ target is considered not hot any more. */
+ if (total >= info->count / 2)
+ return false;
+
+ inline_stack stack;
+ get_inline_stack (gimple_location (stmt), &stack);
+ if (stack.length () == 0)
+ return false;
+ function_instance *s = get_function_instance_by_inline_stack (stack);
+ if (s == NULL)
+ return false;
+ icall_target_map map;
+ if (s->find_icall_target_map (stmt, &map) == 0)
+ return false;
+ for (icall_target_map::const_iterator iter = map.begin ();
+ iter != map.end (); ++iter)
+ info->targets[iter->first] = iter->second;
+ return true;
+}
+
+/* Find total count of the callee of EDGE. */
+
+gcov_type
+autofdo_source_profile::get_callsite_total_count (
+ struct cgraph_edge *edge) const
+{
+ inline_stack stack;
+ stack.safe_push (std::make_pair (edge->callee->decl, 0));
+ get_inline_stack (gimple_location (edge->call_stmt), &stack);
+
+ function_instance *s = get_function_instance_by_inline_stack (stack);
+ if (s == NULL
+ || afdo_string_table->get_index (IDENTIFIER_POINTER (
+ DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
+ return 0;
+ else
+ return s->total_count ();
+}
+
+/* Read AutoFDO profile and returns TRUE on success. */
+
+/* source profile format:
+
+ GCOV_TAG_AFDO_FUNCTION: 4 bytes
+ LENGTH: 4 bytes
+ NUM_FUNCTIONS: 4 bytes
+ FUNCTION_INSTANCE_1
+ FUNCTION_INSTANCE_2
+ ...
+ FUNCTION_INSTANCE_N. */
+
+bool
+autofdo_source_profile::read ()
+{
+ if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
+ {
+ inform (0, "Not expected TAG.");
+ return false;
+ }
+
+ /* Skip the length of the section. */
+ gcov_read_unsigned ();
+
+ /* Read in the function/callsite profile, and store it in local
+ data structure. */
+ unsigned function_num = gcov_read_unsigned ();
+ for (unsigned i = 0; i < function_num; i++)
+ {
+ function_instance::function_instance_stack stack;
+ function_instance *s = function_instance::read_function_instance (
+ &stack, gcov_read_counter ());
+ afdo_profile_info->sum_all += s->total_count ();
+ map_[s->name ()] = s;
+ }
+ return true;
+}
+
+/* Return the function_instance in the profile that correspond to the
+ inline STACK. */
+
+function_instance *
+autofdo_source_profile::get_function_instance_by_inline_stack (
+ const inline_stack &stack) const
+{
+ name_function_instance_map::const_iterator iter = map_.find (
+ afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
+ if (iter == map_.end())
+ return NULL;
+ function_instance *s = iter->second;
+ for (unsigned i = stack.length() - 1; i > 0; i--)
+ {
+ s = s->get_function_instance_by_decl (
+ stack[i].second, stack[i - 1].first);
+ if (s == NULL)
+ return NULL;
+ }
+ return s;
+}
+
+/* Module profile is only used by LIPO. Here we simply ignore it. */
+
+static void
+fake_read_autofdo_module_profile ()
+{
+ /* Read in the module info. */
+ gcov_read_unsigned ();
+
+ /* Skip the length of the section. */
+ gcov_read_unsigned ();
+
+ /* Read in the file name table. */
+ unsigned total_module_num = gcov_read_unsigned ();
+ gcc_assert (total_module_num == 0);
+}
+
+/* Read data from profile data file. */
+
+static void
+read_profile (void)
+{
+ if (gcov_open (auto_profile_file, 1) == 0)
+ error ("Cannot open profile file %s.", auto_profile_file);
+
+ if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
+ error ("AutoFDO profile magic number does not mathch.");
+
+ /* Skip the version number. */
+ unsigned version = gcov_read_unsigned ();
+ if (version != AUTO_PROFILE_VERSION)
+ error ("AutoFDO profile version %u does match %u.",
+ version, AUTO_PROFILE_VERSION);
+
+ /* Skip the empty integer. */
+ gcov_read_unsigned ();
+
+ /* string_table. */
+ afdo_string_table = new string_table ();
+ if (!afdo_string_table->read())
+ error ("Cannot read string table from %s.", auto_profile_file);
+
+ /* autofdo_source_profile. */
+ afdo_source_profile = autofdo_source_profile::create ();
+ if (afdo_source_profile == NULL)
+ error ("Cannot read function profile from %s.", auto_profile_file);
+
+ /* autofdo_module_profile. */
+ fake_read_autofdo_module_profile ();
+
+ /* Read in the working set. */
+ if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET)
+ error ("Cannot read working set from %s.", auto_profile_file);
+
+ /* Skip the length of the section. */
+ gcov_read_unsigned ();
+ gcov_working_set_t set[128];
+ for (unsigned i = 0; i < 128; i++)
+ {
+ set[i].num_counters = gcov_read_unsigned ();
+ set[i].min_counter = gcov_read_counter ();
+ }
+ add_working_set (set);
+}
+
+/* From AutoFDO profiles, find values inside STMT for that we want to measure
+ histograms for indirect-call optimization.
+
+ This function is actually served for 2 purposes:
+     * before annotation, we need to mark histogram, promote and inline
+     * after annotation, we just need to mark, and let follow-up logic to
+       decide if it needs to promote and inline. */
+
+static void
+afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
+ bool transform)
+{
+ gimple stmt = gsi_stmt (*gsi);
+ tree callee;
+
+ if (map.size () == 0 || gimple_code (stmt) != GIMPLE_CALL
+ || gimple_call_fndecl (stmt) != NULL_TREE)
+ return;
+
+ callee = gimple_call_fn (stmt);
+
+ histogram_value hist = gimple_alloc_histogram_value (
+ cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
+ hist->n_counters = 3;
+ hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
+ gimple_add_histogram_value (cfun, stmt, hist);
+
+ gcov_type total = 0;
+ icall_target_map::const_iterator max_iter = map.end ();
+
+ for (icall_target_map::const_iterator iter = map.begin ();
+ iter != map.end (); ++iter)
+ {
+ total += iter->second;
+ if (max_iter == map.end () || max_iter->second < iter->second)
+ max_iter = iter;
+ }
+
+ hist->hvalue.counters[0]
+ = (unsigned long long)afdo_string_table->get_name (max_iter->first);
+ hist->hvalue.counters[1] = max_iter->second;
+ hist->hvalue.counters[2] = total;
+
+ if (!transform)
+ return;
+
+ struct cgraph_edge *indirect_edge
+ = cgraph_node::get (current_function_decl)->get_edge (stmt);
+ struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
+ get_identifier ((const char *) hist->hvalue.counters[0]));
+
+ if (direct_call == NULL || !check_ic_target (stmt, direct_call))
+ return;
+ if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
+ return;
+ struct cgraph_edge *new_edge
+ = indirect_edge->make_speculative (direct_call, 0, 0);
+ new_edge->redirect_call_stmt_to_callee ();
+ gimple_remove_histogram_value (cfun, stmt, hist);
+ inline_call (new_edge, true, NULL, NULL, false);
+}
+
+/* From AutoFDO profiles, find values inside STMT for that we want to measure
+ histograms and adds them to list VALUES. */
+
+static void
+afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
+ bool transform)
+{
+ afdo_indirect_call (gsi, map, transform);
+}
+
+typedef std::set<basic_block> bb_set;
+typedef std::set<edge> edge_set;
+
+static bool
+is_bb_annotated (const basic_block bb, const bb_set &annotated)
+{
+ return annotated.find (bb) != annotated.end ();
+}
+
+static void
+set_bb_annotated (basic_block bb, bb_set *annotated)
+{
+ annotated->insert (bb);
+}
+
+static bool
+is_edge_annotated (const edge e, const edge_set &annotated)
+{
+ return annotated.find (e) != annotated.end ();
+}
+
+static void
+set_edge_annotated (edge e, edge_set *annotated)
+{
+ annotated->insert (e);
+}
+
+/* For a given BB, set its execution count. Attach value profile if a stmt
+ is not in PROMOTED, because we only want to promot an indirect call once.
+ Return TRUE if BB is annotated. */
+
+static bool
+afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
+{
+ gimple_stmt_iterator gsi;
+ edge e;
+ edge_iterator ei;
+ gcov_type max_count = 0;
+ bool has_annotated = false;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ count_info info;
+ gimple stmt = gsi_stmt (gsi);
+ if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
+ continue;
+ if (afdo_source_profile->get_count_info (stmt, &info))
+ {
+ if (info.count > max_count)
+ max_count = info.count;
+ has_annotated = true;
+ if (info.targets.size () > 0
+ && promoted.find (stmt) == promoted.end ())
+ afdo_vpt (&gsi, info.targets, false);
+ }
+ }
+
+ if (!has_annotated)
+ return false;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple phi = gsi_stmt (gsi);
+ size_t i;
+ for (i = 0; i < gimple_phi_num_args (phi); i++)
+ afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
+ }
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ afdo_source_profile->mark_annotated (e->goto_locus);
+
+ bb->count = max_count;
+ return true;
+}
+
+/* BB1 and BB2 are in an equivalent class iff:
+ 1. BB1 dominates BB2.
+ 2. BB2 post-dominates BB1.
+ 3. BB1 and BB2 are in the same loop nest.
+ This function finds the equivalent class for each basic block, and
+ stores a pointer to the first BB in its equivalent class. Meanwhile,
+ set bb counts for the same equivalent class to be idenical. Update
+ ANNOTATED_BB for the first BB in its equivalent class. */
+
+static void
+afdo_find_equiv_class (bb_set *annotated_bb)
+{
+ basic_block bb;
+
+ FOR_ALL_BB_FN (bb, cfun)
+ bb->aux = NULL;
+
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ vec<basic_block> dom_bbs;
+ basic_block bb1;
+ int i;
+
+ if (bb->aux != NULL)
+ continue;
+ bb->aux = bb;
+ dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
+ FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
+ if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
+ && bb1->loop_father == bb->loop_father)
+ {
+ bb1->aux = bb;
+ if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
+ {
+ bb->count = MAX (bb->count, bb1->count);
+ set_bb_annotated (bb, annotated_bb);
+ }
+ }
+ dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb);
+ FOR_EACH_VEC_ELT (dom_bbs, i, bb1)
+ if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
+ && bb1->loop_father == bb->loop_father)
+ {
+ bb1->aux = bb;
+ if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
+ {
+ bb->count = MAX (bb->count, bb1->count);
+ set_bb_annotated (bb, annotated_bb);
+ }
+ }
+ }
+}
+
+/* If a basic block's count is known, and only one of its in/out edges' count
+ is unknown, its count can be calculated. Meanwhile, if all of the in/out
+ edges' counts are known, then the basic block's unknown count can also be
+ calculated.
+ IS_SUCC is true if out edges of a basic blocks are examined.
+ Update ANNOTATED_BB and ANNOTATED_EDGE accordingly.
+ Return TRUE if any basic block/edge count is changed. */
+
+static bool
+afdo_propagate_edge (bool is_succ, bb_set *annotated_bb,
+ edge_set *annotated_edge)
+{
+ basic_block bb;
+ bool changed = false;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ edge e, unknown_edge = NULL;
+ edge_iterator ei;
+ int num_unknown_edge = 0;
+ gcov_type total_known_count = 0;
+
+ FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
+ if (!is_edge_annotated (e, *annotated_edge))
+ num_unknown_edge++, unknown_edge = e;
+ else
+ total_known_count += e->count;
+
+ if (num_unknown_edge == 0)
+ {
+ if (total_known_count > bb->count)
+ {
+ bb->count = total_known_count;
+ changed = true;
+ }
+ if (!is_bb_annotated (bb, *annotated_bb))
+ {
+ set_bb_annotated (bb, annotated_bb);
+ changed = true;
+ }
+ }
+ else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
+ {
+ if (bb->count >= total_known_count)
+ unknown_edge->count = bb->count - total_known_count;
+ else
+ unknown_edge->count = 0;
+ set_edge_annotated (unknown_edge, annotated_edge);
+ changed = true;
+ }
+ }
+ return changed;
+}
+
+/* Special propagation for circuit expressions. Because GCC translates
+ control flow into data flow for circuit expressions. E.g.
+ BB1:
+ if (a && b)
+ BB2
+ else
+ BB3
+
+ will be translated into:
+
+ BB1:
+ if (a)
+ goto BB.t1
+ else
+ goto BB.t3
+ BB.t1:
+ if (b)
+ goto BB.t2
+ else
+ goto BB.t3
+ BB.t2:
+ goto BB.t3
+ BB.t3:
+ tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
+ if (tmp)
+ goto BB2
+ else
+ goto BB3
+
+ In this case, we need to propagate through PHI to determine the edge
+ count of BB1->BB.t1, BB.t1->BB.t2.
+ Update ANNOTATED_EDGE accordingly. */
+
+static void
+afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge)
+{
+ basic_block bb;
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ gimple phi_stmt;
+ tree cmp_rhs, cmp_lhs;
+ gimple cmp_stmt = last_stmt (bb);
+ edge e;
+ edge_iterator ei;
+
+ if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
+ continue;
+ cmp_rhs = gimple_cond_rhs (cmp_stmt);
+ cmp_lhs = gimple_cond_lhs (cmp_stmt);
+ if (!TREE_CONSTANT (cmp_rhs)
+ || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
+ continue;
+ if (TREE_CODE (cmp_lhs) != SSA_NAME)
+ continue;
+ if (!is_bb_annotated (bb, annotated_bb))
+ continue;
+ phi_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
+ while (phi_stmt && gimple_code (phi_stmt) == GIMPLE_ASSIGN
+ && gimple_assign_single_p (phi_stmt)
+ && TREE_CODE (gimple_assign_rhs1 (phi_stmt)) == SSA_NAME)
+ phi_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (phi_stmt));
+ if (!phi_stmt || gimple_code (phi_stmt) != GIMPLE_PHI)
+ continue;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ unsigned i, total = 0;
+ edge only_one;
+ bool check_value_one = (((integer_onep (cmp_rhs))
+ ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
+ ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
+ if (!is_edge_annotated (e, *annotated_edge))
+ continue;
+ for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
+ {
+ tree val = gimple_phi_arg_def (phi_stmt, i);
+ edge ep = gimple_phi_arg_edge (phi_stmt, i);
+
+ if (!TREE_CONSTANT (val)
+ || !(integer_zerop (val) || integer_onep (val)))
+ continue;
+ if (check_value_one ^ integer_onep (val))
+ continue;
+ total++;
+ only_one = ep;
+ if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge))
+ {
+ ep->probability = 0;
+ ep->count = 0;
+ set_edge_annotated (ep, annotated_edge);
+ }
+ }
+ if (total == 1 && !is_edge_annotated (only_one, *annotated_edge))
+ {
+ only_one->probability = e->probability;
+ only_one->count = e->count;
+ set_edge_annotated (only_one, annotated_edge);
+ }
+ }
+ }
+}
+
+/* Propagate the basic block count and edge count on the control flow
+ graph. We do the propagation iteratively until stablize. */
+
+static void
+afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge)
+{
+ basic_block bb;
+ bool changed = true;
+ int i = 0;
+
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ bb->count = ((basic_block)bb->aux)->count;
+ if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb))
+ set_bb_annotated (bb, annotated_bb);
+ }
+
+ while (changed && i++ < 10)
+ {
+ changed = false;
+
+ if (afdo_propagate_edge (true, annotated_bb, annotated_edge))
+ changed = true;
+ if (afdo_propagate_edge (false, annotated_bb, annotated_edge))
+ changed = true;
+ afdo_propagate_circuit (*annotated_bb, annotated_edge);
+ }
+}
+
+/* Propagate counts on control flow graph and calculate branch
+ probabilities. */
+
+static void
+afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge)
+{
+ basic_block bb;
+ bool has_sample = false;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ if (bb->count > 0)
+ has_sample = true;
+
+ if (!has_sample)
+ return;
+
+ calculate_dominance_info (CDI_POST_DOMINATORS);
+ calculate_dominance_info (CDI_DOMINATORS);
+ loop_optimizer_init (0);
+
+ afdo_find_equiv_class (annotated_bb);
+ afdo_propagate (annotated_bb, annotated_edge);
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ edge e;
+ edge_iterator ei;
+ int num_unknown_succ = 0;
+ gcov_type total_count = 0;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (!is_edge_annotated (e, *annotated_edge))
+ num_unknown_succ++;
+ else
+ total_count += e->count;
+ }
+ if (num_unknown_succ == 0 && total_count > 0)
+ {
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ e->probability = (double)e->count * REG_BR_PROB_BASE / total_count;
+ }
+ }
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE;
+ bb->aux = NULL;
+ }
+
+ loop_optimizer_finalize ();
+ free_dominance_info (CDI_DOMINATORS);
+ free_dominance_info (CDI_POST_DOMINATORS);
+}
+
+/* Perform value profile transformation using AutoFDO profile. Add the
+ promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
+ indirect call promoted. */
+
+static bool
+afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
+{
+ basic_block bb;
+ if (afdo_source_profile->get_function_instance_by_decl (
+ current_function_decl) == NULL)
+ return false;
+
+ compute_inline_parameters (cgraph_node::get (current_function_decl), true);
+
+ bool has_vpt = false;
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ if (!has_indirect_call (bb))
+ continue;
+ gimple_stmt_iterator gsi;
+
+ gcov_type bb_count = 0;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ count_info info;
+ gimple stmt = gsi_stmt (gsi);
+ if (afdo_source_profile->get_count_info (stmt, &info))
+ bb_count = MAX (bb_count, info.count);
+ }
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ /* IC_promotion and early_inline_2 is done in multiple iterations.
+ No need to promoted the stmt if its in promoted_stmts (means
+ it is already been promoted in the previous iterations). */
+ if (gimple_code (stmt) != GIMPLE_CALL || gimple_call_fn (stmt) == NULL
+ || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
+ || promoted_stmts->find (stmt) != promoted_stmts->end ())
+ continue;
+
+ count_info info;
+ afdo_source_profile->get_count_info (stmt, &info);
+ info.count = bb_count;
+ if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
+ {
+ /* Promote the indirect call and update the promoted_stmts. */
+ promoted_stmts->insert (stmt);
+ afdo_vpt (&gsi, info.targets, true);
+ has_vpt = true;
+ }
+ }
+ }
+ if (has_vpt)
+ {
+ optimize_inline_calls (current_function_decl);
+ return true;
+ }
+ else
+ return false;
+}
+
+/* Annotate auto profile to the control flow graph. Do not annotate value
+ profile for stmts in PROMOTED_STMTS. */
+
+static void
+afdo_annotate_cfg (const stmt_set &promoted_stmts)
+{
+ basic_block bb;
+ bb_set annotated_bb;
+ edge_set annotated_edge;
+ const function_instance *s
+ = afdo_source_profile->get_function_instance_by_decl (
+ current_function_decl);
+
+ if (s == NULL)
+ return;
+ cgraph_node::get (current_function_decl)->count = s->head_count ();
+ ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count ();
+ gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ edge e;
+ edge_iterator ei;
+
+ bb->count = 0;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ e->count = 0;
+
+ if (afdo_set_bb_count (bb, promoted_stmts))
+ set_bb_annotated (bb, &annotated_bb);
+ if (bb->count > max_count)
+ max_count = bb->count;
+ }
+ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
+ > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
+ {
+ ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
+ = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
+ }
+ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
+ > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
+ {
+ EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
+ = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+ set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
+ }
+ afdo_source_profile->mark_annotated (
+ DECL_SOURCE_LOCATION (current_function_decl));
+ afdo_source_profile->mark_annotated (cfun->function_start_locus);
+ afdo_source_profile->mark_annotated (cfun->function_end_locus);
+ if (max_count > 0)
+ {
+ afdo_calculate_branch_prob (&annotated_bb, &annotated_edge);
+ counts_to_freqs ();
+ profile_status_for_fn (cfun) = PROFILE_READ;
+ }
+ if (flag_value_profile_transformations)
+ gimple_value_profile_transformations ();
+}
+
+/* Wrapper function to invoke early inliner. */
+
+static void
+early_inline ()
+{
+ compute_inline_parameters (cgraph_node::get (current_function_decl), true);
+ unsigned todo = early_inliner (cfun);
+ if (todo & TODO_update_ssa_any)
+ update_ssa (TODO_update_ssa);
+}
+
+/* Use AutoFDO profile to annoate the control flow graph.
+ Return the todo flag. */
+
+static unsigned int
+auto_profile (void)
+{
+ struct cgraph_node *node;
+
+ if (symtab->state == FINISHED)
+ return 0;
+
+ init_node_map (true);
+ profile_info = autofdo::afdo_profile_info;
+
+ FOR_EACH_FUNCTION (node)
+ {
+ if (!gimple_has_body_p (node->decl))
+ continue;
+
+ /* Don't profile functions produced for builtin stuff. */
+ if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
+ continue;
+
+ push_cfun (DECL_STRUCT_FUNCTION (node->decl));
+
+ /* First do indirect call promotion and early inline to make the
+ IR match the profiled binary before actual annotation.
+
+ This is needed because an indirect call might have been promoted
+ and inlined in the profiled binary. If we do not promote and
+ inline these indirect calls before annotation, the profile for
+ these promoted functions will be lost.
+
+ e.g. foo() --indirect_call--> bar()
+ In profiled binary, the callsite is promoted and inlined, making
+ the profile look like:
+
+ foo: {
+ loc_foo_1: count_1
+ bar@loc_foo_2: {
+ loc_bar_1: count_2
+ loc_bar_2: count_3
+ }
+ }
+
+ Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
+ If we perform annotation on it, the profile inside bar@loc_foo2
+ will be wasted.
+
+ To avoid this, we promote loc_foo_2 and inline the promoted bar
+ function before annotation, so the profile inside bar@loc_foo2
+ will be useful. */
+ autofdo::stmt_set promoted_stmts;
+ for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++)
+ {
+ if (!flag_value_profile_transformations
+ || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
+ break;
+ early_inline ();
+ }
+
+ early_inline ();
+ autofdo::afdo_annotate_cfg (promoted_stmts);
+ compute_function_frequency ();
+ update_ssa (TODO_update_ssa);
+
+ /* Local pure-const may imply need to fixup the cfg. */
+ if (execute_fixup_cfg () & TODO_cleanup_cfg)
+ cleanup_tree_cfg ();
+
+ free_dominance_info (CDI_DOMINATORS);
+ free_dominance_info (CDI_POST_DOMINATORS);
+ cgraph_edge::rebuild_edges ();
+ pop_cfun ();
+ }
+
+ return TODO_rebuild_cgraph_edges;
+}
+} /* namespace autofdo. */
+
+/* Read the profile from the profile data file. */
+
+void
+read_autofdo_file (void)
+{
+ if (auto_profile_file == NULL)
+ auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
+
+ autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc (
+ 1, sizeof (struct gcov_ctr_summary));
+ autofdo::afdo_profile_info->runs = 1;
+ autofdo::afdo_profile_info->sum_max = 0;
+ autofdo::afdo_profile_info->sum_all = 0;
+
+ /* Read the profile from the profile file. */
+ autofdo::read_profile ();
+}
+
+/* Free the resources. */
+
+void
+end_auto_profile (void)
+{
+ delete autofdo::afdo_source_profile;
+ delete autofdo::afdo_string_table;
+ profile_info = NULL;
+}
+
+/* Returns TRUE if EDGE is hot enough to be inlined early. */
+
+bool
+afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
+{
+ gcov_type count
+ = autofdo::afdo_source_profile->get_callsite_total_count (edge);
+ if (count > 0)
+ {
+ bool is_hot;
+ const struct gcov_ctr_summary *saved_profile_info = profile_info;
+ /* At earling inline stage, profile_info is not set yet. We need to
+ temporarily set it to afdo_profile_info to calculate hotness. */
+ profile_info = autofdo::afdo_profile_info;
+ is_hot = maybe_hot_count_p (NULL, count);
+ profile_info = saved_profile_info;
+ return is_hot;
+ }
+ else
+ return false;
+}
+
+namespace
+{
+
+const pass_data pass_data_ipa_auto_profile = {
+ SIMPLE_IPA_PASS, "afdo", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_IPA_AUTOFDO, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_ipa_auto_profile : public simple_ipa_opt_pass
+{
+public:
+ pass_ipa_auto_profile (gcc::context *ctxt)
+ : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
+ {
+ }
+
+ /* opt_pass methods: */
+ virtual bool
+ gate (function *)
+ {
+ return flag_auto_profile;
+ }
+ virtual unsigned int
+ execute (function *)
+ {
+ return autofdo::auto_profile ();
+ }
+}; // class pass_ipa_auto_profile
+
+} // anon namespace
+
+simple_ipa_opt_pass *
+make_pass_ipa_auto_profile (gcc::context *ctxt)
+{
+ return new pass_ipa_auto_profile (ctxt);
+}
diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h
new file mode 100644
index 00000000000..7b4f2449cb7
--- /dev/null
+++ b/gcc/auto-profile.h
@@ -0,0 +1,31 @@
+/* auto-profile.h - Defines data exported from auto-profile.c
+ Copyright (C) 2014. Free Software Foundation, Inc.
+ Contributed by Dehao Chen (dehao@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef AUTO_PROFILE_H
+#define AUTO_PROFILE_H
+
+/* Read, process, finalize AutoFDO data structures. */
+extern void read_autofdo_file (void);
+extern void end_auto_profile (void);
+
+/* Returns TRUE if EDGE is hot enough to be inlined early. */
+extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);
+
+#endif /* AUTO_PROFILE_H */
diff --git a/gcc/basic-block.h b/gcc/basic-block.h
index b1055c00fa3..a82db077619 100644
--- a/gcc/basic-block.h
+++ b/gcc/basic-block.h
@@ -652,6 +652,7 @@ ei_cond (edge_iterator ei, edge *p)
extern void dump_flow_info (FILE *, int);
/* In predict.c */
+extern bool maybe_hot_count_p (struct function *, gcov_type);
extern bool maybe_hot_bb_p (struct function *, const_basic_block);
extern bool maybe_hot_edge_p (edge);
extern bool probably_never_executed_bb_p (struct function *, const_basic_block);
@@ -805,6 +806,7 @@ extern void default_rtl_profile (void);
/* In profile.c. */
typedef struct gcov_working_set_info gcov_working_set_t;
extern gcov_working_set_t *find_working_set (unsigned pct_times_10);
+extern void add_working_set (gcov_working_set_t *);
/* Check tha probability is sane. */
diff --git a/gcc/common.opt b/gcc/common.opt
index 5db5e1ee6ad..817ac87bf12 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -895,6 +895,16 @@ fauto-inc-dec
Common Report Var(flag_auto_inc_dec) Init(1)
Generate auto-inc/dec instructions
+fauto-profile
+Common Report Var(flag_auto_profile) Optimization
+Use sample profile information for call graph node weights. The default
+profile file is fbdata.afdo in 'pwd'.
+
+fauto-profile=
+Common Joined RejectNegative Var(auto_profile_file)
+Use sample profile information for call graph node weights. The profile
+file is specified in the argument.
+
; -fcheck-bounds causes gcc to generate array bounds checks.
; For C, C++ and ObjC: defaults off.
; For Java: defaults to on.
diff --git a/gcc/coverage.c b/gcc/coverage.c
index 66217ed862d..c02b18c1144 100644
--- a/gcc/coverage.c
+++ b/gcc/coverage.c
@@ -61,6 +61,7 @@ along with GCC; see the file COPYING3. If not see
#include "filenames.h"
#include "target.h"
#include "params.h"
+#include "auto-profile.h"
#include "gcov-io.h"
#include "gcov-io.c"
@@ -1214,7 +1215,9 @@ coverage_init (const char *filename)
bbg_file_stamp = local_tick;
- if (flag_branch_probabilities)
+ if (flag_auto_profile)
+ read_autofdo_file ();
+ else if (flag_branch_probabilities)
read_counts_file ();
/* Name of bbg file. */
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 23f272f4a1f..0b26976c520 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -365,7 +365,8 @@ Objective-C and Objective-C++ Dialects}.
@gccoptlist{-faggressive-loop-optimizations -falign-functions[=@var{n}] @gol
-falign-jumps[=@var{n}] @gol
-falign-labels[=@var{n}] -falign-loops[=@var{n}] @gol
--fassociative-math -fauto-inc-dec -fbranch-probabilities @gol
+-fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol
+-fauto-inc-dec -fbranch-probabilities @gol
-fbranch-target-load-optimize -fbranch-target-load-optimize2 @gol
-fbtr-bb-exclusive -fcaller-saves @gol
-fcheck-data-deps -fcombine-stack-adjustments -fconserve-stack @gol
@@ -9205,6 +9206,41 @@ code.
If @var{path} is specified, GCC looks at the @var{path} to find
the profile feedback data files. See @option{-fprofile-dir}.
+
+@item -fauto-profile
+@itemx -fauto-profile=@var{path}
+@opindex fauto-profile
+Enable sampling based feedback directed optimizations, and optimizations
+generally profitable only with profile feedback available.
+
+The following options are enabled: @code{-fbranch-probabilities}, @code{-fvpt},
+@code{-funroll-loops}, @code{-fpeel-loops}, @code{-ftracer}, @code{-ftree-vectorize},
+@code{-finline-functions}, @code{-fipa-cp}, @code{-fipa-cp-clone},
+@code{-fpredictive-commoning}, @code{-funswitch-loops},
+@code{-fgcse-after-reload}, @code{-ftree-loop-distribute-patterns},
+
+If @var{path} is specified, GCC looks at the @var{path} to find
+the profile feedback data files.
+
+In order to collect AutoFDO profile, you need to have:
+
+1. A linux system with linux perf support
+2. (optional) An Intel processor with last branch record (LBR) support. This is
+ to guarantee accurate instruction level profile, which is important for
+ AutoFDO performance.
+
+To collect the profile, first use linux perf to collect raw profile
+(see @uref{https://perf.wiki.kernel.org/}).
+
+E.g.
+@code{perf record -e br_inst_retired:near_taken -b -o perf.data -- your_program}
+
+Then use create_gcov tool, which takes raw profile and unstripped binary to
+generate AutoFDO profile that can be used by GCC.
+(see @uref{https://github.com/google/autofdo}).
+
+E.g.
+@code{create_gcov --binary=your_program.unstripped --profile=perf.data --gcov=profile.afdo}
@end table
The following options control compiler behavior regarding floating-point
diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index 6fc126851b4..3c2543705d5 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -244,6 +244,9 @@ typedef uint64_t gcov_type_unsigned;
#define GCOV_TAG_PROGRAM_SUMMARY ((gcov_unsigned_t)0xa3000000)
#define GCOV_TAG_SUMMARY_LENGTH(NUM) \
(1 + GCOV_COUNTERS_SUMMABLE * (10 + 3 * 2) + (NUM) * 5)
+#define GCOV_TAG_AFDO_FILE_NAMES ((gcov_unsigned_t)0xaa000000)
+#define GCOV_TAG_AFDO_FUNCTION ((gcov_unsigned_t)0xac000000)
+#define GCOV_TAG_AFDO_WORKING_SET ((gcov_unsigned_t)0xaf000000)
/* Counters that are collected. */
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index e79a4dd8e21..f0177dfc60d 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -122,6 +122,7 @@ along with GCC; see the file COPYING3. If not see
#include "ipa-inline.h"
#include "ipa-utils.h"
#include "sreal.h"
+#include "auto-profile.h"
#include "cilk.h"
#include "builtins.h"
@@ -443,6 +444,14 @@ want_early_inline_function_p (struct cgraph_edge *e)
if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
;
+ /* For AutoFDO, we need to make sure that before profile annotation, all
+ hot paths' IR look exactly the same as profiled binary. As a result,
+ in einliner, we will disregard size limit and inline those callsites
+ that are:
+ * inlined in the profiled binary, and
+ * the cloned callee has enough samples to be considered "hot". */
+ else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
+ ;
else if (!DECL_DECLARED_INLINE_P (callee->decl)
&& !flag_inline_small_functions)
{
@@ -2360,39 +2369,8 @@ early_inline_small_functions (struct cgraph_node *node)
return inlined;
}
-/* Do inlining of small functions. Doing so early helps profiling and other
- passes to be somewhat more effective and avoids some code duplication in
- later real inlining pass for testcases with very many function calls. */
-
-namespace {
-
-const pass_data pass_data_early_inline =
-{
- GIMPLE_PASS, /* type */
- "einline", /* name */
- OPTGROUP_INLINE, /* optinfo_flags */
- TV_EARLY_INLINING, /* tv_id */
- PROP_ssa, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- 0, /* todo_flags_finish */
-};
-
-class pass_early_inline : public gimple_opt_pass
-{
-public:
- pass_early_inline (gcc::context *ctxt)
- : gimple_opt_pass (pass_data_early_inline, ctxt)
- {}
-
- /* opt_pass methods: */
- virtual unsigned int execute (function *);
-
-}; // class pass_early_inline
-
unsigned int
-pass_early_inline::execute (function *fun)
+early_inliner (function *fun)
{
struct cgraph_node *node = cgraph_node::get (current_function_decl);
struct cgraph_edge *edge;
@@ -2493,6 +2471,43 @@ pass_early_inline::execute (function *fun)
return todo;
}
+/* Do inlining of small functions. Doing so early helps profiling and other
+ passes to be somewhat more effective and avoids some code duplication in
+ later real inlining pass for testcases with very many function calls. */
+
+namespace {
+
+const pass_data pass_data_early_inline =
+{
+ GIMPLE_PASS, /* type */
+ "einline", /* name */
+ OPTGROUP_INLINE, /* optinfo_flags */
+ TV_EARLY_INLINING, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_early_inline : public gimple_opt_pass
+{
+public:
+ pass_early_inline (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_early_inline, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual unsigned int execute (function *);
+
+}; // class pass_early_inline
+
+unsigned int
+pass_early_inline::execute (function *fun)
+{
+ return early_inliner (fun);
+}
+
} // anon namespace
gimple_opt_pass *
diff --git a/gcc/ipa-inline.h b/gcc/ipa-inline.h
index 75973230bc5..8edf49621ad 100644
--- a/gcc/ipa-inline.h
+++ b/gcc/ipa-inline.h
@@ -238,6 +238,7 @@ void initialize_growth_caches (void);
void free_growth_caches (void);
void compute_inline_parameters (struct cgraph_node *, bool);
bool speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining);
+unsigned int early_inliner (function *fun);
/* In ipa-inline-transform.c */
bool inline_call (struct cgraph_edge *, bool, vec<cgraph_edge *> *, int *, bool,
diff --git a/gcc/opts.c b/gcc/opts.c
index 30541962018..de066bcce07 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1280,6 +1280,50 @@ print_specific_help (unsigned int include_flags,
opts->x_help_columns, opts, lang_mask);
}
+/* Enable FDO-related flags. */
+
+static void
+enable_fdo_optimizations (struct gcc_options *opts,
+ struct gcc_options *opts_set,
+ int value)
+{
+ if (!opts_set->x_flag_branch_probabilities)
+ opts->x_flag_branch_probabilities = value;
+ if (!opts_set->x_flag_profile_values)
+ opts->x_flag_profile_values = value;
+ if (!opts_set->x_flag_unroll_loops)
+ opts->x_flag_unroll_loops = value;
+ if (!opts_set->x_flag_peel_loops)
+ opts->x_flag_peel_loops = value;
+ if (!opts_set->x_flag_tracer)
+ opts->x_flag_tracer = value;
+ if (!opts_set->x_flag_value_profile_transformations)
+ opts->x_flag_value_profile_transformations = value;
+ if (!opts_set->x_flag_inline_functions)
+ opts->x_flag_inline_functions = value;
+ if (!opts_set->x_flag_ipa_cp)
+ opts->x_flag_ipa_cp = value;
+ if (!opts_set->x_flag_ipa_cp_clone
+ && value && opts->x_flag_ipa_cp)
+ opts->x_flag_ipa_cp_clone = value;
+ if (!opts_set->x_flag_predictive_commoning)
+ opts->x_flag_predictive_commoning = value;
+ if (!opts_set->x_flag_unswitch_loops)
+ opts->x_flag_unswitch_loops = value;
+ if (!opts_set->x_flag_gcse_after_reload)
+ opts->x_flag_gcse_after_reload = value;
+ if (!opts_set->x_flag_tree_loop_vectorize
+ && !opts_set->x_flag_tree_vectorize)
+ opts->x_flag_tree_loop_vectorize = value;
+ if (!opts_set->x_flag_tree_slp_vectorize
+ && !opts_set->x_flag_tree_vectorize)
+ opts->x_flag_tree_slp_vectorize = value;
+ if (!opts_set->x_flag_vect_cost_model)
+ opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC;
+ if (!opts_set->x_flag_tree_loop_distribute_patterns)
+ opts->x_flag_tree_loop_distribute_patterns = value;
+}
+
/* Handle target- and language-independent options. Return zero to
generate an "unknown option" message. Only options that need
extra handling need to be listed here; if you simply want
@@ -1749,50 +1793,30 @@ common_handle_option (struct gcc_options *opts,
value = true;
/* No break here - do -fprofile-use processing. */
case OPT_fprofile_use:
- if (!opts_set->x_flag_branch_probabilities)
- opts->x_flag_branch_probabilities = value;
- if (!opts_set->x_flag_profile_values)
- opts->x_flag_profile_values = value;
- if (!opts_set->x_flag_unroll_loops)
- opts->x_flag_unroll_loops = value;
- if (!opts_set->x_flag_peel_loops)
- opts->x_flag_peel_loops = value;
- if (!opts_set->x_flag_tracer)
- opts->x_flag_tracer = value;
- if (!opts_set->x_flag_value_profile_transformations)
- opts->x_flag_value_profile_transformations = value;
- if (!opts_set->x_flag_inline_functions)
- opts->x_flag_inline_functions = value;
- if (!opts_set->x_flag_ipa_cp)
- opts->x_flag_ipa_cp = value;
- if (!opts_set->x_flag_ipa_cp_clone
- && value && opts->x_flag_ipa_cp)
- opts->x_flag_ipa_cp_clone = value;
- if (!opts_set->x_flag_predictive_commoning)
- opts->x_flag_predictive_commoning = value;
- if (!opts_set->x_flag_unswitch_loops)
- opts->x_flag_unswitch_loops = value;
- if (!opts_set->x_flag_gcse_after_reload)
- opts->x_flag_gcse_after_reload = value;
- if (!opts_set->x_flag_tree_loop_vectorize
- && !opts_set->x_flag_tree_vectorize)
- opts->x_flag_tree_loop_vectorize = value;
- if (!opts_set->x_flag_tree_slp_vectorize
- && !opts_set->x_flag_tree_vectorize)
- opts->x_flag_tree_slp_vectorize = value;
- if (!opts_set->x_flag_vect_cost_model)
- opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC;
- if (!opts_set->x_flag_tree_loop_distribute_patterns)
- opts->x_flag_tree_loop_distribute_patterns = value;
+ enable_fdo_optimizations (opts, opts_set, value);
if (!opts_set->x_flag_profile_reorder_functions)
- opts->x_flag_profile_reorder_functions = value;
- /* Indirect call profiling should do all useful transformations
- speculative devirtualization does. */
+ opts->x_flag_profile_reorder_functions = value;
+ /* Indirect call profiling should do all useful transformations
+ speculative devirtualization does. */
if (!opts_set->x_flag_devirtualize_speculatively
&& opts->x_flag_value_profile_transformations)
opts->x_flag_devirtualize_speculatively = false;
break;
+ case OPT_fauto_profile_:
+ opts->x_auto_profile_file = xstrdup (arg);
+ opts->x_flag_auto_profile = true;
+ value = true;
+ /* No break here - do -fauto-profile processing. */
+ case OPT_fauto_profile:
+ enable_fdo_optimizations (opts, opts_set, value);
+ if (!opts_set->x_flag_profile_correction)
+ opts->x_flag_profile_correction = value;
+ maybe_set_param_value (
+ PARAM_EARLY_INLINER_MAX_ITERATIONS, 10,
+ opts->x_param_values, opts_set->x_param_values);
+ break;
+
case OPT_fprofile_generate_:
opts->x_profile_data_prefix = xstrdup (arg);
value = true;
diff --git a/gcc/passes.def b/gcc/passes.def
index 57c2c1343b1..9c2542d5352 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_rebuild_cgraph_edges);
NEXT_PASS (pass_inline_parameters);
POP_INSERT_PASSES ()
+ NEXT_PASS (pass_ipa_auto_profile);
NEXT_PASS (pass_ipa_free_inline_summary);
NEXT_PASS (pass_ipa_tree_profile);
PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile)
diff --git a/gcc/predict.c b/gcc/predict.c
index 5f5d4a55c9c..96c7661aebe 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -167,7 +167,7 @@ set_hot_bb_threshold (gcov_type min)
/* Return TRUE if frequency FREQ is considered to be hot. */
-static inline bool
+bool
maybe_hot_count_p (struct function *fun, gcov_type count)
{
if (fun && profile_status_for_fn (fun) != PROFILE_READ)
@@ -2859,7 +2859,7 @@ counts_to_freqs (void)
/* Don't overwrite the estimated frequencies when the profile for
the function is missing. We may drop this function PROFILE_GUESSED
later in drop_profile (). */
- if (!ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
+ if (!flag_auto_profile && !ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
return 0;
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
@@ -3230,7 +3230,8 @@ rebuild_frequencies (void)
count_max = MAX (bb->count, count_max);
if (profile_status_for_fn (cfun) == PROFILE_GUESSED
- || (profile_status_for_fn (cfun) == PROFILE_READ && count_max < REG_BR_PROB_BASE/10))
+ || (!flag_auto_profile && profile_status_for_fn (cfun) == PROFILE_READ
+ && count_max < REG_BR_PROB_BASE/10))
{
loop_optimizer_init (0);
add_noreturn_fake_exit_edges ();
diff --git a/gcc/profile.c b/gcc/profile.c
index bcff4110522..71b8cbdc97f 100644
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -112,6 +112,14 @@ static int total_num_times_called;
static int total_hist_br_prob[20];
static int total_num_branches;
+/* Helper function to update gcov_working_sets. */
+
+void add_working_set (gcov_working_set_t *set) {
+ int i = 0;
+ for (; i < NUM_GCOV_WORKING_SETS; i++)
+ gcov_working_sets[i] = set[i];
+}
+
/* Forward declarations. */
static void find_spanning_tree (struct edge_list *);
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 55a230b39c8..96b606951d6 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -89,6 +89,7 @@ DEFTIMEVAR (TV_WHOPR_PARTITIONING , "whopr partitioning")
DEFTIMEVAR (TV_WHOPR_LTRANS , "whopr ltrans")
DEFTIMEVAR (TV_IPA_REFERENCE , "ipa reference")
DEFTIMEVAR (TV_IPA_PROFILE , "ipa profile")
+DEFTIMEVAR (TV_IPA_AUTOFDO , "auto profile")
DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const")
DEFTIMEVAR (TV_IPA_ICF , "ipa icf")
DEFTIMEVAR (TV_IPA_PTA , "ipa points-to")
diff --git a/gcc/toplev.c b/gcc/toplev.c
index a87ff9a2ff8..adfae0bf93b 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -83,6 +83,7 @@ along with GCC; see the file COPYING3. If not see
#include "diagnostic-color.h"
#include "context.h"
#include "pass_manager.h"
+#include "auto-profile.h"
#include "dwarf2out.h"
#include "ipa-reference.h"
#include "ipa-prop.h"
@@ -671,6 +672,10 @@ compile_file (void)
targetm.asm_out.output_ident (ident_str);
}
+ /* Auto profile finalization. */
+ if (flag_auto_profile)
+ end_auto_profile ();
+
/* Invoke registered plugin callbacks. */
invoke_plugin_callbacks (PLUGIN_FINISH_UNIT, NULL);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 2ff52cf5243..3db1a08b0dd 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -449,6 +449,7 @@ extern simple_ipa_opt_pass *make_pass_ipa_lower_emutls (gcc::context *ctxt);
extern simple_ipa_opt_pass
*make_pass_ipa_function_and_variable_visibility (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_tree_profile (gcc::context *ctxt);
+extern simple_ipa_opt_pass *make_pass_ipa_auto_profile (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_early_local_passes (gcc::context *ctxt);
diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c
index c25626c60aa..ceb616968ef 100644
--- a/gcc/tree-profile.c
+++ b/gcc/tree-profile.c
@@ -719,8 +719,10 @@ public:
bool
pass_ipa_tree_profile::gate (function *)
{
- /* When profile instrumentation, use or test coverage shall be performed. */
- return (!in_lto_p
+ /* When profile instrumentation, use or test coverage shall be performed.
+ But for AutoFDO, this there is no instrumentation, thus this pass is
+ diabled. */
+ return (!in_lto_p && !flag_auto_profile
&& (flag_branch_probabilities || flag_test_coverage
|| profile_arc_flag));
}
diff --git a/gcc/tree-ssa-live.c b/gcc/tree-ssa-live.c
index 23eaf262c64..87f5bf84c59 100644
--- a/gcc/tree-ssa-live.c
+++ b/gcc/tree-ssa-live.c
@@ -605,7 +605,7 @@ remove_unused_scope_block_p (tree scope)
;
/* When not generating debug info we can eliminate info on unused
variables. */
- else if (debug_info_level == DINFO_LEVEL_NONE)
+ else if (!flag_auto_profile && debug_info_level == DINFO_LEVEL_NONE)
{
/* Even for -g0 don't prune outer scopes from artificial
functions, otherwise diagnostics using tree_nonartificial_location
diff --git a/gcc/value-prof.c b/gcc/value-prof.c
index 37710ca6da6..8ed6433ce7f 100644
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -139,7 +139,7 @@ static bool gimple_ic_transform (gimple_stmt_iterator *);
/* Allocate histogram value. */
-static histogram_value
+histogram_value
gimple_alloc_histogram_value (struct function *fun ATTRIBUTE_UNUSED,
enum hist_type type, gimple stmt, tree value)
{
@@ -1342,7 +1342,7 @@ find_func_by_profile_id (int profile_id)
may ICE. Here we only do very minimal sanity check just to make compiler happy.
Returns true if TARGET is considered ok for call CALL_STMT. */
-static bool
+bool
check_ic_target (gimple call_stmt, struct cgraph_node *target)
{
location_t locus;
diff --git a/gcc/value-prof.h b/gcc/value-prof.h
index 00a89fab2a4..f4db3d4bb66 100644
--- a/gcc/value-prof.h
+++ b/gcc/value-prof.h
@@ -77,6 +77,8 @@ typedef vec<histogram_value> histogram_values;
extern void gimple_find_values_to_profile (histogram_values *);
extern bool gimple_value_profile_transformations (void);
+histogram_value gimple_alloc_histogram_value (struct function *, enum hist_type,
+ gimple stmt, tree);
histogram_value gimple_histogram_value (struct function *, gimple);
histogram_value gimple_histogram_value_of_type (struct function *, gimple,
enum hist_type);
@@ -91,6 +93,7 @@ void verify_histograms (void);
void free_histograms (void);
void stringop_block_profile (gimple, unsigned int *, HOST_WIDE_INT *);
gimple gimple_ic (gimple, struct cgraph_node *, int, gcov_type, gcov_type);
+bool check_ic_target (gimple, struct cgraph_node *);
/* In tree-profile.c. */