diff options
-rw-r--r-- | gcc/ChangeLog | 38 | ||||
-rw-r--r-- | gcc/Makefile.in | 7 | ||||
-rw-r--r-- | gcc/common.opt | 8 | ||||
-rw-r--r-- | gcc/coverage.c | 26 | ||||
-rw-r--r-- | gcc/coverage.h | 2 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 37 | ||||
-rw-r--r-- | gcc/opts.c | 23 | ||||
-rw-r--r-- | gcc/passes.c | 8 | ||||
-rw-r--r-- | gcc/profile.c | 79 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/g++.dg/bprob/bprob.exp | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-prof/inliner-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-prof/tree-prof.exp | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.misc-tests/bprob.exp | 4 | ||||
-rw-r--r-- | gcc/toplev.c | 23 | ||||
-rw-r--r-- | gcc/toplev.h | 1 | ||||
-rw-r--r-- | gcc/tree-profile.c | 11 | ||||
-rw-r--r-- | gcc/value-prof.c | 1060 | ||||
-rw-r--r-- | gcc/value-prof.h | 25 |
19 files changed, 118 insertions, 1250 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6e0a6e2eca2..d599ec17a31 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,43 @@ 2005-07-28 Jan Hubicka <jh@suse.cz> + * Makefile.in (rtl-profile.o): Kill all traces of it. + * common.opt (fspeculative-prefetching, ftree-based-profiling): Kill. + * coverage.h (rtl_coverage_counter_ref): Kill. + * opts.c (flag_speculative_prefetching_set): Kill. + (flag_loop_optimize_set): New. + (common_handle_option): Disable loop optimizer when profiling; + do not handle speculative prefetching. + * passes.c (init_optimization_passes): Replace pass_profiling combo + by branch_prob pass. + * profile.c (compute_value_histograms): Update for simplified value + profiles. + (rtl_register_profile_hooks): Kill. + (pass_profiling): Kill. + (rest_of_handle_branch_prob): Do not profile. + * toplev.c (process_options): Remove speculative prefetching. + * toplev.h (flag_tree_based_profiling): Kill. + * tree-profile.c (prepare_instrumented_value, + tree_gen_interval_profiler, tree_gen_pow2_profiler, + tree_gen_one_value_profiler, do_tree_profiling): Update for + simplified datastructures. + * value-prof.c: Add comment that speculative prefetching was dropped; + update rest of file for simplified datastructures. + (NOPREFETCH_RANGE_MIN, NOPREFETCH_RANGE_MAX, + rtl_divmod_values_to_profile, insn_prefetch_values_to_profile, + find_mem_reference_1, find_mem_reference_2, find_mem_reference, + rtl_values_to_profile, rtl_divmod_fixed_value, rtl_mod_pow2, + rtl_mod_subtract, gen_speculative_prefetch, + rtl_divmod_fixed_value_transform, rtl_mod_pow2_value_transform, + rtl_mod_subtract_transform, speculative_prefetching_transform): Kill. + (gate_handle_value_profile_transformations, + rest_of_handle_value_profile_transformations, + pass_value_profile_transformations): Kill. + * value-prof.h (histogram_value_t): Remove IL based unions. + (rtl_register_value_prof_hooks, rtl_register_profile_hooks, + rtl_profile_hooks): Remove hooks. + + * invoke.texi (-ftree-based-profiling, -fspeculative-prefetching): Kill. + * cgraph.c (cgraph_clone_edge): New UPDATE_ORIGINAL argument. (cgraph_clone_node): Likewise. * cgraph.h (cgraph_clone_edge): Update prototype. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index f849aab9cae..e465e00550e 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -966,7 +966,7 @@ OBJS-common = \ targhooks.o timevar.o toplev.o tracer.o tree.o tree-dump.o \ varasm.o varray.o vec.o version.o vmsdbgout.o xcoffout.o alloc-pool.o \ et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o passes.o \ - rtl-profile.o tree-profile.o rtlhooks.o cfgexpand.o lambda-mat.o \ + tree-profile.o rtlhooks.o cfgexpand.o lambda-mat.o \ lambda-trans.o lambda-code.o tree-loop-linear.o tree-ssa-sink.o \ tree-vrp.o tree-stdarg.o tree-cfgcleanup.o tree-ssa-reassoc.o \ tree-ssa-structalias.o tree-object-size.o @@ -2242,9 +2242,6 @@ tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) \ function.h toplev.h $(COVERAGE_H) $(TREE_H) value-prof.h $(TREE_DUMP_H) \ tree-pass.h $(TREE_FLOW_H) $(TIMEVAR_H) $(GGC_H) gt-tree-profile.h -rtl-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ - $(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) \ - function.h toplev.h $(COVERAGE_H) value-prof.h $(GGC_H) value-prof.o : value-prof.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(BASIC_BLOCK_H) hard-reg-set.h value-prof.h $(EXPR_H) output.h $(FLAGS_H) \ $(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H) $(GGC_H) $(DIAGNOSTIC_H) \ @@ -2732,7 +2729,7 @@ GTFILES = $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/tree-iterator.c $(srcdir)/gimplify.c \ $(srcdir)/tree-chrec.h $(srcdir)/tree-vect-generic.c \ $(srcdir)/tree-ssa-operands.h $(srcdir)/tree-ssa-operands.c \ - $(srcdir)/tree-profile.c $(srcdir)/rtl-profile.c $(srcdir)/tree-nested.c \ + $(srcdir)/tree-profile.c $(srcdir)/tree-nested.c \ $(srcdir)/ipa-reference.c \ $(srcdir)/targhooks.c $(out_file) \ @all_gtfiles@ diff --git a/gcc/common.opt b/gcc/common.opt index 53e2a501d6a..d92a8feafa8 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -779,10 +779,6 @@ fsingle-precision-constant Common Report Var(flag_single_precision_constant) Convert floating point constants to single precision constants -fspeculative-prefetching -Common Report Var(flag_speculative_prefetching) -Use value profiling for speculative prefetching - fsplit-ivs-in-unroller Common Report Var(flag_split_ivs_in_unroller) Init(1) Split lifetimes of induction variables when loops are unrolled @@ -863,10 +859,6 @@ ftrapv Common Report Var(flag_trapv) Trap for signed overflow in addition, subtraction and multiplication -ftree-based-profiling -Common Report Var(flag_tree_based_profiling) -Use tree-ssa based implementation of profiling - ftree-ccp Common Report Var(flag_tree_ccp) Enable SSA-CCP optimization on trees diff --git a/gcc/coverage.c b/gcc/coverage.c index 5c1d0922aa5..a558e869add 100644 --- a/gcc/coverage.c +++ b/gcc/coverage.c @@ -410,32 +410,6 @@ coverage_counter_alloc (unsigned counter, unsigned num) return 1; } -/* Generate a MEM rtl to access COUNTER NO. */ - -rtx -rtl_coverage_counter_ref (unsigned counter, unsigned no) -{ - enum machine_mode mode = mode_for_size (GCOV_TYPE_SIZE, MODE_INT, 0); - rtx ref; - - gcc_assert (no < fn_n_ctrs[counter] - fn_b_ctrs[counter]); - no += prg_n_ctrs[counter] + fn_b_ctrs[counter]; - if (!ctr_labels[counter]) - { - ctr_labels[counter] = gen_rtx_SYMBOL_REF (Pmode, - ggc_strdup (IDENTIFIER_POINTER (DECL_NAME - (tree_ctr_tables[counter])))); - SYMBOL_REF_FLAGS (ctr_labels[counter]) = SYMBOL_FLAG_LOCAL; - } - ref = plus_constant (ctr_labels[counter], - GCOV_TYPE_SIZE / BITS_PER_UNIT * no); - ref = gen_rtx_MEM (mode, ref); - set_mem_alias_set (ref, new_alias_set ()); - MEM_NOTRAP_P (ref) = 1; - - return ref; -} - /* Generate a tree to access COUNTER NO. */ tree diff --git a/gcc/coverage.h b/gcc/coverage.h index 5330363f7de..e070d837e25 100644 --- a/gcc/coverage.h +++ b/gcc/coverage.h @@ -38,8 +38,6 @@ extern int coverage_begin_output (void); /* Allocate some counters. Repeatable per function. */ extern int coverage_counter_alloc (unsigned /*counter*/, unsigned/*num*/); /* Use a counter from the most recent allocation. */ -extern rtx rtl_coverage_counter_ref (unsigned /*counter*/, unsigned/*num*/); -/* Use a counter from the most recent allocation. */ extern tree tree_coverage_counter_ref (unsigned /*counter*/, unsigned/*num*/); /* Get all the counters for the current function. */ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 91a32641d37..398d2fc8ed2 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -281,7 +281,7 @@ Objective-C and Objective-C++ Dialects}. -ftree-vectorizer-verbose=@var{n} @gol -fdump-tree-storeccp@r{[}-@var{n}@r{]} @gol -feliminate-dwarf2-dups -feliminate-unused-debug-types @gol --feliminate-unused-debug-symbols -fmem-report -fprofile-arcs -ftree-based-profiling @gol +-feliminate-unused-debug-symbols -fmem-report -fprofile-arcs @gol -frandom-seed=@var{string} -fsched-verbose=@var{n} @gol -ftest-coverage -ftime-report -fvar-tracking @gol -g -g@var{level} -gcoff -gdwarf-2 @gol @@ -325,7 +325,7 @@ Objective-C and Objective-C++ Dialects}. -fsched-stalled-insns=@var{n} -sched-stalled-insns-dep=@var{n} @gol -fsched2-use-superblocks @gol -fsched2-use-traces -freschedule-modulo-scheduled-loops @gol --fsignaling-nans -fsingle-precision-constant -fspeculative-prefetching @gol +-fsignaling-nans -fsingle-precision-constant @gol -fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol -funroll-all-loops -funroll-loops -fpeel-loops @gol -fsplit-ivs-in-unroller -funswitch-loops @gol @@ -3580,17 +3580,6 @@ executed. When an arc is the only exit or only entrance to a block, the instrumentation code can be added to the block; otherwise, a new basic block must be created to hold the instrumentation code. -@item -ftree-based-profiling -@opindex ftree-based-profiling -This option is used in addition to @option{-fprofile-arcs} or -@option{-fbranch-probabilities} to control whether those optimizations -are performed on a tree-based or rtl-based internal representation. -If you use this option when compiling with @option{-fprofile-arcs}, -you must also use it when compiling later with @option{-fbranch-probabilities}. -Currently the tree-based optimization is in an early stage of -development, and this option is recommended only for those people -working on improving it. - @need 2000 @item -ftest-coverage @opindex ftest-coverage @@ -5329,8 +5318,9 @@ The following options are enabled: @code{-fprofile-arcs}, @code{-fprofile-values Enable profile feedback directed optimizations, and optimizations generally profitable only with profile feedback available. -The following options are enabled: @code{-fbranch-probabilities}, -@code{-fvpt}, @code{-funroll-loops}, @code{-fpeel-loops}, @code{-ftracer}. +The following options are enabled: @code{-fbranch-probabilities}, @code{-fvpt}, +@code{-funroll-loops}, @code{-fpeel-loops}, @code{-ftracer}, +@code{-fno-loop-optimize}. @end table @@ -5527,23 +5517,6 @@ and actually performs the optimizations based on them. Currently the optimizations include specialization of division operation using the knowledge about the value of the denominator. -@item -fspeculative-prefetching -@opindex fspeculative-prefetching -If combined with @option{-fprofile-arcs}, it instructs the compiler to add -a code to gather information about addresses of memory references in the -program. - -With @option{-fbranch-probabilities}, it reads back the data gathered -and issues prefetch instructions according to them. In addition to the opportunities -noticed by @option{-fprefetch-loop-arrays}, it also notices more complicated -memory access patterns---for example accesses to the data stored in linked -list whose elements are usually allocated sequentially. - -In order to prevent issuing double prefetches, usage of -@option{-fspeculative-prefetching} implies @option{-fno-prefetch-loop-arrays}. - -Enabled with @option{-fprofile-generate} and @option{-fprofile-use}. - @item -frename-registers @opindex frename-registers Attempt to avoid false dependencies in scheduled code by making use diff --git a/gcc/opts.c b/gcc/opts.c index afa25d5bc48..7edaeac5b53 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -94,8 +94,8 @@ static const char undocumented_msg[] = N_("This switch lacks documentation"); static bool profile_arc_flag_set, flag_profile_values_set; static bool flag_unroll_loops_set, flag_tracer_set; static bool flag_value_profile_transformations_set; -bool flag_speculative_prefetching_set; static bool flag_peel_loops_set, flag_branch_probabilities_set; +static bool flag_loop_optimize_set; /* Input file names. */ const char **in_fnames; @@ -807,6 +807,10 @@ common_handle_option (size_t scode, const char *arg, int value) flag_branch_probabilities_set = true; break; + case OPT_floop_optimize: + flag_loop_optimize_set = true; + break; + case OPT_fcall_used_: fix_register (arg, 0, 1); break; @@ -883,10 +887,9 @@ common_handle_option (size_t scode, const char *arg, int value) flag_tracer = value; if (!flag_value_profile_transformations_set) flag_value_profile_transformations = value; -#ifdef HAVE_prefetch - if (0 && !flag_speculative_prefetching_set) - flag_speculative_prefetching = value; -#endif + /* Old loop optimizer is incompatible with tree profiling. */ + if (!flag_loop_optimize_set) + flag_loop_optimize = 0; break; case OPT_fprofile_generate: @@ -896,12 +899,6 @@ common_handle_option (size_t scode, const char *arg, int value) flag_profile_values = value; if (!flag_value_profile_transformations_set) flag_value_profile_transformations = value; - if (!flag_unroll_loops_set) - flag_unroll_loops = value; -#ifdef HAVE_prefetch - if (0 && !flag_speculative_prefetching_set) - flag_speculative_prefetching = value; -#endif break; case OPT_fprofile_values: @@ -927,10 +924,6 @@ common_handle_option (size_t scode, const char *arg, int value) flag_value_profile_transformations_set = true; break; - case OPT_fspeculative_prefetching: - flag_speculative_prefetching_set = true; - break; - case OPT_frandom_seed: /* The real switch is -fno-random-seed. */ if (value) diff --git a/gcc/passes.c b/gcc/passes.c index fcbb8df5a36..16f816c7c16 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -600,7 +600,7 @@ init_optimization_passes (void) NEXT_PASS (pass_loop_optimize); NEXT_PASS (pass_jump_bypass); NEXT_PASS (pass_cfg); - NEXT_PASS (pass_profiling); + NEXT_PASS (pass_branch_prob); NEXT_PASS (pass_rtl_ifcvt); NEXT_PASS (pass_tracer); /* Perform loop optimizations. It might be better to do them a bit @@ -624,12 +624,6 @@ init_optimization_passes (void) NEXT_PASS (pass_postreload); *p = NULL; - p = &pass_profiling.sub; - NEXT_PASS (pass_branch_prob); - NEXT_PASS (pass_value_profile_transformations); - NEXT_PASS (pass_remove_death_notes); - *p = NULL; - p = &pass_postreload.sub; NEXT_PASS (pass_postreload_cse); NEXT_PASS (pass_gcse2); diff --git a/gcc/profile.c b/gcc/profile.c index 1e0c3b87d93..113927fd44a 100644 --- a/gcc/profile.c +++ b/gcc/profile.c @@ -651,7 +651,7 @@ compute_value_histograms (histogram_values values) gcov_type *histogram_counts[GCOV_N_VALUE_COUNTERS]; gcov_type *act_count[GCOV_N_VALUE_COUNTERS]; gcov_type *aact_count; - histogram_value hist; + histogram_value hist = 0; for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++) n_histogram_counters[t] = 0; @@ -683,7 +683,8 @@ compute_value_histograms (histogram_values values) for (i = 0; i < VEC_length (histogram_value, values); i++) { - rtx hist_list = NULL_RTX; + tree stmt = hist->hvalue.stmt; + stmt_ann_t ann = get_stmt_ann (stmt); hist = VEC_index (histogram_value, values, i); t = (int) hist->type; @@ -691,29 +692,12 @@ compute_value_histograms (histogram_values values) aact_count = act_count[t]; act_count[t] += hist->n_counters; - if (!ir_type ()) - { - for (j = hist->n_counters; j > 0; j--) - hist_list = alloc_EXPR_LIST (0, GEN_INT (aact_count[j - 1]), - hist_list); - hist_list = alloc_EXPR_LIST (0, - copy_rtx (hist->hvalue.rtl.value), hist_list); - hist_list = alloc_EXPR_LIST (0, GEN_INT (hist->type), hist_list); - REG_NOTES (hist->hvalue.rtl.insn) = - alloc_EXPR_LIST (REG_VALUE_PROFILE, hist_list, - REG_NOTES (hist->hvalue.rtl.insn)); - } - else - { - tree stmt = hist->hvalue.tree.stmt; - stmt_ann_t ann = get_stmt_ann (stmt); - hist->hvalue.tree.next = ann->histograms; - ann->histograms = hist; - hist->hvalue.tree.counters = - xmalloc (sizeof (gcov_type) * hist->n_counters); - for (j = 0; j < hist->n_counters; j++) - hist->hvalue.tree.counters[j] = aact_count[j]; - } + hist->hvalue.next = ann->histograms; + ann->histograms = hist; + hist->hvalue.counters = + xmalloc (sizeof (gcov_type) * hist->n_counters); + for (j = 0; j < hist->n_counters; j++) + hist->hvalue.counters[j] = aact_count[j]; } for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++) @@ -1324,55 +1308,13 @@ tree_register_profile_hooks (void) profile_hooks = &tree_profile_hooks; } -/* Set up hooks to enable RTL-based profiling. */ - -void -rtl_register_profile_hooks (void) -{ - gcc_assert (!ir_type ()); - profile_hooks = &rtl_profile_hooks; -} -static bool -gate_handle_profiling (void) -{ - return optimize > 0 - || (!flag_tree_based_profiling - && (profile_arc_flag || flag_test_coverage - || flag_branch_probabilities)); -} - -struct tree_opt_pass pass_profiling = -{ - NULL, /* name */ - gate_handle_profiling, /* gate */ - NULL, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - 0, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ - 0 /* letter */ -}; - - /* Do branch profiling and static profile estimation passes. */ static void rest_of_handle_branch_prob (void) { struct loops loops; - rtl_register_profile_hooks (); - rtl_register_value_prof_hooks (); - - if ((profile_arc_flag || flag_test_coverage || flag_branch_probabilities) - && !flag_tree_based_profiling) - branch_prob (); - /* Discover and record the loop depth at the head of each basic block. The loop infrastructure does the real job for us. */ flow_loops_find (&loops); @@ -1382,8 +1324,7 @@ rest_of_handle_branch_prob (void) /* Estimate using heuristics if no profiling info is available. */ if (flag_guess_branch_prob - && (profile_status == PROFILE_ABSENT - || (profile_status == PROFILE_READ && !flag_tree_based_profiling))) + && profile_status == PROFILE_ABSENT) estimate_probability (&loops); flow_loops_free (&loops); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f45371dd8a6..a8badc528d3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,9 @@ 2005-07-28 Jan Hubicka <jh@suse.cz> + * inliner-1.c: Do not dump everything. + * tree-prof.exp: Do not pass -ftree-based-profiling. + * bprob.exp: Likewise. + * gcc.dg/tree-prof/update-loopch.c: Fix thinkos. 2005-07-28 Mark Mitchell <mark@codesourcery.com> diff --git a/gcc/testsuite/g++.dg/bprob/bprob.exp b/gcc/testsuite/g++.dg/bprob/bprob.exp index 64df32fd851..f4b971bff3c 100644 --- a/gcc/testsuite/g++.dg/bprob/bprob.exp +++ b/gcc/testsuite/g++.dg/bprob/bprob.exp @@ -52,10 +52,6 @@ load_lib profopt.exp set profile_options "-fprofile-arcs" set feedback_options "-fbranch-probabilities" -if {[check_profiling_available "-ftree-based-profiling"]} { - lappend profile_options "-ftree-based-profiling -fprofile-arcs" - lappend feedback_options "-ftree-based-profiling -fbranch-probabilities" -} # Main loop. foreach profile_option $profile_options feedback_option $feedback_options { diff --git a/gcc/testsuite/gcc.dg/tree-prof/inliner-1.c b/gcc/testsuite/gcc.dg/tree-prof/inliner-1.c index b27b4fdc81b..8d7c87919a5 100644 --- a/gcc/testsuite/gcc.dg/tree-prof/inliner-1.c +++ b/gcc/testsuite/gcc.dg/tree-prof/inliner-1.c @@ -1,4 +1,4 @@ -/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-all" } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ int a; int b[100]; void abort (void); diff --git a/gcc/testsuite/gcc.dg/tree-prof/tree-prof.exp b/gcc/testsuite/gcc.dg/tree-prof/tree-prof.exp index 5f495a00e63..f7438194396 100644 --- a/gcc/testsuite/gcc.dg/tree-prof/tree-prof.exp +++ b/gcc/testsuite/gcc.dg/tree-prof/tree-prof.exp @@ -20,7 +20,7 @@ load_lib target-supports.exp # Some targets don't support tree profiling. -if { ![check_profiling_available "-ftree-based-profiling"] } { +if { ![check_profiling_available ""] } { return } @@ -41,8 +41,8 @@ load_lib profopt.exp # These are globals used by profopt-execute. The first is options # needed to generate profile data, the second is options to use the # profile data. -set profile_option "-ftree-based-profiling -fprofile-generate" -set feedback_option "-ftree-based-profiling -fprofile-use" +set profile_option "-fprofile-generate" +set feedback_option "-fprofile-use" foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] { # If we're only testing specific files and this isn't one of them, skip it. diff --git a/gcc/testsuite/gcc.misc-tests/bprob.exp b/gcc/testsuite/gcc.misc-tests/bprob.exp index f7f7072c392..745c43993fa 100644 --- a/gcc/testsuite/gcc.misc-tests/bprob.exp +++ b/gcc/testsuite/gcc.misc-tests/bprob.exp @@ -49,10 +49,6 @@ load_lib profopt.exp set profile_options "-fprofile-arcs" set feedback_options "-fbranch-probabilities" -if {[check_profiling_available "-ftree-based-profiling"]} { - lappend profile_options "-ftree-based-profiling -fprofile-arcs" - lappend feedback_options "-ftree-based-profiling -fbranch-probabilities" -} foreach profile_option $profile_options feedback_option $feedback_options { foreach src [lsort [glob -nocomplain $srcdir/$subdir/bprob-*.c]] { diff --git a/gcc/toplev.c b/gcc/toplev.c index b5ee33b33b8..094a2e245ac 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -1562,17 +1562,6 @@ process_options (void) if (flag_value_profile_transformations) flag_profile_values = 1; - /* Speculative prefetching implies the value profiling. We also switch off - the prefetching in the loop optimizer, so that we do not emit double - prefetches. TODO -- we should teach these two to cooperate; the loop - based prefetching may sometimes do a better job, especially in connection - with reuse analysis. */ - if (flag_speculative_prefetching) - { - flag_profile_values = 1; - flag_prefetch_loop_arrays = 0; - } - /* Warn about options that are not supported on this machine. */ #ifndef INSN_SCHEDULING if (flag_schedule_insns || flag_schedule_insns_after_reload) @@ -1732,24 +1721,12 @@ process_options (void) warning (0, "-fprefetch-loop-arrays not supported for this target"); flag_prefetch_loop_arrays = 0; } - if (flag_speculative_prefetching) - { - if (flag_speculative_prefetching_set) - warning (0, "-fspeculative-prefetching not supported for this target"); - flag_speculative_prefetching = 0; - } #else if (flag_prefetch_loop_arrays && !HAVE_prefetch) { warning (0, "-fprefetch-loop-arrays not supported for this target (try -march switches)"); flag_prefetch_loop_arrays = 0; } - if (flag_speculative_prefetching && !HAVE_prefetch) - { - if (flag_speculative_prefetching_set) - warning (0, "-fspeculative-prefetching not supported for this target (try -march switches)"); - flag_speculative_prefetching = 0; - } #endif /* This combination of options isn't handled for i386 targets and doesn't diff --git a/gcc/toplev.h b/gcc/toplev.h index 45797b13381..e293c9ec25a 100644 --- a/gcc/toplev.h +++ b/gcc/toplev.h @@ -128,7 +128,6 @@ extern int flag_unroll_all_loops; extern int flag_unswitch_loops; extern int flag_cprop_registers; extern int time_report; -extern int flag_tree_based_profiling; /* Things to do with target switches. */ extern void print_version (FILE *, const char *); diff --git a/gcc/tree-profile.c b/gcc/tree-profile.c index a19e3a42ad1..578104f4557 100644 --- a/gcc/tree-profile.c +++ b/gcc/tree-profile.c @@ -123,7 +123,7 @@ static tree prepare_instrumented_value (block_stmt_iterator *bsi, histogram_value value) { - tree val = value->hvalue.tree.value; + tree val = value->hvalue.value; return force_gimple_operand_bsi (bsi, fold_convert (gcov_type_node, val), true, NULL_TREE); } @@ -135,7 +135,7 @@ prepare_instrumented_value (block_stmt_iterator *bsi, static void tree_gen_interval_profiler (histogram_value value, unsigned tag, unsigned base) { - tree stmt = value->hvalue.tree.stmt; + tree stmt = value->hvalue.stmt; block_stmt_iterator bsi = bsi_for_stmt (stmt); tree ref = tree_coverage_counter_ref (tag, base), ref_ptr; tree args, call, val; @@ -162,7 +162,7 @@ tree_gen_interval_profiler (histogram_value value, unsigned tag, unsigned base) static void tree_gen_pow2_profiler (histogram_value value, unsigned tag, unsigned base) { - tree stmt = value->hvalue.tree.stmt; + tree stmt = value->hvalue.stmt; block_stmt_iterator bsi = bsi_for_stmt (stmt); tree ref = tree_coverage_counter_ref (tag, base), ref_ptr; tree args, call, val; @@ -185,7 +185,7 @@ tree_gen_pow2_profiler (histogram_value value, unsigned tag, unsigned base) static void tree_gen_one_value_profiler (histogram_value value, unsigned tag, unsigned base) { - tree stmt = value->hvalue.tree.stmt; + tree stmt = value->hvalue.stmt; block_stmt_iterator bsi = bsi_for_stmt (stmt); tree ref = tree_coverage_counter_ref (tag, base), ref_ptr; tree args, call, val; @@ -225,8 +225,7 @@ tree_gen_const_delta_profiler (histogram_value value ATTRIBUTE_UNUSED, static bool do_tree_profiling (void) { - if (flag_tree_based_profiling - && (profile_arc_flag || flag_test_coverage || flag_branch_probabilities)) + if (profile_arc_flag || flag_test_coverage || flag_branch_probabilities) { tree_register_profile_hooks (); tree_register_value_prof_hooks (); diff --git a/gcc/value-prof.c b/gcc/value-prof.c index f436b4a3efd..b9594f1fbe7 100644 --- a/gcc/value-prof.c +++ b/gcc/value-prof.c @@ -61,6 +61,8 @@ static histogram_values static_values = NULL; 2) Speculative prefetching. If we are able to determine that the difference between addresses accessed by a memory reference is usually constant, we may add the prefetch instructions. + FIXME: This transformation was removed together with RTL based value + profiling. Every such optimization should add its requirements for profiled values to insn_values_to_profile function. This function is called from branch_prob @@ -68,68 +70,12 @@ static histogram_values static_values = NULL; compilation with -fprofile-arcs. The optimization may then read the gathered data in the second compilation with -fbranch-probabilities. - There are currently two versions, RTL-based and tree-based. Over time - the RTL-based version may go away. - - In the RTL-based version, the measured data is appended as REG_VALUE_PROFILE - note to the instrumented insn. The argument to the note consists of an - EXPR_LIST where its members have the following meaning (from the first to - the last): - - -- type of information gathered (HIST_TYPE*) - -- the expression that is profiled - -- list of counters starting from the first one. - - In the tree-based version, the measured data is pointed to from the histograms + The measured data is pointed to from the histograms field of the statement annotation of the instrumented insns. It is kept as a linked list of struct histogram_value_t's, which contain the same information as above. */ -/* For speculative prefetching, the range in that we do not prefetch (because - we assume that it will be in cache anyway). The asymmetry between min and - max range is trying to reflect the fact that the sequential prefetching - of the data is commonly done directly by hardware. Nevertheless, these - values are just a guess and should of course be target-specific. - - FIXME: There is no tree form of speculative prefetching as yet. - - FIXME: A better approach to instrumentation in the profile-generation - pass is to generate calls to magic library functions (to be added to - libgcc) rather than inline code. This approach will probably be - necessary to get tree-based speculative prefetching working in a useful - fashion, as inline code bloats things so much the rest of the compiler has - serious problems dealing with it (judging from the rtl behavior). */ - -#ifndef NOPREFETCH_RANGE_MIN -#define NOPREFETCH_RANGE_MIN (-16) -#endif -#ifndef NOPREFETCH_RANGE_MAX -#define NOPREFETCH_RANGE_MAX 32 -#endif - -static void rtl_divmod_values_to_profile (rtx, histogram_values *); -#ifdef HAVE_prefetch -static bool insn_prefetch_values_to_profile (rtx, histogram_values *); -static int find_mem_reference_1 (rtx *, void *); -static void find_mem_reference_2 (rtx, rtx, void *); -static bool find_mem_reference (rtx, rtx *, int *); -#endif - -static void rtl_values_to_profile (rtx, histogram_values *); -static rtx rtl_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx, - rtx, gcov_type, int); -static rtx rtl_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int); -static rtx rtl_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx, - int, int, int); -#ifdef HAVE_prefetch -static rtx gen_speculative_prefetch (rtx, gcov_type, int); -#endif -static bool rtl_divmod_fixed_value_transform (rtx); -static bool rtl_mod_pow2_value_transform (rtx); -static bool rtl_mod_subtract_transform (rtx); -#ifdef HAVE_prefetch -static bool speculative_prefetching_transform (rtx); -#endif + static tree tree_divmod_fixed_value (tree, tree, tree, tree, tree, int, gcov_type, gcov_type); static tree tree_mod_pow2 (tree, tree, tree, tree, int, gcov_type, gcov_type); @@ -139,889 +85,6 @@ static bool tree_divmod_fixed_value_transform (tree); static bool tree_mod_pow2_value_transform (tree); static bool tree_mod_subtract_transform (tree); - -/* Find values inside INSN for that we want to measure histograms for - division/modulo optimization and stores them to VALUES. */ -static void -rtl_divmod_values_to_profile (rtx insn, histogram_values *values) -{ - rtx set, set_src, op1, op2; - enum machine_mode mode; - histogram_value hist; - - if (!INSN_P (insn)) - return; - - set = single_set (insn); - if (!set) - return; - - mode = GET_MODE (SET_DEST (set)); - if (!INTEGRAL_MODE_P (mode)) - return; - - set_src = SET_SRC (set); - switch (GET_CODE (set_src)) - { - case DIV: - case MOD: - case UDIV: - case UMOD: - op1 = XEXP (set_src, 0); - op2 = XEXP (set_src, 1); - if (side_effects_p (op2)) - return; - - /* Check for a special case where the divisor is power of 2. */ - if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2)) - { - hist = ggc_alloc (sizeof (*hist)); - hist->hvalue.rtl.value = op2; - hist->hvalue.rtl.seq = NULL_RTX; - hist->hvalue.rtl.mode = mode; - hist->hvalue.rtl.insn = insn; - hist->type = HIST_TYPE_POW2; - VEC_safe_push (histogram_value, heap, *values, hist); - } - - /* Check whether the divisor is not in fact a constant. */ - if (!CONSTANT_P (op2)) - { - hist = ggc_alloc (sizeof (*hist)); - hist->hvalue.rtl.value = op2; - hist->hvalue.rtl.mode = mode; - hist->hvalue.rtl.seq = NULL_RTX; - hist->hvalue.rtl.insn = insn; - hist->type = HIST_TYPE_SINGLE_VALUE; - VEC_safe_push (histogram_value, heap, *values, hist); - } - - /* For mod, check whether it is not often a noop (or replaceable by - a few subtractions). */ - if (GET_CODE (set_src) == UMOD && !side_effects_p (op1)) - { - rtx tmp; - - hist = ggc_alloc (sizeof (*hist)); - start_sequence (); - tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2)); - hist->hvalue.rtl.value = force_operand (tmp, NULL_RTX); - hist->hvalue.rtl.seq = get_insns (); - end_sequence (); - hist->hvalue.rtl.mode = mode; - hist->hvalue.rtl.insn = insn; - hist->type = HIST_TYPE_INTERVAL; - hist->hdata.intvl.int_start = 0; - hist->hdata.intvl.steps = 2; - VEC_safe_push (histogram_value, heap, *values, hist); - } - return; - - default: - return; - } -} - -#ifdef HAVE_prefetch - -/* Called from find_mem_reference through for_each_rtx, finds a memory - reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored - to *RET and the traversing of the expression is interrupted by returning 1. - Otherwise 0 is returned. */ - -static int -find_mem_reference_1 (rtx *expr, void *ret) -{ - rtx *mem = ret; - - if (MEM_P (*expr)) - { - *mem = *expr; - return 1; - } - return 0; -} - -/* Called form find_mem_reference through note_stores to find out whether - the memory reference MEM is a store. I.e. if EXPR == MEM, the variable - FMR2_WRITE is set to true. */ - -static int fmr2_write; -static void -find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem) -{ - if (expr == mem) - fmr2_write = true; -} - -/* Find a memory reference inside INSN, return it in MEM. Set WRITE to true - if it is a write of the mem. Return false if no memory reference is found, - true otherwise. */ - -static bool -find_mem_reference (rtx insn, rtx *mem, int *write) -{ - *mem = NULL_RTX; - for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem); - - if (!*mem) - return false; - - fmr2_write = false; - note_stores (PATTERN (insn), find_mem_reference_2, *mem); - *write = fmr2_write; - return true; -} - -/* Find values inside INSN for that we want to measure histograms for - a speculative prefetching. Add them to the list VALUES. - Returns true if such we found any such value, false otherwise. */ - -static bool -insn_prefetch_values_to_profile (rtx insn, histogram_values* values) -{ - rtx mem, address; - int write; - histogram_value hist; - - /* It only makes sense to look for memory references in ordinary insns. */ - if (!NONJUMP_INSN_P (insn)) - return false; - - if (!find_mem_reference (insn, &mem, &write)) - return false; - - address = XEXP (mem, 0); - if (side_effects_p (address)) - return false; - - if (CONSTANT_P (address)) - return false; - - hist = ggc_alloc (sizeof (*hist)); - hist->hvalue.rtl.value = address; - hist->hvalue.rtl.mode = GET_MODE (address); - hist->hvalue.rtl.seq = NULL_RTX; - hist->hvalue.rtl.insn = insn; - hist->type = HIST_TYPE_CONST_DELTA; - VEC_safe_push (histogram_value, heap, *values, hist); - - return true; -} -#endif -/* Find values inside INSN for that we want to measure histograms and adds - them to list VALUES (increasing the record of its length in N_VALUES). */ -static void -rtl_values_to_profile (rtx insn, histogram_values *values) -{ - if (flag_value_profile_transformations) - rtl_divmod_values_to_profile (insn, values); - -#ifdef HAVE_prefetch - if (flag_speculative_prefetching) - insn_prefetch_values_to_profile (insn, values); -#endif -} - -/* Find list of values for that we want to measure histograms. */ -static void -rtl_find_values_to_profile (histogram_values *values) -{ - rtx insn; - unsigned i, libcall_level; - histogram_value hist; - - life_analysis (NULL, PROP_DEATH_NOTES); - - *values = NULL; - libcall_level = 0; - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - rtl_values_to_profile (insn, values); - static_values = *values; - - for (i = 0; VEC_iterate (histogram_value, *values, i, hist); i++) - { - switch (hist->type) - { - case HIST_TYPE_INTERVAL: - if (dump_file) - fprintf (dump_file, - "Interval counter for insn %d, range %d -- %d.\n", - INSN_UID ((rtx)hist->hvalue.rtl.insn), - hist->hdata.intvl.int_start, - (hist->hdata.intvl.int_start - + hist->hdata.intvl.steps - 1)); - hist->n_counters = hist->hdata.intvl.steps + 2; - break; - - case HIST_TYPE_POW2: - if (dump_file) - fprintf (dump_file, - "Pow2 counter for insn %d.\n", - INSN_UID ((rtx)hist->hvalue.rtl.insn)); - hist->n_counters = 2; - break; - - case HIST_TYPE_SINGLE_VALUE: - if (dump_file) - fprintf (dump_file, - "Single value counter for insn %d.\n", - INSN_UID ((rtx)hist->hvalue.rtl.insn)); - hist->n_counters = 3; - break; - - case HIST_TYPE_CONST_DELTA: - if (dump_file) - fprintf (dump_file, - "Constant delta counter for insn %d.\n", - INSN_UID ((rtx)hist->hvalue.rtl.insn)); - hist->n_counters = 4; - break; - - default: - gcc_unreachable (); - } - } - allocate_reg_info (max_reg_num (), FALSE, FALSE); -} - -/* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses - them to identify and exploit properties of values that are hard to analyze - statically. - - We do following transformations: - - 1) - - x = a / b; - - where b is almost always a constant N is transformed to - - if (b == N) - x = a / N; - else - x = a / b; - - Analogically with % - - 2) - - x = a % b - - where b is almost always a power of 2 and the division is unsigned - TODO -- handle signed case as well - - if ((b & (b - 1)) == 0) - x = a & (b - 1); - else - x = x % b; - - Note that when b = 0, no error will occur and x = a; this is correct, - as result of such operation is undefined. - - 3) - - x = a % b - - where a is almost always less then b and the division is unsigned - TODO -- handle signed case as well - - x = a; - if (x >= b) - x %= b; - - 4) - - x = a % b - - where a is almost always less then 2 * b and the division is unsigned - TODO -- handle signed case as well - - x = a; - if (x >= b) - x -= b; - if (x >= b) - x %= b; - - It would be possible to continue analogically for K * b for other small - K's, but it is probably not useful. - - 5) - - Read or write of mem[address], where the value of address changes usually - by a constant C != 0 between the following accesses to the computation; with - -fspeculative-prefetching we then add a prefetch of address + C before - the insn. This handles prefetching of several interesting cases in addition - to a simple prefetching for addresses that are induction variables, e. g. - linked lists allocated sequentially (even in case they are processed - recursively). - - TODO -- we should also check whether there is not (usually) a small - difference with the adjacent memory references, so that we do - not issue overlapping prefetches. Also we should employ some - heuristics to eliminate cases where prefetching evidently spoils - the code. - -- it should somehow cooperate with the loop optimizer prefetching - - TODO: - - There are other useful cases that could be handled by a similar mechanism, - for example: - - for (i = 0; i < n; i++) - ... - - transform to (for constant N): - - if (n == N) - for (i = 0; i < N; i++) - ... - else - for (i = 0; i < n; i++) - ... - making unroller happy. Since this may grow the code significantly, - we would have to be very careful here. */ - -static bool -rtl_value_profile_transformations (void) -{ - rtx insn, next; - int changed = false; - - for (insn = get_insns (); insn; insn = next) - { - next = NEXT_INSN (insn); - - if (!INSN_P (insn)) - continue; - - /* Scan for insn carrying a histogram. */ - if (!find_reg_note (insn, REG_VALUE_PROFILE, 0)) - continue; - - /* Ignore cold areas -- we are growing a code. */ - if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn))) - continue; - - if (dump_file) - { - fprintf (dump_file, "Trying transformations on insn %d\n", - INSN_UID (insn)); - print_rtl_single (dump_file, insn); - } - - /* Transformations: */ - if (flag_value_profile_transformations - && (rtl_mod_subtract_transform (insn) - || rtl_divmod_fixed_value_transform (insn) - || rtl_mod_pow2_value_transform (insn))) - changed = true; -#ifdef HAVE_prefetch - if (flag_speculative_prefetching - && speculative_prefetching_transform (insn)) - changed = true; -#endif - } - - if (changed) - { - commit_edge_insertions (); - allocate_reg_info (max_reg_num (), FALSE, FALSE); - } - - return changed; -} - -/* Generate code for transformation 1 (with MODE and OPERATION, operands OP1 - and OP2, whose value is expected to be VALUE, result TARGET and - probability of taking the optimal path PROB). */ -static rtx -rtl_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation, - rtx target, rtx op1, rtx op2, gcov_type value, - int prob) -{ - rtx tmp, tmp1, jump; - rtx neq_label = gen_label_rtx (); - rtx end_label = gen_label_rtx (); - rtx sequence; - - start_sequence (); - - if (!REG_P (op2)) - { - tmp = gen_reg_rtx (mode); - emit_move_insn (tmp, copy_rtx (op2)); - } - else - tmp = op2; - - do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX, - NULL_RTX, neq_label); - - /* Add branch probability to jump we just created. */ - jump = get_last_insn (); - REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (REG_BR_PROB_BASE - prob), - REG_NOTES (jump)); - - tmp1 = simplify_gen_binary (operation, mode, - copy_rtx (op1), GEN_INT (value)); - tmp1 = force_operand (tmp1, target); - if (tmp1 != target) - emit_move_insn (copy_rtx (target), copy_rtx (tmp1)); - - emit_jump_insn (gen_jump (end_label)); - emit_barrier (); - - emit_label (neq_label); - tmp1 = simplify_gen_binary (operation, mode, - copy_rtx (op1), copy_rtx (tmp)); - tmp1 = force_operand (tmp1, target); - if (tmp1 != target) - emit_move_insn (copy_rtx (target), copy_rtx (tmp1)); - - emit_label (end_label); - - sequence = get_insns (); - end_sequence (); - rebuild_jump_labels (sequence); - return sequence; -} - -/* Do transform 1) on INSN if applicable. */ -static bool -rtl_divmod_fixed_value_transform (rtx insn) -{ - rtx set, set_src, set_dest, op1, op2, value, histogram; - enum rtx_code code; - enum machine_mode mode; - gcov_type val, count, all; - edge e; - int prob; - - set = single_set (insn); - if (!set) - return false; - - set_src = SET_SRC (set); - set_dest = SET_DEST (set); - code = GET_CODE (set_src); - mode = GET_MODE (set_dest); - - if (code != DIV && code != MOD && code != UDIV && code != UMOD) - return false; - op1 = XEXP (set_src, false); - op2 = XEXP (set_src, 1); - - for (histogram = REG_NOTES (insn); - histogram; - histogram = XEXP (histogram, 1)) - if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE - && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE)) - break; - - if (!histogram) - return false; - - histogram = XEXP (XEXP (histogram, 0), 1); - value = XEXP (histogram, 0); - histogram = XEXP (histogram, 1); - val = INTVAL (XEXP (histogram, 0)); - histogram = XEXP (histogram, 1); - count = INTVAL (XEXP (histogram, 0)); - histogram = XEXP (histogram, 1); - all = INTVAL (XEXP (histogram, 0)); - - /* We require that count be at least half of all; this means - that for the transformation to fire the value must be constant - at least 50% of time (and 75% gives the guarantee of usage). */ - if (!rtx_equal_p (op2, value) || 2 * count < all) - return false; - - if (dump_file) - fprintf (dump_file, "Div/mod by constant transformation on insn %d\n", - INSN_UID (insn)); - - /* Compute probability of taking the optimal path. */ - prob = (count * REG_BR_PROB_BASE + all / 2) / all; - - e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); - delete_insn (insn); - - insert_insn_on_edge ( - rtl_divmod_fixed_value (mode, code, set_dest, - op1, op2, val, prob), e); - - return true; -} - -/* Generate code for transformation 2 (with MODE and OPERATION, operands OP1 - and OP2, result TARGET and probability of taking the optimal path PROB). */ -static rtx -rtl_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target, - rtx op1, rtx op2, int prob) -{ - rtx tmp, tmp1, tmp2, tmp3, jump; - rtx neq_label = gen_label_rtx (); - rtx end_label = gen_label_rtx (); - rtx sequence; - - start_sequence (); - - if (!REG_P (op2)) - { - tmp = gen_reg_rtx (mode); - emit_move_insn (tmp, copy_rtx (op2)); - } - else - tmp = op2; - - tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX, - 0, OPTAB_WIDEN); - tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX, - 0, OPTAB_WIDEN); - do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX, - NULL_RTX, neq_label); - - /* Add branch probability to jump we just created. */ - jump = get_last_insn (); - REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (REG_BR_PROB_BASE - prob), - REG_NOTES (jump)); - - tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target, - 0, OPTAB_WIDEN); - if (tmp3 != target) - emit_move_insn (copy_rtx (target), tmp3); - emit_jump_insn (gen_jump (end_label)); - emit_barrier (); - - emit_label (neq_label); - tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp)); - tmp1 = force_operand (tmp1, target); - if (tmp1 != target) - emit_move_insn (target, tmp1); - - emit_label (end_label); - - sequence = get_insns (); - end_sequence (); - rebuild_jump_labels (sequence); - return sequence; -} - -/* Do transform 2) on INSN if applicable. */ -static bool -rtl_mod_pow2_value_transform (rtx insn) -{ - rtx set, set_src, set_dest, op1, op2, value, histogram; - enum rtx_code code; - enum machine_mode mode; - gcov_type wrong_values, count; - edge e; - int all, prob; - - set = single_set (insn); - if (!set) - return false; - - set_src = SET_SRC (set); - set_dest = SET_DEST (set); - code = GET_CODE (set_src); - mode = GET_MODE (set_dest); - - if (code != UMOD) - return false; - op1 = XEXP (set_src, 0); - op2 = XEXP (set_src, 1); - - for (histogram = REG_NOTES (insn); - histogram; - histogram = XEXP (histogram, 1)) - if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE - && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2)) - break; - - if (!histogram) - return false; - - histogram = XEXP (XEXP (histogram, 0), 1); - value = XEXP (histogram, 0); - histogram = XEXP (histogram, 1); - wrong_values = INTVAL (XEXP (histogram, 0)); - histogram = XEXP (histogram, 1); - count = INTVAL (XEXP (histogram, 0)); - - if (!rtx_equal_p (op2, value)) - return false; - - /* We require that we hit a power of two at least half of all evaluations. */ - if (count < wrong_values) - return false; - - if (dump_file) - fprintf (dump_file, "Mod power of 2 transformation on insn %d\n", - INSN_UID (insn)); - - /* Compute probability of taking the optimal path. */ - all = count + wrong_values; - prob = (count * REG_BR_PROB_BASE + all / 2) / all; - - e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); - delete_insn (insn); - - insert_insn_on_edge ( - rtl_mod_pow2 (mode, code, set_dest, op1, op2, prob), e); - - return true; -} - -/* Generate code for transformations 3 and 4 (with MODE and OPERATION, - operands OP1 and OP2, result TARGET, at most SUB subtractions, and - probability of taking the optimal path(s) PROB1 and PROB2). */ -static rtx -rtl_mod_subtract (enum machine_mode mode, enum rtx_code operation, - rtx target, rtx op1, rtx op2, int sub, int prob1, int prob2) -{ - rtx tmp, tmp1, jump; - rtx end_label = gen_label_rtx (); - rtx sequence; - int i; - - start_sequence (); - - if (!REG_P (op2)) - { - tmp = gen_reg_rtx (mode); - emit_move_insn (tmp, copy_rtx (op2)); - } - else - tmp = op2; - - emit_move_insn (target, copy_rtx (op1)); - do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX, - NULL_RTX, end_label); - - /* Add branch probability to jump we just created. */ - jump = get_last_insn (); - REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (prob1), REG_NOTES (jump)); - - for (i = 0; i < sub; i++) - { - tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target, - 0, OPTAB_WIDEN); - if (tmp1 != target) - emit_move_insn (target, tmp1); - do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX, - NULL_RTX, end_label); - - /* Add branch probability to jump we just created. */ - jump = get_last_insn (); - REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, - GEN_INT (prob2), REG_NOTES (jump)); - } - - tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp)); - tmp1 = force_operand (tmp1, target); - if (tmp1 != target) - emit_move_insn (target, tmp1); - - emit_label (end_label); - - sequence = get_insns (); - end_sequence (); - rebuild_jump_labels (sequence); - return sequence; -} - -/* Do transforms 3) and 4) on INSN if applicable. */ -static bool -rtl_mod_subtract_transform (rtx insn) -{ - rtx set, set_src, set_dest, op1, op2, histogram; - enum rtx_code code; - enum machine_mode mode; - gcov_type wrong_values, counts[2], count, all; - edge e; - int i, prob1, prob2; - - set = single_set (insn); - if (!set) - return false; - - set_src = SET_SRC (set); - set_dest = SET_DEST (set); - code = GET_CODE (set_src); - mode = GET_MODE (set_dest); - - if (code != UMOD) - return false; - op1 = XEXP (set_src, 0); - op2 = XEXP (set_src, 1); - - for (histogram = REG_NOTES (insn); - histogram; - histogram = XEXP (histogram, 1)) - if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE - && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL)) - break; - - if (!histogram) - return false; - - histogram = XEXP (XEXP (histogram, 0), 1); - histogram = XEXP (histogram, 1); - - all = 0; - for (i = 0; i < 2; i++) - { - counts[i] = INTVAL (XEXP (histogram, 0)); - all += counts[i]; - histogram = XEXP (histogram, 1); - } - wrong_values = INTVAL (XEXP (histogram, 0)); - histogram = XEXP (histogram, 1); - wrong_values += INTVAL (XEXP (histogram, 0)); - all += wrong_values; - - /* We require that we use just subtractions in at least 50% of all - evaluations. */ - count = 0; - for (i = 0; i < 2; i++) - { - count += counts[i]; - if (count * 2 >= all) - break; - } - - if (i == 2) - return false; - - if (dump_file) - fprintf (dump_file, "Mod subtract transformation on insn %d\n", - INSN_UID (insn)); - - /* Compute probability of taking the optimal path(s). */ - prob1 = (counts[0] * REG_BR_PROB_BASE + all / 2) / all; - prob2 = (counts[1] * REG_BR_PROB_BASE + all / 2) / all; - - e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); - delete_insn (insn); - - insert_insn_on_edge ( - rtl_mod_subtract (mode, code, set_dest, - op1, op2, i, prob1, prob2), e); - - return true; -} - -#ifdef HAVE_prefetch -/* Generate code for transformation 5 for mem with ADDRESS and a constant - step DELTA. WRITE is true if the reference is a store to mem. */ - -static rtx -gen_speculative_prefetch (rtx address, gcov_type delta, int write) -{ - rtx tmp; - rtx sequence; - - /* TODO: we do the prefetching for just one iteration ahead, which - often is not enough. */ - start_sequence (); - if (offsettable_address_p (0, VOIDmode, address)) - tmp = plus_constant (copy_rtx (address), delta); - else - { - tmp = simplify_gen_binary (PLUS, Pmode, - copy_rtx (address), GEN_INT (delta)); - tmp = force_operand (tmp, NULL); - } - if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate) - (tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode)) - tmp = force_reg (Pmode, tmp); - emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3))); - sequence = get_insns (); - end_sequence (); - - return sequence; -} - -/* Do transform 5) on INSN if applicable. */ - -static bool -speculative_prefetching_transform (rtx insn) -{ - rtx histogram, value; - gcov_type val, count, all; - edge e; - rtx mem, address; - int write; - - if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn))) - return false; - - if (!find_mem_reference (insn, &mem, &write)) - return false; - - address = XEXP (mem, 0); - if (side_effects_p (address)) - return false; - - if (CONSTANT_P (address)) - return false; - - for (histogram = REG_NOTES (insn); - histogram; - histogram = XEXP (histogram, 1)) - if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE - && XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA)) - break; - - if (!histogram) - return false; - - histogram = XEXP (XEXP (histogram, 0), 1); - value = XEXP (histogram, 0); - histogram = XEXP (histogram, 1); - /* Skip last value referenced. */ - histogram = XEXP (histogram, 1); - val = INTVAL (XEXP (histogram, 0)); - histogram = XEXP (histogram, 1); - count = INTVAL (XEXP (histogram, 0)); - histogram = XEXP (histogram, 1); - all = INTVAL (XEXP (histogram, 0)); - - /* With that few executions we do not really have a reason to optimize the - statement, and more importantly, the data about differences of addresses - are spoiled by the first item that had no previous value to compare - with. */ - if (all < 4) - return false; - - /* We require that count be at least half of all; this means - that for the transformation to fire the value must be constant - at least 50% of time (and 75% gives the guarantee of usage). */ - if (!rtx_equal_p (address, value) || 2 * count < all) - return false; - - /* If the difference is too small, it does not make too much sense to - prefetch, as the memory is probably already in cache. */ - if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX) - return false; - - if (dump_file) - fprintf (dump_file, "Speculative prefetching for insn %d\n", - INSN_UID (insn)); - - e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn)); - - insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e); - - return true; -} -#endif /* HAVE_prefetch */ - /* Tree based transformations. */ static bool tree_value_profile_transformations (void) @@ -1070,8 +133,8 @@ tree_value_profile_transformations (void) /* Free extra storage from compute_value_histograms. */ while (th) { - free (th->hvalue.tree.counters); - th = th->hvalue.tree.next; + free (th->hvalue.counters); + th = th->hvalue.next; } ann->histograms = 0; } @@ -1205,17 +268,17 @@ tree_divmod_fixed_value_transform (tree stmt) if (!ann->histograms) return false; - for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next) + for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.next) if (histogram->type == HIST_TYPE_SINGLE_VALUE) break; if (!histogram) return false; - value = histogram->hvalue.tree.value; - val = histogram->hvalue.tree.counters[0]; - count = histogram->hvalue.tree.counters[1]; - all = histogram->hvalue.tree.counters[2]; + value = histogram->hvalue.value; + val = histogram->hvalue.counters[0]; + count = histogram->hvalue.counters[1]; + all = histogram->hvalue.counters[2]; /* We require that count is at least half of all; this means that for the transformation to fire the value must be constant @@ -1371,16 +434,16 @@ tree_mod_pow2_value_transform (tree stmt) if (!ann->histograms) return false; - for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next) + for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.next) if (histogram->type == HIST_TYPE_POW2) break; if (!histogram) return false; - value = histogram->hvalue.tree.value; - wrong_values = histogram->hvalue.tree.counters[0]; - count = histogram->hvalue.tree.counters[1]; + value = histogram->hvalue.value; + wrong_values = histogram->hvalue.counters[0]; + count = histogram->hvalue.counters[1]; /* We require that we hit a power of 2 at least half of all evaluations. */ if (simple_cst_equal (op2, value) != 1 || count < wrong_values) @@ -1551,21 +614,21 @@ tree_mod_subtract_transform (tree stmt) if (!ann->histograms) return false; - for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.tree.next) + for (histogram = ann->histograms; histogram; histogram = histogram->hvalue.next) if (histogram->type == HIST_TYPE_INTERVAL) break; if (!histogram) return false; - value = histogram->hvalue.tree.value; + value = histogram->hvalue.value; all = 0; wrong_values = 0; for (i = 0; i < histogram->hdata.intvl.steps; i++) - all += histogram->hvalue.tree.counters[i]; + all += histogram->hvalue.counters[i]; - wrong_values += histogram->hvalue.tree.counters[i]; - wrong_values += histogram->hvalue.tree.counters[i+1]; + wrong_values += histogram->hvalue.counters[i]; + wrong_values += histogram->hvalue.counters[i+1]; all += wrong_values; /* We require that we use just subtractions in at least 50% of all @@ -1573,7 +636,7 @@ tree_mod_subtract_transform (tree stmt) count = 0; for (i = 0; i < histogram->hdata.intvl.steps; i++) { - count += histogram->hvalue.tree.counters[i]; + count += histogram->hvalue.counters[i]; if (count * 2 >= all) break; } @@ -1587,22 +650,20 @@ tree_mod_subtract_transform (tree stmt) } /* Compute probability of taking the optimal path(s). */ - prob1 = (histogram->hvalue.tree.counters[0] * REG_BR_PROB_BASE + all / 2) / all; - prob2 = (histogram->hvalue.tree.counters[1] * REG_BR_PROB_BASE + all / 2) / all; + prob1 = (histogram->hvalue.counters[0] * REG_BR_PROB_BASE + all / 2) / all; + prob2 = (histogram->hvalue.counters[1] * REG_BR_PROB_BASE + all / 2) / all; /* In practice, "steps" is always 2. This interface reflects this, and will need to be changed if "steps" can change. */ result = tree_mod_subtract (stmt, op, op1, op2, prob1, prob2, i, - histogram->hvalue.tree.counters[0], - histogram->hvalue.tree.counters[1], all); + histogram->hvalue.counters[0], + histogram->hvalue.counters[1], all); TREE_OPERAND (modify, 1) = result; return true; } - -/* Connection to the outside world. */ -/* Struct for IR-dependent hooks. */ + struct value_prof_hooks { /* Find list of values for which we want to measure histograms. */ void (*find_values_to_profile) (histogram_values *); @@ -1611,20 +672,6 @@ struct value_prof_hooks { statically. See value-prof.c for more detail. */ bool (*value_profile_transformations) (void); }; - -/* Hooks for RTL-based versions (the only ones that currently work). */ -static struct value_prof_hooks rtl_value_prof_hooks = -{ - rtl_find_values_to_profile, - rtl_value_profile_transformations -}; - -void -rtl_register_value_prof_hooks (void) -{ - value_prof_hooks = &rtl_value_prof_hooks; - gcc_assert (!ir_type ()); -} /* Find values inside STMT for that we want to measure histograms for division/modulo optimization. */ @@ -1662,8 +709,8 @@ tree_divmod_values_to_profile (tree stmt, histogram_values *values) /* Check for the case where the divisor is the same value most of the time. */ hist = ggc_alloc (sizeof (*hist)); - hist->hvalue.tree.value = divisor; - hist->hvalue.tree.stmt = stmt; + hist->hvalue.value = divisor; + hist->hvalue.stmt = stmt; hist->type = HIST_TYPE_SINGLE_VALUE; VEC_quick_push (histogram_value, *values, hist); } @@ -1675,14 +722,14 @@ tree_divmod_values_to_profile (tree stmt, histogram_values *values) { /* Check for a special case where the divisor is power of 2. */ hist = ggc_alloc (sizeof (*hist)); - hist->hvalue.tree.value = divisor; - hist->hvalue.tree.stmt = stmt; + hist->hvalue.value = divisor; + hist->hvalue.stmt = stmt; hist->type = HIST_TYPE_POW2; VEC_quick_push (histogram_value, *values, hist); hist = ggc_alloc (sizeof (*hist)); - hist->hvalue.tree.stmt = stmt; - hist->hvalue.tree.value + hist->hvalue.stmt = stmt; + hist->hvalue.value = build2 (TRUNC_DIV_EXPR, type, op0, divisor); hist->type = HIST_TYPE_INTERVAL; hist->hdata.intvl.int_start = 0; @@ -1728,7 +775,7 @@ tree_find_values_to_profile (histogram_values *values) if (dump_file) { fprintf (dump_file, "Interval counter for tree "); - print_generic_expr (dump_file, hist->hvalue.tree.stmt, + print_generic_expr (dump_file, hist->hvalue.stmt, TDF_SLIM); fprintf (dump_file, ", range %d -- %d.\n", hist->hdata.intvl.int_start, @@ -1742,7 +789,7 @@ tree_find_values_to_profile (histogram_values *values) if (dump_file) { fprintf (dump_file, "Pow2 counter for tree "); - print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM); + print_generic_expr (dump_file, hist->hvalue.stmt, TDF_SLIM); fprintf (dump_file, ".\n"); } hist->n_counters = 2; @@ -1752,7 +799,7 @@ tree_find_values_to_profile (histogram_values *values) if (dump_file) { fprintf (dump_file, "Single value counter for tree "); - print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM); + print_generic_expr (dump_file, hist->hvalue.stmt, TDF_SLIM); fprintf (dump_file, ".\n"); } hist->n_counters = 3; @@ -1762,7 +809,7 @@ tree_find_values_to_profile (histogram_values *values) if (dump_file) { fprintf (dump_file, "Constant delta counter for tree "); - print_generic_expr (dump_file, hist->hvalue.tree.stmt, TDF_SLIM); + print_generic_expr (dump_file, hist->hvalue.stmt, TDF_SLIM); fprintf (dump_file, ".\n"); } hist->n_counters = 4; @@ -1801,39 +848,4 @@ value_profile_transformations (void) return retval; } -static bool -gate_handle_value_profile_transformations (void) -{ - return flag_branch_probabilities - && flag_profile_values - && !flag_tree_based_profiling - && (flag_value_profile_transformations - || flag_speculative_prefetching); -} - - -/* Do optimizations based on expression value profiles. */ -static void -rest_of_handle_value_profile_transformations (void) -{ - if (value_profile_transformations ()) - cleanup_cfg (CLEANUP_EXPENSIVE); -} - -struct tree_opt_pass pass_value_profile_transformations = -{ - "vpt", /* name */ - gate_handle_value_profile_transformations, /* gate */ - rest_of_handle_value_profile_transformations, /* execute */ - NULL, /* sub */ - NULL, /* next */ - 0, /* static_pass_number */ - TV_VPT, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_dump_func, /* todo_flags_finish */ - 'V' /* letter */ -}; diff --git a/gcc/value-prof.h b/gcc/value-prof.h index 2a1191daaf2..f54b355ae9a 100644 --- a/gcc/value-prof.h +++ b/gcc/value-prof.h @@ -40,22 +40,12 @@ enum hist_type /* The value to measure. */ struct histogram_value_t { - union + struct { - struct - { - rtx value; /* The value to profile. */ - rtx seq; /* Insns required to count the profiled value. */ - rtx insn; /* Insn before that to measure. */ - enum machine_mode mode; /* Mode of value to profile. */ - } rtl; - struct - { - tree value; /* The value to profile. */ - tree stmt; /* Insn containing the value. */ - gcov_type *counters; /* Pointer to first counter. */ - struct histogram_value_t *next; /* Linked list pointer. */ - } tree; + tree value; /* The value to profile. */ + tree stmt; /* Insn containing the value. */ + gcov_type *counters; /* Pointer to first counter. */ + struct histogram_value_t *next; /* Linked list pointer. */ } hvalue; enum hist_type type; /* Type of information to measure. */ unsigned n_counters; /* Number of required counters. */ @@ -77,7 +67,6 @@ DEF_VEC_ALLOC_P(histogram_value,heap); typedef VEC(histogram_value,heap) *histogram_values; /* Hooks registration. */ -extern void rtl_register_value_prof_hooks (void); extern void tree_register_value_prof_hooks (void); /* IR-independent entry points. */ @@ -113,13 +102,9 @@ extern void init_branch_prob (void); extern void branch_prob (void); extern void end_branch_prob (void); extern void tree_register_profile_hooks (void); -extern void rtl_register_profile_hooks (void); /* In tree-profile.c. */ extern struct profile_hooks tree_profile_hooks; -/* In rtl-profile.c. */ -extern struct profile_hooks rtl_profile_hooks; - #endif /* GCC_VALUE_PROF_H */ |