diff options
-rw-r--r-- | gcc/ChangeLog | 27 | ||||
-rw-r--r-- | gcc/builtins.c | 120 | ||||
-rw-r--r-- | gcc/cfgloop.h | 3 | ||||
-rw-r--r-- | gcc/cfgloopanal.c | 26 | ||||
-rw-r--r-- | gcc/loop-invariant.c | 23 | ||||
-rw-r--r-- | gcc/tree-inline.c | 98 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-ivopts.c | 27 | ||||
-rw-r--r-- | gcc/tree.h | 2 |
8 files changed, 215 insertions, 111 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f9435971193..201f56ddea6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,30 @@ +2010-07-10 Sandra Loosemore <sandra@codesourcery.com> + + PR middle-end/42505 + * tree-inline.c (estimate_num_insns): Refactor builtin complexity + lookup code into.... + * builtins.c (is_simple_builtin, is_inexpensive_builtin): ...these + new functions. + * tree.h (is_simple_builtin, is_inexpensive_builtin): Declare. + * cfgloopanal.c (target_clobbered_regs): Define. + (init_set_costs): Initialize target_clobbered_regs. + (estimate_reg_pressure_cost): Add call_p argument. When true, + adjust the number of available registers to exclude the + call-clobbered registers. + * cfgloop.h (target_clobbered_regs): Declare. + (estimate_reg_pressure_cost): Adjust declaration. + * tree-ssa-loop-ivopts.c (struct ivopts_data): Add body_includes_call. + (ivopts_global_cost_for_size): Pass it to estimate_reg_pressure_cost. + (determine_set_costs): Dump target_clobbered_regs. + (loop_body_includes_call): New function. + (tree_ssa_iv_optimize_loop): Use it to initialize new field. + * loop-invariant.c (gain_for_invariant): Adjust arguments to pass + call_p flag through. + (best_gain_for_invariant): Likewise. + (find_invariants_to_move): Likewise. + (move_single_loop_invariants): Likewise, using already-computed + has_call field. + 2010-07-10 Richard Guenther <rguenther@suse.de> Joern Rennecke <joern.rennecke@embecosm.com> diff --git a/gcc/builtins.c b/gcc/builtins.c index 046593e08bb..8039f9b8b11 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -13753,3 +13753,123 @@ set_builtin_user_assembler_name (tree decl, const char *asmspec) break; } } + +/* Return true if DECL is a builtin that expands to a constant or similarly + simple code. */ +bool +is_simple_builtin (tree decl) +{ + if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) + switch (DECL_FUNCTION_CODE (decl)) + { + /* Builtins that expand to constants. */ + case BUILT_IN_CONSTANT_P: + case BUILT_IN_EXPECT: + case BUILT_IN_OBJECT_SIZE: + case BUILT_IN_UNREACHABLE: + /* Simple register moves or loads from stack. */ + case BUILT_IN_RETURN_ADDRESS: + case BUILT_IN_EXTRACT_RETURN_ADDR: + case BUILT_IN_FROB_RETURN_ADDR: + case BUILT_IN_RETURN: + case BUILT_IN_AGGREGATE_INCOMING_ADDRESS: + case BUILT_IN_FRAME_ADDRESS: + case BUILT_IN_VA_END: + case BUILT_IN_STACK_SAVE: + case BUILT_IN_STACK_RESTORE: + /* Exception state returns or moves registers around. */ + case BUILT_IN_EH_FILTER: + case BUILT_IN_EH_POINTER: + case BUILT_IN_EH_COPY_VALUES: + return true; + + default: + return false; + } + + return false; +} + +/* Return true if DECL is a builtin that is not expensive, i.e., they are + most probably expanded inline into reasonably simple code. This is a + superset of is_simple_builtin. */ +bool +is_inexpensive_builtin (tree decl) +{ + if (!decl) + return false; + else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD) + return true; + else if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) + switch (DECL_FUNCTION_CODE (decl)) + { + case BUILT_IN_ABS: + case BUILT_IN_ALLOCA: + case BUILT_IN_BSWAP32: + case BUILT_IN_BSWAP64: + case BUILT_IN_CLZ: + case BUILT_IN_CLZIMAX: + case BUILT_IN_CLZL: + case BUILT_IN_CLZLL: + case BUILT_IN_CTZ: + case BUILT_IN_CTZIMAX: + case BUILT_IN_CTZL: + case BUILT_IN_CTZLL: + case BUILT_IN_FFS: + case BUILT_IN_FFSIMAX: + case BUILT_IN_FFSL: + case BUILT_IN_FFSLL: + case BUILT_IN_IMAXABS: + case BUILT_IN_FINITE: + case BUILT_IN_FINITEF: + case BUILT_IN_FINITEL: + case BUILT_IN_FINITED32: + case BUILT_IN_FINITED64: + case BUILT_IN_FINITED128: + case BUILT_IN_FPCLASSIFY: + case BUILT_IN_ISFINITE: + case BUILT_IN_ISINF_SIGN: + case BUILT_IN_ISINF: + case BUILT_IN_ISINFF: + case BUILT_IN_ISINFL: + case BUILT_IN_ISINFD32: + case BUILT_IN_ISINFD64: + case BUILT_IN_ISINFD128: + case BUILT_IN_ISNAN: + case BUILT_IN_ISNANF: + case BUILT_IN_ISNANL: + case BUILT_IN_ISNAND32: + case BUILT_IN_ISNAND64: + case BUILT_IN_ISNAND128: + case BUILT_IN_ISNORMAL: + case BUILT_IN_ISGREATER: + case BUILT_IN_ISGREATEREQUAL: + case BUILT_IN_ISLESS: + case BUILT_IN_ISLESSEQUAL: + case BUILT_IN_ISLESSGREATER: + case BUILT_IN_ISUNORDERED: + case BUILT_IN_VA_ARG_PACK: + case BUILT_IN_VA_ARG_PACK_LEN: + case BUILT_IN_VA_COPY: + case BUILT_IN_TRAP: + case BUILT_IN_SAVEREGS: + case BUILT_IN_POPCOUNTL: + case BUILT_IN_POPCOUNTLL: + case BUILT_IN_POPCOUNTIMAX: + case BUILT_IN_POPCOUNT: + case BUILT_IN_PARITYL: + case BUILT_IN_PARITYLL: + case BUILT_IN_PARITYIMAX: + case BUILT_IN_PARITY: + case BUILT_IN_LABS: + case BUILT_IN_LLABS: + case BUILT_IN_PREFETCH: + return true; + + default: + return is_simple_builtin (decl); + } + + return false; +} + diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 3821ee66c79..46cda11861e 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -627,13 +627,14 @@ fel_init (loop_iterator *li, loop_p *loop, unsigned flags) /* The properties of the target. */ extern unsigned target_avail_regs; +extern unsigned target_clobbered_regs; extern unsigned target_res_regs; extern unsigned target_reg_cost [2]; extern unsigned target_spill_cost [2]; /* Register pressure estimation for induction variable optimizations & loop invariant motion. */ -extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool); +extern unsigned estimate_reg_pressure_cost (unsigned, unsigned, bool, bool); extern void init_set_costs (void); /* Loop optimizer initialization. */ diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c index 129ec25a331..853e6f22dd4 100644 --- a/gcc/cfgloopanal.c +++ b/gcc/cfgloopanal.c @@ -320,6 +320,8 @@ seq_cost (const_rtx seq, bool speed) /* The properties of the target. */ unsigned target_avail_regs; /* Number of available registers. */ +unsigned target_clobbered_regs; /* Number of available registers that are + call-clobbered. */ unsigned target_res_regs; /* Number of registers reserved for temporary expressions. */ unsigned target_reg_cost[2]; /* The cost for register when there still @@ -342,10 +344,15 @@ init_set_costs (void) unsigned i; target_avail_regs = 0; + target_clobbered_regs = 0; for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) && !fixed_regs[i]) - target_avail_regs++; + { + target_avail_regs++; + if (call_used_regs[i]) + target_clobbered_regs++; + } target_res_regs = 3; @@ -379,20 +386,29 @@ init_set_costs (void) /* Estimates cost of increased register pressure caused by making N_NEW new registers live around the loop. N_OLD is the number of registers live - around the loop. */ + around the loop. If CALL_P is true, also take into account that + call-used registers may be clobbered in the loop body, reducing the + number of available registers before we spill. */ unsigned -estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed) +estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed, + bool call_p) { unsigned cost; unsigned regs_needed = n_new + n_old; + unsigned available_regs = target_avail_regs; + + /* If there is a call in the loop body, the call-clobbered registers + are not available for loop invariants. */ + if (call_p) + available_regs = available_regs - target_clobbered_regs; /* If we have enough registers, we should use them and not restrict the transformations unnecessarily. */ - if (regs_needed + target_res_regs <= target_avail_regs) + if (regs_needed + target_res_regs <= available_regs) return 0; - if (regs_needed <= target_avail_regs) + if (regs_needed <= available_regs) /* If we are close to running out of registers, try to preserve them. */ cost = target_reg_cost [speed] * n_new; diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c index 2f38123c7d4..19da7dd6e6d 100644 --- a/gcc/loop-invariant.c +++ b/gcc/loop-invariant.c @@ -1173,11 +1173,13 @@ get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed) /* Calculates gain for eliminating invariant INV. REGS_USED is the number of registers used in the loop, NEW_REGS is the number of new variables already added due to the invariant motion. The number of registers needed - for it is stored in *REGS_NEEDED. */ + for it is stored in *REGS_NEEDED. SPEED and CALL_P are flags passed + through to estimate_reg_pressure_cost. */ static int gain_for_invariant (struct invariant *inv, unsigned *regs_needed, - unsigned *new_regs, unsigned regs_used, bool speed) + unsigned *new_regs, unsigned regs_used, + bool speed, bool call_p) { int comp_cost, size_cost; @@ -1188,9 +1190,9 @@ gain_for_invariant (struct invariant *inv, unsigned *regs_needed, if (! flag_ira_loop_pressure) { size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0], - regs_used, speed) + regs_used, speed, call_p) - estimate_reg_pressure_cost (new_regs[0], - regs_used, speed)); + regs_used, speed, call_p)); } else { @@ -1245,7 +1247,8 @@ gain_for_invariant (struct invariant *inv, unsigned *regs_needed, static int best_gain_for_invariant (struct invariant **best, unsigned *regs_needed, - unsigned *new_regs, unsigned regs_used, bool speed) + unsigned *new_regs, unsigned regs_used, + bool speed, bool call_p) { struct invariant *inv; int i, gain = 0, again; @@ -1261,7 +1264,7 @@ best_gain_for_invariant (struct invariant **best, unsigned *regs_needed, continue; again = gain_for_invariant (inv, aregs_needed, new_regs, regs_used, - speed); + speed, call_p); if (again > gain) { gain = again; @@ -1314,7 +1317,7 @@ set_move_mark (unsigned invno, int gain) /* Determines which invariants to move. */ static void -find_invariants_to_move (bool speed) +find_invariants_to_move (bool speed, bool call_p) { int gain; unsigned i, regs_used, regs_needed[N_REG_CLASSES], new_regs[N_REG_CLASSES]; @@ -1353,7 +1356,8 @@ find_invariants_to_move (bool speed) new_regs[ira_reg_class_cover[i]] = 0; } while ((gain = best_gain_for_invariant (&inv, regs_needed, - new_regs, regs_used, speed)) > 0) + new_regs, regs_used, + speed, call_p)) > 0) { set_move_mark (inv->invno, gain); if (! flag_ira_loop_pressure) @@ -1573,7 +1577,8 @@ move_single_loop_invariants (struct loop *loop) init_inv_motion_data (); find_invariants (loop); - find_invariants_to_move (optimize_loop_for_speed_p (loop)); + find_invariants_to_move (optimize_loop_for_speed_p (loop), + LOOP_DATA (loop)->has_call); move_invariants (loop); free_inv_motion_data (); diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index 57364e43721..4298958fb51 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -3478,105 +3478,13 @@ estimate_num_insns (gimple stmt, eni_weights *weights) if (POINTER_TYPE_P (funtype)) funtype = TREE_TYPE (funtype); - if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_MD) + if (is_simple_builtin (decl)) + return 0; + else if (is_inexpensive_builtin (decl)) cost = weights->target_builtin_call_cost; else cost = weights->call_cost; - if (decl && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) - switch (DECL_FUNCTION_CODE (decl)) - { - /* Builtins that expand to constants. */ - case BUILT_IN_CONSTANT_P: - case BUILT_IN_EXPECT: - case BUILT_IN_OBJECT_SIZE: - case BUILT_IN_UNREACHABLE: - /* Simple register moves or loads from stack. */ - case BUILT_IN_RETURN_ADDRESS: - case BUILT_IN_EXTRACT_RETURN_ADDR: - case BUILT_IN_FROB_RETURN_ADDR: - case BUILT_IN_RETURN: - case BUILT_IN_AGGREGATE_INCOMING_ADDRESS: - case BUILT_IN_FRAME_ADDRESS: - case BUILT_IN_VA_END: - case BUILT_IN_STACK_SAVE: - case BUILT_IN_STACK_RESTORE: - /* Exception state returns or moves registers around. */ - case BUILT_IN_EH_FILTER: - case BUILT_IN_EH_POINTER: - case BUILT_IN_EH_COPY_VALUES: - return 0; - - /* builtins that are not expensive (that is they are most probably - expanded inline into resonably simple code). */ - case BUILT_IN_ABS: - case BUILT_IN_ALLOCA: - case BUILT_IN_BSWAP32: - case BUILT_IN_BSWAP64: - case BUILT_IN_CLZ: - case BUILT_IN_CLZIMAX: - case BUILT_IN_CLZL: - case BUILT_IN_CLZLL: - case BUILT_IN_CTZ: - case BUILT_IN_CTZIMAX: - case BUILT_IN_CTZL: - case BUILT_IN_CTZLL: - case BUILT_IN_FFS: - case BUILT_IN_FFSIMAX: - case BUILT_IN_FFSL: - case BUILT_IN_FFSLL: - case BUILT_IN_IMAXABS: - case BUILT_IN_FINITE: - case BUILT_IN_FINITEF: - case BUILT_IN_FINITEL: - case BUILT_IN_FINITED32: - case BUILT_IN_FINITED64: - case BUILT_IN_FINITED128: - case BUILT_IN_FPCLASSIFY: - case BUILT_IN_ISFINITE: - case BUILT_IN_ISINF_SIGN: - case BUILT_IN_ISINF: - case BUILT_IN_ISINFF: - case BUILT_IN_ISINFL: - case BUILT_IN_ISINFD32: - case BUILT_IN_ISINFD64: - case BUILT_IN_ISINFD128: - case BUILT_IN_ISNAN: - case BUILT_IN_ISNANF: - case BUILT_IN_ISNANL: - case BUILT_IN_ISNAND32: - case BUILT_IN_ISNAND64: - case BUILT_IN_ISNAND128: - case BUILT_IN_ISNORMAL: - case BUILT_IN_ISGREATER: - case BUILT_IN_ISGREATEREQUAL: - case BUILT_IN_ISLESS: - case BUILT_IN_ISLESSEQUAL: - case BUILT_IN_ISLESSGREATER: - case BUILT_IN_ISUNORDERED: - case BUILT_IN_VA_ARG_PACK: - case BUILT_IN_VA_ARG_PACK_LEN: - case BUILT_IN_VA_COPY: - case BUILT_IN_TRAP: - case BUILT_IN_SAVEREGS: - case BUILT_IN_POPCOUNTL: - case BUILT_IN_POPCOUNTLL: - case BUILT_IN_POPCOUNTIMAX: - case BUILT_IN_POPCOUNT: - case BUILT_IN_PARITYL: - case BUILT_IN_PARITYLL: - case BUILT_IN_PARITYIMAX: - case BUILT_IN_PARITY: - case BUILT_IN_LABS: - case BUILT_IN_LLABS: - case BUILT_IN_PREFETCH: - cost = weights->target_builtin_call_cost; - break; - - default: - break; - } - if (decl) funtype = TREE_TYPE (decl); diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index c0a2194cfd0..83ec13e4a76 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -260,6 +260,9 @@ struct ivopts_data /* Are we optimizing for speed? */ bool speed; + + /* Whether the loop body includes any function calls. */ + bool body_includes_call; }; /* An assignment of iv candidates to uses. */ @@ -4431,7 +4434,8 @@ ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size) { /* We add size to the cost, so that we prefer eliminating ivs if possible. */ - return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed); + return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed, + data->body_includes_call); } /* For each size of the induction variable set determine the penalty. */ @@ -4450,6 +4454,7 @@ determine_set_costs (struct ivopts_data *data) { fprintf (dump_file, "Global costs:\n"); fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); + fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs); fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]); fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]); } @@ -5859,6 +5864,25 @@ tree_ssa_iv_optimize_finalize (struct ivopts_data *data) VEC_free (iv_cand_p, heap, data->iv_candidates); } +/* Returns true if the loop body BODY includes any function calls. */ + +static bool +loop_body_includes_call (basic_block *body, unsigned num_nodes) +{ + gimple_stmt_iterator gsi; + unsigned i; + + for (i = 0; i < num_nodes; i++) + for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + if (is_gimple_call (stmt) + && !is_inexpensive_builtin (gimple_call_fndecl (stmt))) + return true; + } + return false; +} + /* Optimizes the LOOP. Returns true if anything changed. */ static bool @@ -5890,6 +5914,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) } body = get_loop_body (loop); + data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); free (body); diff --git a/gcc/tree.h b/gcc/tree.h index e749d696ec4..960ee7d8e76 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -5046,6 +5046,8 @@ extern tree build_range_check (location_t, tree, tree, int, tree, tree); extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int, tree, tree); extern void set_builtin_user_assembler_name (tree decl, const char *asmspec); +extern bool is_simple_builtin (tree); +extern bool is_inexpensive_builtin (tree); /* In convert.c */ extern tree strip_float_extensions (tree); |