diff options
Diffstat (limited to 'gcc/tree-ssa-math-opts.c')
-rw-r--r-- | gcc/tree-ssa-math-opts.c | 521 |
1 files changed, 391 insertions, 130 deletions
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index c22a677e80c..028a8320671 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -87,46 +87,22 @@ along with GCC; see the file COPYING3. If not see #include "config.h" #include "system.h" #include "coretypes.h" -#include "tm.h" +#include "backend.h" +#include "predict.h" +#include "tree.h" +#include "gimple.h" +#include "rtl.h" +#include "ssa.h" #include "flags.h" -#include "hash-set.h" -#include "machmode.h" -#include "vec.h" -#include "double-int.h" -#include "input.h" #include "alias.h" -#include "symtab.h" -#include "wide-int.h" -#include "inchash.h" -#include "tree.h" #include "fold-const.h" -#include "predict.h" -#include "hard-reg-set.h" -#include "function.h" -#include "dominance.h" -#include "cfg.h" -#include "basic-block.h" -#include "tree-ssa-alias.h" #include "internal-fn.h" #include "gimple-fold.h" -#include "gimple-expr.h" -#include "is-a.h" -#include "gimple.h" #include "gimple-iterator.h" #include "gimplify.h" #include "gimplify-me.h" #include "stor-layout.h" -#include "gimple-ssa.h" #include "tree-cfg.h" -#include "tree-phinodes.h" -#include "ssa-iterators.h" -#include "stringpool.h" -#include "tree-ssanames.h" -#include "hashtab.h" -#include "rtl.h" -#include "statistics.h" -#include "real.h" -#include "fixed-value.h" #include "insn-config.h" #include "expmed.h" #include "dojump.h" @@ -143,9 +119,9 @@ along with GCC; see the file COPYING3. If not see #include "target.h" #include "gimple-pretty-print.h" #include "builtins.h" +#include "params.h" /* FIXME: RTL headers have to be included here for optabs. */ -#include "rtl.h" /* Because optabs.h wants enum rtx_code. */ #include "expr.h" /* Because optabs.h wants sepops. */ #include "insn-codes.h" #include "optabs.h" @@ -228,7 +204,7 @@ static struct static struct occurrence *occ_head; /* Allocation pool for getting instances of "struct occurrence". */ -static alloc_pool occ_pool; +static object_allocator<occurrence> *occ_pool; @@ -239,7 +215,7 @@ occ_new (basic_block bb, struct occurrence *children) { struct occurrence *occ; - bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool); + bb->aux = occ = occ_pool->allocate (); memset (occ, 0, sizeof (struct occurrence)); occ->bb = bb; @@ -467,7 +443,7 @@ free_bb (struct occurrence *occ) next = occ->next; child = occ->children; occ->bb->aux = NULL; - pool_free (occ_pool, occ); + occ_pool->remove (occ); /* Now ensure that we don't recurse unless it is necessary. */ if (!child) @@ -571,9 +547,8 @@ pass_cse_reciprocals::execute (function *fun) basic_block bb; tree arg; - occ_pool = create_alloc_pool ("dominators for recip", - sizeof (struct occurrence), - n_basic_blocks_for_fn (fun) / 3 + 1); + occ_pool = new object_allocator<occurrence> + ("dominators for recip", n_basic_blocks_for_fn (fun) / 3 + 1); memset (&reciprocal_stats, 0, sizeof (reciprocal_stats)); calculate_dominance_info (CDI_DOMINATORS); @@ -703,7 +678,7 @@ pass_cse_reciprocals::execute (function *fun) free_dominance_info (CDI_DOMINATORS); free_dominance_info (CDI_POST_DOMINATORS); - free_alloc_pool (occ_pool); + delete occ_pool; return 0; } @@ -1148,6 +1123,357 @@ build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, return result; } +struct pow_synth_sqrt_info +{ + bool *factors; + unsigned int deepest; + unsigned int num_mults; +}; + +/* Return true iff the real value C can be represented as a + sum of powers of 0.5 up to N. That is: + C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1. + Record in INFO the various parameters of the synthesis algorithm such + as the factors a[i], the maximum 0.5 power and the number of + multiplications that will be required. */ + +bool +representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n, + struct pow_synth_sqrt_info *info) +{ + REAL_VALUE_TYPE factor = dconsthalf; + REAL_VALUE_TYPE remainder = c; + + info->deepest = 0; + info->num_mults = 0; + memset (info->factors, 0, n * sizeof (bool)); + + for (unsigned i = 0; i < n; i++) + { + REAL_VALUE_TYPE res; + + /* If something inexact happened bail out now. */ + if (REAL_ARITHMETIC (res, MINUS_EXPR, remainder, factor)) + return false; + + /* We have hit zero. The number is representable as a sum + of powers of 0.5. */ + if (REAL_VALUES_EQUAL (res, dconst0)) + { + info->factors[i] = true; + info->deepest = i + 1; + return true; + } + else if (!REAL_VALUE_NEGATIVE (res)) + { + remainder = res; + info->factors[i] = true; + info->num_mults++; + } + else + info->factors[i] = false; + + REAL_ARITHMETIC (factor, MULT_EXPR, factor, dconsthalf); + } + return false; +} + +/* Return the tree corresponding to FN being applied + to ARG N times at GSI and LOC. + Look up previous results from CACHE if need be. + cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */ + +static tree +get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi, + tree fn, location_t loc, tree *cache) +{ + tree res = cache[n]; + if (!res) + { + tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache); + res = build_and_insert_call (gsi, loc, fn, prev); + cache[n] = res; + } + + return res; +} + +/* Print to STREAM the repeated application of function FNAME to ARG + N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print: + "foo (foo (x))". */ + +static void +print_nested_fn (FILE* stream, const char *fname, const char* arg, + unsigned int n) +{ + if (n == 0) + fprintf (stream, "%s", arg); + else + { + fprintf (stream, "%s (", fname); + print_nested_fn (stream, fname, arg, n - 1); + fprintf (stream, ")"); + } +} + +/* Print to STREAM the fractional sequence of sqrt chains + applied to ARG, described by INFO. Used for the dump file. */ + +static void +dump_fractional_sqrt_sequence (FILE *stream, const char *arg, + struct pow_synth_sqrt_info *info) +{ + for (unsigned int i = 0; i < info->deepest; i++) + { + bool is_set = info->factors[i]; + if (is_set) + { + print_nested_fn (stream, "sqrt", arg, i + 1); + if (i != info->deepest - 1) + fprintf (stream, " * "); + } + } +} + +/* Print to STREAM a representation of raising ARG to an integer + power N. Used for the dump file. */ + +static void +dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n) +{ + if (n > 1) + fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n); + else if (n == 1) + fprintf (stream, "%s", arg); +} + +/* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of + square roots. Place at GSI and LOC. Limit the maximum depth + of the sqrt chains to MAX_DEPTH. Return the tree holding the + result of the expanded sequence or NULL_TREE if the expansion failed. + + This routine assumes that ARG1 is a real number with a fractional part + (the integer exponent case will have been handled earlier in + gimple_expand_builtin_pow). + + For ARG1 > 0.0: + * For ARG1 composed of a whole part WHOLE_PART and a fractional part + FRAC_PART i.e. WHOLE_PART == floor (ARG1) and + FRAC_PART == ARG1 - WHOLE_PART: + Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where + POW (ARG0, FRAC_PART) is expanded as a product of square root chains + if it can be expressed as such, that is if FRAC_PART satisfies: + FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i)) + where integer a[i] is either 0 or 1. + + Example: + POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625) + --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x))) + + For ARG1 < 0.0 there are two approaches: + * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1) + is calculated as above. + + Example: + POW (x, -5.625) == 1.0 / POW (x, 5.625) + --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x)))) + + * (B) : WHOLE_PART := - ceil (abs (ARG1)) + FRAC_PART := ARG1 - WHOLE_PART + and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART). + Example: + POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6) + --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6)) + + For ARG1 < 0.0 we choose between (A) and (B) depending on + how many multiplications we'd have to do. + So, for the example in (B): POW (x, -5.875), if we were to + follow algorithm (A) we would produce: + 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X))) + which contains more multiplications than approach (B). + + Hopefully, this approach will eliminate potentially expensive POW library + calls when unsafe floating point math is enabled and allow the compiler to + further optimise the multiplies, square roots and divides produced by this + function. */ + +static tree +expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc, + tree arg0, tree arg1, HOST_WIDE_INT max_depth) +{ + tree type = TREE_TYPE (arg0); + machine_mode mode = TYPE_MODE (type); + tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); + bool one_over = true; + + if (!sqrtfn) + return NULL_TREE; + + if (TREE_CODE (arg1) != REAL_CST) + return NULL_TREE; + + REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1); + + gcc_assert (max_depth > 0); + tree *cache = XALLOCAVEC (tree, max_depth + 1); + + struct pow_synth_sqrt_info synth_info; + synth_info.factors = XALLOCAVEC (bool, max_depth + 1); + synth_info.deepest = 0; + synth_info.num_mults = 0; + + bool neg_exp = REAL_VALUE_NEGATIVE (exp_init); + REAL_VALUE_TYPE exp = real_value_abs (&exp_init); + + /* The whole and fractional parts of exp. */ + REAL_VALUE_TYPE whole_part; + REAL_VALUE_TYPE frac_part; + + real_floor (&whole_part, mode, &exp); + REAL_ARITHMETIC (frac_part, MINUS_EXPR, exp, whole_part); + + + REAL_VALUE_TYPE ceil_whole = dconst0; + REAL_VALUE_TYPE ceil_fract = dconst0; + + if (neg_exp) + { + real_ceil (&ceil_whole, mode, &exp); + REAL_ARITHMETIC (ceil_fract, MINUS_EXPR, ceil_whole, exp); + } + + if (!representable_as_half_series_p (frac_part, max_depth, &synth_info)) + return NULL_TREE; + + /* Check whether it's more profitable to not use 1.0 / ... */ + if (neg_exp) + { + struct pow_synth_sqrt_info alt_synth_info; + alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1); + alt_synth_info.deepest = 0; + alt_synth_info.num_mults = 0; + + if (representable_as_half_series_p (ceil_fract, max_depth, + &alt_synth_info) + && alt_synth_info.deepest <= synth_info.deepest + && alt_synth_info.num_mults < synth_info.num_mults) + { + whole_part = ceil_whole; + frac_part = ceil_fract; + synth_info.deepest = alt_synth_info.deepest; + synth_info.num_mults = alt_synth_info.num_mults; + memcpy (synth_info.factors, alt_synth_info.factors, + (max_depth + 1) * sizeof (bool)); + one_over = false; + } + } + + HOST_WIDE_INT n = real_to_integer (&whole_part); + REAL_VALUE_TYPE cint; + real_from_integer (&cint, VOIDmode, n, SIGNED); + + if (!real_identical (&whole_part, &cint)) + return NULL_TREE; + + if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS) + return NULL_TREE; + + memset (cache, 0, (max_depth + 1) * sizeof (tree)); + + tree integer_res = n == 0 ? build_real (type, dconst1) : arg0; + + /* Calculate the integer part of the exponent. */ + if (n > 1) + { + integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n); + if (!integer_res) + return NULL_TREE; + } + + if (dump_file) + { + char string[64]; + + real_to_decimal (string, &exp_init, sizeof (string), 0, 1); + fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string); + + if (neg_exp) + { + if (one_over) + { + fprintf (dump_file, "1.0 / ("); + dump_integer_part (dump_file, "x", n); + if (n > 0) + fprintf (dump_file, " * "); + dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); + fprintf (dump_file, ")"); + } + else + { + dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); + fprintf (dump_file, " / ("); + dump_integer_part (dump_file, "x", n); + fprintf (dump_file, ")"); + } + } + else + { + dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); + if (n > 0) + fprintf (dump_file, " * "); + dump_integer_part (dump_file, "x", n); + } + + fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest); + } + + + tree fract_res = NULL_TREE; + cache[0] = arg0; + + /* Calculate the fractional part of the exponent. */ + for (unsigned i = 0; i < synth_info.deepest; i++) + { + if (synth_info.factors[i]) + { + tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache); + + if (!fract_res) + fract_res = sqrt_chain; + + else + fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, + fract_res, sqrt_chain); + } + } + + tree res = NULL_TREE; + + if (neg_exp) + { + if (one_over) + { + if (n > 0) + res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, + fract_res, integer_res); + else + res = fract_res; + + res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR, + build_real (type, dconst1), res); + } + else + { + res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, + fract_res, integer_res); + } + } + else + res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, + fract_res, integer_res); + return res; +} + /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI with location info LOC. If possible, create an equivalent and less expensive sequence of statements prior to GSI, and return an @@ -1157,13 +1483,17 @@ static tree gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, tree arg0, tree arg1) { - REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6; + REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6; REAL_VALUE_TYPE c2, dconst3; HOST_WIDE_INT n; - tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x; + tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x; machine_mode mode; + bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi)); bool hw_sqrt_exists, c_is_int, c2_is_int; + dconst1_4 = dconst1; + SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); + /* If the exponent isn't a constant, there's nothing of interest to be done. */ if (TREE_CODE (arg1) != REAL_CST) @@ -1179,7 +1509,7 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, if (c_is_int && ((n >= -1 && n <= 2) || (flag_unsafe_math_optimizations - && optimize_bb_for_speed_p (gsi_bb (*gsi)) + && speed_p && powi_cost (n) <= POWI_MAX_MULTS))) return gimple_expand_builtin_powi (gsi, loc, arg0, n); @@ -1196,49 +1526,8 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, && !HONOR_SIGNED_ZEROS (mode)) return build_and_insert_call (gsi, loc, sqrtfn, arg0); - /* Optimize pow(x,0.25) = sqrt(sqrt(x)). Assume on most machines that - a builtin sqrt instruction is smaller than a call to pow with 0.25, - so do this optimization even if -Os. Don't do this optimization - if we don't have a hardware sqrt insn. */ - dconst1_4 = dconst1; - SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing; - if (flag_unsafe_math_optimizations - && sqrtfn - && REAL_VALUES_EQUAL (c, dconst1_4) - && hw_sqrt_exists) - { - /* sqrt(x) */ - sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); - - /* sqrt(sqrt(x)) */ - return build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0); - } - - /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are - optimizing for space. Don't do this optimization if we don't have - a hardware sqrt insn. */ - real_from_integer (&dconst3_4, VOIDmode, 3, SIGNED); - SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2); - - if (flag_unsafe_math_optimizations - && sqrtfn - && optimize_function_for_speed_p (cfun) - && REAL_VALUES_EQUAL (c, dconst3_4) - && hw_sqrt_exists) - { - /* sqrt(x) */ - sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); - - /* sqrt(sqrt(x)) */ - sqrt_sqrt = build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0); - - /* sqrt(x) * sqrt(sqrt(x)) */ - return build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, - sqrt_arg0, sqrt_sqrt); - } - /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math optimizations since 1./3. is not exactly representable. If x is negative and finite, the correct value of pow(x,1./3.) is @@ -1263,7 +1552,7 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, && sqrtfn && cbrtfn && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode)) - && optimize_function_for_speed_p (cfun) + && speed_p && hw_sqrt_exists && REAL_VALUES_EQUAL (c, dconst1_6)) { @@ -1274,54 +1563,31 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0); } - /* Optimize pow(x,c), where n = 2c for some nonzero integer n - and c not an integer, into - - sqrt(x) * powi(x, n/2), n > 0; - 1.0 / (sqrt(x) * powi(x, abs(n/2))), n < 0. - - Do not calculate the powi factor when n/2 = 0. */ - real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); - n = real_to_integer (&c2); - real_from_integer (&cint, VOIDmode, n, SIGNED); - c2_is_int = real_identical (&c2, &cint); + /* Attempt to expand the POW as a product of square root chains. + Expand the 0.25 case even when otpimising for size. */ if (flag_unsafe_math_optimizations && sqrtfn - && c2_is_int - && !c_is_int - && optimize_function_for_speed_p (cfun)) + && hw_sqrt_exists + && (speed_p || REAL_VALUES_EQUAL (c, dconst1_4)) + && !HONOR_SIGNED_ZEROS (mode)) { - tree powi_x_ndiv2 = NULL_TREE; - - /* Attempt to fold powi(arg0, abs(n/2)) into multiplies. If not - possible or profitable, give up. Skip the degenerate case when - n is 1 or -1, where the result is always 1. */ - if (absu_hwi (n) != 1) - { - powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0, - abs_hwi (n / 2)); - if (!powi_x_ndiv2) - return NULL_TREE; - } + unsigned int max_depth = speed_p + ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH) + : 2; - /* Calculate sqrt(x). When n is not 1 or -1, multiply it by the - result of the optimal multiply sequence just calculated. */ - sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); - - if (absu_hwi (n) == 1) - result = sqrt_arg0; - else - result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, - sqrt_arg0, powi_x_ndiv2); + tree expand_with_sqrts + = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth); - /* If n is negative, reciprocate the result. */ - if (n < 0) - result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, - build_real (type, dconst1), result); - return result; + if (expand_with_sqrts) + return expand_with_sqrts; } + real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); + n = real_to_integer (&c2); + real_from_integer (&cint, VOIDmode, n, SIGNED); + c2_is_int = real_identical (&c2, &cint); + /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into powi(x, n/3) * powi(cbrt(x), n%3), n > 0; @@ -2721,13 +2987,8 @@ is_widening_mult_p (gimple stmt, /* Ensure that the larger of the two operands comes first. */ if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) { - tree tmp; - tmp = *type1_out; - *type1_out = *type2_out; - *type2_out = tmp; - tmp = *rhs1_out; - *rhs1_out = *rhs2_out; - *rhs2_out = tmp; + std::swap (*type1_out, *type2_out); + std::swap (*rhs1_out, *rhs2_out); } return true; |