diff options
author | Topi Pohjolainen <topi.pohjolainen@intel.com> | 2014-10-26 19:51:00 +0200 |
---|---|---|
committer | Topi Pohjolainen <topi.pohjolainen@intel.com> | 2014-11-11 11:15:02 +0200 |
commit | 79ef2470a9f21a16d3b2c06f073b0acc7988a30b (patch) | |
tree | 66efe718490a9bf6ba8c6238090625e484776b60 | |
parent | 2516e15cfb58f5796ab6b6d5d5f7e173539f60ee (diff) | |
download | mesa-79ef2470a9f21a16d3b2c06f073b0acc7988a30b.tar.gz |
glsl: Add lowering pass approximating sqrt(double) with sqrt(float)
Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
-rw-r--r-- | src/glsl/ir_optimization.h | 1 | ||||
-rw-r--r-- | src/glsl/lower_instructions.cpp | 212 |
2 files changed, 213 insertions, 0 deletions
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 54a817eb8b6..180ae6f0aaf 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -43,6 +43,7 @@ #define SAT_TO_CLAMP 0x800 #define DOPS_TO_DFRAC 0x1000 #define DFREXP_DLDEXP_TO_ARITH 0x2000 +#define DSQRT_TO_FSQRT 0x4000 /** * \see class lower_packing_builtins_visitor diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 17592d6ca0f..ce98188463c 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -44,6 +44,7 @@ * - BORROW_TO_ARITH * - SAT_TO_CLAMP * - DOPS_TO_DFRAC + * - DSQRT_TO_FSQRT * * SUB_TO_ADD_NEG: * --------------- @@ -119,6 +120,11 @@ * DOPS_TO_DFRAC: * -------------- * Converts double trunc, ceil, floor, round to fract + * + * DSQRT_TO_FSQRT + * -------------- + * Splits double square root into exponent division and single precision + * square root. */ #include "main/core.h" /* for M_LOG2E */ @@ -156,6 +162,7 @@ private: void dldexp_to_arith(ir_expression *); void dfrexp_sig_to_arith(ir_expression *); void dfrexp_exp_to_arith(ir_expression *); + void dsqrt_to_fsqrt(ir_expression *); void carry_to_arith(ir_expression *); void borrow_to_arith(ir_expression *); void sat_to_clamp(ir_expression *); @@ -166,6 +173,14 @@ private: void dround_even_to_dfrac(ir_expression *); void dtrunc_to_dfrac(ir_expression *); void dsign_to_csel(ir_expression *); + + ir_variable *div_dfloat_exp_by_two(ir_expression *ir, + ir_rvalue *high_32); + ir_variable * dfloat_exp_to_float_exp(ir_expression *ir, + ir_rvalue *high_32); + ir_variable *dfloat_frac_to_float_frac(ir_expression *ir, + ir_rvalue *high_32, + ir_rvalue *low_32); }; } /* anonymous namespace */ @@ -731,6 +746,198 @@ lower_instructions_visitor::dfrexp_exp_to_arith(ir_expression *ir) this->progress = true; } +/** + * Extract even part of the given double precision float exponent and + * divide it by two: + * exponent = (((high_32_bits >> 20) & 0x7ff) + 1023) >> 1 + */ +ir_variable * +lower_instructions_visitor::div_dfloat_exp_by_two(ir_expression *ir, + ir_rvalue *high_32) +{ + ir_constant *exp_mask = new(ir) ir_constant(0x7ffu); + ir_constant *exp_shift = new(ir) ir_constant(20u); + ir_constant *exp_bias = new(ir) ir_constant(1023u); + ir_constant *exp_shift_by_1 = new(ir) ir_constant(1u); + + ir_variable *exp = new(ir) ir_variable( + glsl_type::uint_type, "exponent", ir_var_temporary); + + base_ir->insert_before(exp); + base_ir->insert_before( + assign( + exp, + rshift(add(bit_and(rshift(high_32, exp_shift), exp_mask), exp_bias), + exp_shift_by_1))); + + return exp; +} + +/** + * Set the biased exponent for the single precision. This will be modulo-2 + * of the incoming exponent including the sign. + * + * In single precision biased exponents are: + * -1 126 0x3F000000 + * 0 127 0x3F800000 + * 1 128 0x40000000 + * + * mod_2 = (high_32_bits & 0x100000) << 3 + * + * exp = (mod_2 | 0x3F000000) + ((hi & 0x40000000) >> 6) + */ +ir_variable * +lower_instructions_visitor::dfloat_exp_to_float_exp(ir_expression *ir, + ir_rvalue *high_32) +{ + ir_constant *exp_mask = new(ir) ir_constant(0x100000u); + ir_constant *exp_shift = new(ir) ir_constant(3u); + ir_constant *exp_minus_one = new(ir) ir_constant(0x3F000000u); + ir_constant *exp_sign_mask = new(ir) ir_constant(0x40000000u); + ir_constant *exp_sign_shift = new(ir) ir_constant(6u); + ir_variable *exp = new(ir) ir_variable( + glsl_type::uint_type, "single_precision_exp", ir_var_temporary); + + base_ir->insert_before(exp); + base_ir->insert_before( + assign( + exp, + add(bit_or(lshift(bit_and(high_32->clone(ir, NULL), exp_mask), + exp_shift), + exp_minus_one), + rshift(bit_and(high_32->clone(ir, NULL), exp_sign_mask), + exp_sign_shift)))); + + return exp; +} + +/** + * Construct a single precision float containing the fraction part and + * uneven part of the exponent of a double precision float. + * + * fraction = high_32_bits & 0xfffff + * fraction = (fraction << 3) | (low_32_bits >> 29) | f_exp + */ +ir_variable * +lower_instructions_visitor::dfloat_frac_to_float_frac(ir_expression *ir, + ir_rvalue *high_32, + ir_rvalue *low_32) +{ + ir_constant *frac_mask = new(ir) ir_constant(0xfffffu); + ir_constant *frac_hi_shift = new(ir) ir_constant(3u); + ir_constant *frac_lo_shift = new(ir) ir_constant(29u); + ir_variable *f_exp = dfloat_exp_to_float_exp(ir, high_32); + ir_variable *frac = new(ir) ir_variable( + glsl_type::uint_type, "fraction", ir_var_temporary); + + base_ir->insert_before(frac); + base_ir->insert_before( + assign( + frac, + bit_or(bit_or(lshift(bit_and(high_32->clone(ir, NULL), frac_mask), + frac_hi_shift), + rshift(low_32, frac_lo_shift)), + f_exp))); + + return frac; +} + +/** + * Representation of a double precision floating point number in 64-bits: + * + * 6 6 5 5 + * 3 2 2 1 0 + * +-+--------+---------------------+ + * |S| E | M | + * +-+--------+---------------------+ + * + * giving value = -1^S * (1.0 + 0.M) * 2^(E - bias) where bias is 1023. + * + * The 11 exponent bits are further divided into the most significant bit + * representing sign and lower 10 representing magnitude. + * + * Now let frac = 1.0 + 0.M, exp_s = -1^(S_E - 1) where S_E is the sign of the + * exponent and exp = abs(E - bias). + * + * frac * 2^(exp_s * exp) + * = frac * 2^(exp_s * (2 * floor(exp / 2))) * 2^(exp_s * (exp % 2)) + * = frac * 2^(exp_s * (exp % 2)) * 2^(2 * (exp_s * floor(exp / 2))) + * + * Now for the root square the even part of the exponent can be simply + * divided by two giving: + * + * sqrt(frac * 2^(exp_s * exp)) + * = sqrt(frac * 2^(exp_s * (exp % 2))) * 2^(exp_s * floor(exp / 2)) + * + * In turn "frac * 2^(exp_s * (exp % 2))" is now within the range of single + * precision floating point number. Hence it can be represented with 32-bits + * with precision reduced from 52 bits to 23. + */ +void +lower_instructions_visitor::dsqrt_to_fsqrt(ir_expression *ir) +{ + assert(ir->operands[0]->type->is_double()); + + const unsigned vec_elem = ir->type->vector_elements; + + ir_rvalue *results[4] = {NULL}; + + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_variable *unpacked = new(ir) ir_variable(glsl_type::uvec2_type, + "unpacked", + ir_var_temporary); + + base_ir->insert_before(unpacked); + base_ir->insert_before( + assign(unpacked, + expr(ir_unop_unpack_double_2x32, + swizzle(ir->operands[0]->clone(ir, NULL), elem, 1)))); + ir_rvalue *lo = swizzle_x(unpacked); + ir_rvalue *hi = swizzle_y(unpacked); + + ir_variable *exp = div_dfloat_exp_by_two(ir, hi); + ir_variable *frac = dfloat_frac_to_float_frac(ir, hi, lo); + ir_constant *frac_f_mask = new(ir) ir_constant(0x7fffffu); + ir_variable *frac_sqrt = new(ir) ir_variable( + glsl_type::uint_type, "sqrt_of_fraction", ir_var_temporary); + + /* Extract the fraction part: fraction &= 0x7FFFFF */ + base_ir->insert_before(frac_sqrt); + base_ir->insert_before( + assign(frac_sqrt, + bit_and(bitcast_f2u(sqrt(bitcast_u2f(frac))), frac_f_mask))); + + ir_constant *exp_shift = new(ir) ir_constant(20u); + ir_constant *frac_hi_shift = new(ir) ir_constant(3u); + ir_constant *frac_lo_mask = new(ir) ir_constant(3u); + ir_constant *frac_lo_shift = new(ir) ir_constant(29u); + + /* unpacked[1] = (exponent << 20) | (fraction >> 3) */ + base_ir->insert_before( + assign(unpacked, + bit_or(lshift(exp, exp_shift), + rshift(frac_sqrt, frac_hi_shift)), + WRITEMASK_Y)); + + /* unpacked[0] = (fraction & 0x3) << 29 */ + base_ir->insert_before( + assign(unpacked, + lshift(bit_and(frac_sqrt, frac_lo_mask), frac_lo_shift), + WRITEMASK_X)); + + results[elem] = expr(ir_unop_pack_double_2x32, unpacked); + } + + /* Put the dvec back together */ + ir->operation = ir_quadop_vector; + ir->operands[0] = results[0]; + ir->operands[1] = results[1]; + ir->operands[2] = results[2]; + ir->operands[3] = results[3]; + + this->progress = true; +} + void lower_instructions_visitor::carry_to_arith(ir_expression *ir) { @@ -1057,6 +1264,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) div_to_mul_rcp(ir); break; + case ir_unop_sqrt: + if (lowering(DSQRT_TO_FSQRT)) + dsqrt_to_fsqrt(ir); + break; + case ir_unop_exp: if (lowering(EXP_TO_EXP2)) exp_to_exp2(ir); |