diff options
author | vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-05-04 09:40:05 +0000 |
---|---|---|
committer | vlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4> | 2017-05-04 09:40:05 +0000 |
commit | af5a1593331d686b9cc5fbbbbdc47e1733a4644e (patch) | |
tree | ff8210e41ae8ced432dbcd42e8be2a919f8dddc6 /src/sqrt.c | |
parent | 87ff38458263c9a9ed79a7ebd547fd32a66ae843 (diff) | |
parent | d79a8111e6b7851b15bac211d8dca0e67a2979b5 (diff) | |
download | mpfr-af5a1593331d686b9cc5fbbbbdc47e1733a4644e.tar.gz |
Merged the latest changes from the trunk, including some old changesets
related to mpfr_zeta that were skipped, resolving conflicts. Added RNDF
support to new code introduced by this merge:
* mpfr_mul_1n in src/mul.c (from r11281);
* mpfr_sqr_1n in src/sqr.c (from r11283);
* mpfr_div_1n in src/div.c (from r11284);
* mpfr_sqrt1n in src/sqrt.c (from r11293).
git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/branches/faithful@11456 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/sqrt.c')
-rw-r--r-- | src/sqrt.c | 167 |
1 files changed, 154 insertions, 13 deletions
diff --git a/src/sqrt.c b/src/sqrt.c index fa30defc5..9b487af65 100644 --- a/src/sqrt.c +++ b/src/sqrt.c @@ -127,7 +127,8 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) sb |= (r0 & mask) ^ rb; rp[0] = r0 & ~mask; - /* rounding */ + /* rounding: sb = 0 implies rb = 0, since (rb,sb)=(1,0) is not possible */ + MPFR_ASSERTD (rb == 0 || sb != 0); /* Note: if 1 and 2 are in [emin,emax], no overflow nor underflow is possible */ @@ -154,16 +155,18 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) rounding: MPFR_EXP (r) = exp_r; - if ((rb == 0 && sb == 0) || (rnd_mode == MPFR_RNDF)) + if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF) { + MPFR_ASSERTD (rb == 0 || rnd_mode == MPFR_RNDF); MPFR_ASSERTD(exp_r >= __gmpfr_emin); MPFR_ASSERTD(exp_r <= __gmpfr_emax); return 0; /* idem than MPFR_RET(0) but faster */ } else if (rnd_mode == MPFR_RNDN) { - if (rb == 0 || (rb && sb == 0 && - (rp[0] & (MPFR_LIMB_ONE << sh)) == 0)) + /* since sb <> 0, only rb is needed to decide how to round, and the exact + middle is not possible */ + if (rb == 0) goto truncate; else goto add_one_ulp; @@ -192,6 +195,133 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) } } +/* Special code for prec(r) = GMP_NUMB_BITS and prec(u) <= GMP_NUMB_BITS. */ +static int +mpfr_sqrt1n (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) +{ + mpfr_prec_t exp_u = MPFR_EXP(u), exp_r; + mp_limb_t u0, r0, rb, sb, low; + mpfr_limb_ptr rp = MPFR_MANT(r); + + MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64); + MPFR_ASSERTD(MPFR_PREC(r) == GMP_NUMB_BITS); + MPFR_ASSERTD(MPFR_PREC(u) <= GMP_NUMB_BITS); + + /* first make the exponent even */ + u0 = MPFR_MANT(u)[0]; + if (((unsigned int) exp_u & 1) != 0) + { + low = u0 << (GMP_NUMB_BITS - 1); + u0 >>= 1; + exp_u ++; + } + else + low = 0; /* low part of u0 */ + MPFR_ASSERTD (((unsigned int) exp_u & 1) == 0); + exp_r = exp_u / 2; + + /* then compute an approximation of the integer square root of + u0*2^GMP_NUMB_BITS */ + __gmpfr_sqrt_limb_approx (r0, u0); + + /* the exact square root is in [r0, r0 + 7] */ + + /* first ensure r0 has its most significant bit set */ + if (MPFR_UNLIKELY(r0 < MPFR_LIMB_HIGHBIT)) + r0 = MPFR_LIMB_HIGHBIT; + + umul_ppmm (rb, sb, r0, r0); + sub_ddmmss (rb, sb, u0, low, rb, sb); + /* for the exact square root, we should have 0 <= rb:sb <= 2*r0 */ + while (!(rb == 0 || (rb == 1 && sb <= 2 * r0))) + { + /* subtract 2*r0+1 from rb:sb: subtract r0 before incrementing r0, + then r0 after (which is r0+1) */ + rb -= (sb < r0); + sb -= r0; + r0 ++; + rb -= (sb < r0); + sb -= r0; + } + /* now we have u0*2^64+low = r0^2 + rb*2^64+sb, with rb*2^64+sb <= 2*r0 */ + MPFR_ASSERTD(rb == 0 || (rb == 1 && sb <= 2 * r0)); + + /* We can't have the middle case u0*2^64 = (r0 + 1/2)^2 since + (r0 + 1/2)^2 is not an integer. + We thus rb = 1 whenever u0*2^64 > (r0 + 1/2)^2, thus rb*2^64 + sb > r0 + and the sticky bit is always 1, unless we had rb = sb = 0. */ + + rb = rb || (sb > r0); + sb = rb | sb; + rp[0] = r0; + + /* rounding */ + + /* Note: if 1 and 2 are in [emin,emax], no overflow nor underflow + is possible */ + if (MPFR_UNLIKELY (exp_r > __gmpfr_emax)) + return mpfr_overflow (r, rnd_mode, 1); + + /* See comments in mpfr_div_1 */ + if (MPFR_UNLIKELY (exp_r < __gmpfr_emin)) + { + if (rnd_mode == MPFR_RNDN) + { + /* the case rp[0] = 111...111 and rb = 1 cannot happen, since it + would imply u0 >= (2^64-1/2)^2/2^64 thus u0 >= 2^64 */ + if (exp_r < __gmpfr_emin - 1 || (rp[0] == MPFR_LIMB_HIGHBIT && sb == 0)) + rnd_mode = MPFR_RNDZ; + } + else if (MPFR_IS_LIKE_RNDA(rnd_mode, 0)) + { + if ((exp_r == __gmpfr_emin - 1) && (rp[0] == ~MPFR_LIMB_ZERO) && (rb | sb)) + goto rounding; /* no underflow */ + } + return mpfr_underflow (r, rnd_mode, 1); + } + + /* sb = 0 can only occur when the square root is exact, i.e., rb = 0 */ + + rounding: + MPFR_EXP (r) = exp_r; + if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF) + { + MPFR_ASSERTD(exp_r >= __gmpfr_emin); + MPFR_ASSERTD(exp_r <= __gmpfr_emax); + return 0; /* idem than MPFR_RET(0) but faster */ + } + else if (rnd_mode == MPFR_RNDN) + { + /* we can't have sb = 0, thus rb is enough */ + if (rb == 0) + goto truncate; + else + goto add_one_ulp; + } + else if (MPFR_IS_LIKE_RNDZ(rnd_mode, 0)) + { + truncate: + MPFR_ASSERTD(exp_r >= __gmpfr_emin); + MPFR_ASSERTD(exp_r <= __gmpfr_emax); + MPFR_RET(-1); + } + else /* round away from zero */ + { + add_one_ulp: + rp[0] += MPFR_LIMB_ONE; + if (rp[0] == 0) + { + rp[0] = MPFR_LIMB_HIGHBIT; + if (MPFR_UNLIKELY(exp_r + 1 > __gmpfr_emax)) + return mpfr_overflow (r, rnd_mode, 1); + MPFR_ASSERTD(exp_r + 1 <= __gmpfr_emax); + MPFR_ASSERTD(exp_r + 1 >= __gmpfr_emin); + MPFR_SET_EXP (r, exp_r + 1); + } + MPFR_RET(1); + } +} + /* Special code for GMP_NUMB_BITS < prec(r) < 2*GMP_NUMB_BITS, and GMP_NUMB_BITS < prec(u) <= 2*GMP_NUMB_BITS. Assumes GMP_NUMB_BITS=64. */ @@ -303,7 +433,7 @@ mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) rounding: MPFR_EXP (r) = exp_r; - if ((rb == 0 && sb == 0) || (rnd_mode == MPFR_RNDF)) + if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF) { MPFR_ASSERTD(exp_r >= __gmpfr_emin); MPFR_ASSERTD(exp_r <= __gmpfr_emax); @@ -311,8 +441,8 @@ mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) } else if (rnd_mode == MPFR_RNDN) { - if (rb == 0 || (rb && sb == 0 && - (rp[0] & (MPFR_LIMB_ONE << sh)) == 0)) + /* since sb <> 0 now, only rb is needed */ + if (rb == 0) goto truncate; else goto add_one_ulp; @@ -341,6 +471,7 @@ mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) MPFR_RET(1); } } + #endif /* !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64 */ int @@ -362,6 +493,7 @@ mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) int sh; /* number of extra bits in rp[0] */ int inexact; /* return ternary flag */ mpfr_exp_t expr; + mpfr_prec_t rq = MPFR_GET_PREC (r); MPFR_TMP_DECL(marker); MPFR_LOG_FUNC @@ -405,16 +537,25 @@ mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) MPFR_SET_POS(r); #if !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64 - if (MPFR_GET_PREC (r) < GMP_NUMB_BITS && MPFR_GET_PREC (u) < GMP_NUMB_BITS) - return mpfr_sqrt1 (r, u, rnd_mode); + { + mpfr_prec_t uq = MPFR_GET_PREC (u); + + if (rq == uq) + { + if (rq < GMP_NUMB_BITS) + return mpfr_sqrt1 (r, u, rnd_mode); + + if (GMP_NUMB_BITS < rq && rq < 2*GMP_NUMB_BITS) + return mpfr_sqrt2 (r, u, rnd_mode); - if (GMP_NUMB_BITS < MPFR_GET_PREC (r) && MPFR_GET_PREC (r) < 2*GMP_NUMB_BITS - && MPFR_LIMB_SIZE(u) == 2) - return mpfr_sqrt2 (r, u, rnd_mode); + if (rq == GMP_NUMB_BITS) + return mpfr_sqrt1n (r, u, rnd_mode); + } + } #endif MPFR_TMP_MARK (marker); - MPFR_UNSIGNED_MINUS_MODULO (sh, MPFR_GET_PREC (r)); + MPFR_UNSIGNED_MINUS_MODULO (sh, rq); if (sh == 0 && rnd_mode == MPFR_RNDN) sh = GMP_NUMB_BITS; /* ugly case */ rsize = MPFR_LIMB_SIZE(r) + (sh == GMP_NUMB_BITS); |