summaryrefslogtreecommitdiff
path: root/src/sqrt.c
diff options
context:
space:
mode:
authorvlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2017-05-04 09:40:05 +0000
committervlefevre <vlefevre@280ebfd0-de03-0410-8827-d642c229c3f4>2017-05-04 09:40:05 +0000
commitaf5a1593331d686b9cc5fbbbbdc47e1733a4644e (patch)
treeff8210e41ae8ced432dbcd42e8be2a919f8dddc6 /src/sqrt.c
parent87ff38458263c9a9ed79a7ebd547fd32a66ae843 (diff)
parentd79a8111e6b7851b15bac211d8dca0e67a2979b5 (diff)
downloadmpfr-af5a1593331d686b9cc5fbbbbdc47e1733a4644e.tar.gz
Merged the latest changes from the trunk, including some old changesets
related to mpfr_zeta that were skipped, resolving conflicts. Added RNDF support to new code introduced by this merge: * mpfr_mul_1n in src/mul.c (from r11281); * mpfr_sqr_1n in src/sqr.c (from r11283); * mpfr_div_1n in src/div.c (from r11284); * mpfr_sqrt1n in src/sqrt.c (from r11293). git-svn-id: svn://scm.gforge.inria.fr/svn/mpfr/branches/faithful@11456 280ebfd0-de03-0410-8827-d642c229c3f4
Diffstat (limited to 'src/sqrt.c')
-rw-r--r--src/sqrt.c167
1 files changed, 154 insertions, 13 deletions
diff --git a/src/sqrt.c b/src/sqrt.c
index fa30defc5..9b487af65 100644
--- a/src/sqrt.c
+++ b/src/sqrt.c
@@ -127,7 +127,8 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
sb |= (r0 & mask) ^ rb;
rp[0] = r0 & ~mask;
- /* rounding */
+ /* rounding: sb = 0 implies rb = 0, since (rb,sb)=(1,0) is not possible */
+ MPFR_ASSERTD (rb == 0 || sb != 0);
/* Note: if 1 and 2 are in [emin,emax], no overflow nor underflow
is possible */
@@ -154,16 +155,18 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
rounding:
MPFR_EXP (r) = exp_r;
- if ((rb == 0 && sb == 0) || (rnd_mode == MPFR_RNDF))
+ if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF)
{
+ MPFR_ASSERTD (rb == 0 || rnd_mode == MPFR_RNDF);
MPFR_ASSERTD(exp_r >= __gmpfr_emin);
MPFR_ASSERTD(exp_r <= __gmpfr_emax);
return 0; /* idem than MPFR_RET(0) but faster */
}
else if (rnd_mode == MPFR_RNDN)
{
- if (rb == 0 || (rb && sb == 0 &&
- (rp[0] & (MPFR_LIMB_ONE << sh)) == 0))
+ /* since sb <> 0, only rb is needed to decide how to round, and the exact
+ middle is not possible */
+ if (rb == 0)
goto truncate;
else
goto add_one_ulp;
@@ -192,6 +195,133 @@ mpfr_sqrt1 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
}
}
+/* Special code for prec(r) = GMP_NUMB_BITS and prec(u) <= GMP_NUMB_BITS. */
+static int
+mpfr_sqrt1n (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
+{
+ mpfr_prec_t exp_u = MPFR_EXP(u), exp_r;
+ mp_limb_t u0, r0, rb, sb, low;
+ mpfr_limb_ptr rp = MPFR_MANT(r);
+
+ MPFR_STAT_STATIC_ASSERT (GMP_NUMB_BITS == 64);
+ MPFR_ASSERTD(MPFR_PREC(r) == GMP_NUMB_BITS);
+ MPFR_ASSERTD(MPFR_PREC(u) <= GMP_NUMB_BITS);
+
+ /* first make the exponent even */
+ u0 = MPFR_MANT(u)[0];
+ if (((unsigned int) exp_u & 1) != 0)
+ {
+ low = u0 << (GMP_NUMB_BITS - 1);
+ u0 >>= 1;
+ exp_u ++;
+ }
+ else
+ low = 0; /* low part of u0 */
+ MPFR_ASSERTD (((unsigned int) exp_u & 1) == 0);
+ exp_r = exp_u / 2;
+
+ /* then compute an approximation of the integer square root of
+ u0*2^GMP_NUMB_BITS */
+ __gmpfr_sqrt_limb_approx (r0, u0);
+
+ /* the exact square root is in [r0, r0 + 7] */
+
+ /* first ensure r0 has its most significant bit set */
+ if (MPFR_UNLIKELY(r0 < MPFR_LIMB_HIGHBIT))
+ r0 = MPFR_LIMB_HIGHBIT;
+
+ umul_ppmm (rb, sb, r0, r0);
+ sub_ddmmss (rb, sb, u0, low, rb, sb);
+ /* for the exact square root, we should have 0 <= rb:sb <= 2*r0 */
+ while (!(rb == 0 || (rb == 1 && sb <= 2 * r0)))
+ {
+ /* subtract 2*r0+1 from rb:sb: subtract r0 before incrementing r0,
+ then r0 after (which is r0+1) */
+ rb -= (sb < r0);
+ sb -= r0;
+ r0 ++;
+ rb -= (sb < r0);
+ sb -= r0;
+ }
+ /* now we have u0*2^64+low = r0^2 + rb*2^64+sb, with rb*2^64+sb <= 2*r0 */
+ MPFR_ASSERTD(rb == 0 || (rb == 1 && sb <= 2 * r0));
+
+ /* We can't have the middle case u0*2^64 = (r0 + 1/2)^2 since
+ (r0 + 1/2)^2 is not an integer.
+ We thus rb = 1 whenever u0*2^64 > (r0 + 1/2)^2, thus rb*2^64 + sb > r0
+ and the sticky bit is always 1, unless we had rb = sb = 0. */
+
+ rb = rb || (sb > r0);
+ sb = rb | sb;
+ rp[0] = r0;
+
+ /* rounding */
+
+ /* Note: if 1 and 2 are in [emin,emax], no overflow nor underflow
+ is possible */
+ if (MPFR_UNLIKELY (exp_r > __gmpfr_emax))
+ return mpfr_overflow (r, rnd_mode, 1);
+
+ /* See comments in mpfr_div_1 */
+ if (MPFR_UNLIKELY (exp_r < __gmpfr_emin))
+ {
+ if (rnd_mode == MPFR_RNDN)
+ {
+ /* the case rp[0] = 111...111 and rb = 1 cannot happen, since it
+ would imply u0 >= (2^64-1/2)^2/2^64 thus u0 >= 2^64 */
+ if (exp_r < __gmpfr_emin - 1 || (rp[0] == MPFR_LIMB_HIGHBIT && sb == 0))
+ rnd_mode = MPFR_RNDZ;
+ }
+ else if (MPFR_IS_LIKE_RNDA(rnd_mode, 0))
+ {
+ if ((exp_r == __gmpfr_emin - 1) && (rp[0] == ~MPFR_LIMB_ZERO) && (rb | sb))
+ goto rounding; /* no underflow */
+ }
+ return mpfr_underflow (r, rnd_mode, 1);
+ }
+
+ /* sb = 0 can only occur when the square root is exact, i.e., rb = 0 */
+
+ rounding:
+ MPFR_EXP (r) = exp_r;
+ if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF)
+ {
+ MPFR_ASSERTD(exp_r >= __gmpfr_emin);
+ MPFR_ASSERTD(exp_r <= __gmpfr_emax);
+ return 0; /* idem than MPFR_RET(0) but faster */
+ }
+ else if (rnd_mode == MPFR_RNDN)
+ {
+ /* we can't have sb = 0, thus rb is enough */
+ if (rb == 0)
+ goto truncate;
+ else
+ goto add_one_ulp;
+ }
+ else if (MPFR_IS_LIKE_RNDZ(rnd_mode, 0))
+ {
+ truncate:
+ MPFR_ASSERTD(exp_r >= __gmpfr_emin);
+ MPFR_ASSERTD(exp_r <= __gmpfr_emax);
+ MPFR_RET(-1);
+ }
+ else /* round away from zero */
+ {
+ add_one_ulp:
+ rp[0] += MPFR_LIMB_ONE;
+ if (rp[0] == 0)
+ {
+ rp[0] = MPFR_LIMB_HIGHBIT;
+ if (MPFR_UNLIKELY(exp_r + 1 > __gmpfr_emax))
+ return mpfr_overflow (r, rnd_mode, 1);
+ MPFR_ASSERTD(exp_r + 1 <= __gmpfr_emax);
+ MPFR_ASSERTD(exp_r + 1 >= __gmpfr_emin);
+ MPFR_SET_EXP (r, exp_r + 1);
+ }
+ MPFR_RET(1);
+ }
+}
+
/* Special code for GMP_NUMB_BITS < prec(r) < 2*GMP_NUMB_BITS,
and GMP_NUMB_BITS < prec(u) <= 2*GMP_NUMB_BITS.
Assumes GMP_NUMB_BITS=64. */
@@ -303,7 +433,7 @@ mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
rounding:
MPFR_EXP (r) = exp_r;
- if ((rb == 0 && sb == 0) || (rnd_mode == MPFR_RNDF))
+ if (sb == 0 /* implies rb = 0 */ || rnd_mode == MPFR_RNDF)
{
MPFR_ASSERTD(exp_r >= __gmpfr_emin);
MPFR_ASSERTD(exp_r <= __gmpfr_emax);
@@ -311,8 +441,8 @@ mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
}
else if (rnd_mode == MPFR_RNDN)
{
- if (rb == 0 || (rb && sb == 0 &&
- (rp[0] & (MPFR_LIMB_ONE << sh)) == 0))
+ /* since sb <> 0 now, only rb is needed */
+ if (rb == 0)
goto truncate;
else
goto add_one_ulp;
@@ -341,6 +471,7 @@ mpfr_sqrt2 (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
MPFR_RET(1);
}
}
+
#endif /* !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64 */
int
@@ -362,6 +493,7 @@ mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
int sh; /* number of extra bits in rp[0] */
int inexact; /* return ternary flag */
mpfr_exp_t expr;
+ mpfr_prec_t rq = MPFR_GET_PREC (r);
MPFR_TMP_DECL(marker);
MPFR_LOG_FUNC
@@ -405,16 +537,25 @@ mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode)
MPFR_SET_POS(r);
#if !defined(MPFR_GENERIC_ABI) && GMP_NUMB_BITS == 64
- if (MPFR_GET_PREC (r) < GMP_NUMB_BITS && MPFR_GET_PREC (u) < GMP_NUMB_BITS)
- return mpfr_sqrt1 (r, u, rnd_mode);
+ {
+ mpfr_prec_t uq = MPFR_GET_PREC (u);
+
+ if (rq == uq)
+ {
+ if (rq < GMP_NUMB_BITS)
+ return mpfr_sqrt1 (r, u, rnd_mode);
+
+ if (GMP_NUMB_BITS < rq && rq < 2*GMP_NUMB_BITS)
+ return mpfr_sqrt2 (r, u, rnd_mode);
- if (GMP_NUMB_BITS < MPFR_GET_PREC (r) && MPFR_GET_PREC (r) < 2*GMP_NUMB_BITS
- && MPFR_LIMB_SIZE(u) == 2)
- return mpfr_sqrt2 (r, u, rnd_mode);
+ if (rq == GMP_NUMB_BITS)
+ return mpfr_sqrt1n (r, u, rnd_mode);
+ }
+ }
#endif
MPFR_TMP_MARK (marker);
- MPFR_UNSIGNED_MINUS_MODULO (sh, MPFR_GET_PREC (r));
+ MPFR_UNSIGNED_MINUS_MODULO (sh, rq);
if (sh == 0 && rnd_mode == MPFR_RNDN)
sh = GMP_NUMB_BITS; /* ugly case */
rsize = MPFR_LIMB_SIZE(r) + (sh == GMP_NUMB_BITS);