1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
#include <stdio.h>
#include <math.h>
#include "gmp.h"
#include "gmp-impl.h"
#include "mpfr.h"
void
#if __STDC__
mpfr_sqrt(mpfr_ptr X, mpfr_srcptr a, unsigned char rnd_mode)
#else
mpfr_sqrt(X, a, rnd_mode)
mpfr_ptr X;
mpfr_srcptr a;
unsigned char rnd_mode;
#endif
{
int p, q, err, i, e, n; mpfr_t t, u; mpfr_ptr x;
/* use Newton's iteration x[n+1] = 1/2*(x[n]+a/x[n]),
the error e[n] = x[n]-sqrt(a) satisfies e[n+1] <= e[n]/2/sqrt(a) */
if (FLAG_NAN(a) || SIGN(a)<0) { SET_NAN(X); return; }
x = (mpfr_ptr) (*_mp_allocate_func) (sizeof(__mpfr_struct));
mpfr_init2(x, PREC(X));
MPN_COPY(x->_mp_d, X->_mp_d, (PREC(X)-1)/BITS_PER_MP_LIMB + 1);
e = EXP(a)/2; if (2*e<EXP(a)) e++;
/* now 2^(2*e-2) <= a < 2^(2*e) i.e. 1/4 <= a/2^(2e) < 1 */
q = p = PREC(x);
for (i=0; i<3; i++)
q = p + (int) ceil(log(4.0*ceil(log((double)q)/log(2.0))+2.0)/log(2.0));
err = q-p; /* the error is at most 2^err ulp */
q = (q/mp_bits_per_limb)*mp_bits_per_limb; /* adjust to entire limb */
mpfr_init2(t, q+mp_bits_per_limb); mpfr_init2(u, q+mp_bits_per_limb);
do {
q += mp_bits_per_limb;
if (q>3*p+mp_bits_per_limb) {
/* try to detect exact roots */
mpfr_round(x, rnd_mode, p);
mpfr_mul(t, x, x, rnd_mode);
if (mpfr_cmp(t, a)==0) goto youpi; /* exact root */
fprintf(stderr, "no convergence in mpfr_sqrt for a=%1.20e, rnd=%d\n",
mpfr_get_d(a), rnd_mode); exit(1);
}
mpfr_set_prec(t, q);
mpfr_set_prec(x, q);
mpfr_set_prec(u, q);
mpfr_set_ui(x, 1, GMP_RNDU);
EXP(x) += e;
n = (int) ceil(log((double) q)/log(2.0));
for (i=0; i<n; i++) {
mpfr_div(t, a, x, GMP_RNDU);
mpfr_add(u, x, t, GMP_RNDU);
mpfr_div_2exp(x, u, 1, GMP_RNDU);
}
} while (mpfr_can_round(x, q-err, GMP_RNDU, rnd_mode, p)==0);
mpfr_round(x, rnd_mode, p);
youpi:
mpfr_clear(t); mpfr_clear(u);
mpfr_set(X, x, rnd_mode);
mpfr_clear(x); (*_mp_free_func)(x, sizeof(__mpfr_struct));
}
|