diff options
-rw-r--r-- | src/bernoulli.c | 57 | ||||
-rw-r--r-- | src/digamma.c | 13 | ||||
-rw-r--r-- | src/free_cache.c | 3 | ||||
-rw-r--r-- | src/li2.c | 15 | ||||
-rw-r--r-- | src/lngamma.c | 31 | ||||
-rw-r--r-- | src/mpfr-impl.h | 4 |
6 files changed, 67 insertions, 56 deletions
diff --git a/src/bernoulli.c b/src/bernoulli.c index 0aa8eba2f..e224c6b5e 100644 --- a/src/bernoulli.c +++ b/src/bernoulli.c @@ -35,12 +35,11 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., Then C[n] = -sum(binomial(n+1,k)*C[k]*n!/(k+1)!, k=0..n-1), which proves that the C[n] are integers. */ -mpz_t* +void mpfr_bernoulli_internal (mpz_t *b, unsigned long n) { if (n == 0) { - b = (mpz_t *) (*__gmp_allocate_func) (sizeof (mpz_t)); mpz_init_set_ui (b[0], 1); } else @@ -48,8 +47,6 @@ mpfr_bernoulli_internal (mpz_t *b, unsigned long n) mpz_t t; unsigned long k; - b = (mpz_t *) (*__gmp_reallocate_func) - (b, n * sizeof (mpz_t), (n + 1) * sizeof (mpz_t)); mpz_init (b[n]); /* b[n] = -sum(binomial(2n+1,2k)*C[k]*(2n)!/(2k+1)!, k=0..n-1) */ mpz_init_set_ui (t, 2 * n + 1); @@ -76,5 +73,55 @@ mpfr_bernoulli_internal (mpz_t *b, unsigned long n) mpz_neg (b[n], b[n]); mpz_clear (t); } - return b; + return; +} + +static MPFR_THREAD_ATTR mpz_t *bernoulli_table = NULL; +static MPFR_THREAD_ATTR unsigned long bernoulli_size = 0; +static MPFR_THREAD_ATTR unsigned long bernoulli_alloc = 0; + +mpz_srcptr +mpfr_bernoulli_cache (unsigned long n) +{ + unsigned long i; + + if (n >= bernoulli_size) { + if (bernoulli_alloc == 0) + { + bernoulli_alloc = MAX(16, n + n/4); + bernoulli_table = (mpz_t *) + (*__gmp_allocate_func) (bernoulli_alloc * sizeof (mpz_t)); + bernoulli_size = 0; + } + else if (n >= bernoulli_alloc) + { + bernoulli_table = (mpz_t *) (*__gmp_reallocate_func) + (bernoulli_table, bernoulli_alloc * sizeof (mpz_t), + (n + n/4) * sizeof (mpz_t)); + bernoulli_alloc = n + n/4; + } + MPFR_ASSERTD(bernoulli_alloc > n); + MPFR_ASSERTD(bernoulli_size >= 0); + for(i = bernoulli_size; i <= n; i++) + mpfr_bernoulli_internal(bernoulli_table, i); + bernoulli_size = n+1; + } + MPFR_ASSERTD(bernoulli_size > n); + return bernoulli_table[n]; +} + +void +mpfr_bernoulli_freecache(void) +{ + unsigned long i; + + if (bernoulli_table != NULL) { + for(i = 0; i < bernoulli_size; i++) { + mpz_clear (bernoulli_table[i]); + } + (*__gmp_free_func) (bernoulli_table, bernoulli_alloc * sizeof (mpz_t)); + bernoulli_table = NULL; + bernoulli_alloc = 0; + bernoulli_size = 0; + } } diff --git a/src/digamma.c b/src/digamma.c index 9b1c9207c..61ab935c6 100644 --- a/src/digamma.c +++ b/src/digamma.c @@ -34,8 +34,7 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x) mpfr_prec_t p = MPFR_PREC (s); mpfr_t t, u, invxx; mpfr_exp_t e, exps, f, expu; - mpz_t *INITIALIZED(B); /* variable B declared as initialized */ - unsigned long n0, n; /* number of allocated B[] */ + unsigned long n; MPFR_ASSERTN(MPFR_IS_POS(x) && (MPFR_EXP(x) >= 2)); @@ -59,12 +58,9 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x) /* in the following we note err=xxx when the ratio between the approximation and the exact result can be written (1 + theta)^xxx for |theta| <= 2^(-p), following Higham's method */ - B = mpfr_bernoulli_internal ((mpz_t *) 0, 0); mpfr_set_ui (t, 1, MPFR_RNDN); /* err = 0 */ for (n = 1;; n++) { - /* compute next Bernoulli number */ - B = mpfr_bernoulli_internal (B, n); /* The main term is Bernoulli[2n]/(2n)/x^(2n) = B[n]/(2n+1)!(2n)/x^(2n) = B[n]*t[n]/(2n) where t[n]/t[n-1] = 1/(2n)/(2n+1)/x^2. */ mpfr_mul (t, t, invxx, MPFR_RNDU); /* err = err + 3 */ @@ -72,7 +68,7 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x) mpfr_div_ui (t, t, 2 * n + 1, MPFR_RNDU); /* err = err + 1 */ /* we thus have err = 5n here */ mpfr_div_ui (u, t, 2 * n, MPFR_RNDU); /* err = 5n+1 */ - mpfr_mul_z (u, u, B[n], MPFR_RNDU); /* err = 5n+2, and the + mpfr_mul_z (u, u, mpfr_bernoulli_cache(n), MPFR_RNDU);/* err = 5n+2, and the absolute error is bounded by 10n+4 ulp(u) [Rule 11] */ /* if the terms 'u' are decreasing by a factor two at least, @@ -95,11 +91,6 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x) e += f; /* total rounding error coming from 'u' term */ } - n0 = ++n; - while (n--) - mpz_clear (B[n]); - (*__gmp_free_func) (B, n0 * sizeof (mpz_t)); - mpfr_clear (t); mpfr_clear (u); mpfr_clear (invxx); diff --git a/src/free_cache.c b/src/free_cache.c index 84413bb11..78b8eeae1 100644 --- a/src/free_cache.c +++ b/src/free_cache.c @@ -74,6 +74,9 @@ mpfr_mpz_clear (mpz_t z) void mpfr_free_cache (void) { + /* Before mpz caching */ + mpfr_bernoulli_freecache(); + #if MPFR_MY_MPZ_INIT int i; MPFR_ASSERTD (n_alloc >= 0 && n_alloc <= numberof (mpz_tab)); @@ -33,11 +33,10 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., static int li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode) { - int i, Bm, Bmax; + int i; mpfr_t s, u, v, w; mpfr_prec_t sump, p; mpfr_exp_t se, err; - mpz_t *B; MPFR_ZIV_DECL (loop); /* The series converges for |z| < 2 pi, but in mpfr_li2 the argument is @@ -53,9 +52,6 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode) mpfr_init2 (v, p); mpfr_init2 (w, p); - B = mpfr_bernoulli_internal ((mpz_t *) 0, 0); - Bm = Bmax = 1; - MPFR_ZIV_INIT (loop, p); for (;;) { @@ -67,9 +63,6 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode) for (i = 1;; i++) { - if (i >= Bmax) - B = mpfr_bernoulli_internal (B, Bmax++); /* B_2i*(2i+1)!, exact */ - mpfr_mul (v, u, v, MPFR_RNDU); mpfr_div_ui (v, v, 2 * i, MPFR_RNDU); mpfr_div_ui (v, v, 2 * i, MPFR_RNDU); @@ -77,7 +70,7 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode) mpfr_div_ui (v, v, 2 * i + 1, MPFR_RNDU); /* here, v_2i = v_{2i-2} / (2i * (2i+1))^2 */ - mpfr_mul_z (w, v, B[i], MPFR_RNDN); + mpfr_mul_z (w, v, mpfr_bernoulli_cache(i), MPFR_RNDN); /* here, w_2i = v_2i * B_2i * (2i+1)! with error(w_2i) < 2^(5 * i + 8) ulp(w_2i) (see algorithms.tex) */ @@ -107,10 +100,6 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode) MPFR_ZIV_FREE (loop); mpfr_set (sum, s, rnd_mode); - Bm = Bmax; - while (Bm--) - mpz_clear (B[Bm]); - (*__gmp_free_func) (B, Bmax * sizeof (mpz_t)); mpfr_clears (s, u, v, w, (mpfr_ptr) 0); /* Let K be the returned value. diff --git a/src/lngamma.c b/src/lngamma.c index fd011813c..26986b984 100644 --- a/src/lngamma.c +++ b/src/lngamma.c @@ -172,11 +172,8 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd) mpfr_prec_t precy, w; /* working precision */ mpfr_t s, t, u, v, z; unsigned long m, k, maxm; - mpz_t *B; int compared, inexact; mpfr_exp_t err_s, err_t; - unsigned long Bm = 0; /* number of allocated B[] */ - unsigned long oldBm; double d; MPFR_SAVE_EXPO_DECL (expo); MPFR_ZIV_DECL (loop); @@ -436,14 +433,12 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd) and we need k steps of argument reconstruction. Assuming k is large with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e., k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w. - However, since the series is more expensive to compute, the optimal - value seems to be k ~ 4.5 * w experimentally. - Note added February 12, 2014: for a target precision of 1000 bits, - gamma(pi^2) with k = 4.5*w gives m=55 and k=4639 which is about 60% - slower than k=1.5*w (m=69 and k=1540), thus we change for 1.5*w. */ + However, since the series is slightly more expensive to compute, + the optimal value seems to be k ~ 0.25 * w experimentally (with + caching of Bernoulli numbers). */ mpfr_set_prec (s, 53); mpfr_gamma_alpha (s, w); - mpfr_set_ui_2exp (s, 3, -1, MPFR_RNDU); + mpfr_set_ui_2exp (s, 1, -2, MPFR_RNDU); mpfr_mul_ui (s, s, w, MPFR_RNDU); if (mpfr_cmp (z0, s) < 0) { @@ -491,13 +486,6 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd) mpfr_mul (u, u, u, MPFR_RNDN); /* 1/z^2 * (1+u)^3 */ - if (Bm == 0) - { - B = mpfr_bernoulli_internal ((mpz_t *) 0, 0); - B = mpfr_bernoulli_internal (B, 1); - Bm = 2; - } - /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */ maxm = 1UL << (GMP_NUMB_BITS / 2 - 1); @@ -523,12 +511,7 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd) } /* (1+u)^(10m-8) */ /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */ - if (Bm <= m) - { - B = mpfr_bernoulli_internal (B, m); /* B[2m]*(2m+1)!, exact */ - Bm ++; - } - mpfr_mul_z (v, t, B[m], MPFR_RNDN); /* (1+u)^(10m-7) */ + mpfr_mul_z (v, t, mpfr_bernoulli_cache(m), MPFR_RNDN); /* (1+u)^(10m-7) */ MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3)); mpfr_add (s, s, v, MPFR_RNDN); } @@ -630,10 +613,6 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd) #ifdef IS_GAMMA end0: #endif - oldBm = Bm; - while (Bm--) - mpz_clear (B[Bm]); - (*__gmp_free_func) (B, oldBm * sizeof (mpz_t)); end: if (inexact == 0) diff --git a/src/mpfr-impl.h b/src/mpfr-impl.h index 1afdf213d..f7ec07a66 100644 --- a/src/mpfr-impl.h +++ b/src/mpfr-impl.h @@ -1986,8 +1986,10 @@ __MPFR_DECLSPEC MPFR_COLD_FUNCTION_ATTR MPFR_NORETURN void __MPFR_DECLSPEC void mpfr_rand_raw _MPFR_PROTO((mpfr_limb_ptr, gmp_randstate_t, mpfr_prec_t)); -__MPFR_DECLSPEC mpz_t* mpfr_bernoulli_internal _MPFR_PROTO((mpz_t*, +__MPFR_DECLSPEC void mpfr_bernoulli_internal _MPFR_PROTO((mpz_t*, unsigned long)); + __MPFR_DECLSPEC mpz_srcptr mpfr_bernoulli_cache (unsigned long n); + __MPFR_DECLSPEC void mpfr_bernoulli_freecache(void); __MPFR_DECLSPEC int mpfr_sincos_fast _MPFR_PROTO((mpfr_t, mpfr_t, mpfr_srcptr, mpfr_rnd_t)); |