summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/bernoulli.c57
-rw-r--r--src/digamma.c13
-rw-r--r--src/free_cache.c3
-rw-r--r--src/li2.c15
-rw-r--r--src/lngamma.c31
-rw-r--r--src/mpfr-impl.h4
6 files changed, 67 insertions, 56 deletions
diff --git a/src/bernoulli.c b/src/bernoulli.c
index 0aa8eba2f..e224c6b5e 100644
--- a/src/bernoulli.c
+++ b/src/bernoulli.c
@@ -35,12 +35,11 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
Then C[n] = -sum(binomial(n+1,k)*C[k]*n!/(k+1)!, k=0..n-1),
which proves that the C[n] are integers.
*/
-mpz_t*
+void
mpfr_bernoulli_internal (mpz_t *b, unsigned long n)
{
if (n == 0)
{
- b = (mpz_t *) (*__gmp_allocate_func) (sizeof (mpz_t));
mpz_init_set_ui (b[0], 1);
}
else
@@ -48,8 +47,6 @@ mpfr_bernoulli_internal (mpz_t *b, unsigned long n)
mpz_t t;
unsigned long k;
- b = (mpz_t *) (*__gmp_reallocate_func)
- (b, n * sizeof (mpz_t), (n + 1) * sizeof (mpz_t));
mpz_init (b[n]);
/* b[n] = -sum(binomial(2n+1,2k)*C[k]*(2n)!/(2k+1)!, k=0..n-1) */
mpz_init_set_ui (t, 2 * n + 1);
@@ -76,5 +73,55 @@ mpfr_bernoulli_internal (mpz_t *b, unsigned long n)
mpz_neg (b[n], b[n]);
mpz_clear (t);
}
- return b;
+ return;
+}
+
+static MPFR_THREAD_ATTR mpz_t *bernoulli_table = NULL;
+static MPFR_THREAD_ATTR unsigned long bernoulli_size = 0;
+static MPFR_THREAD_ATTR unsigned long bernoulli_alloc = 0;
+
+mpz_srcptr
+mpfr_bernoulli_cache (unsigned long n)
+{
+ unsigned long i;
+
+ if (n >= bernoulli_size) {
+ if (bernoulli_alloc == 0)
+ {
+ bernoulli_alloc = MAX(16, n + n/4);
+ bernoulli_table = (mpz_t *)
+ (*__gmp_allocate_func) (bernoulli_alloc * sizeof (mpz_t));
+ bernoulli_size = 0;
+ }
+ else if (n >= bernoulli_alloc)
+ {
+ bernoulli_table = (mpz_t *) (*__gmp_reallocate_func)
+ (bernoulli_table, bernoulli_alloc * sizeof (mpz_t),
+ (n + n/4) * sizeof (mpz_t));
+ bernoulli_alloc = n + n/4;
+ }
+ MPFR_ASSERTD(bernoulli_alloc > n);
+ MPFR_ASSERTD(bernoulli_size >= 0);
+ for(i = bernoulli_size; i <= n; i++)
+ mpfr_bernoulli_internal(bernoulli_table, i);
+ bernoulli_size = n+1;
+ }
+ MPFR_ASSERTD(bernoulli_size > n);
+ return bernoulli_table[n];
+}
+
+void
+mpfr_bernoulli_freecache(void)
+{
+ unsigned long i;
+
+ if (bernoulli_table != NULL) {
+ for(i = 0; i < bernoulli_size; i++) {
+ mpz_clear (bernoulli_table[i]);
+ }
+ (*__gmp_free_func) (bernoulli_table, bernoulli_alloc * sizeof (mpz_t));
+ bernoulli_table = NULL;
+ bernoulli_alloc = 0;
+ bernoulli_size = 0;
+ }
}
diff --git a/src/digamma.c b/src/digamma.c
index 9b1c9207c..61ab935c6 100644
--- a/src/digamma.c
+++ b/src/digamma.c
@@ -34,8 +34,7 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x)
mpfr_prec_t p = MPFR_PREC (s);
mpfr_t t, u, invxx;
mpfr_exp_t e, exps, f, expu;
- mpz_t *INITIALIZED(B); /* variable B declared as initialized */
- unsigned long n0, n; /* number of allocated B[] */
+ unsigned long n;
MPFR_ASSERTN(MPFR_IS_POS(x) && (MPFR_EXP(x) >= 2));
@@ -59,12 +58,9 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x)
/* in the following we note err=xxx when the ratio between the approximation
and the exact result can be written (1 + theta)^xxx for |theta| <= 2^(-p),
following Higham's method */
- B = mpfr_bernoulli_internal ((mpz_t *) 0, 0);
mpfr_set_ui (t, 1, MPFR_RNDN); /* err = 0 */
for (n = 1;; n++)
{
- /* compute next Bernoulli number */
- B = mpfr_bernoulli_internal (B, n);
/* The main term is Bernoulli[2n]/(2n)/x^(2n) = B[n]/(2n+1)!(2n)/x^(2n)
= B[n]*t[n]/(2n) where t[n]/t[n-1] = 1/(2n)/(2n+1)/x^2. */
mpfr_mul (t, t, invxx, MPFR_RNDU); /* err = err + 3 */
@@ -72,7 +68,7 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x)
mpfr_div_ui (t, t, 2 * n + 1, MPFR_RNDU); /* err = err + 1 */
/* we thus have err = 5n here */
mpfr_div_ui (u, t, 2 * n, MPFR_RNDU); /* err = 5n+1 */
- mpfr_mul_z (u, u, B[n], MPFR_RNDU); /* err = 5n+2, and the
+ mpfr_mul_z (u, u, mpfr_bernoulli_cache(n), MPFR_RNDU);/* err = 5n+2, and the
absolute error is bounded
by 10n+4 ulp(u) [Rule 11] */
/* if the terms 'u' are decreasing by a factor two at least,
@@ -95,11 +91,6 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x)
e += f; /* total rounding error coming from 'u' term */
}
- n0 = ++n;
- while (n--)
- mpz_clear (B[n]);
- (*__gmp_free_func) (B, n0 * sizeof (mpz_t));
-
mpfr_clear (t);
mpfr_clear (u);
mpfr_clear (invxx);
diff --git a/src/free_cache.c b/src/free_cache.c
index 84413bb11..78b8eeae1 100644
--- a/src/free_cache.c
+++ b/src/free_cache.c
@@ -74,6 +74,9 @@ mpfr_mpz_clear (mpz_t z)
void
mpfr_free_cache (void)
{
+ /* Before mpz caching */
+ mpfr_bernoulli_freecache();
+
#if MPFR_MY_MPZ_INIT
int i;
MPFR_ASSERTD (n_alloc >= 0 && n_alloc <= numberof (mpz_tab));
diff --git a/src/li2.c b/src/li2.c
index 789a61892..43ccd18e0 100644
--- a/src/li2.c
+++ b/src/li2.c
@@ -33,11 +33,10 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
static int
li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode)
{
- int i, Bm, Bmax;
+ int i;
mpfr_t s, u, v, w;
mpfr_prec_t sump, p;
mpfr_exp_t se, err;
- mpz_t *B;
MPFR_ZIV_DECL (loop);
/* The series converges for |z| < 2 pi, but in mpfr_li2 the argument is
@@ -53,9 +52,6 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode)
mpfr_init2 (v, p);
mpfr_init2 (w, p);
- B = mpfr_bernoulli_internal ((mpz_t *) 0, 0);
- Bm = Bmax = 1;
-
MPFR_ZIV_INIT (loop, p);
for (;;)
{
@@ -67,9 +63,6 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode)
for (i = 1;; i++)
{
- if (i >= Bmax)
- B = mpfr_bernoulli_internal (B, Bmax++); /* B_2i*(2i+1)!, exact */
-
mpfr_mul (v, u, v, MPFR_RNDU);
mpfr_div_ui (v, v, 2 * i, MPFR_RNDU);
mpfr_div_ui (v, v, 2 * i, MPFR_RNDU);
@@ -77,7 +70,7 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode)
mpfr_div_ui (v, v, 2 * i + 1, MPFR_RNDU);
/* here, v_2i = v_{2i-2} / (2i * (2i+1))^2 */
- mpfr_mul_z (w, v, B[i], MPFR_RNDN);
+ mpfr_mul_z (w, v, mpfr_bernoulli_cache(i), MPFR_RNDN);
/* here, w_2i = v_2i * B_2i * (2i+1)! with
error(w_2i) < 2^(5 * i + 8) ulp(w_2i) (see algorithms.tex) */
@@ -107,10 +100,6 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode)
MPFR_ZIV_FREE (loop);
mpfr_set (sum, s, rnd_mode);
- Bm = Bmax;
- while (Bm--)
- mpz_clear (B[Bm]);
- (*__gmp_free_func) (B, Bmax * sizeof (mpz_t));
mpfr_clears (s, u, v, w, (mpfr_ptr) 0);
/* Let K be the returned value.
diff --git a/src/lngamma.c b/src/lngamma.c
index fd011813c..26986b984 100644
--- a/src/lngamma.c
+++ b/src/lngamma.c
@@ -172,11 +172,8 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
mpfr_prec_t precy, w; /* working precision */
mpfr_t s, t, u, v, z;
unsigned long m, k, maxm;
- mpz_t *B;
int compared, inexact;
mpfr_exp_t err_s, err_t;
- unsigned long Bm = 0; /* number of allocated B[] */
- unsigned long oldBm;
double d;
MPFR_SAVE_EXPO_DECL (expo);
MPFR_ZIV_DECL (loop);
@@ -436,14 +433,12 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
and we need k steps of argument reconstruction. Assuming k is large
with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e.,
k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w.
- However, since the series is more expensive to compute, the optimal
- value seems to be k ~ 4.5 * w experimentally.
- Note added February 12, 2014: for a target precision of 1000 bits,
- gamma(pi^2) with k = 4.5*w gives m=55 and k=4639 which is about 60%
- slower than k=1.5*w (m=69 and k=1540), thus we change for 1.5*w. */
+ However, since the series is slightly more expensive to compute,
+ the optimal value seems to be k ~ 0.25 * w experimentally (with
+ caching of Bernoulli numbers). */
mpfr_set_prec (s, 53);
mpfr_gamma_alpha (s, w);
- mpfr_set_ui_2exp (s, 3, -1, MPFR_RNDU);
+ mpfr_set_ui_2exp (s, 1, -2, MPFR_RNDU);
mpfr_mul_ui (s, s, w, MPFR_RNDU);
if (mpfr_cmp (z0, s) < 0)
{
@@ -491,13 +486,6 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
mpfr_mul (u, u, u, MPFR_RNDN); /* 1/z^2 * (1+u)^3 */
- if (Bm == 0)
- {
- B = mpfr_bernoulli_internal ((mpz_t *) 0, 0);
- B = mpfr_bernoulli_internal (B, 1);
- Bm = 2;
- }
-
/* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */
maxm = 1UL << (GMP_NUMB_BITS / 2 - 1);
@@ -523,12 +511,7 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
}
/* (1+u)^(10m-8) */
/* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */
- if (Bm <= m)
- {
- B = mpfr_bernoulli_internal (B, m); /* B[2m]*(2m+1)!, exact */
- Bm ++;
- }
- mpfr_mul_z (v, t, B[m], MPFR_RNDN); /* (1+u)^(10m-7) */
+ mpfr_mul_z (v, t, mpfr_bernoulli_cache(m), MPFR_RNDN); /* (1+u)^(10m-7) */
MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3));
mpfr_add (s, s, v, MPFR_RNDN);
}
@@ -630,10 +613,6 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
#ifdef IS_GAMMA
end0:
#endif
- oldBm = Bm;
- while (Bm--)
- mpz_clear (B[Bm]);
- (*__gmp_free_func) (B, oldBm * sizeof (mpz_t));
end:
if (inexact == 0)
diff --git a/src/mpfr-impl.h b/src/mpfr-impl.h
index 1afdf213d..f7ec07a66 100644
--- a/src/mpfr-impl.h
+++ b/src/mpfr-impl.h
@@ -1986,8 +1986,10 @@ __MPFR_DECLSPEC MPFR_COLD_FUNCTION_ATTR MPFR_NORETURN void
__MPFR_DECLSPEC void mpfr_rand_raw _MPFR_PROTO((mpfr_limb_ptr, gmp_randstate_t,
mpfr_prec_t));
-__MPFR_DECLSPEC mpz_t* mpfr_bernoulli_internal _MPFR_PROTO((mpz_t*,
+__MPFR_DECLSPEC void mpfr_bernoulli_internal _MPFR_PROTO((mpz_t*,
unsigned long));
+ __MPFR_DECLSPEC mpz_srcptr mpfr_bernoulli_cache (unsigned long n);
+ __MPFR_DECLSPEC void mpfr_bernoulli_freecache(void);
__MPFR_DECLSPEC int mpfr_sincos_fast _MPFR_PROTO((mpfr_t, mpfr_t,
mpfr_srcptr, mpfr_rnd_t));