6 files changed, 67 insertions, 56 deletions
diff --git a/src/bernoulli.c b/src/bernoulli.c
index 0aa8eba2f..e224c6b5e 100644
--- a/src/bernoulli.c
+++ b/src/bernoulli.c
@@ -35,12 +35,11 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
    Then C[n] = -sum(binomial(n+1,k)*C[k]*n!/(k+1)!,  k=0..n-1),
    which proves that the C[n] are integers.
 */
-mpz_t*
+void
 mpfr_bernoulli_internal (mpz_t *b, unsigned long n)
 {
   if (n == 0)
     {
-      b = (mpz_t *) (*__gmp_allocate_func) (sizeof (mpz_t));
       mpz_init_set_ui (b[0], 1);
     }
   else
@@ -48,8 +47,6 @@ mpfr_bernoulli_internal (mpz_t *b, unsigned long n)
       mpz_t t;
       unsigned long k;
 
-      b = (mpz_t *) (*__gmp_reallocate_func)
-        (b, n * sizeof (mpz_t), (n + 1) * sizeof (mpz_t));
       mpz_init (b[n]);
       /* b[n] = -sum(binomial(2n+1,2k)*C[k]*(2n)!/(2k+1)!,  k=0..n-1) */
       mpz_init_set_ui (t, 2 * n + 1);
@@ -76,5 +73,55 @@ mpfr_bernoulli_internal (mpz_t *b, unsigned long n)
       mpz_neg (b[n], b[n]);
       mpz_clear (t);
     }
-  return b;
+  return;
+}
+
+static MPFR_THREAD_ATTR mpz_t *bernoulli_table = NULL;
+static MPFR_THREAD_ATTR unsigned long bernoulli_size = 0;
+static MPFR_THREAD_ATTR unsigned long bernoulli_alloc = 0;
+
+mpz_srcptr
+mpfr_bernoulli_cache (unsigned long n)
+{
+  unsigned long i;
+
+  if (n >= bernoulli_size) {
+    if (bernoulli_alloc == 0)
+      {
+	bernoulli_alloc = MAX(16, n + n/4);
+	bernoulli_table = (mpz_t *) 
+	  (*__gmp_allocate_func) (bernoulli_alloc * sizeof (mpz_t));
+	bernoulli_size  = 0;
+      }
+    else if (n >= bernoulli_alloc)
+      {
+	bernoulli_table = (mpz_t *) (*__gmp_reallocate_func)
+	  (bernoulli_table, bernoulli_alloc * sizeof (mpz_t),
+	   (n + n/4) * sizeof (mpz_t));
+	bernoulli_alloc = n + n/4;
+      }
+    MPFR_ASSERTD(bernoulli_alloc > n);
+    MPFR_ASSERTD(bernoulli_size >= 0);
+    for(i = bernoulli_size; i <= n; i++)
+      mpfr_bernoulli_internal(bernoulli_table, i);
+    bernoulli_size = n+1;
+  }
+  MPFR_ASSERTD(bernoulli_size > n);
+  return bernoulli_table[n];
+}
+
+void
+mpfr_bernoulli_freecache(void)
+{
+  unsigned long i;
+
+  if (bernoulli_table != NULL) {
+    for(i = 0; i < bernoulli_size; i++) {
+      mpz_clear (bernoulli_table[i]);
+    }
+    (*__gmp_free_func) (bernoulli_table, bernoulli_alloc * sizeof (mpz_t)); 
+    bernoulli_table = NULL;
+    bernoulli_alloc = 0;
+    bernoulli_size = 0;
+  }
 }
diff --git a/src/digamma.c b/src/digamma.c
index 9b1c9207c..61ab935c6 100644
--- a/src/digamma.c
+++ b/src/digamma.c
@@ -34,8 +34,7 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x)
   mpfr_prec_t p = MPFR_PREC (s);
   mpfr_t t, u, invxx;
   mpfr_exp_t e, exps, f, expu;
-  mpz_t *INITIALIZED(B);  /* variable B declared as initialized */
-  unsigned long n0, n; /* number of allocated B[] */
+  unsigned long n;
 
   MPFR_ASSERTN(MPFR_IS_POS(x) && (MPFR_EXP(x) >= 2));
 
@@ -59,12 +58,9 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x)
   /* in the following we note err=xxx when the ratio between the approximation
      and the exact result can be written (1 + theta)^xxx for |theta| <= 2^(-p),
      following Higham's method */
-  B = mpfr_bernoulli_internal ((mpz_t *) 0, 0);
   mpfr_set_ui (t, 1, MPFR_RNDN); /* err = 0 */
   for (n = 1;; n++)
     {
-      /* compute next Bernoulli number */
-      B = mpfr_bernoulli_internal (B, n);
       /* The main term is Bernoulli[2n]/(2n)/x^(2n) = B[n]/(2n+1)!(2n)/x^(2n)
          = B[n]*t[n]/(2n) where t[n]/t[n-1] = 1/(2n)/(2n+1)/x^2. */
       mpfr_mul (t, t, invxx, MPFR_RNDU);        /* err = err + 3 */
@@ -72,7 +68,7 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x)
       mpfr_div_ui (t, t, 2 * n + 1, MPFR_RNDU); /* err = err + 1 */
       /* we thus have err = 5n here */
       mpfr_div_ui (u, t, 2 * n, MPFR_RNDU);     /* err = 5n+1 */
-      mpfr_mul_z (u, u, B[n], MPFR_RNDU);       /* err = 5n+2, and the
+      mpfr_mul_z (u, u, mpfr_bernoulli_cache(n), MPFR_RNDU);/* err = 5n+2, and the
                                                    absolute error is bounded
                                                    by 10n+4 ulp(u) [Rule 11] */
       /* if the terms 'u' are decreasing by a factor two at least,
@@ -95,11 +91,6 @@ mpfr_digamma_approx (mpfr_ptr s, mpfr_srcptr x)
       e += f; /* total rounding error coming from 'u' term */
     }
 
-  n0 = ++n;
-  while (n--)
-    mpz_clear (B[n]);
-  (*__gmp_free_func) (B, n0 * sizeof (mpz_t));
-
   mpfr_clear (t);
   mpfr_clear (u);
   mpfr_clear (invxx);
diff --git a/src/free_cache.c b/src/free_cache.c
index 84413bb11..78b8eeae1 100644
--- a/src/free_cache.c
+++ b/src/free_cache.c
@@ -74,6 +74,9 @@ mpfr_mpz_clear (mpz_t z)
 void
 mpfr_free_cache (void)
 {
+  /* Before mpz caching */
+  mpfr_bernoulli_freecache();
+
 #if MPFR_MY_MPZ_INIT
   int i;
   MPFR_ASSERTD (n_alloc >= 0 && n_alloc <= numberof (mpz_tab));
diff --git a/src/li2.c b/src/li2.c
index 789a61892..43ccd18e0 100644
--- a/src/li2.c
+++ b/src/li2.c
@@ -33,11 +33,10 @@ http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
 static int
 li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode)
 {
-  int i, Bm, Bmax;
+  int i;
   mpfr_t s, u, v, w;
   mpfr_prec_t sump, p;
   mpfr_exp_t se, err;
-  mpz_t *B;
   MPFR_ZIV_DECL (loop);
 
   /* The series converges for |z| < 2 pi, but in mpfr_li2 the argument is
@@ -53,9 +52,6 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode)
   mpfr_init2 (v, p);
   mpfr_init2 (w, p);
 
-  B = mpfr_bernoulli_internal ((mpz_t *) 0, 0);
-  Bm = Bmax = 1;
-
   MPFR_ZIV_INIT (loop, p);
   for (;;)
     {
@@ -67,9 +63,6 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode)
 
       for (i = 1;; i++)
         {
-          if (i >= Bmax)
-            B = mpfr_bernoulli_internal (B, Bmax++); /* B_2i*(2i+1)!, exact */
-
           mpfr_mul (v, u, v, MPFR_RNDU);
           mpfr_div_ui (v, v, 2 * i, MPFR_RNDU);
           mpfr_div_ui (v, v, 2 * i, MPFR_RNDU);
@@ -77,7 +70,7 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode)
           mpfr_div_ui (v, v, 2 * i + 1, MPFR_RNDU);
           /* here, v_2i = v_{2i-2} / (2i * (2i+1))^2 */
 
-          mpfr_mul_z (w, v, B[i], MPFR_RNDN);
+          mpfr_mul_z (w, v, mpfr_bernoulli_cache(i), MPFR_RNDN);
           /* here, w_2i = v_2i * B_2i * (2i+1)! with
              error(w_2i) < 2^(5 * i + 8) ulp(w_2i) (see algorithms.tex) */
 
@@ -107,10 +100,6 @@ li2_series (mpfr_t sum, mpfr_srcptr z, mpfr_rnd_t rnd_mode)
   MPFR_ZIV_FREE (loop);
   mpfr_set (sum, s, rnd_mode);
 
-  Bm = Bmax;
-  while (Bm--)
-    mpz_clear (B[Bm]);
-  (*__gmp_free_func) (B, Bmax * sizeof (mpz_t));
   mpfr_clears (s, u, v, w, (mpfr_ptr) 0);
 
   /* Let K be the returned value.
diff --git a/src/lngamma.c b/src/lngamma.c
index fd011813c..26986b984 100644
--- a/src/lngamma.c
+++ b/src/lngamma.c
@@ -172,11 +172,8 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
   mpfr_prec_t precy, w; /* working precision */
   mpfr_t s, t, u, v, z;
   unsigned long m, k, maxm;
-  mpz_t *B;
   int compared, inexact;
   mpfr_exp_t err_s, err_t;
-  unsigned long Bm = 0; /* number of allocated B[] */
-  unsigned long oldBm;
   double d;
   MPFR_SAVE_EXPO_DECL (expo);
   MPFR_ZIV_DECL (loop);
@@ -436,14 +433,12 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
          and we need k steps of argument reconstruction. Assuming k is large
          with respect to z0, and k = n, we get 1/(Pi*e)^(2n) ~ 2^(-w), i.e.,
          k ~ w*log(2)/2/log(Pi*e) ~ 0.1616 * w.
-         However, since the series is more expensive to compute, the optimal
-         value seems to be k ~ 4.5 * w experimentally.
-         Note added February 12, 2014: for a target precision of 1000 bits,
-         gamma(pi^2) with k = 4.5*w gives m=55 and k=4639 which is about 60%
-         slower than k=1.5*w (m=69 and k=1540), thus we change for 1.5*w. */
+         However, since the series is slightly more expensive to compute,
+         the optimal value seems to be k ~ 0.25 * w experimentally (with
+         caching of Bernoulli numbers). */
       mpfr_set_prec (s, 53);
       mpfr_gamma_alpha (s, w);
-      mpfr_set_ui_2exp (s, 3, -1, MPFR_RNDU);
+      mpfr_set_ui_2exp (s, 1, -2, MPFR_RNDU);
       mpfr_mul_ui (s, s, w, MPFR_RNDU);
       if (mpfr_cmp (z0, s) < 0)
         {
@@ -491,13 +486,6 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
 
       mpfr_mul (u, u, u, MPFR_RNDN); /* 1/z^2 * (1+u)^3 */
 
-      if (Bm == 0)
-        {
-          B = mpfr_bernoulli_internal ((mpz_t *) 0, 0);
-          B = mpfr_bernoulli_internal (B, 1);
-          Bm = 2;
-        }
-
       /* m <= maxm ensures that 2*m*(2*m+1) <= ULONG_MAX */
       maxm = 1UL << (GMP_NUMB_BITS / 2 - 1);
 
@@ -523,12 +511,7 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
             }
           /* (1+u)^(10m-8) */
           /* invariant: t=1/(2m)/(2m-1)/z^(2m-1)/(2m+1)! */
-          if (Bm <= m)
-            {
-              B = mpfr_bernoulli_internal (B, m); /* B[2m]*(2m+1)!, exact */
-              Bm ++;
-            }
-          mpfr_mul_z (v, t, B[m], MPFR_RNDN); /* (1+u)^(10m-7) */
+          mpfr_mul_z (v, t, mpfr_bernoulli_cache(m), MPFR_RNDN); /* (1+u)^(10m-7) */
           MPFR_ASSERTD(MPFR_GET_EXP(v) <= - (2 * m + 3));
           mpfr_add (s, s, v, MPFR_RNDN);
         }
@@ -630,10 +613,6 @@ GAMMA_FUNC (mpfr_ptr y, mpfr_srcptr z0, mpfr_rnd_t rnd)
 #ifdef IS_GAMMA
  end0:
 #endif
-  oldBm = Bm;
-  while (Bm--)
-    mpz_clear (B[Bm]);
-  (*__gmp_free_func) (B, oldBm * sizeof (mpz_t));
 
  end:
   if (inexact == 0)
diff --git a/src/mpfr-impl.h b/src/mpfr-impl.h
index 1afdf213d..f7ec07a66 100644
--- a/src/mpfr-impl.h
+++ b/src/mpfr-impl.h
@@ -1986,8 +1986,10 @@ __MPFR_DECLSPEC MPFR_COLD_FUNCTION_ATTR MPFR_NORETURN void
 __MPFR_DECLSPEC void mpfr_rand_raw _MPFR_PROTO((mpfr_limb_ptr, gmp_randstate_t,
                                                 mpfr_prec_t));
 
-__MPFR_DECLSPEC mpz_t* mpfr_bernoulli_internal _MPFR_PROTO((mpz_t*,
+__MPFR_DECLSPEC void mpfr_bernoulli_internal _MPFR_PROTO((mpz_t*,
                                                             unsigned long));
+  __MPFR_DECLSPEC  mpz_srcptr mpfr_bernoulli_cache (unsigned long n);
+  __MPFR_DECLSPEC  void mpfr_bernoulli_freecache(void);
 
 __MPFR_DECLSPEC int mpfr_sincos_fast _MPFR_PROTO((mpfr_t, mpfr_t,
                                                   mpfr_srcptr, mpfr_rnd_t));