Complete rewrite (made last April).

author: tege <tege@gmplib.org> 2006-10-25 22:06:00 +0200
committer: tege <tege@gmplib.org> 2006-10-25 22:06:00 +0200
commit: 243ce6ef556df8b35e58d6661eb630ed628da203 (patch)
tree: 5c0ce14fc0c514049d2d457613e318b4d12dd156
parent: af20c3e4f5e89a61827be795e903b54be3a8a034 (diff)
download: gmp-243ce6ef556df8b35e58d6661eb630ed628da203.tar.gz
2 files changed, 391 insertions, 340 deletions
diff --git a/mpn/generic/get_str.c b/mpn/generic/get_str.c
index 8ea256ac3..c52e2fffc 100644
--- a/mpn/generic/get_str.c
+++ b/mpn/generic/get_str.c
@@ -1,32 +1,32 @@
 /* mpn_get_str -- Convert a MSIZE long limb vector pointed to by MPTR
    to a printable string in STR in base BASE.
 
-Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002 Free Software
-Foundation, Inc.
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006 Free
+Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 2.1 of the License, or (at your
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 2.1 of the License, or (at your
 option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+for more details.
 
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
-MA 02110-1301, USA. */
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library; see the file COPYING.LIB.  If not, write to the Free
+Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
+USA. */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
 /* Conversion of U {up,un} to a string in base b.  Internally, we convert to
-     base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
+   base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
 
   A) Divide U repeatedly by B, generating a quotient and remainder, until the
      quotient becomes zero.  The remainders hold the converted digits.  Digits
@@ -37,11 +37,10 @@ MA 02110-1301, USA. */
      come out from left to right.  (Currently not used herein, except for in
      code for converting single limbs to individual digits.)
 
-  C) Compute B^1, B^2, B^4, ..., B^(2^s), for s such that B^(2^s) > sqrt(U).
-     Then divide U by B^(2^k), generating an integer quotient and remainder.
-     Recursively convert the quotient, then the remainder, using the
-     precomputed powers.  Digits come out from left to right.  (Used in
-     mpn_dc_get_str.)
+  C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s just above sqrt(U).
+     Then divide U by B^s, generating quotient and remainder.  Recursively
+     convert the quotient, then the remainder, using the precomputed powers.
+     Digits come out from left to right.  (Used in mpn_dc_get_str.)
 
   When using algorithm C, algorithm B might be suitable for basecase code,
   since the required b^g power will be readily accessible.
@@ -135,37 +134,26 @@ MA 02110-1301, USA. */
 #define GET_STR_PRECOMPUTE_THRESHOLD 30
 #endif
 
-struct powers
-{
-  size_t digits_in_base;
-  mp_ptr p;
-  mp_size_t n;		/* mpz_struct uses int for sizes, but not mpn! */
-  int base;
-};
-typedef struct powers powers_t;
-
 
-/* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
-   the string in STR.  Generate LEN characters, possibly padding with zeros to
-   the left.  If LEN is zero, generate as many characters as required.
-   Return a pointer immediately after the last digit of the result string.
-   Complexity is O(UN^2); intended for small conversions.  */
+/* Convert {up,un} to a string in base base, and put the result in str.
+   Generate len characters, possibly padding with zeros to the left.  If len is
+   zero, generate as many characters as required.  Return a pointer immediately
+   after the last digit of the result string.  Complexity is O(un^2); intended
+   for small conversions.  */
 static unsigned char *
 mpn_sb_get_str (unsigned char *str, size_t len,
-		mp_ptr up, mp_size_t un,
-		const powers_t *powtab)
+		mp_ptr up, mp_size_t un, int base)
 {
   mp_limb_t rl, ul;
   unsigned char *s;
-  int base;
   size_t l;
   /* Allocate memory for largest possible string, given that we only get here
      for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
      base is 3.  7/11 is an approximation to 1/log2(3).  */
 #if TUNE_PROGRAM_BUILD
-#define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * BITS_PER_MP_LIMB * 7 / 11)
+#define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
 #else
-#define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * BITS_PER_MP_LIMB * 7 / 11)
+#define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
 #endif
   unsigned char buf[BUF_ALLOC];
 #if TUNE_PROGRAM_BUILD
@@ -174,7 +162,6 @@ mpn_sb_get_str (unsigned char *str, size_t len,
   mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
 #endif
 
-  base = powtab->base;
   if (base == 10)
     {
       /* Special case code for base==10 so that the compiler has a chance to
@@ -194,10 +181,10 @@ mpn_sb_get_str (unsigned char *str, size_t len,
 	  un -= rp[un] == 0;
 	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
 	  s -= MP_BASES_CHARS_PER_LIMB_10;
-	  i = MP_BASES_CHARS_PER_LIMB_10;
 #if HAVE_HOST_CPU_FAMILY_x86
 	  /* The code below turns out to be a bit slower for x86 using gcc.
 	     Use plain code.  */
+	  i = MP_BASES_CHARS_PER_LIMB_10;
 	  do
 	    {
 	      umul_ppmm (digit, frac, frac, 10);
@@ -212,31 +199,28 @@ mpn_sb_get_str (unsigned char *str, size_t len,
 	    {
 	      umul_ppmm (digit, frac, frac, 10);
 	      *s++ = digit;
-	      i--;
 	    }
 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
 	    {
 	      umul_ppmm (digit, frac, frac, 10);
 	      *s++ = digit;
-	      i--;
 	    }
 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
 	    {
 	      umul_ppmm (digit, frac, frac, 10);
 	      *s++ = digit;
-	      i--;
 	    }
 	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
 	    {
 	      umul_ppmm (digit, frac, frac, 10);
 	      *s++ = digit;
-	      i--;
 	    }
+	  i = MP_BASES_CHARS_PER_LIMB_10 - (4-MP_BASES_NORMALIZATION_STEPS_10);
 	  frac = (frac + 0xf) >> 4;
 	  do
 	    {
 	      frac *= 10;
-	      digit = frac >> (BITS_PER_MP_LIMB - 4);
+	      digit = frac >> (GMP_LIMB_BITS - 4);
 	      *s++ = digit;
 	      frac &= (~(mp_limb_t) 0) >> 4;
 	    }
@@ -323,7 +307,7 @@ mpn_dc_get_str (unsigned char *str, size_t len,
   if (un < GET_STR_DC_THRESHOLD)
     {
       if (un != 0)
-	str = mpn_sb_get_str (str, len, up, un, powtab);
+	str = mpn_sb_get_str (str, len, up, un, powtab->base);
       else
 	{
 	  while (len != 0)
@@ -352,8 +336,12 @@ mpn_dc_get_str (unsigned char *str, size_t len,
 
 	  mpn_tdiv_qr (qp, rp, 0L, up, un, pwp, pwn);
 	  qn = un - pwn; qn += qp[qn] != 0;		/* quotient size */
+
+	  ASSERT (qn < pwn || (qn == pwn && mpn_cmp (qp, pwp, pwn) < 0));
+
 	  if (len != 0)
 	    len = len - powtab->digits_in_base;
+
 	  str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + un - pwn + 1);
 	  str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn, powtab - 1, tmp);
 	}
@@ -362,6 +350,27 @@ mpn_dc_get_str (unsigned char *str, size_t len,
 }
 
 
+/*
+   Example sequences:
+
+   1  1  2  3  5  9  17		(worst case for un = 33,34)
+   1  1  1  2s
+         2a    4s
+            3a    8s
+               5a    16s
+                  9a
+                     17a
+
+   1  1  2  4  8  16  32	(best case for un = 63,64)
+   1  1  2s 4s 8s 16s 32s
+
+   1  1  2  4  7  14  28	(medium)
+   1  1  1  3sa
+         2a    7sa
+            4a    17s
+                      28s
+*/
+
 /* There are no leading zeros on the digits generated at str, but that's not
    currently a documented feature.  */
 
@@ -371,12 +380,13 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
   mp_ptr powtab_mem, powtab_mem_ptr;
   mp_limb_t big_base;
   size_t digits_in_base;
-  powers_t powtab[30];
+  powers_t powtab[GMP_LIMB_BITS];
   int pi;
   mp_size_t n;
   mp_ptr p, t;
   size_t out_len;
   mp_ptr tmp;
+  TMP_DECL;
 
   /* Special case zero, as the code below doesn't handle it.  */
   if (un == 0)
@@ -437,18 +447,16 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
   /* General case.  The base is not a power of 2.  */
 
   if (un < GET_STR_PRECOMPUTE_THRESHOLD)
-    {
-      struct powers ptab[1];
-      ptab[0].base = base;
-      return mpn_sb_get_str (str, (size_t) 0, up, un, ptab) - str;
-    }
+    return mpn_sb_get_str (str, (size_t) 0, up, un, base) - str;
+
+  TMP_MARK;
 
   /* Allocate one large block for the powers of big_base.  With the current
      scheme, we need to allocate twice as much as would be possible if a
      minimal set of powers were generated.  */
-#define POWTAB_ALLOC_SIZE (2 * un + 30)
-#define TMP_ALLOC_SIZE (un + 30)
-  powtab_mem = __GMP_ALLOCATE_FUNC_LIMBS (POWTAB_ALLOC_SIZE);
+#define POWTAB_ALLOC_SIZE (un + 2 * GMP_LIMB_BITS)
+#define TMP_ALLOC_SIZE (un)
+  powtab_mem = TMP_BALLOC_LIMBS (POWTAB_ALLOC_SIZE);
   powtab_mem_ptr = powtab_mem;
 
   /* Compute a table of powers: big_base^1, big_base^2, big_base^4, ...,
@@ -458,43 +466,84 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
   big_base = __mp_bases[base].big_base;
   digits_in_base = __mp_bases[base].chars_per_limb;
 
-  powtab[0].base = base; /* FIXME: hack for getting base to mpn_sb_get_str */
-  powtab[1].p = &big_base;
-  powtab[1].n = 1;
-  powtab[1].digits_in_base = digits_in_base;
-  powtab[1].base = base;
-  powtab[2].p = &big_base;
-  powtab[2].n = 1;
-  powtab[2].digits_in_base = digits_in_base;
-  powtab[2].base = base;
-  n = 1;
-  pi = 2;
-  p = &big_base;
-  for (;;)
-    {
-      ++pi;
-      t = powtab_mem_ptr;
-      powtab_mem_ptr += 2 * n;
-      mpn_sqr_n (t, p, n);
-      n *= 2; n -= t[n - 1] == 0;
-      digits_in_base *= 2;
-      p = t;
-      powtab[pi].p = p;
-      powtab[pi].n = n;
-      powtab[pi].digits_in_base = digits_in_base;
-      powtab[pi].base = base;
-
-      if (2 * n > un)
-	break;
+  {
+    mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;
+    mp_limb_t cy;
+    n_pows = 0;
+    xn = 1 + un*(__mp_bases[base].chars_per_bit_exactly*GMP_NUMB_BITS)/__mp_bases[base].chars_per_limb;
+    for (pn = xn; pn != 1; pn = (pn + 1) >> 1)
+      {
+	exptab[n_pows] = pn;
+	n_pows++;
+      }
+    exptab[n_pows] = 1;
+
+    powtab[0].p = &big_base;
+    powtab[0].n = 1;
+    powtab[0].digits_in_base = digits_in_base;
+    powtab[0].base = base;
+
+    powtab[1].p = powtab_mem_ptr;  powtab_mem_ptr += 2;
+    powtab[1].p[0] = big_base;
+    powtab[1].n = 1;
+    powtab[1].digits_in_base = digits_in_base;
+    powtab[1].base = base;
+
+    n = 1;
+    p = &big_base;
+    bexp = 1;
+    for (pi = 2; pi < n_pows; pi++)
+      {
+	t = powtab_mem_ptr;
+	powtab_mem_ptr += 2 * n + 2;
+
+	ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + POWTAB_ALLOC_SIZE);
+
+	mpn_sqr_n (t, p, n);
+
+	digits_in_base *= 2;
+	n *= 2;  n -= t[n - 1] == 0;
+	bexp *= 2;
+
+	if (bexp + 1 < exptab[n_pows - pi])
+	  {
+	    digits_in_base += __mp_bases[base].chars_per_limb;
+	    cy = mpn_mul_1 (t, t, n, big_base);
+	    t[n] = cy;
+	    n += cy != 0;
+	    bexp += 1;
+	  }
+	p = t;
+	powtab[pi].p = p;
+	powtab[pi].n = n;
+	powtab[pi].digits_in_base = digits_in_base;
+	powtab[pi].base = base;
+      }
+
+    for (pi = 1; pi < n_pows; pi++)
+      {
+	t = powtab[pi].p;
+	n = powtab[pi].n;
+	cy = mpn_mul_1 (t, t, n, big_base);
+	t[n] = cy;
+	n += cy != 0;
+	powtab[pi].n = n;
+	powtab[pi].digits_in_base += __mp_bases[base].chars_per_limb;
+      }
+
+#if 0
+    { int i;
+      printf ("Computed table values for base=%d, un=%d, xn=%d:\n", base, un, xn);
+      for (i = 0; i < n_pows; i++)
+	printf ("%2d: %10ld %10ld %11ld\n", i, exptab[n_pows-i], powtab[i].n, powtab[i].digits_in_base);
     }
-  ASSERT_ALWAYS (POWTAB_ALLOC_SIZE > powtab_mem_ptr - powtab_mem);
+#endif
+  }
 
   /* Using our precomputed powers, now in powtab[], convert our number.  */
-  tmp = __GMP_ALLOCATE_FUNC_LIMBS (TMP_ALLOC_SIZE);
-  out_len = mpn_dc_get_str (str, 0, up, un, powtab + pi, tmp) - str;
-  __GMP_FREE_FUNC_LIMBS (tmp, TMP_ALLOC_SIZE);
-
-  __GMP_FREE_FUNC_LIMBS (powtab_mem, POWTAB_ALLOC_SIZE);
+  tmp = TMP_BALLOC_LIMBS (TMP_ALLOC_SIZE);
+  out_len = mpn_dc_get_str (str, 0, up, un, powtab - 1 + pi, tmp) - str;
+  TMP_FREE;
 
   return out_len;
 }
diff --git a/mpn/generic/set_str.c b/mpn/generic/set_str.c
index 4166bb1d7..f08965237 100644
--- a/mpn/generic/set_str.c
+++ b/mpn/generic/set_str.c
@@ -2,72 +2,76 @@
    Convert a STR_LEN long base BASE byte string pointed to by STR to a limb
    vector pointed to by RES_PTR.  Return the number of limbs in RES_PTR.
 
-Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002 Free Software
-Foundation, Inc.
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006 Free
+Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify it
-under the terms of the GNU Library General Public License as published by the
-Free Software Foundation; either version 2 of the License, or (at your option)
-any later version.
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public License
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received a copy of the GNU Library General Public License along
+You should have received a copy of the GNU Lesser General Public License along
 with the GNU MP Library; see the file COPYING.LIB.  If not, write to the Free
-Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
 USA. */
 
+/* Originally written 2004, but it was left non-working.
+   Fixed 2006-03 at LORIA.
+
+   TODO:
+
+      Perhaps do not compute the highest power?
+      Instead, multiply twice by the 2nd highest power:
+
+	       _______
+	      |_______|  hp
+	      |_______|  pow
+       _______________
+      |_______________|  final result
+
+
+	       _______
+	      |_______|  hp
+		  |___|  pow[-1]
+	   ___________
+	  |___________|  intermediate result
+		  |___|  pow[-1]
+       _______________
+      |_______________|  final result
+
+      Generalizing that idea, perhaps we should make powtab contain successive
+      cubes, not squares.
+*/
+
 #include "gmp.h"
 #include "gmp-impl.h"
+#include "longlong.h"
 
-static mp_size_t convert_blocks __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
 
-/* When to switch to sub-quadratic code.  This counts characters/digits in
-   the input string, not limbs as most other *_THRESHOLD.  */
-#ifndef SET_STR_THRESHOLD
-#define SET_STR_THRESHOLD 4000
+#ifndef SET_STR_DC_THRESHOLD
+#define SET_STR_DC_THRESHOLD 2000
 #endif
 
-/* Don't define this to anything but 1 for now.  In order to make other values
-   work well, either the convert_blocks function should be generalized to
-   handle larger blocks than chars_per_limb, or the basecase code should be
-   broken out of the main function.  Also note that this must be a power of
-   2.  */
-#ifndef SET_STR_BLOCK_SIZE
-#define SET_STR_BLOCK_SIZE 1	/* Must be a power of 2. */
+#ifndef SET_STR_PRECOMPUTE_THRESHOLD
+#define SET_STR_PRECOMPUTE_THRESHOLD 5000
 #endif
 
+mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int);
+mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t,
+			  const powers_t *, mp_ptr);
+void mpn_set_str_compute_powtab (powers_t *, mp_ptr, mp_size_t, int);
 
-/* This check interferes with expression based values of SET_STR_THRESHOLD
-   used for tuning and measuring.
-#if SET_STR_BLOCK_SIZE >= SET_STR_THRESHOLD
-These values are silly.
-The sub-quadratic code would recurse to itself.
-#endif
-*/
 
 mp_size_t
 mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
 {
-  mp_size_t size;
-  mp_limb_t big_base;
-  int chars_per_limb;
-  mp_limb_t res_digit;
-
-  ASSERT (base >= 2);
-  ASSERT (base < numberof (__mp_bases));
-  ASSERT (str_len >= 1);
-
-  big_base = __mp_bases[base].big_base;
-  chars_per_limb = __mp_bases[base].chars_per_limb;
-
-  size = 0;
-
   if (POW2_P (base))
     {
       /* The base is a power of 2.  Read the input string from least to most
@@ -75,8 +79,11 @@ mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
 
       const unsigned char *s;
       int next_bitpos;
-      int bits_per_indigit = big_base;
+      mp_limb_t res_digit;
+      mp_size_t size;
+      int bits_per_indigit = __mp_bases[base].big_base;
 
+      size = 0;
       res_digit = 0;
       next_bitpos = 0;
 
@@ -98,251 +105,246 @@ mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
 	rp[size++] = res_digit;
       return size;
     }
+
+  if (BELOW_THRESHOLD (str_len, SET_STR_PRECOMPUTE_THRESHOLD))
+    return mpn_bc_set_str (rp, str, str_len, base);
   else
     {
-      /* General case.  The base is not a power of 2.  */
+      mp_ptr powtab_mem, tp;
+      powers_t powtab[GMP_LIMB_BITS];
+      int chars_per_limb;
+      mp_size_t size;
+      mp_size_t un;
+      TMP_DECL;
 
-      if (str_len < SET_STR_THRESHOLD)
-	{
-	  size_t i;
-	  int j;
-	  mp_limb_t cy_limb;
+      TMP_MARK;
 
-	  for (i = chars_per_limb; i < str_len; i += chars_per_limb)
-	    {
-	      res_digit = *str++;
-	      if (base == 10)
-		{ /* This is a common case.
-		     Help the compiler to avoid multiplication.  */
-		  for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
-		    res_digit = res_digit * 10 + *str++;
-		}
-	      else
-		{
-		  for (j = chars_per_limb - 1; j != 0; j--)
-		    res_digit = res_digit * base + *str++;
-		}
-
-	      if (size == 0)
-		{
-		  if (res_digit != 0)
-		    {
-		      rp[0] = res_digit;
-		      size = 1;
-		    }
-		}
-	      else
-		{
-#if HAVE_NATIVE_mpn_mul_1c
-		  cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
-#else
-		  cy_limb = mpn_mul_1 (rp, rp, size, big_base);
-		  cy_limb += mpn_add_1 (rp, rp, size, res_digit);
-#endif
-		  if (cy_limb != 0)
-		    rp[size++] = cy_limb;
-		}
-	    }
+      chars_per_limb = __mp_bases[base].chars_per_limb;
 
-	  big_base = base;
-	  res_digit = *str++;
-	  if (base == 10)
-	    { /* This is a common case.
-		 Help the compiler to avoid multiplication.  */
-	      for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--)
-		{
-		  res_digit = res_digit * 10 + *str++;
-		  big_base *= 10;
-		}
-	    }
-	  else
-	    {
-	      for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--)
-		{
-		  res_digit = res_digit * base + *str++;
-		  big_base *= base;
-		}
-	    }
+      un = str_len / chars_per_limb + 1;
 
-	  if (size == 0)
-	    {
-	      if (res_digit != 0)
-		{
-		  rp[0] = res_digit;
-		  size = 1;
-		}
-	    }
-	  else
-	    {
-#if HAVE_NATIVE_mpn_mul_1c
-	      cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+      /* Allocate one large block for the powers of big_base.  */
+      powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un));
+
+      mpn_set_str_compute_powtab (powtab, powtab_mem, un, base);
+
+      tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));
+      size = mpn_dc_set_str (rp, str, str_len, powtab, tp);
+
+      TMP_FREE;
+      return size;
+    }
+}
+
+void
+mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
+{
+  mp_ptr powtab_mem_ptr;
+  long i, pi;
+  mp_size_t n;
+  mp_ptr p, t;
+  unsigned normalization_steps;
+  mp_limb_t big_base, big_base_inverted;
+  int chars_per_limb;
+  size_t digits_in_base;
+
+  powtab_mem_ptr = powtab_mem;
+
+  chars_per_limb = __mp_bases[base].chars_per_limb;
+  big_base = __mp_bases[base].big_base;
+  big_base_inverted = __mp_bases[base].big_base_inverted;
+  count_leading_zeros (normalization_steps, big_base);
+
+  p = powtab_mem_ptr;
+  powtab_mem_ptr += 1;
+
+  digits_in_base = chars_per_limb;
+
+  p[0] = big_base;
+  n = 1;
+
+  count_leading_zeros (i, un - 1);
+  i = GMP_LIMB_BITS - 1 - i;
+
+  powtab[i].p = p;
+  powtab[i].n = n;
+  powtab[i].digits_in_base = digits_in_base;
+  powtab[i].base = base;
+
+  for (pi = i - 1; pi >= 0; pi--)
+    {
+      t = powtab_mem_ptr;
+      powtab_mem_ptr += 2 * n;
+
+      ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_set_str_powtab_alloc (un));
+
+      mpn_sqr_n (t, p, n);
+      n = 2 * n - 1; n += t[n] != 0;
+      digits_in_base *= 2;
+#if 1
+      if ((((un - 1) >> pi) & 2) == 0)
+	{
+	  mpn_divexact_1 (t, t, n, big_base);
+	  n -= t[n - 1] == 0;
+	  digits_in_base -= chars_per_limb;
+	}
 #else
-	      cy_limb = mpn_mul_1 (rp, rp, size, big_base);
-	      cy_limb += mpn_add_1 (rp, rp, size, res_digit);
-#endif
-	      if (cy_limb != 0)
-		rp[size++] = cy_limb;
-	    }
-	  return size;
+      if (CLEVER_CONDITION_1 ())
+	{
+	  // perform adjustment operation of previous
+	  cy = mpn_mul_1 (p, p, n, big_base);
 	}
-      else
+      if (CLEVER_CONDITION_2 ())
 	{
-	  /* Sub-quadratic code.  */
-
-	  mp_ptr dp;
-	  mp_size_t dsize;
-	  mp_ptr xp, tp;
-	  mp_size_t step;
-	  mp_size_t i;
-	  size_t alloc;
-	  mp_size_t n;
-	  mp_ptr pow_mem;
-
-	  alloc = (str_len + chars_per_limb - 1) / chars_per_limb;
-	  alloc = 2 * alloc;
-	  dp = __GMP_ALLOCATE_FUNC_LIMBS (alloc);
-
-#if SET_STR_BLOCK_SIZE == 1
-	  dsize = convert_blocks (dp, str, str_len, base);
-#else
-	  {
-	    const unsigned char *s;
-	    mp_ptr ddp = dp;
-
-	    s = str + str_len;
-	    while (s - str >  SET_STR_BLOCK_SIZE * chars_per_limb)
-	      {
-		s -= SET_STR_BLOCK_SIZE * chars_per_limb;
-		mpn_set_str (ddp, s, SET_STR_BLOCK_SIZE * chars_per_limb, base);
-		ddp += SET_STR_BLOCK_SIZE;
-	      }
-	    ddp += mpn_set_str (ddp, str, s - str, base);
-	    dsize = ddp - dp;
-	  }
+	  // perform adjustment operation of new
+	  cy = mpn_mul_1 (t, t, n, big_base);
+	}
 #endif
+      p = t;
+      powtab[pi].p = p;
+      powtab[pi].n = n;
+      powtab[pi].digits_in_base = digits_in_base;
+      powtab[pi].base = base;
+    }
+}
 
-	  /* Allocate space for powers of big_base.  Could trim this in two
-	     ways:
-	     1. Only really need 2^ceil(log2(dsize)) bits for the largest
-		power.
-	     2. Only the variable to get the largest power needs that much
-		memory.  The other variable needs half as much.  We need just
-		figure out which of xp and tp will hold the last one.
-	     Net space savings would be in the range 1/4 to 5/8 of current
-	     allocation, depending on how close dsize is to the next greater
-	     power of 2.  */
-	  pow_mem = __GMP_ALLOCATE_FUNC_LIMBS (2 * alloc);
-	  xp = pow_mem;
-	  tp = pow_mem + alloc;
-
-	  xp[0] = big_base;
-	  n = 1;
-	  step = 1;
-#if SET_STR_BLOCK_SIZE != 1
-	  for (i = SET_STR_BLOCK_SIZE; i > 1; i >>= 1)
-	    {
-	      mpn_sqr_n (tp, xp, n);
-	      n = 2 * n;
-	      n -= tp[n - 1] == 0;
+mp_size_t
+mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len,
+		const powers_t *powtab, mp_ptr tp)
+{
+  size_t len_lo, len_hi;
+  mp_limb_t cy;
+  mp_size_t ln, hn, n;
 
-	      step = 2 * step;
-	      MP_PTR_SWAP (tp, xp);
-	    }
-#endif
+  len_lo = powtab->digits_in_base;
+  len_hi = str_len - len_lo;
 
-	  /* Multiply every second limb block, each `step' limbs large by the
-	     base power currently in xp[], then add this to the adjacent block.
-	     We thereby convert from dsize blocks in base big_base, to dsize/2
-	     blocks in base big_base^2, then to dsize/4 blocks in base
-	     big_base^4, etc, etc.  */
+  if (str_len <= len_lo)
+    {
+      if (BELOW_THRESHOLD (str_len, SET_STR_DC_THRESHOLD))
+	return mpn_bc_set_str (rp, str, str_len, powtab->base);
+      else
+	return mpn_dc_set_str (rp, str, str_len, powtab + 1, tp);
+    }
 
-	  if (step < dsize)
-	    {
-	      for (;;)
-		{
-		  for (i = 0; i < dsize - step; i += 2 * step)
-		    {
-		      mp_ptr bp = dp + i;
-		      mp_size_t m = dsize - i - step;
-		      mp_size_t hi;
-		      if (n >= m)
-			{
-			  mpn_mul (tp, xp, n, bp + step, m);
-			  mpn_add (bp, tp, n + m, bp, n);
-			  hi = i + n + m;
-			  dsize = hi;
-			  dsize -= dp[dsize - 1] == 0;
-			}
-		      else
-			{
-			  mpn_mul_n (tp, xp, bp + step, n);
-			  mpn_add (bp, tp, n + n, bp, n);
-			}
-		    }
-
-		  step = 2 * step;
-		  if (! (step < dsize))
-		    break;
-
-		  mpn_sqr_n (tp, xp, n);
-		  n = 2 * n;
-		  n -= tp[n - 1] == 0;
-		  MP_PTR_SWAP (tp, xp);
-		}
-	    }
+  ASSERT_ALWAYS (len_lo >= len_hi);
 
-	  MPN_NORMALIZE (dp, dsize);
-	  MPN_COPY (rp, dp, dsize);
-	  __GMP_FREE_FUNC_LIMBS (pow_mem, 2 * alloc);
-	  __GMP_FREE_FUNC_LIMBS (dp, alloc);
-	  return dsize;
-	}
-    }
+  if (BELOW_THRESHOLD (len_hi, SET_STR_DC_THRESHOLD))
+    hn = mpn_bc_set_str (tp, str, len_hi, powtab->base);
+  else
+    hn = mpn_dc_set_str (tp, str, len_hi, powtab + 1, rp);
+
+  if (hn == 0)
+    MPN_ZERO (rp, powtab->n);
+  else
+    mpn_mul (rp, powtab->p, powtab->n, tp, hn);
+
+  str = str + str_len - len_lo;
+  if (BELOW_THRESHOLD (len_lo, SET_STR_DC_THRESHOLD))
+    ln = mpn_bc_set_str (tp, str, len_lo, powtab->base);
+  else
+    ln = mpn_dc_set_str (tp, str, len_lo, powtab + 1, tp + powtab->n + 1);
+
+  cy = mpn_add_n (rp, rp, tp, ln);
+  mpn_incr_u (rp + ln, cy);
+  n = hn + powtab->n;
+  return n - (rp[n - 1] == 0);
 }
 
-static mp_size_t
-convert_blocks (mp_ptr dp, const unsigned char *str, size_t str_len, int base)
+mp_size_t
+mpn_bc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
 {
+  mp_size_t size;
+  size_t i;
+  long j;
+  mp_limb_t cy_limb;
+
+  mp_limb_t big_base;
   int chars_per_limb;
-  mp_size_t i;
-  int j;
-  int ds;
-  mp_size_t dsize;
   mp_limb_t res_digit;
 
-  chars_per_limb = __mp_bases[base].chars_per_limb;
+  ASSERT (base >= 2);
+  ASSERT (base < numberof (__mp_bases));
+  ASSERT (str_len >= 1);
 
-  dsize = str_len / chars_per_limb;
-  ds = str_len % chars_per_limb;
+  big_base = __mp_bases[base].big_base;
+  chars_per_limb = __mp_bases[base].chars_per_limb;
 
-  if (ds != 0)
+  size = 0;
+  for (i = chars_per_limb; i < str_len; i += chars_per_limb)
     {
       res_digit = *str++;
-      for (j = ds - 1; j != 0; j--)
-	res_digit = res_digit * base + *str++;
-      dp[dsize] = res_digit;
+      if (base == 10)
+	{ /* This is a common case.
+	     Help the compiler to avoid multiplication.  */
+	  for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
+	    res_digit = res_digit * 10 + *str++;
+	}
+      else
+	{
+	  for (j = chars_per_limb - 1; j != 0; j--)
+	    res_digit = res_digit * base + *str++;
+	}
+
+      if (size == 0)
+	{
+	  if (res_digit != 0)
+	    {
+	      rp[0] = res_digit;
+	      size = 1;
+	    }
+	}
+      else
+	{
+#if HAVE_NATIVE_mpn_mul_1c
+	  cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+	  cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+	  cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+	  if (cy_limb != 0)
+	    rp[size++] = cy_limb;
+	}
     }
 
+  big_base = base;
+  res_digit = *str++;
   if (base == 10)
-    {
-      for (i = dsize - 1; i >= 0; i--)
+    { /* This is a common case.
+	 Help the compiler to avoid multiplication.  */
+      for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--)
 	{
-	  res_digit = *str++;
-	  for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
-	    res_digit = res_digit * 10 + *str++;
-	  dp[i] = res_digit;
+	  res_digit = res_digit * 10 + *str++;
+	  big_base *= 10;
 	}
     }
   else
     {
-      for (i = dsize - 1; i >= 0; i--)
+      for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--)
 	{
-	  res_digit = *str++;
-	  for (j = chars_per_limb - 1; j != 0; j--)
-	    res_digit = res_digit * base + *str++;
-	  dp[i] = res_digit;
+	  res_digit = res_digit * base + *str++;
+	  big_base *= base;
 	}
     }
 
-  return dsize + (ds != 0);
+  if (size == 0)
+    {
+      if (res_digit != 0)
+	{
+	  rp[0] = res_digit;
+	  size = 1;
+	}
+    }
+  else
+    {
+#if HAVE_NATIVE_mpn_mul_1c
+      cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+      cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+      cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+      if (cy_limb != 0)
+	rp[size++] = cy_limb;
+    }
+  return size;
 }
author	tege <tege@gmplib.org>	2006-10-25 22:06:00 +0200
committer	tege <tege@gmplib.org>	2006-10-25 22:06:00 +0200
commit	243ce6ef556df8b35e58d6661eb630ed628da203 (patch)
tree	5c0ce14fc0c514049d2d457613e318b4d12dd156
parent	af20c3e4f5e89a61827be795e903b54be3a8a034 (diff)
download	gmp-243ce6ef556df8b35e58d6661eb630ed628da203.tar.gz