summaryrefslogtreecommitdiff
path: root/mpn/generic/set_str.c
diff options
context:
space:
mode:
authortege <tege@gmplib.org>2006-10-25 22:06:00 +0200
committertege <tege@gmplib.org>2006-10-25 22:06:00 +0200
commit243ce6ef556df8b35e58d6661eb630ed628da203 (patch)
tree5c0ce14fc0c514049d2d457613e318b4d12dd156 /mpn/generic/set_str.c
parentaf20c3e4f5e89a61827be795e903b54be3a8a034 (diff)
downloadgmp-243ce6ef556df8b35e58d6661eb630ed628da203.tar.gz
Complete rewrite (made last April).
Diffstat (limited to 'mpn/generic/set_str.c')
-rw-r--r--mpn/generic/set_str.c506
1 files changed, 254 insertions, 252 deletions
diff --git a/mpn/generic/set_str.c b/mpn/generic/set_str.c
index 4166bb1d7..f08965237 100644
--- a/mpn/generic/set_str.c
+++ b/mpn/generic/set_str.c
@@ -2,72 +2,76 @@
Convert a STR_LEN long base BASE byte string pointed to by STR to a limb
vector pointed to by RES_PTR. Return the number of limbs in RES_PTR.
-Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002 Free Software
-Foundation, Inc.
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify it
-under the terms of the GNU Library General Public License as published by the
-Free Software Foundation; either version 2 of the License, or (at your option)
-any later version.
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 2.1 of the License, or (at your
+option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
for more details.
-You should have received a copy of the GNU Library General Public License along
+You should have received a copy of the GNU Lesser General Public License along
with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free
-Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
USA. */
+/* Originally written 2004, but it was left non-working.
+ Fixed 2006-03 at LORIA.
+
+ TODO:
+
+ Perhaps do not compute the highest power?
+ Instead, multiply twice by the 2nd highest power:
+
+ _______
+ |_______| hp
+ |_______| pow
+ _______________
+ |_______________| final result
+
+
+ _______
+ |_______| hp
+ |___| pow[-1]
+ ___________
+ |___________| intermediate result
+ |___| pow[-1]
+ _______________
+ |_______________| final result
+
+ Generalizing that idea, perhaps we should make powtab contain successive
+ cubes, not squares.
+*/
+
#include "gmp.h"
#include "gmp-impl.h"
+#include "longlong.h"
-static mp_size_t convert_blocks __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
-/* When to switch to sub-quadratic code. This counts characters/digits in
- the input string, not limbs as most other *_THRESHOLD. */
-#ifndef SET_STR_THRESHOLD
-#define SET_STR_THRESHOLD 4000
+#ifndef SET_STR_DC_THRESHOLD
+#define SET_STR_DC_THRESHOLD 2000
#endif
-/* Don't define this to anything but 1 for now. In order to make other values
- work well, either the convert_blocks function should be generalized to
- handle larger blocks than chars_per_limb, or the basecase code should be
- broken out of the main function. Also note that this must be a power of
- 2. */
-#ifndef SET_STR_BLOCK_SIZE
-#define SET_STR_BLOCK_SIZE 1 /* Must be a power of 2. */
+#ifndef SET_STR_PRECOMPUTE_THRESHOLD
+#define SET_STR_PRECOMPUTE_THRESHOLD 5000
#endif
+mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int);
+mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t,
+ const powers_t *, mp_ptr);
+void mpn_set_str_compute_powtab (powers_t *, mp_ptr, mp_size_t, int);
-/* This check interferes with expression based values of SET_STR_THRESHOLD
- used for tuning and measuring.
-#if SET_STR_BLOCK_SIZE >= SET_STR_THRESHOLD
-These values are silly.
-The sub-quadratic code would recurse to itself.
-#endif
-*/
mp_size_t
mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
{
- mp_size_t size;
- mp_limb_t big_base;
- int chars_per_limb;
- mp_limb_t res_digit;
-
- ASSERT (base >= 2);
- ASSERT (base < numberof (__mp_bases));
- ASSERT (str_len >= 1);
-
- big_base = __mp_bases[base].big_base;
- chars_per_limb = __mp_bases[base].chars_per_limb;
-
- size = 0;
-
if (POW2_P (base))
{
/* The base is a power of 2. Read the input string from least to most
@@ -75,8 +79,11 @@ mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
const unsigned char *s;
int next_bitpos;
- int bits_per_indigit = big_base;
+ mp_limb_t res_digit;
+ mp_size_t size;
+ int bits_per_indigit = __mp_bases[base].big_base;
+ size = 0;
res_digit = 0;
next_bitpos = 0;
@@ -98,251 +105,246 @@ mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
rp[size++] = res_digit;
return size;
}
+
+ if (BELOW_THRESHOLD (str_len, SET_STR_PRECOMPUTE_THRESHOLD))
+ return mpn_bc_set_str (rp, str, str_len, base);
else
{
- /* General case. The base is not a power of 2. */
+ mp_ptr powtab_mem, tp;
+ powers_t powtab[GMP_LIMB_BITS];
+ int chars_per_limb;
+ mp_size_t size;
+ mp_size_t un;
+ TMP_DECL;
- if (str_len < SET_STR_THRESHOLD)
- {
- size_t i;
- int j;
- mp_limb_t cy_limb;
+ TMP_MARK;
- for (i = chars_per_limb; i < str_len; i += chars_per_limb)
- {
- res_digit = *str++;
- if (base == 10)
- { /* This is a common case.
- Help the compiler to avoid multiplication. */
- for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
- res_digit = res_digit * 10 + *str++;
- }
- else
- {
- for (j = chars_per_limb - 1; j != 0; j--)
- res_digit = res_digit * base + *str++;
- }
-
- if (size == 0)
- {
- if (res_digit != 0)
- {
- rp[0] = res_digit;
- size = 1;
- }
- }
- else
- {
-#if HAVE_NATIVE_mpn_mul_1c
- cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
-#else
- cy_limb = mpn_mul_1 (rp, rp, size, big_base);
- cy_limb += mpn_add_1 (rp, rp, size, res_digit);
-#endif
- if (cy_limb != 0)
- rp[size++] = cy_limb;
- }
- }
+ chars_per_limb = __mp_bases[base].chars_per_limb;
- big_base = base;
- res_digit = *str++;
- if (base == 10)
- { /* This is a common case.
- Help the compiler to avoid multiplication. */
- for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--)
- {
- res_digit = res_digit * 10 + *str++;
- big_base *= 10;
- }
- }
- else
- {
- for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--)
- {
- res_digit = res_digit * base + *str++;
- big_base *= base;
- }
- }
+ un = str_len / chars_per_limb + 1;
- if (size == 0)
- {
- if (res_digit != 0)
- {
- rp[0] = res_digit;
- size = 1;
- }
- }
- else
- {
-#if HAVE_NATIVE_mpn_mul_1c
- cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+ /* Allocate one large block for the powers of big_base. */
+ powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un));
+
+ mpn_set_str_compute_powtab (powtab, powtab_mem, un, base);
+
+ tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un));
+ size = mpn_dc_set_str (rp, str, str_len, powtab, tp);
+
+ TMP_FREE;
+ return size;
+ }
+}
+
+void
+mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base)
+{
+ mp_ptr powtab_mem_ptr;
+ long i, pi;
+ mp_size_t n;
+ mp_ptr p, t;
+ unsigned normalization_steps;
+ mp_limb_t big_base, big_base_inverted;
+ int chars_per_limb;
+ size_t digits_in_base;
+
+ powtab_mem_ptr = powtab_mem;
+
+ chars_per_limb = __mp_bases[base].chars_per_limb;
+ big_base = __mp_bases[base].big_base;
+ big_base_inverted = __mp_bases[base].big_base_inverted;
+ count_leading_zeros (normalization_steps, big_base);
+
+ p = powtab_mem_ptr;
+ powtab_mem_ptr += 1;
+
+ digits_in_base = chars_per_limb;
+
+ p[0] = big_base;
+ n = 1;
+
+ count_leading_zeros (i, un - 1);
+ i = GMP_LIMB_BITS - 1 - i;
+
+ powtab[i].p = p;
+ powtab[i].n = n;
+ powtab[i].digits_in_base = digits_in_base;
+ powtab[i].base = base;
+
+ for (pi = i - 1; pi >= 0; pi--)
+ {
+ t = powtab_mem_ptr;
+ powtab_mem_ptr += 2 * n;
+
+ ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_set_str_powtab_alloc (un));
+
+ mpn_sqr_n (t, p, n);
+ n = 2 * n - 1; n += t[n] != 0;
+ digits_in_base *= 2;
+#if 1
+ if ((((un - 1) >> pi) & 2) == 0)
+ {
+ mpn_divexact_1 (t, t, n, big_base);
+ n -= t[n - 1] == 0;
+ digits_in_base -= chars_per_limb;
+ }
#else
- cy_limb = mpn_mul_1 (rp, rp, size, big_base);
- cy_limb += mpn_add_1 (rp, rp, size, res_digit);
-#endif
- if (cy_limb != 0)
- rp[size++] = cy_limb;
- }
- return size;
+ if (CLEVER_CONDITION_1 ())
+ {
+ // perform adjustment operation of previous
+ cy = mpn_mul_1 (p, p, n, big_base);
}
- else
+ if (CLEVER_CONDITION_2 ())
{
- /* Sub-quadratic code. */
-
- mp_ptr dp;
- mp_size_t dsize;
- mp_ptr xp, tp;
- mp_size_t step;
- mp_size_t i;
- size_t alloc;
- mp_size_t n;
- mp_ptr pow_mem;
-
- alloc = (str_len + chars_per_limb - 1) / chars_per_limb;
- alloc = 2 * alloc;
- dp = __GMP_ALLOCATE_FUNC_LIMBS (alloc);
-
-#if SET_STR_BLOCK_SIZE == 1
- dsize = convert_blocks (dp, str, str_len, base);
-#else
- {
- const unsigned char *s;
- mp_ptr ddp = dp;
-
- s = str + str_len;
- while (s - str > SET_STR_BLOCK_SIZE * chars_per_limb)
- {
- s -= SET_STR_BLOCK_SIZE * chars_per_limb;
- mpn_set_str (ddp, s, SET_STR_BLOCK_SIZE * chars_per_limb, base);
- ddp += SET_STR_BLOCK_SIZE;
- }
- ddp += mpn_set_str (ddp, str, s - str, base);
- dsize = ddp - dp;
- }
+ // perform adjustment operation of new
+ cy = mpn_mul_1 (t, t, n, big_base);
+ }
#endif
+ p = t;
+ powtab[pi].p = p;
+ powtab[pi].n = n;
+ powtab[pi].digits_in_base = digits_in_base;
+ powtab[pi].base = base;
+ }
+}
- /* Allocate space for powers of big_base. Could trim this in two
- ways:
- 1. Only really need 2^ceil(log2(dsize)) bits for the largest
- power.
- 2. Only the variable to get the largest power needs that much
- memory. The other variable needs half as much. We need just
- figure out which of xp and tp will hold the last one.
- Net space savings would be in the range 1/4 to 5/8 of current
- allocation, depending on how close dsize is to the next greater
- power of 2. */
- pow_mem = __GMP_ALLOCATE_FUNC_LIMBS (2 * alloc);
- xp = pow_mem;
- tp = pow_mem + alloc;
-
- xp[0] = big_base;
- n = 1;
- step = 1;
-#if SET_STR_BLOCK_SIZE != 1
- for (i = SET_STR_BLOCK_SIZE; i > 1; i >>= 1)
- {
- mpn_sqr_n (tp, xp, n);
- n = 2 * n;
- n -= tp[n - 1] == 0;
+mp_size_t
+mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len,
+ const powers_t *powtab, mp_ptr tp)
+{
+ size_t len_lo, len_hi;
+ mp_limb_t cy;
+ mp_size_t ln, hn, n;
- step = 2 * step;
- MP_PTR_SWAP (tp, xp);
- }
-#endif
+ len_lo = powtab->digits_in_base;
+ len_hi = str_len - len_lo;
- /* Multiply every second limb block, each `step' limbs large by the
- base power currently in xp[], then add this to the adjacent block.
- We thereby convert from dsize blocks in base big_base, to dsize/2
- blocks in base big_base^2, then to dsize/4 blocks in base
- big_base^4, etc, etc. */
+ if (str_len <= len_lo)
+ {
+ if (BELOW_THRESHOLD (str_len, SET_STR_DC_THRESHOLD))
+ return mpn_bc_set_str (rp, str, str_len, powtab->base);
+ else
+ return mpn_dc_set_str (rp, str, str_len, powtab + 1, tp);
+ }
- if (step < dsize)
- {
- for (;;)
- {
- for (i = 0; i < dsize - step; i += 2 * step)
- {
- mp_ptr bp = dp + i;
- mp_size_t m = dsize - i - step;
- mp_size_t hi;
- if (n >= m)
- {
- mpn_mul (tp, xp, n, bp + step, m);
- mpn_add (bp, tp, n + m, bp, n);
- hi = i + n + m;
- dsize = hi;
- dsize -= dp[dsize - 1] == 0;
- }
- else
- {
- mpn_mul_n (tp, xp, bp + step, n);
- mpn_add (bp, tp, n + n, bp, n);
- }
- }
-
- step = 2 * step;
- if (! (step < dsize))
- break;
-
- mpn_sqr_n (tp, xp, n);
- n = 2 * n;
- n -= tp[n - 1] == 0;
- MP_PTR_SWAP (tp, xp);
- }
- }
+ ASSERT_ALWAYS (len_lo >= len_hi);
- MPN_NORMALIZE (dp, dsize);
- MPN_COPY (rp, dp, dsize);
- __GMP_FREE_FUNC_LIMBS (pow_mem, 2 * alloc);
- __GMP_FREE_FUNC_LIMBS (dp, alloc);
- return dsize;
- }
- }
+ if (BELOW_THRESHOLD (len_hi, SET_STR_DC_THRESHOLD))
+ hn = mpn_bc_set_str (tp, str, len_hi, powtab->base);
+ else
+ hn = mpn_dc_set_str (tp, str, len_hi, powtab + 1, rp);
+
+ if (hn == 0)
+ MPN_ZERO (rp, powtab->n);
+ else
+ mpn_mul (rp, powtab->p, powtab->n, tp, hn);
+
+ str = str + str_len - len_lo;
+ if (BELOW_THRESHOLD (len_lo, SET_STR_DC_THRESHOLD))
+ ln = mpn_bc_set_str (tp, str, len_lo, powtab->base);
+ else
+ ln = mpn_dc_set_str (tp, str, len_lo, powtab + 1, tp + powtab->n + 1);
+
+ cy = mpn_add_n (rp, rp, tp, ln);
+ mpn_incr_u (rp + ln, cy);
+ n = hn + powtab->n;
+ return n - (rp[n - 1] == 0);
}
-static mp_size_t
-convert_blocks (mp_ptr dp, const unsigned char *str, size_t str_len, int base)
+mp_size_t
+mpn_bc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
{
+ mp_size_t size;
+ size_t i;
+ long j;
+ mp_limb_t cy_limb;
+
+ mp_limb_t big_base;
int chars_per_limb;
- mp_size_t i;
- int j;
- int ds;
- mp_size_t dsize;
mp_limb_t res_digit;
- chars_per_limb = __mp_bases[base].chars_per_limb;
+ ASSERT (base >= 2);
+ ASSERT (base < numberof (__mp_bases));
+ ASSERT (str_len >= 1);
- dsize = str_len / chars_per_limb;
- ds = str_len % chars_per_limb;
+ big_base = __mp_bases[base].big_base;
+ chars_per_limb = __mp_bases[base].chars_per_limb;
- if (ds != 0)
+ size = 0;
+ for (i = chars_per_limb; i < str_len; i += chars_per_limb)
{
res_digit = *str++;
- for (j = ds - 1; j != 0; j--)
- res_digit = res_digit * base + *str++;
- dp[dsize] = res_digit;
+ if (base == 10)
+ { /* This is a common case.
+ Help the compiler to avoid multiplication. */
+ for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
+ res_digit = res_digit * 10 + *str++;
+ }
+ else
+ {
+ for (j = chars_per_limb - 1; j != 0; j--)
+ res_digit = res_digit * base + *str++;
+ }
+
+ if (size == 0)
+ {
+ if (res_digit != 0)
+ {
+ rp[0] = res_digit;
+ size = 1;
+ }
+ }
+ else
+ {
+#if HAVE_NATIVE_mpn_mul_1c
+ cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+ cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+ cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+ if (cy_limb != 0)
+ rp[size++] = cy_limb;
+ }
}
+ big_base = base;
+ res_digit = *str++;
if (base == 10)
- {
- for (i = dsize - 1; i >= 0; i--)
+ { /* This is a common case.
+ Help the compiler to avoid multiplication. */
+ for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--)
{
- res_digit = *str++;
- for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
- res_digit = res_digit * 10 + *str++;
- dp[i] = res_digit;
+ res_digit = res_digit * 10 + *str++;
+ big_base *= 10;
}
}
else
{
- for (i = dsize - 1; i >= 0; i--)
+ for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--)
{
- res_digit = *str++;
- for (j = chars_per_limb - 1; j != 0; j--)
- res_digit = res_digit * base + *str++;
- dp[i] = res_digit;
+ res_digit = res_digit * base + *str++;
+ big_base *= base;
}
}
- return dsize + (ds != 0);
+ if (size == 0)
+ {
+ if (res_digit != 0)
+ {
+ rp[0] = res_digit;
+ size = 1;
+ }
+ }
+ else
+ {
+#if HAVE_NATIVE_mpn_mul_1c
+ cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
+#else
+ cy_limb = mpn_mul_1 (rp, rp, size, big_base);
+ cy_limb += mpn_add_1 (rp, rp, size, res_digit);
+#endif
+ if (cy_limb != 0)
+ rp[size++] = cy_limb;
+ }
+ return size;
}