diff options
-rw-r--r-- | mpn/generic/get_str.c | 225 | ||||
-rw-r--r-- | mpn/generic/set_str.c | 506 |
2 files changed, 391 insertions, 340 deletions
diff --git a/mpn/generic/get_str.c b/mpn/generic/get_str.c index 8ea256ac3..c52e2fffc 100644 --- a/mpn/generic/get_str.c +++ b/mpn/generic/get_str.c @@ -1,32 +1,32 @@ /* mpn_get_str -- Convert a MSIZE long limb vector pointed to by MPTR to a printable string in STR in base BASE. -Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002 Free Software -Foundation, Inc. +Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006 Free +Software Foundation, Inc. This file is part of the GNU MP Library. -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 2.1 of the License, or (at your +The GNU MP Library is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published by the +Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +for more details. -You should have received a copy of the GNU Lesser General Public License -along with the GNU MP Library; see the file COPYING.LIB. If not, write to -the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -MA 02110-1301, USA. */ +You should have received a copy of the GNU Lesser General Public License along +with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free +Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, +USA. */ #include "gmp.h" #include "gmp-impl.h" #include "longlong.h" /* Conversion of U {up,un} to a string in base b. Internally, we convert to - base B = b^m, the largest power of b that fits a limb. Basic algorithms: + base B = b^m, the largest power of b that fits a limb. Basic algorithms: A) Divide U repeatedly by B, generating a quotient and remainder, until the quotient becomes zero. The remainders hold the converted digits. Digits @@ -37,11 +37,10 @@ MA 02110-1301, USA. */ come out from left to right. (Currently not used herein, except for in code for converting single limbs to individual digits.) - C) Compute B^1, B^2, B^4, ..., B^(2^s), for s such that B^(2^s) > sqrt(U). - Then divide U by B^(2^k), generating an integer quotient and remainder. - Recursively convert the quotient, then the remainder, using the - precomputed powers. Digits come out from left to right. (Used in - mpn_dc_get_str.) + C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s just above sqrt(U). + Then divide U by B^s, generating quotient and remainder. Recursively + convert the quotient, then the remainder, using the precomputed powers. + Digits come out from left to right. (Used in mpn_dc_get_str.) When using algorithm C, algorithm B might be suitable for basecase code, since the required b^g power will be readily accessible. @@ -135,37 +134,26 @@ MA 02110-1301, USA. */ #define GET_STR_PRECOMPUTE_THRESHOLD 30 #endif -struct powers -{ - size_t digits_in_base; - mp_ptr p; - mp_size_t n; /* mpz_struct uses int for sizes, but not mpn! */ - int base; -}; -typedef struct powers powers_t; - -/* Convert {UP,UN} to a string with a base as represented in POWTAB, and put - the string in STR. Generate LEN characters, possibly padding with zeros to - the left. If LEN is zero, generate as many characters as required. - Return a pointer immediately after the last digit of the result string. - Complexity is O(UN^2); intended for small conversions. */ +/* Convert {up,un} to a string in base base, and put the result in str. + Generate len characters, possibly padding with zeros to the left. If len is + zero, generate as many characters as required. Return a pointer immediately + after the last digit of the result string. Complexity is O(un^2); intended + for small conversions. */ static unsigned char * mpn_sb_get_str (unsigned char *str, size_t len, - mp_ptr up, mp_size_t un, - const powers_t *powtab) + mp_ptr up, mp_size_t un, int base) { mp_limb_t rl, ul; unsigned char *s; - int base; size_t l; /* Allocate memory for largest possible string, given that we only get here for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest base is 3. 7/11 is an approximation to 1/log2(3). */ #if TUNE_PROGRAM_BUILD -#define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * BITS_PER_MP_LIMB * 7 / 11) +#define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11) #else -#define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * BITS_PER_MP_LIMB * 7 / 11) +#define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11) #endif unsigned char buf[BUF_ALLOC]; #if TUNE_PROGRAM_BUILD @@ -174,7 +162,6 @@ mpn_sb_get_str (unsigned char *str, size_t len, mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD]; #endif - base = powtab->base; if (base == 10) { /* Special case code for base==10 so that the compiler has a chance to @@ -194,10 +181,10 @@ mpn_sb_get_str (unsigned char *str, size_t len, un -= rp[un] == 0; frac = (rp[0] + 1) << GMP_NAIL_BITS; s -= MP_BASES_CHARS_PER_LIMB_10; - i = MP_BASES_CHARS_PER_LIMB_10; #if HAVE_HOST_CPU_FAMILY_x86 /* The code below turns out to be a bit slower for x86 using gcc. Use plain code. */ + i = MP_BASES_CHARS_PER_LIMB_10; do { umul_ppmm (digit, frac, frac, 10); @@ -212,31 +199,28 @@ mpn_sb_get_str (unsigned char *str, size_t len, { umul_ppmm (digit, frac, frac, 10); *s++ = digit; - i--; } if (MP_BASES_NORMALIZATION_STEPS_10 <= 1) { umul_ppmm (digit, frac, frac, 10); *s++ = digit; - i--; } if (MP_BASES_NORMALIZATION_STEPS_10 <= 2) { umul_ppmm (digit, frac, frac, 10); *s++ = digit; - i--; } if (MP_BASES_NORMALIZATION_STEPS_10 <= 3) { umul_ppmm (digit, frac, frac, 10); *s++ = digit; - i--; } + i = MP_BASES_CHARS_PER_LIMB_10 - (4-MP_BASES_NORMALIZATION_STEPS_10); frac = (frac + 0xf) >> 4; do { frac *= 10; - digit = frac >> (BITS_PER_MP_LIMB - 4); + digit = frac >> (GMP_LIMB_BITS - 4); *s++ = digit; frac &= (~(mp_limb_t) 0) >> 4; } @@ -323,7 +307,7 @@ mpn_dc_get_str (unsigned char *str, size_t len, if (un < GET_STR_DC_THRESHOLD) { if (un != 0) - str = mpn_sb_get_str (str, len, up, un, powtab); + str = mpn_sb_get_str (str, len, up, un, powtab->base); else { while (len != 0) @@ -352,8 +336,12 @@ mpn_dc_get_str (unsigned char *str, size_t len, mpn_tdiv_qr (qp, rp, 0L, up, un, pwp, pwn); qn = un - pwn; qn += qp[qn] != 0; /* quotient size */ + + ASSERT (qn < pwn || (qn == pwn && mpn_cmp (qp, pwp, pwn) < 0)); + if (len != 0) len = len - powtab->digits_in_base; + str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + un - pwn + 1); str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn, powtab - 1, tmp); } @@ -362,6 +350,27 @@ mpn_dc_get_str (unsigned char *str, size_t len, } +/* + Example sequences: + + 1 1 2 3 5 9 17 (worst case for un = 33,34) + 1 1 1 2s + 2a 4s + 3a 8s + 5a 16s + 9a + 17a + + 1 1 2 4 8 16 32 (best case for un = 63,64) + 1 1 2s 4s 8s 16s 32s + + 1 1 2 4 7 14 28 (medium) + 1 1 1 3sa + 2a 7sa + 4a 17s + 28s +*/ + /* There are no leading zeros on the digits generated at str, but that's not currently a documented feature. */ @@ -371,12 +380,13 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un) mp_ptr powtab_mem, powtab_mem_ptr; mp_limb_t big_base; size_t digits_in_base; - powers_t powtab[30]; + powers_t powtab[GMP_LIMB_BITS]; int pi; mp_size_t n; mp_ptr p, t; size_t out_len; mp_ptr tmp; + TMP_DECL; /* Special case zero, as the code below doesn't handle it. */ if (un == 0) @@ -437,18 +447,16 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un) /* General case. The base is not a power of 2. */ if (un < GET_STR_PRECOMPUTE_THRESHOLD) - { - struct powers ptab[1]; - ptab[0].base = base; - return mpn_sb_get_str (str, (size_t) 0, up, un, ptab) - str; - } + return mpn_sb_get_str (str, (size_t) 0, up, un, base) - str; + + TMP_MARK; /* Allocate one large block for the powers of big_base. With the current scheme, we need to allocate twice as much as would be possible if a minimal set of powers were generated. */ -#define POWTAB_ALLOC_SIZE (2 * un + 30) -#define TMP_ALLOC_SIZE (un + 30) - powtab_mem = __GMP_ALLOCATE_FUNC_LIMBS (POWTAB_ALLOC_SIZE); +#define POWTAB_ALLOC_SIZE (un + 2 * GMP_LIMB_BITS) +#define TMP_ALLOC_SIZE (un) + powtab_mem = TMP_BALLOC_LIMBS (POWTAB_ALLOC_SIZE); powtab_mem_ptr = powtab_mem; /* Compute a table of powers: big_base^1, big_base^2, big_base^4, ..., @@ -458,43 +466,84 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un) big_base = __mp_bases[base].big_base; digits_in_base = __mp_bases[base].chars_per_limb; - powtab[0].base = base; /* FIXME: hack for getting base to mpn_sb_get_str */ - powtab[1].p = &big_base; - powtab[1].n = 1; - powtab[1].digits_in_base = digits_in_base; - powtab[1].base = base; - powtab[2].p = &big_base; - powtab[2].n = 1; - powtab[2].digits_in_base = digits_in_base; - powtab[2].base = base; - n = 1; - pi = 2; - p = &big_base; - for (;;) - { - ++pi; - t = powtab_mem_ptr; - powtab_mem_ptr += 2 * n; - mpn_sqr_n (t, p, n); - n *= 2; n -= t[n - 1] == 0; - digits_in_base *= 2; - p = t; - powtab[pi].p = p; - powtab[pi].n = n; - powtab[pi].digits_in_base = digits_in_base; - powtab[pi].base = base; - - if (2 * n > un) - break; + { + mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp; + mp_limb_t cy; + n_pows = 0; + xn = 1 + un*(__mp_bases[base].chars_per_bit_exactly*GMP_NUMB_BITS)/__mp_bases[base].chars_per_limb; + for (pn = xn; pn != 1; pn = (pn + 1) >> 1) + { + exptab[n_pows] = pn; + n_pows++; + } + exptab[n_pows] = 1; + + powtab[0].p = &big_base; + powtab[0].n = 1; + powtab[0].digits_in_base = digits_in_base; + powtab[0].base = base; + + powtab[1].p = powtab_mem_ptr; powtab_mem_ptr += 2; + powtab[1].p[0] = big_base; + powtab[1].n = 1; + powtab[1].digits_in_base = digits_in_base; + powtab[1].base = base; + + n = 1; + p = &big_base; + bexp = 1; + for (pi = 2; pi < n_pows; pi++) + { + t = powtab_mem_ptr; + powtab_mem_ptr += 2 * n + 2; + + ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + POWTAB_ALLOC_SIZE); + + mpn_sqr_n (t, p, n); + + digits_in_base *= 2; + n *= 2; n -= t[n - 1] == 0; + bexp *= 2; + + if (bexp + 1 < exptab[n_pows - pi]) + { + digits_in_base += __mp_bases[base].chars_per_limb; + cy = mpn_mul_1 (t, t, n, big_base); + t[n] = cy; + n += cy != 0; + bexp += 1; + } + p = t; + powtab[pi].p = p; + powtab[pi].n = n; + powtab[pi].digits_in_base = digits_in_base; + powtab[pi].base = base; + } + + for (pi = 1; pi < n_pows; pi++) + { + t = powtab[pi].p; + n = powtab[pi].n; + cy = mpn_mul_1 (t, t, n, big_base); + t[n] = cy; + n += cy != 0; + powtab[pi].n = n; + powtab[pi].digits_in_base += __mp_bases[base].chars_per_limb; + } + +#if 0 + { int i; + printf ("Computed table values for base=%d, un=%d, xn=%d:\n", base, un, xn); + for (i = 0; i < n_pows; i++) + printf ("%2d: %10ld %10ld %11ld\n", i, exptab[n_pows-i], powtab[i].n, powtab[i].digits_in_base); } - ASSERT_ALWAYS (POWTAB_ALLOC_SIZE > powtab_mem_ptr - powtab_mem); +#endif + } /* Using our precomputed powers, now in powtab[], convert our number. */ - tmp = __GMP_ALLOCATE_FUNC_LIMBS (TMP_ALLOC_SIZE); - out_len = mpn_dc_get_str (str, 0, up, un, powtab + pi, tmp) - str; - __GMP_FREE_FUNC_LIMBS (tmp, TMP_ALLOC_SIZE); - - __GMP_FREE_FUNC_LIMBS (powtab_mem, POWTAB_ALLOC_SIZE); + tmp = TMP_BALLOC_LIMBS (TMP_ALLOC_SIZE); + out_len = mpn_dc_get_str (str, 0, up, un, powtab - 1 + pi, tmp) - str; + TMP_FREE; return out_len; } diff --git a/mpn/generic/set_str.c b/mpn/generic/set_str.c index 4166bb1d7..f08965237 100644 --- a/mpn/generic/set_str.c +++ b/mpn/generic/set_str.c @@ -2,72 +2,76 @@ Convert a STR_LEN long base BASE byte string pointed to by STR to a limb vector pointed to by RES_PTR. Return the number of limbs in RES_PTR. -Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002 Free Software -Foundation, Inc. +Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006 Free +Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it -under the terms of the GNU Library General Public License as published by the -Free Software Foundation; either version 2 of the License, or (at your option) -any later version. +under the terms of the GNU Lesser General Public License as published by the +Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License +FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. -You should have received a copy of the GNU Library General Public License along +You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free -Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ +/* Originally written 2004, but it was left non-working. + Fixed 2006-03 at LORIA. + + TODO: + + Perhaps do not compute the highest power? + Instead, multiply twice by the 2nd highest power: + + _______ + |_______| hp + |_______| pow + _______________ + |_______________| final result + + + _______ + |_______| hp + |___| pow[-1] + ___________ + |___________| intermediate result + |___| pow[-1] + _______________ + |_______________| final result + + Generalizing that idea, perhaps we should make powtab contain successive + cubes, not squares. +*/ + #include "gmp.h" #include "gmp-impl.h" +#include "longlong.h" -static mp_size_t convert_blocks __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int)); -/* When to switch to sub-quadratic code. This counts characters/digits in - the input string, not limbs as most other *_THRESHOLD. */ -#ifndef SET_STR_THRESHOLD -#define SET_STR_THRESHOLD 4000 +#ifndef SET_STR_DC_THRESHOLD +#define SET_STR_DC_THRESHOLD 2000 #endif -/* Don't define this to anything but 1 for now. In order to make other values - work well, either the convert_blocks function should be generalized to - handle larger blocks than chars_per_limb, or the basecase code should be - broken out of the main function. Also note that this must be a power of - 2. */ -#ifndef SET_STR_BLOCK_SIZE -#define SET_STR_BLOCK_SIZE 1 /* Must be a power of 2. */ +#ifndef SET_STR_PRECOMPUTE_THRESHOLD +#define SET_STR_PRECOMPUTE_THRESHOLD 5000 #endif +mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int); +mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, + const powers_t *, mp_ptr); +void mpn_set_str_compute_powtab (powers_t *, mp_ptr, mp_size_t, int); -/* This check interferes with expression based values of SET_STR_THRESHOLD - used for tuning and measuring. -#if SET_STR_BLOCK_SIZE >= SET_STR_THRESHOLD -These values are silly. -The sub-quadratic code would recurse to itself. -#endif -*/ mp_size_t mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base) { - mp_size_t size; - mp_limb_t big_base; - int chars_per_limb; - mp_limb_t res_digit; - - ASSERT (base >= 2); - ASSERT (base < numberof (__mp_bases)); - ASSERT (str_len >= 1); - - big_base = __mp_bases[base].big_base; - chars_per_limb = __mp_bases[base].chars_per_limb; - - size = 0; - if (POW2_P (base)) { /* The base is a power of 2. Read the input string from least to most @@ -75,8 +79,11 @@ mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base) const unsigned char *s; int next_bitpos; - int bits_per_indigit = big_base; + mp_limb_t res_digit; + mp_size_t size; + int bits_per_indigit = __mp_bases[base].big_base; + size = 0; res_digit = 0; next_bitpos = 0; @@ -98,251 +105,246 @@ mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base) rp[size++] = res_digit; return size; } + + if (BELOW_THRESHOLD (str_len, SET_STR_PRECOMPUTE_THRESHOLD)) + return mpn_bc_set_str (rp, str, str_len, base); else { - /* General case. The base is not a power of 2. */ + mp_ptr powtab_mem, tp; + powers_t powtab[GMP_LIMB_BITS]; + int chars_per_limb; + mp_size_t size; + mp_size_t un; + TMP_DECL; - if (str_len < SET_STR_THRESHOLD) - { - size_t i; - int j; - mp_limb_t cy_limb; + TMP_MARK; - for (i = chars_per_limb; i < str_len; i += chars_per_limb) - { - res_digit = *str++; - if (base == 10) - { /* This is a common case. - Help the compiler to avoid multiplication. */ - for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--) - res_digit = res_digit * 10 + *str++; - } - else - { - for (j = chars_per_limb - 1; j != 0; j--) - res_digit = res_digit * base + *str++; - } - - if (size == 0) - { - if (res_digit != 0) - { - rp[0] = res_digit; - size = 1; - } - } - else - { -#if HAVE_NATIVE_mpn_mul_1c - cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit); -#else - cy_limb = mpn_mul_1 (rp, rp, size, big_base); - cy_limb += mpn_add_1 (rp, rp, size, res_digit); -#endif - if (cy_limb != 0) - rp[size++] = cy_limb; - } - } + chars_per_limb = __mp_bases[base].chars_per_limb; - big_base = base; - res_digit = *str++; - if (base == 10) - { /* This is a common case. - Help the compiler to avoid multiplication. */ - for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--) - { - res_digit = res_digit * 10 + *str++; - big_base *= 10; - } - } - else - { - for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--) - { - res_digit = res_digit * base + *str++; - big_base *= base; - } - } + un = str_len / chars_per_limb + 1; - if (size == 0) - { - if (res_digit != 0) - { - rp[0] = res_digit; - size = 1; - } - } - else - { -#if HAVE_NATIVE_mpn_mul_1c - cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit); + /* Allocate one large block for the powers of big_base. */ + powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_set_str_powtab_alloc (un)); + + mpn_set_str_compute_powtab (powtab, powtab_mem, un, base); + + tp = TMP_BALLOC_LIMBS (mpn_dc_set_str_itch (un)); + size = mpn_dc_set_str (rp, str, str_len, powtab, tp); + + TMP_FREE; + return size; + } +} + +void +mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, int base) +{ + mp_ptr powtab_mem_ptr; + long i, pi; + mp_size_t n; + mp_ptr p, t; + unsigned normalization_steps; + mp_limb_t big_base, big_base_inverted; + int chars_per_limb; + size_t digits_in_base; + + powtab_mem_ptr = powtab_mem; + + chars_per_limb = __mp_bases[base].chars_per_limb; + big_base = __mp_bases[base].big_base; + big_base_inverted = __mp_bases[base].big_base_inverted; + count_leading_zeros (normalization_steps, big_base); + + p = powtab_mem_ptr; + powtab_mem_ptr += 1; + + digits_in_base = chars_per_limb; + + p[0] = big_base; + n = 1; + + count_leading_zeros (i, un - 1); + i = GMP_LIMB_BITS - 1 - i; + + powtab[i].p = p; + powtab[i].n = n; + powtab[i].digits_in_base = digits_in_base; + powtab[i].base = base; + + for (pi = i - 1; pi >= 0; pi--) + { + t = powtab_mem_ptr; + powtab_mem_ptr += 2 * n; + + ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_set_str_powtab_alloc (un)); + + mpn_sqr_n (t, p, n); + n = 2 * n - 1; n += t[n] != 0; + digits_in_base *= 2; +#if 1 + if ((((un - 1) >> pi) & 2) == 0) + { + mpn_divexact_1 (t, t, n, big_base); + n -= t[n - 1] == 0; + digits_in_base -= chars_per_limb; + } #else - cy_limb = mpn_mul_1 (rp, rp, size, big_base); - cy_limb += mpn_add_1 (rp, rp, size, res_digit); -#endif - if (cy_limb != 0) - rp[size++] = cy_limb; - } - return size; + if (CLEVER_CONDITION_1 ()) + { + // perform adjustment operation of previous + cy = mpn_mul_1 (p, p, n, big_base); } - else + if (CLEVER_CONDITION_2 ()) { - /* Sub-quadratic code. */ - - mp_ptr dp; - mp_size_t dsize; - mp_ptr xp, tp; - mp_size_t step; - mp_size_t i; - size_t alloc; - mp_size_t n; - mp_ptr pow_mem; - - alloc = (str_len + chars_per_limb - 1) / chars_per_limb; - alloc = 2 * alloc; - dp = __GMP_ALLOCATE_FUNC_LIMBS (alloc); - -#if SET_STR_BLOCK_SIZE == 1 - dsize = convert_blocks (dp, str, str_len, base); -#else - { - const unsigned char *s; - mp_ptr ddp = dp; - - s = str + str_len; - while (s - str > SET_STR_BLOCK_SIZE * chars_per_limb) - { - s -= SET_STR_BLOCK_SIZE * chars_per_limb; - mpn_set_str (ddp, s, SET_STR_BLOCK_SIZE * chars_per_limb, base); - ddp += SET_STR_BLOCK_SIZE; - } - ddp += mpn_set_str (ddp, str, s - str, base); - dsize = ddp - dp; - } + // perform adjustment operation of new + cy = mpn_mul_1 (t, t, n, big_base); + } #endif + p = t; + powtab[pi].p = p; + powtab[pi].n = n; + powtab[pi].digits_in_base = digits_in_base; + powtab[pi].base = base; + } +} - /* Allocate space for powers of big_base. Could trim this in two - ways: - 1. Only really need 2^ceil(log2(dsize)) bits for the largest - power. - 2. Only the variable to get the largest power needs that much - memory. The other variable needs half as much. We need just - figure out which of xp and tp will hold the last one. - Net space savings would be in the range 1/4 to 5/8 of current - allocation, depending on how close dsize is to the next greater - power of 2. */ - pow_mem = __GMP_ALLOCATE_FUNC_LIMBS (2 * alloc); - xp = pow_mem; - tp = pow_mem + alloc; - - xp[0] = big_base; - n = 1; - step = 1; -#if SET_STR_BLOCK_SIZE != 1 - for (i = SET_STR_BLOCK_SIZE; i > 1; i >>= 1) - { - mpn_sqr_n (tp, xp, n); - n = 2 * n; - n -= tp[n - 1] == 0; +mp_size_t +mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, + const powers_t *powtab, mp_ptr tp) +{ + size_t len_lo, len_hi; + mp_limb_t cy; + mp_size_t ln, hn, n; - step = 2 * step; - MP_PTR_SWAP (tp, xp); - } -#endif + len_lo = powtab->digits_in_base; + len_hi = str_len - len_lo; - /* Multiply every second limb block, each `step' limbs large by the - base power currently in xp[], then add this to the adjacent block. - We thereby convert from dsize blocks in base big_base, to dsize/2 - blocks in base big_base^2, then to dsize/4 blocks in base - big_base^4, etc, etc. */ + if (str_len <= len_lo) + { + if (BELOW_THRESHOLD (str_len, SET_STR_DC_THRESHOLD)) + return mpn_bc_set_str (rp, str, str_len, powtab->base); + else + return mpn_dc_set_str (rp, str, str_len, powtab + 1, tp); + } - if (step < dsize) - { - for (;;) - { - for (i = 0; i < dsize - step; i += 2 * step) - { - mp_ptr bp = dp + i; - mp_size_t m = dsize - i - step; - mp_size_t hi; - if (n >= m) - { - mpn_mul (tp, xp, n, bp + step, m); - mpn_add (bp, tp, n + m, bp, n); - hi = i + n + m; - dsize = hi; - dsize -= dp[dsize - 1] == 0; - } - else - { - mpn_mul_n (tp, xp, bp + step, n); - mpn_add (bp, tp, n + n, bp, n); - } - } - - step = 2 * step; - if (! (step < dsize)) - break; - - mpn_sqr_n (tp, xp, n); - n = 2 * n; - n -= tp[n - 1] == 0; - MP_PTR_SWAP (tp, xp); - } - } + ASSERT_ALWAYS (len_lo >= len_hi); - MPN_NORMALIZE (dp, dsize); - MPN_COPY (rp, dp, dsize); - __GMP_FREE_FUNC_LIMBS (pow_mem, 2 * alloc); - __GMP_FREE_FUNC_LIMBS (dp, alloc); - return dsize; - } - } + if (BELOW_THRESHOLD (len_hi, SET_STR_DC_THRESHOLD)) + hn = mpn_bc_set_str (tp, str, len_hi, powtab->base); + else + hn = mpn_dc_set_str (tp, str, len_hi, powtab + 1, rp); + + if (hn == 0) + MPN_ZERO (rp, powtab->n); + else + mpn_mul (rp, powtab->p, powtab->n, tp, hn); + + str = str + str_len - len_lo; + if (BELOW_THRESHOLD (len_lo, SET_STR_DC_THRESHOLD)) + ln = mpn_bc_set_str (tp, str, len_lo, powtab->base); + else + ln = mpn_dc_set_str (tp, str, len_lo, powtab + 1, tp + powtab->n + 1); + + cy = mpn_add_n (rp, rp, tp, ln); + mpn_incr_u (rp + ln, cy); + n = hn + powtab->n; + return n - (rp[n - 1] == 0); } -static mp_size_t -convert_blocks (mp_ptr dp, const unsigned char *str, size_t str_len, int base) +mp_size_t +mpn_bc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base) { + mp_size_t size; + size_t i; + long j; + mp_limb_t cy_limb; + + mp_limb_t big_base; int chars_per_limb; - mp_size_t i; - int j; - int ds; - mp_size_t dsize; mp_limb_t res_digit; - chars_per_limb = __mp_bases[base].chars_per_limb; + ASSERT (base >= 2); + ASSERT (base < numberof (__mp_bases)); + ASSERT (str_len >= 1); - dsize = str_len / chars_per_limb; - ds = str_len % chars_per_limb; + big_base = __mp_bases[base].big_base; + chars_per_limb = __mp_bases[base].chars_per_limb; - if (ds != 0) + size = 0; + for (i = chars_per_limb; i < str_len; i += chars_per_limb) { res_digit = *str++; - for (j = ds - 1; j != 0; j--) - res_digit = res_digit * base + *str++; - dp[dsize] = res_digit; + if (base == 10) + { /* This is a common case. + Help the compiler to avoid multiplication. */ + for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--) + res_digit = res_digit * 10 + *str++; + } + else + { + for (j = chars_per_limb - 1; j != 0; j--) + res_digit = res_digit * base + *str++; + } + + if (size == 0) + { + if (res_digit != 0) + { + rp[0] = res_digit; + size = 1; + } + } + else + { +#if HAVE_NATIVE_mpn_mul_1c + cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit); +#else + cy_limb = mpn_mul_1 (rp, rp, size, big_base); + cy_limb += mpn_add_1 (rp, rp, size, res_digit); +#endif + if (cy_limb != 0) + rp[size++] = cy_limb; + } } + big_base = base; + res_digit = *str++; if (base == 10) - { - for (i = dsize - 1; i >= 0; i--) + { /* This is a common case. + Help the compiler to avoid multiplication. */ + for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--) { - res_digit = *str++; - for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--) - res_digit = res_digit * 10 + *str++; - dp[i] = res_digit; + res_digit = res_digit * 10 + *str++; + big_base *= 10; } } else { - for (i = dsize - 1; i >= 0; i--) + for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--) { - res_digit = *str++; - for (j = chars_per_limb - 1; j != 0; j--) - res_digit = res_digit * base + *str++; - dp[i] = res_digit; + res_digit = res_digit * base + *str++; + big_base *= base; } } - return dsize + (ds != 0); + if (size == 0) + { + if (res_digit != 0) + { + rp[0] = res_digit; + size = 1; + } + } + else + { +#if HAVE_NATIVE_mpn_mul_1c + cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit); +#else + cy_limb = mpn_mul_1 (rp, rp, size, big_base); + cy_limb += mpn_add_1 (rp, rp, size, res_digit); +#endif + if (cy_limb != 0) + rp[size++] = cy_limb; + } + return size; } |