diff options
author | tege <tege@gmplib.org> | 2000-04-14 18:44:08 +0200 |
---|---|---|
committer | tege <tege@gmplib.org> | 2000-04-14 18:44:08 +0200 |
commit | 248ef3ff95df5402d4b3928f3be72b0214b7e1c6 (patch) | |
tree | 6e152f942bcf6e9855599de4148e6584cc02df95 /mpn | |
parent | 66bdebf2af8ae1c445249fc07561b0dc624ff33c (diff) | |
download | gmp-248ef3ff95df5402d4b3928f3be72b0214b7e1c6.tar.gz |
New files replacing old division code.
Diffstat (limited to 'mpn')
-rw-r--r-- | mpn/generic/bz_divrem_n.c | 203 | ||||
-rw-r--r-- | mpn/generic/sb_divrem_mn.c | 188 | ||||
-rw-r--r-- | mpn/generic/tdiv_qr.c | 391 |
3 files changed, 782 insertions, 0 deletions
diff --git a/mpn/generic/bz_divrem_n.c b/mpn/generic/bz_divrem_n.c new file mode 100644 index 000000000..cc92d7130 --- /dev/null +++ b/mpn/generic/bz_divrem_n.c @@ -0,0 +1,203 @@ +/* mpn_bz_divrem_n and auxilliary routines. + +Copyright (C) 2000 Free Software Foundation, Inc. +Contributed by Paul Zimmermann. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" + +/* +[1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler, + Technical report MPI-I-98-1-022, october 1998. + http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz +*/ + +static mp_limb_t mpn_bz_div_3_halves_by_2 (); +static mp_limb_t mpn_bz_divrem_aux (); + +/* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than + div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way, + i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */ +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD) +#endif + +#if 0 +static +unused_mpn_divrem (qp, qxn, np, nn, dp, dn) + mp_ptr qp; + mp_size_t qxn; + mp_ptr np; + mp_size_t nn; + mp_srcptr dp; + mp_size_t dn; +{ + /* This might be useful: */ + if (qxn != 0) + { + mp_limb_t c; + mp_ptr tp = alloca ((nn + qxn) * BYTES_PER_MP_LIMB); + MPN_COPY (tp + qxn - nn, np, nn); + MPN_ZERO (tp, qxn); + c = mpn_divrem (qp, 0L, tp, nn + qxn, dp, dn); + /* Maybe copy proper part of tp to np? Documentation is unclear about + the returned np value when qxn != 0 */ + return c; + } +} +#endif + +/* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n) + by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n). + Returns most significant limb of the quotient, which is 0 or 1. + Requires that the most significant bit of the divisor is set. */ + +mp_limb_t +mpn_bz_divrem_n (qp, np, dp, n) + mp_ptr qp; + mp_ptr np; + mp_srcptr dp; + mp_size_t n; +{ + mp_limb_t qhl = 0; + if (mpn_cmp (np + n, dp, n) >= 0) + { + qhl = 1; + mpn_sub_n (np + n, np + n, dp, n); + abort (); + } + if (n % 2 != 0) + { + /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */ + if (n < BZ_THRESHOLD) + qhl += mpn_sb_divrem_mn (qp + 1, np + 2, 2 * (n - 1), dp + 1, n - 1); + else + qhl += mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1); + /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by + (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */ + if (mpn_sub_1 (np + n, np + n, 1, + mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]))) + { + /* quotient too large */ + qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L); + if (mpn_add_n (np + 1, np + 1, dp, n) == 0) + { /* still too large */ + qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L); + mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */ + } + } + /* now divide (np, n + 1) by (dp, n) */ + qhl += mpn_add_1 (qp + 1, qp + 1, n - 1, + mpn_sb_divrem_mn (qp, np, n + 1, dp, n)); + } + else + { + mp_ptr tmp; + mp_size_t n2 = n/2; + TMP_DECL (marker); + TMP_MARK (marker); + tmp = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB); + qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp); + ASSERT_ALWAYS (qhl == 0); + qhl = mpn_add_1 (qp + n2, qp + n2, n2, + mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp)); + TMP_FREE (marker); + } + return qhl; +} + +/* Like mpn_bz_divrem_n, but without memory allocation. Also + assumes mpn_cmp (np + n, dp, n) < 0 */ + +static mp_limb_t +mpn_bz_divrem_aux (qp, np, dp, n, tmp) + mp_ptr qp; + mp_ptr np; + mp_srcptr dp; + mp_size_t n; + mp_ptr tmp; +{ + mp_limb_t qhl; + + if (n % 2 != 0) + { + /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */ + qhl = mpn_bz_divrem_aux (qp + 1, np + 2, dp + 1, n - 1, tmp); + /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by + (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */ + if (mpn_sub_1 (np + n, np + n, 1, + mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]))) + { + /* quotient too large */ + qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L); + if (mpn_add_n (np + 1, np + 1, dp, n) == 0) + { /* still too large */ + qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L); + mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */ + } + } + /* now divide (np, n + 1) by (dp, n) */ + qhl += mpn_add_1 (qp + 1, qp + 1, n - 1, + mpn_sb_divrem_mn (qp, np, n + 1, dp, n)); + } + else + { + mp_size_t n2 = n/2; + qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp); + ASSERT_ALWAYS (qhl == 0); + qhl = mpn_add_1 (qp + n2, qp + n2, n2, + mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp)); + } + return qhl; +} + +/* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n), + the remainder in (np, 2n) */ + +static mp_limb_t +mpn_bz_div_3_halves_by_2 (qp, np, dp, n, tmp) + mp_ptr qp; + mp_ptr np; + mp_srcptr dp; + mp_size_t n; + mp_ptr tmp; +{ + mp_size_t twon = n + n; + mp_limb_t qhl; + + if (n < BZ_THRESHOLD) + qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n); + else + qhl = mpn_bz_divrem_aux (qp, np + n, dp + n, n, tmp); + /* q = (qp, n), c = (np + n, n) with the notations of [1] */ + mpn_mul_n (tmp, qp, dp, n); + if (qhl != 0) + mpn_add_n (tmp + n, tmp + n, dp, n); + if (mpn_sub_n (np, np, tmp, twon)) /* R = (np, 2n) */ + { + qhl -= mpn_sub_1 (qp, qp, n, 1L); + if (mpn_add_n (np, np, dp, twon) == 0) + { /* qp still too large */ + qhl -= mpn_sub_1 (qp, qp, n, 1L); + mpn_add_n (np, np, dp, twon); /* always carry here */ + } + } + return qhl; +} diff --git a/mpn/generic/sb_divrem_mn.c b/mpn/generic/sb_divrem_mn.c new file mode 100644 index 000000000..80298cd2a --- /dev/null +++ b/mpn/generic/sb_divrem_mn.c @@ -0,0 +1,188 @@ +/* mpn_divrem_classic -- Divide natural numbers, producing both remainder and + quotient. + +Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write + the NSIZE-DSIZE least significant quotient limbs at QP + and the DSIZE long remainder at NP. If QEXTRA_LIMBS is + non-zero, generate that many fraction bits and append them after the + other quotient limbs. + Return the most significant limb of the quotient, this is always 0 or 1. + + Preconditions: + 0. NSIZE >= DSIZE. + 1. The most significant bit of the divisor must be set. + 2. QP must either not overlap with the input operands at all, or + QP + DSIZE >= NP must hold true. (This means that it's + possible to put the quotient in the high part of NUM, right after the + remainder in NUM. + 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. + 4. DSIZE >= 2. */ + + +#define PREINVERT_VIABLE \ + (UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */) + +mp_limb_t +#if __STDC__ +mpn_sb_divrem_mn (mp_ptr qp, + mp_ptr np, mp_size_t nsize, + mp_srcptr dp, mp_size_t dsize) +#else +mpn_sb_divrem_mn (qp, np, nsize, dp, dsize) + mp_ptr qp; + mp_ptr np; + mp_size_t nsize; + mp_srcptr dp; + mp_size_t dsize; +#endif +{ + mp_limb_t most_significant_q_limb = 0; + mp_size_t i; + mp_limb_t dx, d1, n0; + mp_limb_t dxinv; + int have_preinv; + + ASSERT_ALWAYS (dsize > 2); + + np += nsize - dsize; + dx = dp[dsize - 1]; + d1 = dp[dsize - 2]; + n0 = np[dsize - 1]; + + if (n0 >= dx) + { + if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0) + { + mpn_sub_n (np, np, dp, dsize); + // n0 = np[dsize - 1]; + most_significant_q_limb = 1; + } + } + + /* If multiplication is much faster than division, preinvert the + most significant divisor limb before entering the loop. */ + if (PREINVERT_VIABLE) + { + have_preinv = 0; + if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME) + { + invert_limb (dxinv, dx); + have_preinv = 1; + } + } + + for (i = nsize - dsize - 1; i >= 0; i--) + { + mp_limb_t q; + mp_limb_t nx; + mp_limb_t cy_limb; + + nx = np[dsize - 1]; + np--; + + if (nx == dx) + { + /* This might over-estimate q, but it's probably not worth + the extra code here to find out. */ + q = ~(mp_limb_t) 0; + +#if 1 + cy_limb = mpn_submul_1 (np, dp, dsize, q); +#else + /* This should be faster on many machines */ + cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize); + cy = mpn_add_n (np, np, dp, dsize); + np[dsize] += cy; +#endif + + if (nx != cy_limb) + { + mpn_add_n (np, np, dp, dsize); + q--; + } + + qp[i] = q; + } + else + { + mp_limb_t rx, r1, r0, p1, p0; + + if (PREINVERT_VIABLE && have_preinv) + udiv_qrnnd_preinv (q, r1, nx, np[dsize - 1], dx, dxinv); + else + udiv_qrnnd (q, r1, nx, np[dsize - 1], dx); + umul_ppmm (p1, p0, d1, q); + + r0 = np[dsize - 2]; + rx = 0; + if (r1 < p1 || (r1 == p1 && r0 < p0)) + { + p1 -= p0 < d1; + p0 -= d1; + q--; + r1 += dx; + rx = r1 < dx; + } + + p1 += r0 < p0; /* cannot carry! */ + rx -= r1 < p1; /* may become 11..1 if q is still too large */ + r1 -= p1; + r0 -= p0; + + cy_limb = mpn_submul_1 (np, dp, dsize - 2, q); + + { + mp_limb_t cy1, cy2; + cy1 = r0 < cy_limb; + r0 -= cy_limb; + cy2 = r1 < cy1; + r1 -= cy1; + np[dsize - 1] = r1; + np[dsize - 2] = r0; + if (cy2 != rx) + { + mpn_add_n (np, np, dp, dsize); + q--; + } + } + qp[i] = q; + } + } + + /* ______ ______ ______ + |__rx__|__r1__|__r0__| partial remainder + ______ ______ + - |__p1__|__p0__| partial product to subtract + ______ ______ + - |______|cylimb| + + rx is -1, 0 or 1. If rx=1, then q is correct (it should match + carry out). If rx=-1 then q is too large. If rx=0, then q might + be too large, but it is most likely correct. + */ + + return most_significant_q_limb; +} diff --git a/mpn/generic/tdiv_qr.c b/mpn/generic/tdiv_qr.c new file mode 100644 index 000000000..2592e431c --- /dev/null +++ b/mpn/generic/tdiv_qr.c @@ -0,0 +1,391 @@ +/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and + write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If + qxn is non-zero, generate that many fraction limbs and append them after the + other quotient limbs, and update the remainder accordningly. The input + operands are unaffected. + + Preconditions: + 1. The most significant limb of of the divisor must be non-zero. + 2. No argument overlap is permitted. (??? relax this ???) + 3. nn >= dn, even if qxn is non-zero. (??? relax this ???) + + The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time + complexity of multiplication. + + THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY + SAFE TO CALL THIS FUNCTION THROUGH DOCUMENTED INTERFACES. IN FACT, + IT IS ALMOST GUARANTEED THAT THIS FUNCTION WILL CHANGE INCOMPATIBLY + OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + + (We're considering to: + 1. Make it return the most significant quotient limb instead of store it + 2. Change the name to mpn_tdiv_qr_mn.) + +Copyright (C) 1997, 2000 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +#include "gmp.h" +#include "gmp-impl.h" +#include "longlong.h" + +#ifndef BZ_THRESHOLD +#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD) +#endif + +/* Extract the middle limb from ((h,,l) << cnt) */ +#define SHL(h,l,cnt) \ + ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1)))) + +void +#if __STDC__ +mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, + mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) +#else +mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn) + mp_ptr qp; + mp_ptr rp; + mp_size_t qxn; + mp_srcptr np; + mp_size_t nn; + mp_srcptr dp; + mp_size_t dn; +#endif +{ + /* FIXME: + 1. qxn + 2. pass allocated storage in additional parameter? + */ + + if (qxn != 0) + abort (); + + switch (dn) + { + case 0: + DIVIDE_BY_ZERO; + + case 1: + { + rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]); + return; + } + + case 2: + { + int cnt; + mp_ptr n2p, d2p; + mp_limb_t qhl, cy; + count_leading_zeros (cnt, dp[dn - 1]); + if (cnt != 0) + { + d2p = alloca (dn * BYTES_PER_MP_LIMB); + mpn_lshift (d2p, dp, dn, cnt); + n2p = alloca ((nn + 1) * BYTES_PER_MP_LIMB); + cy = mpn_lshift (n2p, np, nn, cnt); + n2p[nn] = cy; + qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p); + if (cy == 0) + qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */ + } + else + { + d2p = (mp_ptr) dp; + n2p = alloca (nn * BYTES_PER_MP_LIMB); + MPN_COPY (n2p, np, nn); + qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p); + qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */ + } + + if (cnt != 0) + mpn_rshift (rp, n2p, dn, cnt); + else + MPN_COPY (rp, n2p, dn); + return; + } + + default: + { + int adjust; + adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */ + if (nn + adjust >= 2 * dn) + { + mp_ptr n2p, d2p; + mp_limb_t cy; + int cnt; + count_leading_zeros (cnt, dp[dn - 1]); + + qp[nn - dn] = 0; /* zero high quotient limb */ + if (cnt != 0) /* normalize divisor if needed */ + { + d2p = alloca (dn * BYTES_PER_MP_LIMB); + mpn_lshift (d2p, dp, dn, cnt); + n2p = alloca ((nn + 1) * BYTES_PER_MP_LIMB); + cy = mpn_lshift (n2p, np, nn, cnt); + n2p[nn] = cy; + nn += adjust; + } + else + { + d2p = (mp_ptr) dp; + n2p = alloca ((nn + 1) * BYTES_PER_MP_LIMB); + MPN_COPY (n2p, np, nn); + n2p[nn] = 0; + nn += adjust; + } + + if (dn == 2) + mpn_divrem_2 (qp, 0L, n2p, nn, d2p); + else if (dn < BZ_THRESHOLD) + mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn); + else + { + /* Perform 2*dn / dn limb divisions as long as the limbs + in np last. */ + mp_ptr q2p = qp + nn - 2 * dn; + n2p += nn - 2 * dn; + mpn_bz_divrem_n (q2p, n2p, d2p, dn); + nn -= dn; + while (nn >= 2 * dn) + { + mp_limb_t c; + q2p -= dn; n2p -= dn; + c = mpn_bz_divrem_n (q2p, n2p, d2p, dn); + ASSERT_ALWAYS (c == 0); + nn -= dn; + } + + if (nn != dn) + { + n2p -= nn - dn; + /* In theory, we could fall out to the cute code below + since we now have exactly the situation that code + is designed to handle. We botch this badly and call + the basic mpn_sb_divrem_mn! */ + if (dn == 2) + mpn_divrem_2 (qp, 0L, n2p, nn, d2p); + else + mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn); + } + } + + + if (cnt != 0) + mpn_rshift (rp, n2p, dn, cnt); + else + MPN_COPY (rp, n2p, dn); + return; + } + + /* When we come here, the numerator/partial remainder is less + than twice the size of the denominator. */ + + { + /* Problem: + + Divide a numerator N with nn limbs by a denominator D with dn + limbs forming a quotient of nn-dn+1 limbs. When qn is small + compared to dn, conventional division algorithms perform poorly. + We want an algorithm that has an expected running time that is + dependent only on qn. It is assumed that the most significant + limb of the numerator is smaller than the most significant limb + of the denominator. + + Algorithm (very informally stated): + + 1) Divide the 2 x qn most significant limbs from the numerator + by the qn most significant limbs from the denominator. Call + the result qest. This is either the correct quotient, but + might be 1 or 2 too large. Compute the remainder from the + division. (This step is implemented by a mpn_divrem call.) + + 2) Is the most significant limb from the remainder < p, where p + is the product of the most significant limb from the quotient + and the next(d). (Next(d) denotes the next ignored limb from + the denominator.) If it is, decrement qest, and adjust the + remainder accordingly. + + 3) Is the remainder >= qest? If it is, qest is the desired + quotient. The algorithm terminates. + + 4) Subtract qest x next(d) from the remainder. If there is + borrow out, decrement qest, and adjust the remainder + accordingly. + + 5) Skip one word from the denominator (i.e., let next(d) denote + the next less significant limb. */ + + mp_size_t qn; + mp_ptr n2p, d2p; + mp_ptr tp; + mp_limb_t cy; + mp_size_t in, rn; + mp_limb_t quotient_too_large; + int cnt; + + qn = nn - dn; + qp[qn] = 0; /* zero high quotient limb */ + qn += adjust; /* qn cannot become bigger */ + + if (qn == 0) + { + MPN_COPY (rp, np, dn); + return; + } + + in = dn - qn; /* (at least partially) ignored # of limbs in ops */ + /* Normalize denominator by shifting it to the left such that its + most significant bit is set. Then shift the numerator the same + amount, to mathematically preserve quotient. */ + count_leading_zeros (cnt, dp[dn - 1]); + if (cnt != 0) + { + d2p = alloca (qn * BYTES_PER_MP_LIMB); + + mpn_lshift (d2p, dp + in, qn, cnt); + d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt); + + n2p = alloca ((2 * qn + 1) * BYTES_PER_MP_LIMB); + cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt); + if (adjust) + { + n2p[2 * qn] = cy; + n2p++; + } + else + { + n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt); + } + } + else + { + d2p = (mp_ptr) dp + in; + + n2p = alloca ((2 * qn + 1) * BYTES_PER_MP_LIMB); + MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn); + if (adjust) + { + n2p[2 * qn] = 0; + n2p++; + } + } + + /* Get an approximate quotient using the extracted operands. */ + if (qn == 1) + n2p[0] = mpn_divmod_1 (qp, n2p, 2L, d2p[0]); + else if (qn == 2) + mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); + else if (qn < BZ_THRESHOLD) + mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn); + else + mpn_bz_divrem_n (qp, n2p, d2p, qn); + + rn = qn; + /* Multiply the first ignored divisor limb by the most significant + quotient limb. If that product is > the partial remainder's + most significant limb, we know the quotient is too large. This + test quickly catches most cases where the quotient is too large; + it catches all cases where the quotient is 2 too large. */ + { + mp_limb_t dl, x; + mp_limb_t h, l; + + if (in - 2 < 0) + dl = 0; + else + dl = dp[in - 2]; + + x = SHL (dp[in - 1], dl, cnt); + umul_ppmm (h, l, x, qp[qn - 1]); + + if (n2p[qn - 1] < h) + { + mp_limb_t cy; + + mpn_decr_u (qp, (mp_limb_t) 1); + cy = mpn_add_n (n2p, n2p, d2p, qn); + if (cy) + { + /* The partial remainder is safely large. */ + n2p[qn] = cy; + ++rn; + } + } + } + + quotient_too_large = 0; + if (cnt != 0) + { + mp_limb_t cy1, cy2; + + /* Append partially used numerator limb to partial remainder. */ + cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt); + n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt); + + /* Update partial remainder with partially used divisor limb. */ + cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt)); + if (qn != rn) + { + if (n2p[qn] < cy2) + abort (); + n2p[qn] -= cy2; + } + else + { + n2p[qn] = cy1 - cy2; + + quotient_too_large = (cy1 < cy2); + ++rn; + } + --in; + } + /* True: partial remainder now is neutral, i.e., it is not shifted up. */ + + tp = alloca (dn * BYTES_PER_MP_LIMB); + + if (in < qn) + { + if (in == 0) + { + MPN_COPY (rp, n2p, rn); + if (rn != dn) + abort (); + goto foo; + } + mpn_mul (tp, qp, qn, dp, in); + } + else + mpn_mul (tp, dp, in, qp, qn); + + cy = mpn_sub (n2p, n2p, rn, tp + in, qn); + MPN_COPY (rp + in, n2p, dn - in); + quotient_too_large |= cy; + cy = mpn_sub_n (rp, np, tp, in); + cy = mpn_sub_1 (rp + in, rp + in, rn, cy); + quotient_too_large |= cy; + foo: + if (quotient_too_large) + { + mpn_decr_u (qp, (mp_limb_t) 1); + mpn_add_n (rp, rp, dp, dn); + } + } + return; + } + } +} |