summaryrefslogtreecommitdiff
path: root/mpn
diff options
context:
space:
mode:
authortege <tege@gmplib.org>2000-04-14 18:44:08 +0200
committertege <tege@gmplib.org>2000-04-14 18:44:08 +0200
commit248ef3ff95df5402d4b3928f3be72b0214b7e1c6 (patch)
tree6e152f942bcf6e9855599de4148e6584cc02df95 /mpn
parent66bdebf2af8ae1c445249fc07561b0dc624ff33c (diff)
downloadgmp-248ef3ff95df5402d4b3928f3be72b0214b7e1c6.tar.gz
New files replacing old division code.
Diffstat (limited to 'mpn')
-rw-r--r--mpn/generic/bz_divrem_n.c203
-rw-r--r--mpn/generic/sb_divrem_mn.c188
-rw-r--r--mpn/generic/tdiv_qr.c391
3 files changed, 782 insertions, 0 deletions
diff --git a/mpn/generic/bz_divrem_n.c b/mpn/generic/bz_divrem_n.c
new file mode 100644
index 000000000..cc92d7130
--- /dev/null
+++ b/mpn/generic/bz_divrem_n.c
@@ -0,0 +1,203 @@
+/* mpn_bz_divrem_n and auxilliary routines.
+
+Copyright (C) 2000 Free Software Foundation, Inc.
+Contributed by Paul Zimmermann.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+[1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
+ Technical report MPI-I-98-1-022, october 1998.
+ http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
+*/
+
+static mp_limb_t mpn_bz_div_3_halves_by_2 ();
+static mp_limb_t mpn_bz_divrem_aux ();
+
+/* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
+ div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
+ i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
+#endif
+
+#if 0
+static
+unused_mpn_divrem (qp, qxn, np, nn, dp, dn)
+ mp_ptr qp;
+ mp_size_t qxn;
+ mp_ptr np;
+ mp_size_t nn;
+ mp_srcptr dp;
+ mp_size_t dn;
+{
+ /* This might be useful: */
+ if (qxn != 0)
+ {
+ mp_limb_t c;
+ mp_ptr tp = alloca ((nn + qxn) * BYTES_PER_MP_LIMB);
+ MPN_COPY (tp + qxn - nn, np, nn);
+ MPN_ZERO (tp, qxn);
+ c = mpn_divrem (qp, 0L, tp, nn + qxn, dp, dn);
+ /* Maybe copy proper part of tp to np? Documentation is unclear about
+ the returned np value when qxn != 0 */
+ return c;
+ }
+}
+#endif
+
+/* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
+ by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
+ Returns most significant limb of the quotient, which is 0 or 1.
+ Requires that the most significant bit of the divisor is set. */
+
+mp_limb_t
+mpn_bz_divrem_n (qp, np, dp, n)
+ mp_ptr qp;
+ mp_ptr np;
+ mp_srcptr dp;
+ mp_size_t n;
+{
+ mp_limb_t qhl = 0;
+ if (mpn_cmp (np + n, dp, n) >= 0)
+ {
+ qhl = 1;
+ mpn_sub_n (np + n, np + n, dp, n);
+ abort ();
+ }
+ if (n % 2 != 0)
+ {
+ /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */
+ if (n < BZ_THRESHOLD)
+ qhl += mpn_sb_divrem_mn (qp + 1, np + 2, 2 * (n - 1), dp + 1, n - 1);
+ else
+ qhl += mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1);
+ /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by
+ (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */
+ if (mpn_sub_1 (np + n, np + n, 1,
+ mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0])))
+ {
+ /* quotient too large */
+ qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
+ if (mpn_add_n (np + 1, np + 1, dp, n) == 0)
+ { /* still too large */
+ qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
+ mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */
+ }
+ }
+ /* now divide (np, n + 1) by (dp, n) */
+ qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
+ mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
+ }
+ else
+ {
+ mp_ptr tmp;
+ mp_size_t n2 = n/2;
+ TMP_DECL (marker);
+ TMP_MARK (marker);
+ tmp = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
+ qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp);
+ ASSERT_ALWAYS (qhl == 0);
+ qhl = mpn_add_1 (qp + n2, qp + n2, n2,
+ mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp));
+ TMP_FREE (marker);
+ }
+ return qhl;
+}
+
+/* Like mpn_bz_divrem_n, but without memory allocation. Also
+ assumes mpn_cmp (np + n, dp, n) < 0 */
+
+static mp_limb_t
+mpn_bz_divrem_aux (qp, np, dp, n, tmp)
+ mp_ptr qp;
+ mp_ptr np;
+ mp_srcptr dp;
+ mp_size_t n;
+ mp_ptr tmp;
+{
+ mp_limb_t qhl;
+
+ if (n % 2 != 0)
+ {
+ /* divide (2n - 2) most significant limbs from np by those (n - 1) from dp */
+ qhl = mpn_bz_divrem_aux (qp + 1, np + 2, dp + 1, n - 1, tmp);
+ /* now (qp + 1, n - 1) contains the quotient of (np + 2, 2n - 2) by
+ (dp + 1, n - 1) and (np + 2, n - 1) contains the remainder */
+ if (mpn_sub_1 (np + n, np + n, 1,
+ mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0])))
+ {
+ /* quotient too large */
+ qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
+ if (mpn_add_n (np + 1, np + 1, dp, n) == 0)
+ { /* still too large */
+ qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, 1L);
+ mpn_add_n (np + 1, np + 1, dp, n); /* always carry here */
+ }
+ }
+ /* now divide (np, n + 1) by (dp, n) */
+ qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
+ mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
+ }
+ else
+ {
+ mp_size_t n2 = n/2;
+ qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2, tmp);
+ ASSERT_ALWAYS (qhl == 0);
+ qhl = mpn_add_1 (qp + n2, qp + n2, n2,
+ mpn_bz_div_3_halves_by_2 (qp, np, dp, n2, tmp));
+ }
+ return qhl;
+}
+
+/* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
+ the remainder in (np, 2n) */
+
+static mp_limb_t
+mpn_bz_div_3_halves_by_2 (qp, np, dp, n, tmp)
+ mp_ptr qp;
+ mp_ptr np;
+ mp_srcptr dp;
+ mp_size_t n;
+ mp_ptr tmp;
+{
+ mp_size_t twon = n + n;
+ mp_limb_t qhl;
+
+ if (n < BZ_THRESHOLD)
+ qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
+ else
+ qhl = mpn_bz_divrem_aux (qp, np + n, dp + n, n, tmp);
+ /* q = (qp, n), c = (np + n, n) with the notations of [1] */
+ mpn_mul_n (tmp, qp, dp, n);
+ if (qhl != 0)
+ mpn_add_n (tmp + n, tmp + n, dp, n);
+ if (mpn_sub_n (np, np, tmp, twon)) /* R = (np, 2n) */
+ {
+ qhl -= mpn_sub_1 (qp, qp, n, 1L);
+ if (mpn_add_n (np, np, dp, twon) == 0)
+ { /* qp still too large */
+ qhl -= mpn_sub_1 (qp, qp, n, 1L);
+ mpn_add_n (np, np, dp, twon); /* always carry here */
+ }
+ }
+ return qhl;
+}
diff --git a/mpn/generic/sb_divrem_mn.c b/mpn/generic/sb_divrem_mn.c
new file mode 100644
index 000000000..80298cd2a
--- /dev/null
+++ b/mpn/generic/sb_divrem_mn.c
@@ -0,0 +1,188 @@
+/* mpn_divrem_classic -- Divide natural numbers, producing both remainder and
+ quotient.
+
+Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+ the NSIZE-DSIZE least significant quotient limbs at QP
+ and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
+ non-zero, generate that many fraction bits and append them after the
+ other quotient limbs.
+ Return the most significant limb of the quotient, this is always 0 or 1.
+
+ Preconditions:
+ 0. NSIZE >= DSIZE.
+ 1. The most significant bit of the divisor must be set.
+ 2. QP must either not overlap with the input operands at all, or
+ QP + DSIZE >= NP must hold true. (This means that it's
+ possible to put the quotient in the high part of NUM, right after the
+ remainder in NUM.
+ 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
+ 4. DSIZE >= 2. */
+
+
+#define PREINVERT_VIABLE \
+ (UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */)
+
+mp_limb_t
+#if __STDC__
+mpn_sb_divrem_mn (mp_ptr qp,
+ mp_ptr np, mp_size_t nsize,
+ mp_srcptr dp, mp_size_t dsize)
+#else
+mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)
+ mp_ptr qp;
+ mp_ptr np;
+ mp_size_t nsize;
+ mp_srcptr dp;
+ mp_size_t dsize;
+#endif
+{
+ mp_limb_t most_significant_q_limb = 0;
+ mp_size_t i;
+ mp_limb_t dx, d1, n0;
+ mp_limb_t dxinv;
+ int have_preinv;
+
+ ASSERT_ALWAYS (dsize > 2);
+
+ np += nsize - dsize;
+ dx = dp[dsize - 1];
+ d1 = dp[dsize - 2];
+ n0 = np[dsize - 1];
+
+ if (n0 >= dx)
+ {
+ if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0)
+ {
+ mpn_sub_n (np, np, dp, dsize);
+ // n0 = np[dsize - 1];
+ most_significant_q_limb = 1;
+ }
+ }
+
+ /* If multiplication is much faster than division, preinvert the
+ most significant divisor limb before entering the loop. */
+ if (PREINVERT_VIABLE)
+ {
+ have_preinv = 0;
+ if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME)
+ {
+ invert_limb (dxinv, dx);
+ have_preinv = 1;
+ }
+ }
+
+ for (i = nsize - dsize - 1; i >= 0; i--)
+ {
+ mp_limb_t q;
+ mp_limb_t nx;
+ mp_limb_t cy_limb;
+
+ nx = np[dsize - 1];
+ np--;
+
+ if (nx == dx)
+ {
+ /* This might over-estimate q, but it's probably not worth
+ the extra code here to find out. */
+ q = ~(mp_limb_t) 0;
+
+#if 1
+ cy_limb = mpn_submul_1 (np, dp, dsize, q);
+#else
+ /* This should be faster on many machines */
+ cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize);
+ cy = mpn_add_n (np, np, dp, dsize);
+ np[dsize] += cy;
+#endif
+
+ if (nx != cy_limb)
+ {
+ mpn_add_n (np, np, dp, dsize);
+ q--;
+ }
+
+ qp[i] = q;
+ }
+ else
+ {
+ mp_limb_t rx, r1, r0, p1, p0;
+
+ if (PREINVERT_VIABLE && have_preinv)
+ udiv_qrnnd_preinv (q, r1, nx, np[dsize - 1], dx, dxinv);
+ else
+ udiv_qrnnd (q, r1, nx, np[dsize - 1], dx);
+ umul_ppmm (p1, p0, d1, q);
+
+ r0 = np[dsize - 2];
+ rx = 0;
+ if (r1 < p1 || (r1 == p1 && r0 < p0))
+ {
+ p1 -= p0 < d1;
+ p0 -= d1;
+ q--;
+ r1 += dx;
+ rx = r1 < dx;
+ }
+
+ p1 += r0 < p0; /* cannot carry! */
+ rx -= r1 < p1; /* may become 11..1 if q is still too large */
+ r1 -= p1;
+ r0 -= p0;
+
+ cy_limb = mpn_submul_1 (np, dp, dsize - 2, q);
+
+ {
+ mp_limb_t cy1, cy2;
+ cy1 = r0 < cy_limb;
+ r0 -= cy_limb;
+ cy2 = r1 < cy1;
+ r1 -= cy1;
+ np[dsize - 1] = r1;
+ np[dsize - 2] = r0;
+ if (cy2 != rx)
+ {
+ mpn_add_n (np, np, dp, dsize);
+ q--;
+ }
+ }
+ qp[i] = q;
+ }
+ }
+
+ /* ______ ______ ______
+ |__rx__|__r1__|__r0__| partial remainder
+ ______ ______
+ - |__p1__|__p0__| partial product to subtract
+ ______ ______
+ - |______|cylimb|
+
+ rx is -1, 0 or 1. If rx=1, then q is correct (it should match
+ carry out). If rx=-1 then q is too large. If rx=0, then q might
+ be too large, but it is most likely correct.
+ */
+
+ return most_significant_q_limb;
+}
diff --git a/mpn/generic/tdiv_qr.c b/mpn/generic/tdiv_qr.c
new file mode 100644
index 000000000..2592e431c
--- /dev/null
+++ b/mpn/generic/tdiv_qr.c
@@ -0,0 +1,391 @@
+/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
+ write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If
+ qxn is non-zero, generate that many fraction limbs and append them after the
+ other quotient limbs, and update the remainder accordningly. The input
+ operands are unaffected.
+
+ Preconditions:
+ 1. The most significant limb of of the divisor must be non-zero.
+ 2. No argument overlap is permitted. (??? relax this ???)
+ 3. nn >= dn, even if qxn is non-zero. (??? relax this ???)
+
+ The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
+ complexity of multiplication.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO CALL THIS FUNCTION THROUGH DOCUMENTED INTERFACES. IN FACT,
+ IT IS ALMOST GUARANTEED THAT THIS FUNCTION WILL CHANGE INCOMPATIBLY
+ OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+ (We're considering to:
+ 1. Make it return the most significant quotient limb instead of store it
+ 2. Change the name to mpn_tdiv_qr_mn.)
+
+Copyright (C) 1997, 2000 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef BZ_THRESHOLD
+#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
+#endif
+
+/* Extract the middle limb from ((h,,l) << cnt) */
+#define SHL(h,l,cnt) \
+ ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
+
+void
+#if __STDC__
+mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
+ mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+#else
+mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
+ mp_ptr qp;
+ mp_ptr rp;
+ mp_size_t qxn;
+ mp_srcptr np;
+ mp_size_t nn;
+ mp_srcptr dp;
+ mp_size_t dn;
+#endif
+{
+ /* FIXME:
+ 1. qxn
+ 2. pass allocated storage in additional parameter?
+ */
+
+ if (qxn != 0)
+ abort ();
+
+ switch (dn)
+ {
+ case 0:
+ DIVIDE_BY_ZERO;
+
+ case 1:
+ {
+ rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
+ return;
+ }
+
+ case 2:
+ {
+ int cnt;
+ mp_ptr n2p, d2p;
+ mp_limb_t qhl, cy;
+ count_leading_zeros (cnt, dp[dn - 1]);
+ if (cnt != 0)
+ {
+ d2p = alloca (dn * BYTES_PER_MP_LIMB);
+ mpn_lshift (d2p, dp, dn, cnt);
+ n2p = alloca ((nn + 1) * BYTES_PER_MP_LIMB);
+ cy = mpn_lshift (n2p, np, nn, cnt);
+ n2p[nn] = cy;
+ qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
+ if (cy == 0)
+ qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
+ }
+ else
+ {
+ d2p = (mp_ptr) dp;
+ n2p = alloca (nn * BYTES_PER_MP_LIMB);
+ MPN_COPY (n2p, np, nn);
+ qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
+ qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
+ }
+
+ if (cnt != 0)
+ mpn_rshift (rp, n2p, dn, cnt);
+ else
+ MPN_COPY (rp, n2p, dn);
+ return;
+ }
+
+ default:
+ {
+ int adjust;
+ adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
+ if (nn + adjust >= 2 * dn)
+ {
+ mp_ptr n2p, d2p;
+ mp_limb_t cy;
+ int cnt;
+ count_leading_zeros (cnt, dp[dn - 1]);
+
+ qp[nn - dn] = 0; /* zero high quotient limb */
+ if (cnt != 0) /* normalize divisor if needed */
+ {
+ d2p = alloca (dn * BYTES_PER_MP_LIMB);
+ mpn_lshift (d2p, dp, dn, cnt);
+ n2p = alloca ((nn + 1) * BYTES_PER_MP_LIMB);
+ cy = mpn_lshift (n2p, np, nn, cnt);
+ n2p[nn] = cy;
+ nn += adjust;
+ }
+ else
+ {
+ d2p = (mp_ptr) dp;
+ n2p = alloca ((nn + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (n2p, np, nn);
+ n2p[nn] = 0;
+ nn += adjust;
+ }
+
+ if (dn == 2)
+ mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
+ else if (dn < BZ_THRESHOLD)
+ mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
+ else
+ {
+ /* Perform 2*dn / dn limb divisions as long as the limbs
+ in np last. */
+ mp_ptr q2p = qp + nn - 2 * dn;
+ n2p += nn - 2 * dn;
+ mpn_bz_divrem_n (q2p, n2p, d2p, dn);
+ nn -= dn;
+ while (nn >= 2 * dn)
+ {
+ mp_limb_t c;
+ q2p -= dn; n2p -= dn;
+ c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
+ ASSERT_ALWAYS (c == 0);
+ nn -= dn;
+ }
+
+ if (nn != dn)
+ {
+ n2p -= nn - dn;
+ /* In theory, we could fall out to the cute code below
+ since we now have exactly the situation that code
+ is designed to handle. We botch this badly and call
+ the basic mpn_sb_divrem_mn! */
+ if (dn == 2)
+ mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
+ else
+ mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
+ }
+ }
+
+
+ if (cnt != 0)
+ mpn_rshift (rp, n2p, dn, cnt);
+ else
+ MPN_COPY (rp, n2p, dn);
+ return;
+ }
+
+ /* When we come here, the numerator/partial remainder is less
+ than twice the size of the denominator. */
+
+ {
+ /* Problem:
+
+ Divide a numerator N with nn limbs by a denominator D with dn
+ limbs forming a quotient of nn-dn+1 limbs. When qn is small
+ compared to dn, conventional division algorithms perform poorly.
+ We want an algorithm that has an expected running time that is
+ dependent only on qn. It is assumed that the most significant
+ limb of the numerator is smaller than the most significant limb
+ of the denominator.
+
+ Algorithm (very informally stated):
+
+ 1) Divide the 2 x qn most significant limbs from the numerator
+ by the qn most significant limbs from the denominator. Call
+ the result qest. This is either the correct quotient, but
+ might be 1 or 2 too large. Compute the remainder from the
+ division. (This step is implemented by a mpn_divrem call.)
+
+ 2) Is the most significant limb from the remainder < p, where p
+ is the product of the most significant limb from the quotient
+ and the next(d). (Next(d) denotes the next ignored limb from
+ the denominator.) If it is, decrement qest, and adjust the
+ remainder accordingly.
+
+ 3) Is the remainder >= qest? If it is, qest is the desired
+ quotient. The algorithm terminates.
+
+ 4) Subtract qest x next(d) from the remainder. If there is
+ borrow out, decrement qest, and adjust the remainder
+ accordingly.
+
+ 5) Skip one word from the denominator (i.e., let next(d) denote
+ the next less significant limb. */
+
+ mp_size_t qn;
+ mp_ptr n2p, d2p;
+ mp_ptr tp;
+ mp_limb_t cy;
+ mp_size_t in, rn;
+ mp_limb_t quotient_too_large;
+ int cnt;
+
+ qn = nn - dn;
+ qp[qn] = 0; /* zero high quotient limb */
+ qn += adjust; /* qn cannot become bigger */
+
+ if (qn == 0)
+ {
+ MPN_COPY (rp, np, dn);
+ return;
+ }
+
+ in = dn - qn; /* (at least partially) ignored # of limbs in ops */
+ /* Normalize denominator by shifting it to the left such that its
+ most significant bit is set. Then shift the numerator the same
+ amount, to mathematically preserve quotient. */
+ count_leading_zeros (cnt, dp[dn - 1]);
+ if (cnt != 0)
+ {
+ d2p = alloca (qn * BYTES_PER_MP_LIMB);
+
+ mpn_lshift (d2p, dp + in, qn, cnt);
+ d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
+
+ n2p = alloca ((2 * qn + 1) * BYTES_PER_MP_LIMB);
+ cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
+ if (adjust)
+ {
+ n2p[2 * qn] = cy;
+ n2p++;
+ }
+ else
+ {
+ n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
+ }
+ }
+ else
+ {
+ d2p = (mp_ptr) dp + in;
+
+ n2p = alloca ((2 * qn + 1) * BYTES_PER_MP_LIMB);
+ MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
+ if (adjust)
+ {
+ n2p[2 * qn] = 0;
+ n2p++;
+ }
+ }
+
+ /* Get an approximate quotient using the extracted operands. */
+ if (qn == 1)
+ n2p[0] = mpn_divmod_1 (qp, n2p, 2L, d2p[0]);
+ else if (qn == 2)
+ mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
+ else if (qn < BZ_THRESHOLD)
+ mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
+ else
+ mpn_bz_divrem_n (qp, n2p, d2p, qn);
+
+ rn = qn;
+ /* Multiply the first ignored divisor limb by the most significant
+ quotient limb. If that product is > the partial remainder's
+ most significant limb, we know the quotient is too large. This
+ test quickly catches most cases where the quotient is too large;
+ it catches all cases where the quotient is 2 too large. */
+ {
+ mp_limb_t dl, x;
+ mp_limb_t h, l;
+
+ if (in - 2 < 0)
+ dl = 0;
+ else
+ dl = dp[in - 2];
+
+ x = SHL (dp[in - 1], dl, cnt);
+ umul_ppmm (h, l, x, qp[qn - 1]);
+
+ if (n2p[qn - 1] < h)
+ {
+ mp_limb_t cy;
+
+ mpn_decr_u (qp, (mp_limb_t) 1);
+ cy = mpn_add_n (n2p, n2p, d2p, qn);
+ if (cy)
+ {
+ /* The partial remainder is safely large. */
+ n2p[qn] = cy;
+ ++rn;
+ }
+ }
+ }
+
+ quotient_too_large = 0;
+ if (cnt != 0)
+ {
+ mp_limb_t cy1, cy2;
+
+ /* Append partially used numerator limb to partial remainder. */
+ cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
+ n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
+
+ /* Update partial remainder with partially used divisor limb. */
+ cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
+ if (qn != rn)
+ {
+ if (n2p[qn] < cy2)
+ abort ();
+ n2p[qn] -= cy2;
+ }
+ else
+ {
+ n2p[qn] = cy1 - cy2;
+
+ quotient_too_large = (cy1 < cy2);
+ ++rn;
+ }
+ --in;
+ }
+ /* True: partial remainder now is neutral, i.e., it is not shifted up. */
+
+ tp = alloca (dn * BYTES_PER_MP_LIMB);
+
+ if (in < qn)
+ {
+ if (in == 0)
+ {
+ MPN_COPY (rp, n2p, rn);
+ if (rn != dn)
+ abort ();
+ goto foo;
+ }
+ mpn_mul (tp, qp, qn, dp, in);
+ }
+ else
+ mpn_mul (tp, dp, in, qp, qn);
+
+ cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
+ MPN_COPY (rp + in, n2p, dn - in);
+ quotient_too_large |= cy;
+ cy = mpn_sub_n (rp, np, tp, in);
+ cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
+ quotient_too_large |= cy;
+ foo:
+ if (quotient_too_large)
+ {
+ mpn_decr_u (qp, (mp_limb_t) 1);
+ mpn_add_n (rp, rp, dp, dn);
+ }
+ }
+ return;
+ }
+ }
+}