196 files changed, 8327 insertions, 22020 deletions
diff --git a/gmp/mpn/generic/add.c b/gmp/mpn/generic/add.c
index 559f26133c..8065ccf3c2 100644
--- a/gmp/mpn/generic/add.c
+++ b/gmp/mpn/generic/add.c
@@ -5,28 +5,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __GMP_FORCE_mpn_add 1
 
diff --git a/gmp/mpn/generic/add_1.c b/gmp/mpn/generic/add_1.c
index ca2d866852..2d3fa76c2e 100644
--- a/gmp/mpn/generic/add_1.c
+++ b/gmp/mpn/generic/add_1.c
@@ -5,28 +5,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __GMP_FORCE_mpn_add_1 1
 
diff --git a/gmp/mpn/generic/add_err1_n.c b/gmp/mpn/generic/add_err1_n.c
deleted file mode 100644
index b8cb75f6e8..0000000000
--- a/gmp/mpn/generic/add_err1_n.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* mpn_add_err1_n -- add_n with one error term
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
-  return value is carry out.
-
-  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
-  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_add_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-		mp_ptr ep, mp_srcptr yp,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
-
-  yp += n - 1;
-  el = eh = 0;
-
-  do
-    {
-      yl = *yp--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary add_n */
-      ADDC_LIMB (cy1, sl, ul, vl);
-      ADDC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh:el) */
-      zl = (-cy) & yl;
-      el += zl;
-      eh += el < zl;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
-  el &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el;
-  ep[1] = eh;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/add_err2_n.c b/gmp/mpn/generic/add_err2_n.c
deleted file mode 100644
index 4b0242a32d..0000000000
--- a/gmp/mpn/generic/add_err2_n.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/* mpn_add_err2_n -- add_n with two error terms
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
-  return value is carry out.
-
-  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
-  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
-           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
-  stores two-limb results at {ep,2} and {ep+2,2} respectively.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_add_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
-
-  yp1 += n - 1;
-  yp2 += n - 1;
-  el1 = eh1 = 0;
-  el2 = eh2 = 0;
-
-  do
-    {
-      yl1 = *yp1--;
-      yl2 = *yp2--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary add_n */
-      ADDC_LIMB (cy1, sl, ul, vl);
-      ADDC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh1:el1) */
-      zl1 = (-cy) & yl1;
-      el1 += zl1;
-      eh1 += el1 < zl1;
-
-      /* update (eh2:el2) */
-      zl2 = (-cy) & yl2;
-      el2 += zl2;
-      eh2 += el2 < zl2;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
-  el1 &= GMP_NUMB_MASK;
-  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
-  el2 &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el1;
-  ep[1] = eh1;
-  ep[2] = el2;
-  ep[3] = eh2;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/add_err3_n.c b/gmp/mpn/generic/add_err3_n.c
deleted file mode 100644
index 28cd7facf9..0000000000
--- a/gmp/mpn/generic/add_err3_n.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/* mpn_add_err3_n -- add_n with three error terms
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
-  return value is carry out.
-
-  (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
-  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
-           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
-           c[1]*yp3[n-1] + ... + c[n]*yp3[0],
-  stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_add_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
-
-  yp1 += n - 1;
-  yp2 += n - 1;
-  yp3 += n - 1;
-  el1 = eh1 = 0;
-  el2 = eh2 = 0;
-  el3 = eh3 = 0;
-
-  do
-    {
-      yl1 = *yp1--;
-      yl2 = *yp2--;
-      yl3 = *yp3--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary add_n */
-      ADDC_LIMB (cy1, sl, ul, vl);
-      ADDC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh1:el1) */
-      zl1 = (-cy) & yl1;
-      el1 += zl1;
-      eh1 += el1 < zl1;
-
-      /* update (eh2:el2) */
-      zl2 = (-cy) & yl2;
-      el2 += zl2;
-      eh2 += el2 < zl2;
-
-      /* update (eh3:el3) */
-      zl3 = (-cy) & yl3;
-      el3 += zl3;
-      eh3 += el3 < zl3;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
-  el1 &= GMP_NUMB_MASK;
-  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
-  el2 &= GMP_NUMB_MASK;
-  eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
-  el3 &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el1;
-  ep[1] = eh1;
-  ep[2] = el2;
-  ep[3] = eh2;
-  ep[4] = el3;
-  ep[5] = eh3;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/add_n.c b/gmp/mpn/generic/add_n.c
index 1a07670900..5006e27780 100644
--- a/gmp/mpn/generic/add_n.c
+++ b/gmp/mpn/generic/add_n.c
@@ -1,32 +1,21 @@
 /* mpn_add_n -- Add equal length limb vectors.
 
-Copyright 1992-1994, 1996, 2000, 2002, 2009 Free Software Foundation, Inc.
+Copyright 1992, 1993, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -40,8 +29,8 @@ mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
 
   ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
 
   cy = 0;
   do
@@ -70,8 +59,8 @@ mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   mp_limb_t ul, vl, rl, cy;
 
   ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
 
   cy = 0;
   do
diff --git a/gmp/mpn/generic/addmul_1.c b/gmp/mpn/generic/addmul_1.c
index d76b4ad135..861e1bc830 100644
--- a/gmp/mpn/generic/addmul_1.c
+++ b/gmp/mpn/generic/addmul_1.c
@@ -3,33 +3,23 @@
    pointed to by RP.  Return the most significant limb of the product,
    adjusted for carry-out from the addition.
 
-Copyright 1992-1994, 1996, 2000, 2002, 2004 Free Software Foundation, Inc.
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/add_n_sub_n.c b/gmp/mpn/generic/addsub_n.c
index 012eb3e33a..452cf7b211 100644
--- a/gmp/mpn/generic/add_n_sub_n.c
+++ b/gmp/mpn/generic/addsub_n.c
@@ -1,36 +1,25 @@
-/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
+/* mpn_addsub_n -- Add and Subtract two limb vectors of equal, non-zero length.
 
    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 1999-2001, 2006 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2006 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -39,16 +28,16 @@ see https://www.gnu.org/licenses/.  */
 #define L1_CACHE_SIZE 8192	/* only 68040 has less than this */
 #endif
 
-#define PART_SIZE (L1_CACHE_SIZE / GMP_LIMB_BYTES / 6)
+#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
 
 
-/* mpn_add_n_sub_n.
+/* mpn_addsub_n.
    r1[] = s1[] + s2[]
    r2[] = s1[] - s2[]
    All operands have n limbs.
    In-place operations allowed.  */
 mp_limb_t
-mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
+mpn_addsub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
 {
   mp_limb_t acyn, acyo;		/* carry for add */
   mp_limb_t scyn, scyo;		/* carry for subtract */
@@ -153,19 +142,19 @@ main (int argc, char **argv)
 
   n = strtol (argv[1], 0, 0);
 
-  r1p = malloc (n * GMP_LIMB_BYTES);
-  r2p = malloc (n * GMP_LIMB_BYTES);
-  s1p = malloc (n * GMP_LIMB_BYTES);
-  s2p = malloc (n * GMP_LIMB_BYTES);
+  r1p = malloc (n * BYTES_PER_MP_LIMB);
+  r2p = malloc (n * BYTES_PER_MP_LIMB);
+  s1p = malloc (n * BYTES_PER_MP_LIMB);
+  s2p = malloc (n * BYTES_PER_MP_LIMB);
   TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
   printf ("              separate add and sub: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n));
+  TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n));
   printf ("combined addsub separate variables: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
   printf ("        combined addsub r1 overlap: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+  TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
   printf ("        combined addsub r2 overlap: %.3f\n", t);
-  TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n));
+  TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n));
   printf ("          combined addsub in-place: %.3f\n", t);
 
   return 0;
diff --git a/gmp/mpn/generic/bdiv_dbm1c.c b/gmp/mpn/generic/bdiv_dbm1c.c
index 22c3cfd2c8..23cb6f1c9e 100644
--- a/gmp/mpn/generic/bdiv_dbm1c.c
+++ b/gmp/mpn/generic/bdiv_dbm1c.c
@@ -10,28 +10,17 @@ Copyright 2008, 2009 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/bdiv_q.c b/gmp/mpn/generic/bdiv_q.c
deleted file mode 100644
index 1fc1bb7c09..0000000000
--- a/gmp/mpn/generic/bdiv_q.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/* mpn_bdiv_q -- Hensel division with precomputed inverse, returning quotient.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Computes Q = N / D mod B^n. */
-
-void
-mpn_bdiv_q (mp_ptr qp,
-	    mp_srcptr np, mp_size_t nn,
-	    mp_srcptr dp, mp_size_t dn,
-	    mp_ptr tp)
-{
-  mp_limb_t di;
-
-  if (BELOW_THRESHOLD (dn, DC_BDIV_Q_THRESHOLD))
-    {
-      MPN_COPY (tp, np, nn);
-      binvert_limb (di, dp[0]);  di = -di;
-      mpn_sbpi1_bdiv_q (qp, tp, nn, dp, dn, di);
-    }
-  else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
-    {
-      MPN_COPY (tp, np, nn);
-      binvert_limb (di, dp[0]);  di = -di;
-      mpn_dcpi1_bdiv_q (qp, tp, nn, dp, dn, di);
-    }
-  else
-    {
-      mpn_mu_bdiv_q (qp, np, nn, dp, dn, tp);
-    }
-  return;
-}
-
-mp_size_t
-mpn_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
-{
-  if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
-    return nn;
-  else
-    return mpn_mu_bdiv_q_itch (nn, dn);
-}
diff --git a/gmp/mpn/generic/bdiv_q_1.c b/gmp/mpn/generic/bdiv_q_1.c
deleted file mode 100644
index 74b247d5a9..0000000000
--- a/gmp/mpn/generic/bdiv_q_1.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/* mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by 1-limb
-   divisor, returning quotient only.
-
-   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
-   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
-   FUTURE GNU MP RELEASES.
-
-Copyright 2000-2003, 2005, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-mp_limb_t
-mpn_pi1_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d,
-		  mp_limb_t di, int shift)
-{
-  mp_size_t  i;
-  mp_limb_t  c, h, l, u, u_next, dummy;
-
-  ASSERT (n >= 1);
-  ASSERT (d != 0);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT_MPN (up, n);
-  ASSERT_LIMB (d);
-
-  d <<= GMP_NAIL_BITS;
-
-  if (shift != 0)
-    {
-      c = 0;
-
-      u = up[0];
-      rp--;
-      for (i = 1; i < n; i++)
-	{
-	  u_next = up[i];
-	  u = ((u >> shift) | (u_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
-
-	  SUBC_LIMB (c, l, u, c);
-
-	  l = (l * di) & GMP_NUMB_MASK;
-	  rp[i] = l;
-
-	  umul_ppmm (h, dummy, l, d);
-	  c += h;
-	  u = u_next;
-	}
-
-      u = u >> shift;
-      l = u - c;
-      l = (l * di) & GMP_NUMB_MASK;
-      rp[i] = l;
-    }
-  else
-    {
-      u = up[0];
-      l = (u * di) & GMP_NUMB_MASK;
-      rp[0] = l;
-      c = 0;
-
-      for (i = 1; i < n; i++)
-	{
-	  umul_ppmm (h, dummy, l, d);
-	  c += h;
-
-	  u = up[i];
-	  SUBC_LIMB (c, l, u, c);
-
-	  l = (l * di) & GMP_NUMB_MASK;
-	  rp[i] = l;
-	}
-    }
-
-  return c;
-}
-
-mp_limb_t
-mpn_bdiv_q_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t d)
-{
-  mp_limb_t di;
-  int shift;
-
-  ASSERT (n >= 1);
-  ASSERT (d != 0);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT_MPN (up, n);
-  ASSERT_LIMB (d);
-
-  if ((d & 1) == 0)
-    {
-      count_trailing_zeros (shift, d);
-      d >>= shift;
-    }
-  else
-    shift = 0;
-
-  binvert_limb (di, d);
-  return mpn_pi1_bdiv_q_1 (rp, up, n, d, di, shift);
-}
diff --git a/gmp/mpn/generic/bdiv_qr.c b/gmp/mpn/generic/bdiv_qr.c
deleted file mode 100644
index 6a5eedbbc2..0000000000
--- a/gmp/mpn/generic/bdiv_qr.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/* mpn_bdiv_qr -- Hensel division with precomputed inverse, returning quotient
-   and remainder.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Computes Q = N / D mod B^n,
-	    R = N - QD.  */
-
-mp_limb_t
-mpn_bdiv_qr (mp_ptr qp, mp_ptr rp,
-	     mp_srcptr np, mp_size_t nn,
-	     mp_srcptr dp, mp_size_t dn,
-	     mp_ptr tp)
-{
-  mp_limb_t di;
-  mp_limb_t rh;
-
-  ASSERT (nn > dn);
-  if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
-      BELOW_THRESHOLD (nn - dn, DC_BDIV_QR_THRESHOLD))
-    {
-      MPN_COPY (tp, np, nn);
-      binvert_limb (di, dp[0]);  di = -di;
-      rh = mpn_sbpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
-      MPN_COPY (rp, tp + nn - dn, dn);
-    }
-  else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
-    {
-      MPN_COPY (tp, np, nn);
-      binvert_limb (di, dp[0]);  di = -di;
-      rh = mpn_dcpi1_bdiv_qr (qp, tp, nn, dp, dn, di);
-      MPN_COPY (rp, tp + nn - dn, dn);
-    }
-  else
-    {
-      rh = mpn_mu_bdiv_qr (qp, rp, np, nn, dp, dn, tp);
-    }
-
-  return rh;
-}
-
-mp_size_t
-mpn_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
-{
-  if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
-    return nn;
-  else
-    return  mpn_mu_bdiv_qr_itch (nn, dn);
-}
diff --git a/gmp/mpn/generic/bdivmod.c b/gmp/mpn/generic/bdivmod.c
new file mode 100644
index 0000000000..783b594082
--- /dev/null
+++ b/gmp/mpn/generic/bdivmod.c
@@ -0,0 +1,124 @@
+/* mpn/bdivmod.c: mpn_bdivmod for computing U/V mod 2^d.
+
+Copyright 1991, 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+/* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d).
+
+   Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and
+   returns the high d%BITS_PER_MP_LIMB bits of Q as the result.
+
+   Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up.  Since the
+   low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows
+   the limb vectors at qp to overwrite the low limbs at up, provided qp <= up.
+
+   Preconditions:
+   1.  V is odd.
+   2.  usize * BITS_PER_MP_LIMB >= d.
+   3.  If Q and U overlap, qp <= up.
+
+   Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
+
+   Funding for this work has been partially provided by Conselho Nacional
+   de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
+   301314194-2, and was done while I was a visiting reseacher in the Instituto
+   de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
+
+   References:
+       T. Jebelean, An algorithm for exact division, Journal of Symbolic
+       Computation, v. 15, 1993, pp. 169-180.
+
+       K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
+       Mathematical Software, v. 21 (March), 1995, pp. 111-122.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+mp_limb_t
+mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,
+	     mp_srcptr vp, mp_size_t vsize, unsigned long int d)
+{
+  mp_limb_t v_inv;
+
+  ASSERT (usize >= 1);
+  ASSERT (vsize >= 1);
+  ASSERT (usize * GMP_NUMB_BITS >= d);
+  ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));
+  ASSERT (! MPN_OVERLAP_P (qp, d/GMP_NUMB_BITS, vp, vsize));
+  ASSERT (MPN_SAME_OR_INCR2_P (qp, d/GMP_NUMB_BITS, up, usize));
+  ASSERT_MPN (up, usize);
+  ASSERT_MPN (vp, vsize);
+
+  /* 1/V mod 2^GMP_NUMB_BITS. */
+  binvert_limb (v_inv, vp[0]);
+
+  /* Fast code for two cases previously used by the accel part of mpn_gcd.
+     (Could probably remove this now it's inlined there.) */
+  if (usize == 2 && vsize == 2 &&
+      (d == GMP_NUMB_BITS || d == 2*GMP_NUMB_BITS))
+    {
+      mp_limb_t hi, lo;
+      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
+      umul_ppmm (hi, lo, q, vp[0] << GMP_NAIL_BITS);
+      up[0] = 0;
+      up[1] -= hi + q*vp[1];
+      qp[0] = q;
+      if (d == 2*GMP_NUMB_BITS)
+        {
+          q = (up[1] * v_inv) & GMP_NUMB_MASK;
+          up[1] = 0;
+          qp[1] = q;
+        }
+      return 0;
+    }
+
+  /* Main loop.  */
+  while (d >= GMP_NUMB_BITS)
+    {
+      mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
+      mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
+      if (usize > vsize)
+	mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+      d -= GMP_NUMB_BITS;
+      up += 1, usize -= 1;
+      *qp++ = q;
+    }
+
+  if (d)
+    {
+      mp_limb_t b;
+      mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
+      if (q <= 1)
+	{
+	  if (q == 0)
+	    return 0;
+	  else
+	    b = mpn_sub_n (up, up, vp, MIN (usize, vsize));
+	}
+      else
+	b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
+
+      if (usize > vsize)
+	mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
+      return q;
+    }
+
+  return 0;
+}
diff --git a/gmp/mpn/generic/binvert.c b/gmp/mpn/generic/binvert.c
index be27ea552e..24d4dcdb6f 100644
--- a/gmp/mpn/generic/binvert.c
+++ b/gmp/mpn/generic/binvert.c
@@ -1,38 +1,28 @@
-/* Compute {up,n}^(-1) mod B^n.
+/* Compute {up,n}^(-1) mod 2(n*GMP_NUMB_BITS).
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright (C) 2004-2007, 2009, 2012 Free Software Foundation, Inc.
+Copyright (C) 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -43,6 +33,14 @@ see https://www.gnu.org/licenses/.  */
   r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
 */
 
+/* This is intended for constant THRESHOLDs only, where the compiler can
+   completely fold the result.  */
+#define LOG2C(n) \
+ (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
+  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
+  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
+  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
+
 #if TUNE_PROGRAM_BUILD
 #define NPOWS \
  ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
@@ -54,9 +52,12 @@ see https://www.gnu.org/licenses/.  */
 mp_size_t
 mpn_binvert_itch (mp_size_t n)
 {
-  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (n);
-  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, n, (n + 1) >> 1);
-  return itch_local + itch_out;
+#if WANT_FFT
+  if (ABOVE_THRESHOLD (n, 2 * MUL_FFT_MODF_THRESHOLD))
+    return mpn_fft_next_size (n, mpn_fft_best_k (n, 0));
+  else
+#endif
+    return 3 * (n - (n >> 1));
 }
 
 void
@@ -75,28 +76,42 @@ mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
 
   xp = scratch;
 
-  /* Compute a base value of rn limbs.  */
+  /* Compute a base value using a low-overhead O(n^2) algorithm.  FIXME: We
+     should call some divide-and-conquer lsb division function here for an
+     operand subrange.  */
   MPN_ZERO (xp, rn);
   xp[0] = 1;
   binvert_limb (di, up[0]);
   if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD))
-    mpn_sbpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+    mpn_sb_bdiv_q (rp, xp, rn, up, rn, -di);
   else
-    mpn_dcpi1_bdiv_q (rp, xp, rn, up, rn, -di);
+    mpn_dc_bdiv_q (rp, xp, rn, up, rn, -di);
 
   /* Use Newton iterations to get the desired precision.  */
   for (; rn < n; rn = newrn)
     {
-      mp_size_t m;
       newrn = *--sizp;
 
-      /* X <- UR. */
-      m = mpn_mulmod_bnm1_next_size (newrn);
-      mpn_mulmod_bnm1 (xp, m, up, newrn, rp, rn, xp + m);
-      mpn_sub_1 (xp + m, xp, rn - (m - newrn), 1);
-
-      /* R = R(X/B^rn) */
-      mpn_mullo_n (rp + rn, rp, xp + rn, newrn - rn);
-      mpn_neg (rp + rn, rp + rn, newrn - rn);
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (newrn, 2 * MUL_FFT_MODF_THRESHOLD))
+	{
+	  int k;
+	  mp_size_t m, i;
+
+	  k = mpn_fft_best_k (newrn, 0);
+	  m = mpn_fft_next_size (newrn, k);
+	  mpn_mul_fft (xp, m, up, newrn, rp, rn, k);
+	  for (i = rn - 1; i >= 0; i--)
+	    if (xp[i] > (i == 0))
+	      {
+		mpn_add_1 (xp + rn, xp + rn, newrn - rn, 1);
+		break;
+	      }
+	}
+      else
+#endif
+	mpn_mul (xp, up, newrn, rp, rn);
+      mpn_mullow_n (rp + rn, rp, xp + rn, newrn - rn);
+      mpn_neg_n (rp + rn, rp + rn, newrn - rn);
     }
 }
diff --git a/gmp/mpn/generic/broot.c b/gmp/mpn/generic/broot.c
deleted file mode 100644
index 6974ac8b9e..0000000000
--- a/gmp/mpn/generic/broot.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/* mpn_broot -- Compute hensel sqrt
-
-   Contributed to the GNU project by Niels Möller
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
-   typical use will have e small. */
-static mp_limb_t
-powlimb (mp_limb_t a, mp_limb_t e)
-{
-  mp_limb_t r = 1;
-  mp_limb_t s = a;
-
-  for (r = 1, s = a; e > 0; e >>= 1, s *= s)
-    if (e & 1)
-      r *= s;
-
-  return r;
-}
-
-/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
-
-   Iterates
-
-     r' <-- r - r * (a^{k-1} r^k - 1) / n
-
-   If
-
-     a^{k-1} r^k = 1 (mod 2^m),
-
-   then
-
-     a^{k-1} r'^k = 1 (mod 2^{2m}),
-
-   Compute the update term as
-
-     r' = r - (a^{k-1} r^{k+1} - r) / k
-
-   where we still have cancellation of low limbs.
-
- */
-void
-mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
-{
-  mp_size_t sizes[GMP_LIMB_BITS * 2];
-  mp_ptr akm1, tp, rnp, ep;
-  mp_limb_t a0, r0, km1, kp1h, kinv;
-  mp_size_t rn;
-  unsigned i;
-
-  TMP_DECL;
-
-  ASSERT (n > 0);
-  ASSERT (ap[0] & 1);
-  ASSERT (k & 1);
-  ASSERT (k >= 3);
-
-  TMP_MARK;
-
-  akm1 = TMP_ALLOC_LIMBS (4*n);
-  tp = akm1 + n;
-
-  km1 = k-1;
-  /* FIXME: Could arrange the iteration so we don't need to compute
-     this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
-     that we can use wraparound also for a*r, since the low half is
-     unchanged from the previous iteration. Or possibly mulmid. Also,
-     a r = a^{1/k}, so we get that value too, for free? */
-  mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
-
-  a0 = ap[0];
-  binvert_limb (kinv, k);
-
-  /* 4 bits: a^{1/k - 1} (mod 16):
-
-	a % 8
-	1 3 5 7
-   k%4 +-------
-     1 |1 1 1 1
-     3 |1 9 9 1
-  */
-  r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
-  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
-  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
-  r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
-#if GMP_NUMB_BITS > 32
-  {
-    unsigned prec = 32;
-    do
-      {
-	r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
-	prec *= 2;
-      }
-    while (prec < GMP_NUMB_BITS);
-  }
-#endif
-
-  rp[0] = r0;
-  if (n == 1)
-    {
-      TMP_FREE;
-      return;
-    }
-
-  /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
-  kp1h = k/2 + 1;
-
-  /* FIXME: Special case for two limb iteration. */
-  rnp = TMP_ALLOC_LIMBS (2*n + 1);
-  ep = rnp + n;
-
-  /* FIXME: Possible to this on the fly with some bit fiddling. */
-  for (i = 0; n > 1; n = (n + 1)/2)
-    sizes[i++] = n;
-
-  rn = 1;
-
-  while (i-- > 0)
-    {
-      /* Compute x^{k+1}. */
-      mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
-			       final iteration. */
-      mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
-
-      /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */
-
-      mpn_mullo_n (ep, rnp, akm1, sizes[i]);
-      ASSERT (mpn_cmp (ep, rp, rn) == 0);
-
-      ASSERT (sizes[i] <= 2*rn);
-      mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
-      mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
-      rn = sizes[i];
-    }
-  TMP_FREE;
-}
-
-/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
-void
-mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
-{
-  mp_ptr tp;
-  TMP_DECL;
-
-  ASSERT (n > 0);
-  ASSERT (ap[0] & 1);
-  ASSERT (k & 1);
-
-  if (k == 1)
-    {
-      MPN_COPY (rp, ap, n);
-      return;
-    }
-
-  TMP_MARK;
-  tp = TMP_ALLOC_LIMBS (n);
-
-  mpn_broot_invm1 (tp, ap, n, k);
-  mpn_mullo_n (rp, tp, ap, n);
-
-  TMP_FREE;
-}
diff --git a/gmp/mpn/generic/brootinv.c b/gmp/mpn/generic/brootinv.c
deleted file mode 100644
index b96c97f1d3..0000000000
--- a/gmp/mpn/generic/brootinv.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b).
-
-   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
-
-Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
-   typical use will have e small. */
-static mp_limb_t
-powlimb (mp_limb_t a, mp_limb_t e)
-{
-  mp_limb_t r;
-
-  for (r = 1; e > 0; e >>= 1, a *= a)
-    if (e & 1)
-      r *= a;
-
-  return r;
-}
-
-/* Compute r such that r^k * y = 1 (mod B^n).
-
-   Iterates
-     r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
-   using Hensel lifting, each time doubling the number of known bits in r.
-
-   Works just for odd k.  Else the Hensel lifting degenerates.
-
-   FIXME:
-
-     (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
-
-     (2) Rewrite iteration as
-	   r' <-- r - k^{-1} r (r^k y - 1)
-	 and take advantage of the zero low part of r^k y - 1.
-
-     (3) Use wrap-around trick.
-
-     (4) Use a small table to get starting value.
-
-   Scratch need: 5*bn, where bn = ceil (bnb / GMP_NUMB_BITS).
-*/
-
-void
-mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
-{
-  mp_ptr tp2, tp3;
-  mp_limb_t kinv, k2, r0, y0;
-  mp_size_t order[GMP_LIMB_BITS + 1];
-  int i, d;
-
-  ASSERT (bn > 0);
-  ASSERT ((k & 1) != 0);
-
-  tp2 = tp + bn;
-  tp3 = tp + 2 * bn;
-  k2 = k + 1;
-
-  binvert_limb (kinv, k);
-
-  /* 4-bit initial approximation:
-
-   y%16 | 1  3  5  7  9 11 13 15,
-    k%4 +-------------------------+k2%4
-     1  | 1 11 13  7  9  3  5 15  |  2
-     3  | 1  3  5  7  9 11 13 15  |  0
-
-  */
-  y0 = yp[0];
-
-  r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 2) & 8);		/* 4 bits */
-  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f));		/* 8 bits */
-  r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7fff));	/* 16 bits */
-#if GMP_NUMB_BITS > 16
-  {
-    unsigned prec = 16;
-    do
-      {
-	r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));
-	prec *= 2;
-      }
-    while (prec < GMP_NUMB_BITS);
-  }
-#endif
-
-  rp[0] = r0;
-  if (bn == 1)
-    return;
-
-  /* This initialization doesn't matter for the result (any garbage is
-     cancelled in the iteration), but proper initialization makes
-     valgrind happier. */
-  MPN_ZERO (rp+1, bn-1);
-
-  d = 0;
-  for (; bn > 1; bn = (bn + 1) >> 1)
-    order[d++] = bn;
-
-  for (i = d - 1; i >= 0; i--)
-    {
-      bn = order[i];
-
-      mpn_mul_1 (tp, rp, bn, k2);
-
-      mpn_powlo (tp2, rp, &k2, 1, bn, tp3);
-      mpn_mullo_n (rp, yp, tp2, bn);
-
-      mpn_sub_n (tp2, tp, rp, bn);
-      mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, kinv, 0);
-    }
-}
diff --git a/gmp/mpn/generic/bsqrt.c b/gmp/mpn/generic/bsqrt.c
deleted file mode 100644
index 18ba26f440..0000000000
--- a/gmp/mpn/generic/bsqrt.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* mpn_bsqrt, a^{1/2} (mod 2^n).
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-void
-mpn_bsqrt (mp_ptr rp, mp_srcptr ap, mp_bitcnt_t nb, mp_ptr tp)
-{
-  mp_ptr sp;
-  mp_size_t n;
-
-  ASSERT (nb > 0);
-
-  n = nb / GMP_NUMB_BITS;
-  sp = tp + n;
-
-  mpn_bsqrtinv (sp, ap, nb, tp);
-  mpn_mullo_n (rp, sp, ap, n);
-}
diff --git a/gmp/mpn/generic/bsqrtinv.c b/gmp/mpn/generic/bsqrtinv.c
deleted file mode 100644
index 33df6a3c15..0000000000
--- a/gmp/mpn/generic/bsqrtinv.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/* mpn_bsqrtinv, compute r such that r^2 * y = 1 (mod 2^{b+1}).
-
-   Contributed to the GNU project by Martin Boij (as part of perfpow.c).
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Compute r such that r^2 * y = 1 (mod 2^{b+1}).
-   Return non-zero if such an integer r exists.
-
-   Iterates
-     r' <-- (3r - r^3 y) / 2
-   using Hensel lifting.  Since we divide by two, the Hensel lifting is
-   somewhat degenerates.  Therefore, we lift from 2^b to 2^{b+1}-1.
-
-   FIXME:
-     (1) Simplify to do precision book-keeping in limbs rather than bits.
-
-     (2) Rewrite iteration as
-	   r' <-- r - r (r^2 y - 1) / 2
-	 and take advantage of zero low part of r^2 y - 1.
-
-     (3) Use wrap-around trick.
-
-     (4) Use a small table to get starting value.
-*/
-int
-mpn_bsqrtinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_ptr tp)
-{
-  mp_ptr tp2, tp3;
-  mp_limb_t k;
-  mp_size_t bn, order[GMP_LIMB_BITS + 1];
-  int i, d;
-
-  ASSERT (bnb > 0);
-
-  bn = 1 + bnb / GMP_LIMB_BITS;
-
-  tp2 = tp + bn;
-  tp3 = tp + 2 * bn;
-  k = 3;
-
-  rp[0] = 1;
-  if (bnb == 1)
-    {
-      if ((yp[0] & 3) != 1)
-	return 0;
-    }
-  else
-    {
-      if ((yp[0] & 7) != 1)
-	return 0;
-
-      d = 0;
-      for (; bnb != 2; bnb = (bnb + 2) >> 1)
-	order[d++] = bnb;
-
-      for (i = d - 1; i >= 0; i--)
-	{
-	  bnb = order[i];
-	  bn = 1 + bnb / GMP_LIMB_BITS;
-
-	  mpn_mul_1 (tp, rp, bn, k);
-
-	  mpn_powlo (tp2, rp, &k, 1, bn, tp3);
-	  mpn_mullo_n (rp, yp, tp2, bn);
-
-#if HAVE_NATIVE_mpn_rsh1sub_n
-	  mpn_rsh1sub_n (rp, tp, rp, bn);
-#else
-	  mpn_sub_n (tp2, tp, rp, bn);
-	  mpn_rshift (rp, tp2, bn, 1);
-#endif
-	}
-    }
-  return 1;
-}
diff --git a/gmp/mpn/generic/cmp.c b/gmp/mpn/generic/cmp.c
index 18c7b42844..d352076599 100644
--- a/gmp/mpn/generic/cmp.c
+++ b/gmp/mpn/generic/cmp.c
@@ -5,28 +5,17 @@ Copyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __GMP_FORCE_mpn_cmp 1
 
diff --git a/gmp/mpn/generic/cnd_add_n.c b/gmp/mpn/generic/cnd_add_n.c
deleted file mode 100644
index 443f9858da..0000000000
--- a/gmp/mpn/generic/cnd_add_n.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* mpn_cnd_add_n -- Compute R = U + V if CND != 0 or R = U if CND == 0.
-   Both cases should take the same time and perform the exact same memory
-   accesses, since this function is intended to be used where side-channel
-   attack resilience is relevant.
-
-Copyright 1992-1994, 1996, 2000, 2002, 2008, 2009, 2011, 2013 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-mp_limb_t
-mpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
-{
-  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-
-  mask = -(mp_limb_t) (cnd != 0);
-  cy = 0;
-  do
-    {
-      ul = *up++;
-      vl = *vp++ & mask;
-#if GMP_NAIL_BITS == 0
-      sl = ul + vl;
-      cy1 = sl < ul;
-      rl = sl + cy;
-      cy2 = rl < sl;
-      cy = cy1 | cy2;
-      *rp++ = rl;
-#else
-      rl = ul + vl;
-      rl += cy;
-      cy = rl >> GMP_NUMB_BITS;
-      *rp++ = rl & GMP_NUMB_MASK;
-#endif
-    }
-  while (--n != 0);
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/cnd_sub_n.c b/gmp/mpn/generic/cnd_sub_n.c
deleted file mode 100644
index bd8e029a36..0000000000
--- a/gmp/mpn/generic/cnd_sub_n.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* mpn_cnd_sub_n -- Compute R = U - V if CND != 0 or R = U if CND == 0.
-   Both cases should take the same time and perform the exact same memory
-   accesses, since this function is intended to be used where side-channel
-   attack resilience is relevant.
-
-Copyright 1992-1994, 1996, 2000, 2002, 2008, 2009, 2011, 2013 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-mp_limb_t
-mpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
-{
-  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-
-  mask = -(mp_limb_t) (cnd != 0);
-  cy = 0;
-  do
-    {
-      ul = *up++;
-      vl = *vp++ & mask;
-#if GMP_NAIL_BITS == 0
-      sl = ul - vl;
-      cy1 = sl > ul;
-      rl = sl - cy;
-      cy2 = rl > sl;
-      cy = cy1 | cy2;
-      *rp++ = rl;
-#else
-      rl = ul - vl;
-      rl -= cy;
-      cy = rl >> (GMP_LIMB_BITS - 1);
-      *rp++ = rl & GMP_NUMB_MASK;
-#endif
-    }
-  while (--n != 0);
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/com.c b/gmp/mpn/generic/com.c
deleted file mode 100644
index cd8551df5b..0000000000
--- a/gmp/mpn/generic/com.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/* mpn_com - complement an mpn.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#undef mpn_com
-#define mpn_com __MPN(com)
-
-void
-mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n)
-{
-  mp_limb_t ul;
-  do {
-      ul = *up++;
-      *rp++ = ~ul & GMP_NUMB_MASK;
-  } while (--n != 0);
-}
diff --git a/gmp/mpn/generic/comb_tables.c b/gmp/mpn/generic/comb_tables.c
deleted file mode 100644
index 41bcb5f879..0000000000
--- a/gmp/mpn/generic/comb_tables.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* Const tables shared among combinatoric functions.
-
-   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
-   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
-
-Copyright 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Entry i contains (i!/2^t) where t is chosen such that the parenthesis
-   is an odd integer. */
-const mp_limb_t __gmp_oddfac_table[] = { ONE_LIMB_ODD_FACTORIAL_TABLE, ONE_LIMB_ODD_FACTORIAL_EXTTABLE };
-
-/* Entry i contains ((2i+1)!!/2^t) where t is chosen such that the parenthesis
-   is an odd integer. */
-const mp_limb_t __gmp_odd2fac_table[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE };
-
-/* Entry i contains 2i-popc(2i). */
-const unsigned char __gmp_fac2cnt_table[] = { TABLE_2N_MINUS_POPC_2N };
-
-const mp_limb_t __gmp_limbroots_table[] = { NTH_ROOT_NUMB_MASK_TABLE };
diff --git a/gmp/mpn/generic/copyd.c b/gmp/mpn/generic/copyd.c
deleted file mode 100644
index ba3380a82b..0000000000
--- a/gmp/mpn/generic/copyd.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/* mpn_copyd
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_copyd (mp_ptr rp, mp_srcptr up, mp_size_t n)
-{
-  mp_size_t i;
-
-  for (i = n - 1; i >= 0; i--)
-    rp[i] = up[i];
-}
diff --git a/gmp/mpn/generic/copyi.c b/gmp/mpn/generic/copyi.c
deleted file mode 100644
index 0c39b4534b..0000000000
--- a/gmp/mpn/generic/copyi.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/* mpn_copyi
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_copyi (mp_ptr rp, mp_srcptr up, mp_size_t n)
-{
-  mp_size_t i;
-
-  up += n;
-  rp += n;
-  for (i = -n; i != 0; i++)
-    rp[i] = up[i];
-}
diff --git a/gmp/mpn/generic/dc_bdiv_q.c b/gmp/mpn/generic/dc_bdiv_q.c
new file mode 100644
index 0000000000..9a43d18b56
--- /dev/null
+++ b/gmp/mpn/generic/dc_bdiv_q.c
@@ -0,0 +1,137 @@
+/* mpn_dc_bdiv_q -- divide-and-conquer Hensel division with precomputed
+   inverse, returning quotient.
+
+   Contributed to the GNU project by Niels M�ller and Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^n, destroys N. */
+
+mp_size_t
+mpn_dc_bdiv_q_n_itch (mp_size_t n)
+{
+  /* NOTE: Depends om mullow_n interface */
+  return n;
+}
+
+void
+mpn_dc_bdiv_q_n (mp_ptr qp,
+		 mp_ptr np, mp_srcptr dp, mp_size_t n,
+		 mp_limb_t dinv, mp_ptr tp)
+{
+  while (ABOVE_THRESHOLD (n, DC_BDIV_Q_THRESHOLD))
+    {
+      mp_limb_t l, h;
+      mp_limb_t cy;
+
+      l = n >> 1;
+      h = n - l;
+
+      cy = mpn_dc_bdiv_qr_n (qp, np, dp, l, dinv, tp);
+
+      mpn_mullow_n (tp, qp, dp + h, l);
+      mpn_sub_n (np + h, np + h, tp, l);
+
+      if (l < h)
+	{
+	  cy += mpn_submul_1 (np + l, qp, l, dp[l]);
+	  np[n - 1] -= cy;
+	}
+      qp += l;
+      np += l;
+      n -= l;
+    }
+  mpn_sb_bdiv_q (qp, np, n, dp, n, dinv);
+}
+
+void
+mpn_dc_bdiv_q (mp_ptr qp,
+	       mp_ptr np, mp_size_t nn,
+	       mp_srcptr dp, mp_size_t dn,
+	       mp_limb_t dinv)
+{
+  mp_size_t qn;
+  mp_limb_t cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  tp = TMP_SALLOC_LIMBS (dn);
+
+  qn = nn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn in a super-efficient manner.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
+	cy = mpn_sb_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+      else
+	cy = mpn_dc_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp + qn, dn - qn);
+	  else
+	    mpn_mul (tp, dp + qn, dn - qn, qp, qn);
+	  mpn_incr_u (tp + qn, cy);
+
+	  mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
+	  cy = 0;
+	}
+
+      np += qn;
+      qp += qn;
+
+      qn = nn - qn;
+      while (qn > dn)
+	{
+	  mpn_sub_1 (np + dn, np + dn, qn, cy);
+	  cy = mpn_dc_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+	  qp += dn;
+	  np += dn;
+	  qn -= dn;
+	}
+      mpn_sub_1 (np + dn, np + dn, qn, cy);
+      mpn_dc_bdiv_q_n (qp, np, dp, dn, dinv, tp);
+      TMP_FREE;
+      return;
+    }
+
+  if (BELOW_THRESHOLD (qn, DC_BDIV_Q_THRESHOLD))
+    mpn_sb_bdiv_q (qp, np, 2 * qn, dp, qn, dinv);
+  else
+    mpn_dc_bdiv_q_n (qp, np, dp, qn, dinv, tp);
+
+  TMP_FREE;
+}
diff --git a/gmp/mpn/generic/dcpi1_bdiv_qr.c b/gmp/mpn/generic/dc_bdiv_qr.c
index 8a251f8d9d..8b59bbd860 100644
--- a/gmp/mpn/generic/dcpi1_bdiv_qr.c
+++ b/gmp/mpn/generic/dc_bdiv_qr.c
@@ -1,39 +1,29 @@
-/* mpn_dcpi1_bdiv_qr -- divide-and-conquer Hensel division with precomputed
+/* mpn_dc_bdiv_qr -- divide-and-conquer Hensel division with precomputed
    inverse, returning quotient and remainder.
 
-   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+   Contributed to the GNU project by Niels M�ller and Torbj�rn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -53,14 +43,14 @@ see https://www.gnu.org/licenses/.  */
    d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
 
 mp_size_t
-mpn_dcpi1_bdiv_qr_n_itch (mp_size_t n)
+mpn_dc_bdiv_qr_n_itch (mp_size_t n)
 {
   return n;
 }
 
 mp_limb_t
-mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
-		     mp_limb_t dinv, mp_ptr tp)
+mpn_dc_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		  mp_limb_t dinv, mp_ptr tp)
 {
   mp_size_t lo, hi;
   mp_limb_t cy;
@@ -70,9 +60,9 @@ mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
   hi = n - lo;			/* ceil(n/2) */
 
   if (BELOW_THRESHOLD (lo, DC_BDIV_QR_THRESHOLD))
-    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
+    cy = mpn_sb_bdiv_qr (qp, np, 2 * lo, dp, lo, dinv);
   else
-    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
+    cy = mpn_dc_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
 
   mpn_mul (tp, dp + lo, hi, qp, lo);
 
@@ -80,9 +70,9 @@ mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
   rh = mpn_sub (np + lo, np + lo, n + hi, tp, n);
 
   if (BELOW_THRESHOLD (hi, DC_BDIV_QR_THRESHOLD))
-    cy = mpn_sbpi1_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
+    cy = mpn_sb_bdiv_qr (qp + lo, np + lo, 2 * hi, dp, hi, dinv);
   else
-    cy = mpn_dcpi1_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
+    cy = mpn_dc_bdiv_qr_n (qp + lo, np + lo, dp, hi, dinv, tp);
 
   mpn_mul (tp, qp + lo, hi, dp + hi, lo);
 
@@ -93,8 +83,8 @@ mpn_dcpi1_bdiv_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
 }
 
 mp_limb_t
-mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
-		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+mpn_dc_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn,
+		mp_limb_t dinv)
 {
   mp_size_t qn;
   mp_limb_t rr, cy;
@@ -103,10 +93,6 @@ mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
 
   TMP_MARK;
 
-  ASSERT (dn >= 2);		/* to adhere to mpn_sbpi1_div_qr's limits */
-  ASSERT (nn - dn >= 1);	/* to adhere to mpn_sbpi1_div_qr's limits */
-  ASSERT (dp[0] & 1);
-
   tp = TMP_SALLOC_LIMBS (dn);
 
   qn = nn - dn;
@@ -120,9 +106,9 @@ mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
 
       /* Perform the typically smaller block first.  */
       if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
-	cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+	cy = mpn_sb_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
       else
-	cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+	cy = mpn_dc_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
 
       rr = 0;
       if (qn != dn)
@@ -144,7 +130,7 @@ mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
       do
 	{
 	  rr += mpn_sub_1 (np + dn, np + dn, qn, cy);
-	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
+	  cy = mpn_dc_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
 	  qp += dn;
 	  np += dn;
 	  qn -= dn;
@@ -155,9 +141,9 @@ mpn_dcpi1_bdiv_qr (mp_ptr qp, mp_ptr np, mp_size_t nn,
     }
 
   if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
-    cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
+    cy = mpn_sb_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
   else
-    cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
+    cy = mpn_dc_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
 
   rr = 0;
   if (qn != dn)
diff --git a/gmp/mpn/generic/dc_div_q.c b/gmp/mpn/generic/dc_div_q.c
new file mode 100644
index 0000000000..276ae4fba6
--- /dev/null
+++ b/gmp/mpn/generic/dc_div_q.c
@@ -0,0 +1,57 @@
+/* mpn_dc_div_q -- divide-and-conquer division, returning exact quotient only.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_dc_div_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  mp_ptr tp, wp;
+  mp_limb_t qh;
+  mp_size_t qn;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  tp = TMP_SALLOC_LIMBS (nn + 1);
+  MPN_COPY (tp + 1, np, nn);
+  tp[0] = 0;
+
+  qn = nn - dn;
+  wp = TMP_SALLOC_LIMBS (qn + 1);
+
+  qh = mpn_dc_divappr_q (wp, tp, nn + 1, dp, dn);
+
+  if (wp[0] == 0)
+    /* FIXME: Should multiply and subtract here, not recompute from scratch.  */
+    qh = mpn_dc_div_qr (qp, np, nn, dp, dn);
+  else
+    MPN_COPY (qp, wp + 1, qn);
+
+  return qh;
+}
diff --git a/gmp/mpn/generic/dc_div_qr.c b/gmp/mpn/generic/dc_div_qr.c
new file mode 100644
index 0000000000..41a46f1516
--- /dev/null
+++ b/gmp/mpn/generic/dc_div_qr.c
@@ -0,0 +1,203 @@
+/* mpn_dc_div_qr -- recursive divide-and-conquer division for arbitrary size
+   operands.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_dc_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		 mp_srcptr dip, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy, qh, ql;
+
+  lo = n >> 1;			/* floor(n/2) */
+  hi = n - lo;			/* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+    qh = mpn_sb_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dip);
+  else
+    qh = mpn_dc_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dip, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp, lo);
+
+  cy = mpn_sub_n (np + lo, np + lo, tp, n);
+  if (qh != 0)
+    cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+  while (cy != 0)
+    {
+      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+      cy -= mpn_add_n (np + lo, np + lo, dp, n);
+    }
+
+  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
+    ql = mpn_sb_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dip);
+  else
+    ql = mpn_dc_div_qr_n (qp, np + hi, dp + hi, lo, dip, tp);
+
+  mpn_mul (tp, dp, hi, qp, lo);
+
+  cy = mpn_sub_n (np, np, tp, n);
+  if (ql != 0)
+    cy += mpn_sub_n (np + lo, np + lo, dp, hi);
+
+  while (cy != 0)
+    {
+      mpn_sub_1 (qp, qp, lo, 1);
+      cy -= mpn_add_n (np, np, dp, n);
+    }
+
+  return qh;
+}
+
+mp_limb_t
+mpn_preinv_dc_div_qr (mp_ptr qp,
+		      mp_ptr np, mp_size_t nn,
+		      mp_srcptr dp, mp_size_t dn,
+		      mp_srcptr dip)
+{
+  mp_size_t qn;
+  mp_limb_t qh, cy;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  tp = TMP_SALLOC_LIMBS (dn);
+
+  qn = nn - dn;
+  qp += qn;
+  np += nn;
+  dp += dn;
+
+  if (qn > dn)
+    {
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dip);
+      else
+	qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dip, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	  else
+	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	  if (qh != 0)
+	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	  while (cy != 0)
+	    {
+	      qh -= mpn_sub_1 (qp, qp, qn, 1);
+	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+	    }
+	}
+
+      qn = nn - dn - qn;
+      do
+	{
+	  qp -= dn;
+	  np -= dn;
+	  mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dip, tp);
+	  qn -= dn;
+	}
+      while (qn > 0);
+    }
+  else
+    {
+      if (qn == 0)
+	{
+	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
+	  if (qh)
+	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
+	  TMP_FREE;
+	  return qh;
+	}
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dip);
+      else
+	qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dip, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	  else
+	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	  if (qh != 0)
+	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	  while (cy != 0)
+	    {
+	      qh -= mpn_sub_1 (qp, qp, qn, 1);
+	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+	    }
+	}
+    }
+
+  TMP_FREE;
+  return qh;
+}
+
+mp_limb_t
+mpn_dc_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  mp_limb_t cy;
+  mp_limb_t xp[2], dip[2];
+
+  ASSERT (dn >= 2);
+
+  cy = mpn_add_1 (xp, dp + dn - 2, 2, 1);
+  if (cy != 0)
+    dip[0] = dip[1] = 0;
+  else
+    {
+      mp_limb_t scratch[10];	/* FIXME */
+      mpn_invert (dip, xp, 2, scratch);
+    }
+
+  return mpn_preinv_dc_div_qr (qp, np, nn, dp, dn, dip);
+}
diff --git a/gmp/mpn/generic/dc_divappr_q.c b/gmp/mpn/generic/dc_divappr_q.c
new file mode 100644
index 0000000000..4474872388
--- /dev/null
+++ b/gmp/mpn/generic/dc_divappr_q.c
@@ -0,0 +1,196 @@
+/* mpn_dc_divappr_q -- divide-and-conquer division, returning only approximate
+   quotient.  The quotient retuened is either correct, or unity too large.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+mp_limb_t
+mpn_dc_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
+		    mp_srcptr dip, mp_ptr tp)
+{
+  mp_size_t lo, hi;
+  mp_limb_t cy, qh, ql;
+
+  lo = n >> 1;			/* floor(n/2) */
+  hi = n - lo;			/* ceil(n/2) */
+
+  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
+    qh = mpn_sb_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dip);
+  else
+    qh = mpn_dc_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dip, tp);
+
+  mpn_mul (tp, qp + lo, hi, dp, lo);
+
+  cy = mpn_sub_n (np + lo, np + lo, tp, n);
+  if (qh != 0)
+    cy += mpn_sub_n (np + n, np + n, dp, lo);
+
+  while (cy != 0)
+    {
+      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
+      cy -= mpn_add_n (np + lo, np + lo, dp, n);
+    }
+
+  if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
+    ql = mpn_sb_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dip);
+  else
+    ql = mpn_dc_divappr_q_n (qp, np + hi, dp + hi, lo, dip, tp);
+
+  if (UNLIKELY (ql != 0))
+    {
+      mp_size_t i;
+      for (i = 0; i < lo; i++)
+	qp[i] = GMP_NUMB_MASK;
+    }
+
+  return qh;
+}
+
+mp_limb_t
+mpn_preinv_dc_divappr_q (mp_ptr qp,
+			 mp_ptr np, mp_size_t nn,
+			 mp_srcptr dp, mp_size_t dn,
+			 mp_srcptr dip)
+{
+  mp_size_t qn;
+  mp_limb_t qh, cy, qsave;
+  mp_ptr tp;
+  TMP_DECL;
+
+  TMP_MARK;
+
+  tp = TMP_SALLOC_LIMBS (dn+1);
+
+  qn = nn - dn;
+  qp += qn;
+  np += nn;
+  dp += dn;
+
+  if (qn > dn)
+    {
+      qn++;			/* pretend we'll need an extra limb */
+      /* Reduce qn mod dn without division, optimizing small operations.  */
+      do
+	qn -= dn;
+      while (qn > dn);
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      /* Perform the typically smaller block first.  */
+      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
+	qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dip);
+      else
+	qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dip, tp);
+
+      if (qn != dn)
+	{
+	  if (qn > dn - qn)
+	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
+	  else
+	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
+
+	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
+	  if (qh != 0)
+	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
+
+	  while (cy != 0)
+	    {
+	      qh -= mpn_sub_1 (qp, qp, qn, 1);
+	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
+	    }
+	}
+
+      qn = nn - dn - qn + 1;
+      while (qn > dn)
+	{
+	  qp -= dn;
+	  np -= dn;
+	  mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dip, tp);
+	  qn -= dn;
+	}
+
+      /* Since we pretended we'd need an extra quotient limb before, we now
+	 have made sure the code above left just dn-1=qn quotient limbs to
+	 develop.  Develop that plus a guard limb. */
+      qn--;
+      qp -= qn;
+      np -= dn;
+      qsave = qp[qn];
+      mpn_dc_divappr_q_n (qp, np - dn, dp - dn, dn, dip, tp);
+      MPN_COPY_INCR (qp, qp + 1, qn);
+      qp[qn] = qsave;
+    }
+  else
+    {
+      if (qn == 0)
+	{
+	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
+	  if (qh)
+	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
+	  TMP_FREE;
+	  return qh;
+	}
+
+      qp -= qn;			/* point at low limb of next quotient block */
+      np -= qn;			/* point in the middle of partial remainder */
+
+      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
+	 /* Full precision.  Optimal?  */
+	qh = mpn_sb_divappr_q (qp, np - dn, nn, dp - dn, dn, dip);
+      else
+	{
+	  /* Put quotient in tp, use qp as temporary, since qp lacks a limb.  */
+	  qh = mpn_dc_divappr_q_n (tp, np - qn - 2, dp - (qn + 1), qn + 1, dip, qp);
+	  MPN_COPY (qp, tp + 1, qn);
+	}
+    }
+
+  TMP_FREE;
+  return qh;
+}
+
+mp_limb_t
+mpn_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+  mp_limb_t cy;
+  mp_limb_t xp[2], dip[2];
+
+  ASSERT (dn >= 2);
+
+  cy = mpn_add_1 (xp, dp + dn - 2, 2, 1);
+  if (cy != 0)
+    dip[0] = dip[1] = 0;
+  else
+    {
+      mp_limb_t scratch[10];	/* FIXME */
+      mpn_invert (dip, xp, 2, scratch);
+    }
+
+  return mpn_preinv_dc_divappr_q (qp, np, nn, dp, dn, dip);
+}
diff --git a/gmp/mpn/generic/dc_divrem_n.c b/gmp/mpn/generic/dc_divrem_n.c
new file mode 100644
index 0000000000..61ddde72c3
--- /dev/null
+++ b/gmp/mpn/generic/dc_divrem_n.c
@@ -0,0 +1,121 @@
+/* mpn_dc_divrem_n and auxilliary routines.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
+   INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+   IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
+   FUTURE GNU MP RELEASE.
+
+
+Copyright 2000, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
+Contributed by Paul Zimmermann.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+[1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
+    Technical report MPI-I-98-1-022, october 1998.
+    http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
+*/
+
+static mp_limb_t mpn_dc_div_3_by_2
+  __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch));
+static mp_limb_t mpn_dc_div_2_by_1
+  __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch));
+
+/* mpn_dc_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
+   by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
+   Returns most significant limb of the quotient, which is 0 or 1.
+   Requires that the most significant bit of the divisor is set.  */
+
+mp_limb_t
+mpn_dc_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
+{
+  mp_limb_t ret;
+  mp_ptr scratch;
+  TMP_DECL;
+  TMP_MARK;
+
+  scratch = TMP_ALLOC_LIMBS (n);
+  ret = mpn_dc_div_2_by_1 (qp, np, dp, n, scratch);
+
+  TMP_FREE;
+  return ret;
+}
+
+static mp_limb_t
+mpn_dc_div_2_by_1 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  mp_limb_t qhl, cc;
+  mp_size_t n2 = n/2;
+
+  if (n % 2 != 0)
+    {
+      mp_ptr qp1 = qp + 1;
+      qhl = mpn_dc_div_3_by_2 (qp1 + n2, np + 2 + n2, dp + 1, n2, scratch);
+      qhl += mpn_add_1 (qp1 + n2, qp1 + n2, n2,
+			mpn_dc_div_3_by_2 (qp1, np + 2, dp + 1, n2, scratch));
+
+      cc = mpn_submul_1 (np + 1, qp1, n - 1, dp[0]);
+      cc = mpn_sub_1 (np + n, np + n, 1, cc);
+      if (qhl != 0)
+	cc += mpn_sub_1 (np + n, np + n, 1, dp[0]);
+      while (cc != 0)
+	{
+	  qhl -= mpn_sub_1 (qp1, qp1, n - 1, (mp_limb_t) 1);
+	  cc -= mpn_add_n (np + 1, np + 1, dp, n);
+	}
+      qhl += mpn_add_1 (qp1, qp1, n - 1,
+			mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
+    }
+  else
+    {
+      qhl = mpn_dc_div_3_by_2 (qp + n2, np + n2, dp, n2, scratch);
+      qhl += mpn_add_1 (qp + n2, qp + n2, n2,
+			mpn_dc_div_3_by_2 (qp, np, dp, n2, scratch));
+    }
+  return qhl;
+}
+
+
+/* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
+   the remainder in (np, 2n) */
+
+static mp_limb_t
+mpn_dc_div_3_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
+{
+  mp_size_t twon = n + n;
+  mp_limb_t qhl, cc;
+
+  if (n < DIV_DC_THRESHOLD)
+    qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
+  else
+    qhl = mpn_dc_div_2_by_1 (qp, np + n, dp + n, n, scratch);
+
+  mpn_mul_n (scratch, qp, dp, n);
+  cc = mpn_sub_n (np, np, scratch, twon);
+
+  if (qhl != 0)
+    cc += mpn_sub_n (np + n, np + n, dp, n);
+  while (cc != 0)
+    {
+      qhl -= mpn_sub_1 (qp, qp, n, (mp_limb_t) 1);
+      cc -= mpn_add_n (np, np, dp, twon);
+    }
+  return qhl;
+}
diff --git a/gmp/mpn/generic/dcpi1_bdiv_q.c b/gmp/mpn/generic/dcpi1_bdiv_q.c
deleted file mode 100644
index a7b86c96d4..0000000000
--- a/gmp/mpn/generic/dcpi1_bdiv_q.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/* mpn_dcpi1_bdiv_q -- divide-and-conquer Hensel division with precomputed
-   inverse, returning quotient.
-
-   Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009-2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-mp_size_t
-mpn_dcpi1_bdiv_q_n_itch (mp_size_t n)
-{
-  /* NOTE: Depends on mullo_n interface */
-  return n;
-}
-
-/* Computes Q = N / D mod B^n, destroys N.
-
-   N = {np,n}
-   D = {dp,n}
-*/
-
-void
-mpn_dcpi1_bdiv_q_n (mp_ptr qp,
-		    mp_ptr np, mp_srcptr dp, mp_size_t n,
-		    mp_limb_t dinv, mp_ptr tp)
-{
-  while (ABOVE_THRESHOLD (n, DC_BDIV_Q_THRESHOLD))
-    {
-      mp_size_t lo, hi;
-      mp_limb_t cy;
-
-      lo = n >> 1;			/* floor(n/2) */
-      hi = n - lo;			/* ceil(n/2) */
-
-      cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp);
-
-      mpn_mullo_n (tp, qp, dp + hi, lo);
-      mpn_sub_n (np + hi, np + hi, tp, lo);
-
-      if (lo < hi)
-	{
-	  cy += mpn_submul_1 (np + lo, qp, lo, dp[lo]);
-	  np[n - 1] -= cy;
-	}
-      qp += lo;
-      np += lo;
-      n -= lo;
-    }
-  mpn_sbpi1_bdiv_q (qp, np, n, dp, n, dinv);
-}
-
-/* Computes Q = N / D mod B^nn, destroys N.
-
-   N = {np,nn}
-   D = {dp,dn}
-*/
-
-void
-mpn_dcpi1_bdiv_q (mp_ptr qp,
-		  mp_ptr np, mp_size_t nn,
-		  mp_srcptr dp, mp_size_t dn,
-		  mp_limb_t dinv)
-{
-  mp_size_t qn;
-  mp_limb_t cy;
-  mp_ptr tp;
-  TMP_DECL;
-
-  TMP_MARK;
-
-  ASSERT (dn >= 2);
-  ASSERT (nn - dn >= 0);
-  ASSERT (dp[0] & 1);
-
-  tp = TMP_SALLOC_LIMBS (dn);
-
-  qn = nn;
-
-  if (qn > dn)
-    {
-      /* Reduce qn mod dn in a super-efficient manner.  */
-      do
-	qn -= dn;
-      while (qn > dn);
-
-      /* Perform the typically smaller block first.  */
-      if (BELOW_THRESHOLD (qn, DC_BDIV_QR_THRESHOLD))
-	cy = mpn_sbpi1_bdiv_qr (qp, np, 2 * qn, dp, qn, dinv);
-      else
-	cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, qn, dinv, tp);
-
-      if (qn != dn)
-	{
-	  if (qn > dn - qn)
-	    mpn_mul (tp, qp, qn, dp + qn, dn - qn);
-	  else
-	    mpn_mul (tp, dp + qn, dn - qn, qp, qn);
-	  mpn_incr_u (tp + qn, cy);
-
-	  mpn_sub (np + qn, np + qn, nn - qn, tp, dn);
-	  cy = 0;
-	}
-
-      np += qn;
-      qp += qn;
-
-      qn = nn - qn;
-      while (qn > dn)
-	{
-	  mpn_sub_1 (np + dn, np + dn, qn - dn, cy);
-	  cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, dn, dinv, tp);
-	  qp += dn;
-	  np += dn;
-	  qn -= dn;
-	}
-      mpn_dcpi1_bdiv_q_n (qp, np, dp, dn, dinv, tp);
-    }
-  else
-    {
-      if (BELOW_THRESHOLD (qn, DC_BDIV_Q_THRESHOLD))
-	mpn_sbpi1_bdiv_q (qp, np, qn, dp, qn, dinv);
-      else
-	mpn_dcpi1_bdiv_q_n (qp, np, dp, qn, dinv, tp);
-    }
-
-  TMP_FREE;
-}
diff --git a/gmp/mpn/generic/dcpi1_div_q.c b/gmp/mpn/generic/dcpi1_div_q.c
deleted file mode 100644
index 32d74c31a9..0000000000
--- a/gmp/mpn/generic/dcpi1_div_q.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/* mpn_dc_div_q -- divide-and-conquer division, returning exact quotient
-   only.
-
-   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-mp_limb_t
-mpn_dcpi1_div_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
-		 mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
-{
-  mp_ptr tp, wp;
-  mp_limb_t qh;
-  mp_size_t qn;
-  TMP_DECL;
-
-  TMP_MARK;
-
-  ASSERT (dn >= 6);
-  ASSERT (nn - dn >= 3);
-  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
-
-  tp = TMP_SALLOC_LIMBS (nn + 1);
-  MPN_COPY (tp + 1, np, nn);
-  tp[0] = 0;
-
-  qn = nn - dn;
-  wp = TMP_SALLOC_LIMBS (qn + 1);
-
-  qh = mpn_dcpi1_divappr_q (wp, tp, nn + 1, dp, dn, dinv);
-
-  if (wp[0] == 0)
-    {
-      mp_limb_t cy;
-
-      if (qn > dn)
-	mpn_mul (tp, wp + 1, qn, dp, dn);
-      else
-	mpn_mul (tp, dp, dn, wp + 1, qn);
-
-      cy = (qh != 0) ? mpn_add_n (tp + qn, tp + qn, dp, dn) : 0;
-
-      if (cy || mpn_cmp (tp, np, nn) > 0) /* At most is wrong by one, no cycle. */
-	qh -= mpn_sub_1 (qp, wp + 1, qn, 1);
-      else /* Same as below */
-	MPN_COPY (qp, wp + 1, qn);
-    }
-  else
-    MPN_COPY (qp, wp + 1, qn);
-
-  TMP_FREE;
-  return qh;
-}
diff --git a/gmp/mpn/generic/dcpi1_div_qr.c b/gmp/mpn/generic/dcpi1_div_qr.c
deleted file mode 100644
index 4d80c7b769..0000000000
--- a/gmp/mpn/generic/dcpi1_div_qr.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/* mpn_dcpi1_div_qr_n -- recursive divide-and-conquer division for arbitrary
-   size operands.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-mp_limb_t
-mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
-		    gmp_pi1_t *dinv, mp_ptr tp)
-{
-  mp_size_t lo, hi;
-  mp_limb_t cy, qh, ql;
-
-  lo = n >> 1;			/* floor(n/2) */
-  hi = n - lo;			/* ceil(n/2) */
-
-  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
-    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
-  else
-    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
-
-  mpn_mul (tp, qp + lo, hi, dp, lo);
-
-  cy = mpn_sub_n (np + lo, np + lo, tp, n);
-  if (qh != 0)
-    cy += mpn_sub_n (np + n, np + n, dp, lo);
-
-  while (cy != 0)
-    {
-      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
-      cy -= mpn_add_n (np + lo, np + lo, dp, n);
-    }
-
-  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
-    ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
-  else
-    ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);
-
-  mpn_mul (tp, dp, hi, qp, lo);
-
-  cy = mpn_sub_n (np, np, tp, n);
-  if (ql != 0)
-    cy += mpn_sub_n (np + lo, np + lo, dp, hi);
-
-  while (cy != 0)
-    {
-      mpn_sub_1 (qp, qp, lo, 1);
-      cy -= mpn_add_n (np, np, dp, n);
-    }
-
-  return qh;
-}
-
-mp_limb_t
-mpn_dcpi1_div_qr (mp_ptr qp,
-		  mp_ptr np, mp_size_t nn,
-		  mp_srcptr dp, mp_size_t dn,
-		  gmp_pi1_t *dinv)
-{
-  mp_size_t qn;
-  mp_limb_t qh, cy;
-  mp_ptr tp;
-  TMP_DECL;
-
-  TMP_MARK;
-
-  ASSERT (dn >= 6);		/* to adhere to mpn_sbpi1_div_qr's limits */
-  ASSERT (nn - dn >= 3);	/* to adhere to mpn_sbpi1_div_qr's limits */
-  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
-
-  tp = TMP_SALLOC_LIMBS (dn);
-
-  qn = nn - dn;
-  qp += qn;
-  np += nn;
-  dp += dn;
-
-  if (qn > dn)
-    {
-      /* Reduce qn mod dn without division, optimizing small operations.  */
-      do
-	qn -= dn;
-      while (qn > dn);
-
-      qp -= qn;			/* point at low limb of next quotient block */
-      np -= qn;			/* point in the middle of partial remainder */
-
-      /* Perform the typically smaller block first.  */
-      if (qn == 1)
-	{
-	  mp_limb_t q, n2, n1, n0, d1, d0;
-
-	  /* Handle qh up front, for simplicity. */
-	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
-	  if (qh)
-	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
-
-	  /* A single iteration of schoolbook: One 3/2 division,
-	     followed by the bignum update and adjustment. */
-	  n2 = np[0];
-	  n1 = np[-1];
-	  n0 = np[-2];
-	  d1 = dp[-1];
-	  d0 = dp[-2];
-
-	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
-
-	  if (UNLIKELY (n2 == d1) && n1 == d0)
-	    {
-	      q = GMP_NUMB_MASK;
-	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
-	      ASSERT (cy == n2);
-	    }
-	  else
-	    {
-	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
-
-	      if (dn > 2)
-		{
-		  mp_limb_t cy, cy1;
-		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
-
-		  cy1 = n0 < cy;
-		  n0 = (n0 - cy) & GMP_NUMB_MASK;
-		  cy = n1 < cy1;
-		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
-		  np[-2] = n0;
-
-		  if (UNLIKELY (cy != 0))
-		    {
-		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
-		      qh -= (q == 0);
-		      q = (q - 1) & GMP_NUMB_MASK;
-		    }
-		}
-	      else
-		np[-2] = n0;
-
-	      np[-1] = n1;
-	    }
-	  qp[0] = q;
-	}
-      else
-	{
-	  /* Do a 2qn / qn division */
-	  if (qn == 2)
-	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */
-	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
-	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
-	  else
-	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
-
-	  if (qn != dn)
-	    {
-	      if (qn > dn - qn)
-		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
-	      else
-		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
-
-	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
-	      if (qh != 0)
-		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
-
-	      while (cy != 0)
-		{
-		  qh -= mpn_sub_1 (qp, qp, qn, 1);
-		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
-		}
-	    }
-	}
-
-      qn = nn - dn - qn;
-      do
-	{
-	  qp -= dn;
-	  np -= dn;
-	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
-	  qn -= dn;
-	}
-      while (qn > 0);
-    }
-  else
-    {
-      qp -= qn;			/* point at low limb of next quotient block */
-      np -= qn;			/* point in the middle of partial remainder */
-
-      if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
-	qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
-      else
-	qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
-
-      if (qn != dn)
-	{
-	  if (qn > dn - qn)
-	    mpn_mul (tp, qp, qn, dp - dn, dn - qn);
-	  else
-	    mpn_mul (tp, dp - dn, dn - qn, qp, qn);
-
-	  cy = mpn_sub_n (np - dn, np - dn, tp, dn);
-	  if (qh != 0)
-	    cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
-
-	  while (cy != 0)
-	    {
-	      qh -= mpn_sub_1 (qp, qp, qn, 1);
-	      cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
-	    }
-	}
-    }
-
-  TMP_FREE;
-  return qh;
-}
diff --git a/gmp/mpn/generic/dcpi1_divappr_q.c b/gmp/mpn/generic/dcpi1_divappr_q.c
deleted file mode 100644
index c7b03c7f49..0000000000
--- a/gmp/mpn/generic/dcpi1_divappr_q.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
-   quotient.  The quotient returned is either correct, or one too large.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-mp_limb_t
-mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
-		       gmp_pi1_t *dinv, mp_ptr tp)
-{
-  mp_size_t lo, hi;
-  mp_limb_t cy, qh, ql;
-
-  lo = n >> 1;			/* floor(n/2) */
-  hi = n - lo;			/* ceil(n/2) */
-
-  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
-    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
-  else
-    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
-
-  mpn_mul (tp, qp + lo, hi, dp, lo);
-
-  cy = mpn_sub_n (np + lo, np + lo, tp, n);
-  if (qh != 0)
-    cy += mpn_sub_n (np + n, np + n, dp, lo);
-
-  while (cy != 0)
-    {
-      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
-      cy -= mpn_add_n (np + lo, np + lo, dp, n);
-    }
-
-  if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
-    ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
-  else
-    ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
-
-  if (UNLIKELY (ql != 0))
-    {
-      mp_size_t i;
-      for (i = 0; i < lo; i++)
-	qp[i] = GMP_NUMB_MASK;
-    }
-
-  return qh;
-}
-
-mp_limb_t
-mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
-		     mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
-{
-  mp_size_t qn;
-  mp_limb_t qh, cy, qsave;
-  mp_ptr tp;
-  TMP_DECL;
-
-  TMP_MARK;
-
-  ASSERT (dn >= 6);
-  ASSERT (nn > dn);
-  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
-
-  qn = nn - dn;
-  qp += qn;
-  np += nn;
-  dp += dn;
-
-  if (qn >= dn)
-    {
-      qn++;			/* pretend we'll need an extra limb */
-      /* Reduce qn mod dn without division, optimizing small operations.  */
-      do
-	qn -= dn;
-      while (qn > dn);
-
-      qp -= qn;			/* point at low limb of next quotient block */
-      np -= qn;			/* point in the middle of partial remainder */
-
-      tp = TMP_SALLOC_LIMBS (dn);
-
-      /* Perform the typically smaller block first.  */
-      if (qn == 1)
-	{
-	  mp_limb_t q, n2, n1, n0, d1, d0;
-
-	  /* Handle qh up front, for simplicity. */
-	  qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
-	  if (qh)
-	    ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
-
-	  /* A single iteration of schoolbook: One 3/2 division,
-	     followed by the bignum update and adjustment. */
-	  n2 = np[0];
-	  n1 = np[-1];
-	  n0 = np[-2];
-	  d1 = dp[-1];
-	  d0 = dp[-2];
-
-	  ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
-
-	  if (UNLIKELY (n2 == d1) && n1 == d0)
-	    {
-	      q = GMP_NUMB_MASK;
-	      cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
-	      ASSERT (cy == n2);
-	    }
-	  else
-	    {
-	      udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
-
-	      if (dn > 2)
-		{
-		  mp_limb_t cy, cy1;
-		  cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
-
-		  cy1 = n0 < cy;
-		  n0 = (n0 - cy) & GMP_NUMB_MASK;
-		  cy = n1 < cy1;
-		  n1 = (n1 - cy1) & GMP_NUMB_MASK;
-		  np[-2] = n0;
-
-		  if (UNLIKELY (cy != 0))
-		    {
-		      n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
-		      qh -= (q == 0);
-		      q = (q - 1) & GMP_NUMB_MASK;
-		    }
-		}
-	      else
-		np[-2] = n0;
-
-	      np[-1] = n1;
-	    }
-	  qp[0] = q;
-	}
-      else
-	{
-	  if (qn == 2)
-	    qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
-	  else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
-	    qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
-	  else
-	    qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
-
-	  if (qn != dn)
-	    {
-	      if (qn > dn - qn)
-		mpn_mul (tp, qp, qn, dp - dn, dn - qn);
-	      else
-		mpn_mul (tp, dp - dn, dn - qn, qp, qn);
-
-	      cy = mpn_sub_n (np - dn, np - dn, tp, dn);
-	      if (qh != 0)
-		cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
-
-	      while (cy != 0)
-		{
-		  qh -= mpn_sub_1 (qp, qp, qn, 1);
-		  cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
-		}
-	    }
-	}
-      qn = nn - dn - qn + 1;
-      while (qn > dn)
-	{
-	  qp -= dn;
-	  np -= dn;
-	  mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
-	  qn -= dn;
-	}
-
-      /* Since we pretended we'd need an extra quotient limb before, we now
-	 have made sure the code above left just dn-1=qn quotient limbs to
-	 develop.  Develop that plus a guard limb. */
-      qn--;
-      qp -= qn;
-      np -= dn;
-      qsave = qp[qn];
-      mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
-      MPN_COPY_INCR (qp, qp + 1, qn);
-      qp[qn] = qsave;
-    }
-  else    /* (qn < dn) */
-    {
-      mp_ptr q2p;
-#if 0				/* not possible since we demand nn > dn */
-      if (qn == 0)
-	{
-	  qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
-	  if (qh)
-	    mpn_sub_n (np - dn, np - dn, dp - dn, dn);
-	  TMP_FREE;
-	  return qh;
-	}
-#endif
-
-      qp -= qn;			/* point at low limb of next quotient block */
-      np -= qn;			/* point in the middle of partial remainder */
-
-      q2p = TMP_SALLOC_LIMBS (qn + 1);
-      /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
-	 callers not to be silly?  */
-      if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
-	{
-	  qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
-				    dp - (qn + 1), qn + 1, dinv->inv32);
-	}
-      else
-	{
-	  /* It is tempting to use qp for recursive scratch and put quotient in
-	     tp, but the recursive scratch needs one limb too many.  */
-	  tp = TMP_SALLOC_LIMBS (qn + 1);
-	  qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
-	}
-      MPN_COPY (qp, q2p + 1, qn);
-    }
-
-  TMP_FREE;
-  return qh;
-}
diff --git a/gmp/mpn/generic/div_q.c b/gmp/mpn/generic/div_q.c
deleted file mode 100644
index aabcef0825..0000000000
--- a/gmp/mpn/generic/div_q.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/* mpn_div_q -- division for arbitrary size operands.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2009, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-/* Compute Q = N/D with truncation.
-     N = {np,nn}
-     D = {dp,dn}
-     Q = {qp,nn-dn+1}
-     T = {scratch,nn+1} is scratch space
-   N and D are both untouched by the computation.
-   N and T may overlap; pass the same space if N is irrelevant after the call,
-   but note that tp needs an extra limb.
-
-   Operand requirements:
-     N >= D > 0
-     dp[dn-1] != 0
-     No overlap between the N, D, and Q areas.
-
-   This division function does not clobber its input operands, since it is
-   intended to support average-O(qn) division, and for that to be effective, it
-   cannot put requirements on callers to copy a O(nn) operand.
-
-   If a caller does not care about the value of {np,nn+1} after calling this
-   function, it should pass np also for the scratch argument.  This function
-   will then save some time and space by avoiding allocation and copying.
-   (FIXME: Is this a good design?  We only really save any copying for
-   already-normalised divisors, which should be rare.  It also prevents us from
-   reasonably asking for all scratch space we need.)
-
-   We write nn-dn+1 limbs for the quotient, but return void.  Why not return
-   the most significant quotient limb?  Look at the 4 main code blocks below
-   (consisting of an outer if-else where each arm contains an if-else). It is
-   tricky for the first code block, since the mpn_*_div_q calls will typically
-   generate all nn-dn+1 and return 0 or 1.  I don't see how to fix that unless
-   we generate the most significant quotient limb here, before calling
-   mpn_*_div_q, or put the quotient in a temporary area.  Since this is a
-   critical division case (the SB sub-case in particular) copying is not a good
-   idea.
-
-   It might make sense to split the if-else parts of the (qn + FUDGE
-   >= dn) blocks into separate functions, since we could promise quite
-   different things to callers in these two cases.  The 'then' case
-   benefits from np=scratch, and it could perhaps even tolerate qp=np,
-   saving some headache for many callers.
-
-   FIXME: Scratch allocation leaves a lot to be desired.  E.g., for the MU size
-   operands, we do not reuse the huge scratch for adjustments.  This can be a
-   serious waste of memory for the largest operands.
-*/
-
-/* FUDGE determines when to try getting an approximate quotient from the upper
-   parts of the dividend and divisor, then adjust.  N.B. FUDGE must be >= 2
-   for the code to be correct.  */
-#define FUDGE 5			/* FIXME: tune this */
-
-#define DC_DIV_Q_THRESHOLD      DC_DIVAPPR_Q_THRESHOLD
-#define MU_DIV_Q_THRESHOLD      MU_DIVAPPR_Q_THRESHOLD
-#define MUPI_DIV_Q_THRESHOLD  MUPI_DIVAPPR_Q_THRESHOLD
-#ifndef MUPI_DIVAPPR_Q_THRESHOLD
-#define MUPI_DIVAPPR_Q_THRESHOLD  MUPI_DIV_QR_THRESHOLD
-#endif
-
-void
-mpn_div_q (mp_ptr qp,
-	   mp_srcptr np, mp_size_t nn,
-	   mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
-{
-  mp_ptr new_dp, new_np, tp, rp;
-  mp_limb_t cy, dh, qh;
-  mp_size_t new_nn, qn;
-  gmp_pi1_t dinv;
-  int cnt;
-  TMP_DECL;
-  TMP_MARK;
-
-  ASSERT (nn >= dn);
-  ASSERT (dn > 0);
-  ASSERT (dp[dn - 1] != 0);
-  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn));
-  ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn));
-
-  ASSERT_ALWAYS (FUDGE >= 2);
-
-  if (dn == 1)
-    {
-      mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]);
-      return;
-    }
-
-  qn = nn - dn + 1;		/* Quotient size, high limb might be zero */
-
-  if (qn + FUDGE >= dn)
-    {
-      /* |________________________|
-                          |_______|  */
-      new_np = scratch;
-
-      dh = dp[dn - 1];
-      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
-	{
-	  count_leading_zeros (cnt, dh);
-
-	  cy = mpn_lshift (new_np, np, nn, cnt);
-	  new_np[nn] = cy;
-	  new_nn = nn + (cy != 0);
-
-	  new_dp = TMP_ALLOC_LIMBS (dn);
-	  mpn_lshift (new_dp, dp, dn, cnt);
-
-	  if (dn == 2)
-	    {
-	      qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp);
-	    }
-	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
-		   BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD))
-	    {
-	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
-	      qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32);
-	    }
-	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
-		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
-		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
-		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
-	    {
-	      invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]);
-	      qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv);
-	    }
-	  else
-	    {
-	      mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0);
-	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-	      qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch);
-	    }
-	  if (cy == 0)
-	    qp[qn - 1] = qh;
-	  else if (UNLIKELY (qh != 0))
-	    {
-	      /* This happens only when the quotient is close to B^n and
-		 mpn_*_divappr_q returned B^n.  */
-	      mp_size_t i, n;
-	      n = new_nn - dn;
-	      for (i = 0; i < n; i++)
-		qp[i] = GMP_NUMB_MAX;
-	      qh = 0;		/* currently ignored */
-	    }
-	}
-      else  /* divisor is already normalised */
-	{
-	  if (new_np != np)
-	    MPN_COPY (new_np, np, nn);
-
-	  if (dn == 2)
-	    {
-	      qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp);
-	    }
-	  else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) ||
-		   BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD))
-	    {
-	      invert_pi1 (dinv, dh, dp[dn - 2]);
-	      qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32);
-	    }
-	  else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) ||   /* fast condition */
-		   BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */
-		   (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */
-		   + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn)   /* ...condition */
-	    {
-	      invert_pi1 (dinv, dh, dp[dn - 2]);
-	      qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv);
-	    }
-	  else
-	    {
-	      mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0);
-	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-	      qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
-	    }
-	  qp[nn - dn] = qh;
-	}
-    }
-  else
-    {
-      /* |________________________|
-                |_________________|  */
-      tp = TMP_ALLOC_LIMBS (qn + 1);
-
-      new_np = scratch;
-      new_nn = 2 * qn + 1;
-      if (new_np == np)
-	/* We need {np,nn} to remain untouched until the final adjustment, so
-	   we need to allocate separate space for new_np.  */
-	new_np = TMP_ALLOC_LIMBS (new_nn + 1);
-
-
-      dh = dp[dn - 1];
-      if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0))
-	{
-	  count_leading_zeros (cnt, dh);
-
-	  cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt);
-	  new_np[new_nn] = cy;
-
-	  new_nn += (cy != 0);
-
-	  new_dp = TMP_ALLOC_LIMBS (qn + 1);
-	  mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt);
-	  new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt);
-
-	  if (qn + 1 == 2)
-	    {
-	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
-	    }
-	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
-	    {
-	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
-	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
-	    }
-	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
-	    {
-	      invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]);
-	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
-	    }
-	  else
-	    {
-	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
-	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
-	    }
-	  if (cy == 0)
-	    tp[qn] = qh;
-	  else if (UNLIKELY (qh != 0))
-	    {
-	      /* This happens only when the quotient is close to B^n and
-		 mpn_*_divappr_q returned B^n.  */
-	      mp_size_t i, n;
-	      n = new_nn - (qn + 1);
-	      for (i = 0; i < n; i++)
-		tp[i] = GMP_NUMB_MAX;
-	      qh = 0;		/* currently ignored */
-	    }
-	}
-      else  /* divisor is already normalised */
-	{
-	  MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless of MU will be used */
-
-	  new_dp = (mp_ptr) dp + dn - (qn + 1);
-
-	  if (qn == 2 - 1)
-	    {
-	      qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp);
-	    }
-	  else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1))
-	    {
-	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
-	      qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32);
-	    }
-	  else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1))
-	    {
-	      invert_pi1 (dinv, dh, new_dp[qn - 1]);
-	      qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv);
-	    }
-	  else
-	    {
-	      mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0);
-	      mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-	      qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch);
-	    }
-	  tp[qn] = qh;
-	}
-
-      MPN_COPY (qp, tp + 1, qn);
-      if (tp[0] <= 4)
-        {
-	  mp_size_t rn;
-
-          rp = TMP_ALLOC_LIMBS (dn + qn);
-          mpn_mul (rp, dp, dn, tp + 1, qn);
-	  rn = dn + qn;
-	  rn -= rp[rn - 1] == 0;
-
-          if (rn > nn || mpn_cmp (np, rp, nn) < 0)
-            mpn_decr_u (qp, 1);
-        }
-    }
-
-  TMP_FREE;
-}
diff --git a/gmp/mpn/generic/div_qr_1.c b/gmp/mpn/generic/div_qr_1.c
deleted file mode 100644
index 09401ac535..0000000000
--- a/gmp/mpn/generic/div_qr_1.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/* mpn_div_qr_1 -- mpn by limb division.
-
-   Contributed to the GNU project by Niels Möller and Torbjörn Granlund
-
-Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003, 2013 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#ifndef DIV_QR_1_NORM_THRESHOLD
-#define DIV_QR_1_NORM_THRESHOLD 3
-#endif
-#ifndef DIV_QR_1_UNNORM_THRESHOLD
-#define DIV_QR_1_UNNORM_THRESHOLD 3
-#endif
-
-#if GMP_NAIL_BITS > 0
-#error Nail bits not supported
-#endif
-
-/* Divides {up, n} by d. Writes the n-1 low quotient limbs at {qp,
- * n-1}, and the high quote limb at *qh. Returns remainder. */
-mp_limb_t
-mpn_div_qr_1 (mp_ptr qp, mp_limb_t *qh, mp_srcptr up, mp_size_t n,
-	      mp_limb_t d)
-{
-  unsigned cnt;
-  mp_limb_t uh;
-
-  ASSERT (n > 0);
-  ASSERT (d > 0);
-
-  if (d & GMP_NUMB_HIGHBIT)
-    {
-      /* Normalized case */
-      mp_limb_t dinv, q;
-
-      uh = up[--n];
-
-      q = (uh >= d);
-      *qh = q;
-      uh -= (-q) & d;
-
-      if (BELOW_THRESHOLD (n, DIV_QR_1_NORM_THRESHOLD))
-	{
-	  cnt = 0;
-	plain:
-	  while (n > 0)
-	    {
-	      mp_limb_t ul = up[--n];
-	      udiv_qrnnd (qp[n], uh, uh, ul, d);
-	    }
-	  return uh >> cnt;
-	}
-      invert_limb (dinv, d);
-      return mpn_div_qr_1n_pi1 (qp, up, n, uh, d, dinv);
-    }
-  else
-    {
-      /* Unnormalized case */
-      mp_limb_t dinv, ul;
-
-      if (! UDIV_NEEDS_NORMALIZATION
-	  && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
-	{
-	  uh = up[--n];
-	  udiv_qrnnd (*qh, uh, CNST_LIMB(0), uh, d);
-	  cnt = 0;
-	  goto plain;
-	}
-
-      count_leading_zeros (cnt, d);
-      d <<= cnt;
-
-#if HAVE_NATIVE_div_qr_1u_pi1
-      /* FIXME: Call loop doing on-the-fly normalization */
-#endif
-
-      /* Shift up front, use qp area for shifted copy. A bit messy,
-	 since we have only n-1 limbs available, and shift the high
-	 limb manually. */
-      uh = up[--n];
-      ul = (uh << cnt) | mpn_lshift (qp, up, n, cnt);
-      uh >>= (GMP_LIMB_BITS - cnt);
-
-      if (UDIV_NEEDS_NORMALIZATION
-	  && BELOW_THRESHOLD (n, DIV_QR_1_UNNORM_THRESHOLD))
-	{
-	  udiv_qrnnd (*qh, uh, uh, ul, d);
-	  up = qp;
-	  goto plain;
-	}
-      invert_limb (dinv, d);
-
-      udiv_qrnnd_preinv (*qh, uh, uh, ul, d, dinv);
-      return mpn_div_qr_1n_pi1 (qp, qp, n, uh, d, dinv) >> cnt;
-    }
-}
diff --git a/gmp/mpn/generic/div_qr_1n_pi1.c b/gmp/mpn/generic/div_qr_1n_pi1.c
deleted file mode 100644
index 229ee091a4..0000000000
--- a/gmp/mpn/generic/div_qr_1n_pi1.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/* mpn_div_qr_1n_pi1
-
-   Contributed to the GNU project by Niels Möller
-
-   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#if GMP_NAIL_BITS > 0
-#error Nail bits not supported
-#endif
-
-#ifndef DIV_QR_1N_METHOD
-#define DIV_QR_1N_METHOD 2
-#endif
-
-/* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */
-
-#if defined (__GNUC__)
-
-#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add	%6, %k2\n\t"					\
-	     "adc	%4, %k1\n\t"					\
-	     "sbb	%k0, %k0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
-	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add	%6, %q2\n\t"					\
-	     "adc	%4, %q1\n\t"					\
-	     "sbb	%q0, %q0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
-	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
-#endif
-
-#if defined (__sparc__) && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addxcc	%r3, %4, %1\n\t"				\
-	     "subx	%%g0, %%g0, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
-	 __CLOBBER_CC)
-#endif
-
-#if defined (__sparc__) && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addccc	%r7, %8, %%g0\n\t"				\
-	     "addccc	%r3, %4, %1\n\t"				\
-	     "clr	%0\n\t"						\
-	     "movcs	%%xcc, -1, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),		\
-	     "rJ" ((al) >> 32), "rI" ((bl) >> 32)			\
-	 __CLOBBER_CC)
-#if __VIS__ >= 0x300
-#undef add_mssaaaa
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addxccc	%r3, %4, %1\n\t"				\
-	     "clr	%0\n\t"						\
-	     "movcs	%%xcc, -1, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
-	 __CLOBBER_CC)
-#endif
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
-/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
-   processor running in 32-bit mode, since the carry flag then gets the 32-bit
-   carry.  */
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add%I6c	%2, %5, %6\n\t"					\
-	     "adde	%1, %3, %4\n\t"					\
-	     "subfe	%0, %0, %0\n\t"					\
-	     "nor	%0, %0, %0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
-#endif
-
-#if defined (__s390x__) && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "algr	%2, %6\n\t"					\
-	     "alcgr	%1, %4\n\t"					\
-	     "lghi	%0, 0\n\t"					\
-	     "alcgr	%0, %0\n\t"					\
-	     "lcgr	%0, %0"						\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),			\
-	     "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
-#endif
-
-#if defined (__arm__) && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "adds	%2, %5, %6\n\t"					\
-	     "adcs	%1, %3, %4\n\t"					\
-	     "movcc	%0, #0\n\t"					\
-	     "movcs	%0, #-1"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
-#endif
-#endif /* defined (__GNUC__) */
-
-#ifndef add_mssaaaa
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  do {									\
-    UWtype __s0, __s1, __c0, __c1;					\
-    __s0 = (a0) + (b0);							\
-    __s1 = (a1) + (b1);							\
-    __c0 = __s0 < (a0);							\
-    __c1 = __s1 < (a1);							\
-    (s0) = __s0;							\
-    __s1 = __s1 + __c0;							\
-    (s1) = __s1;							\
-    (m) = - (__c1 + (__s1 < __c0));					\
-  } while (0)
-#endif
-
-#if DIV_QR_1N_METHOD == 1
-
-/* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}.
-   Requires that uh < d. */
-mp_limb_t
-mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh,
-		   mp_limb_t d, mp_limb_t dinv)
-{
-  ASSERT (n > 0);
-  ASSERT (uh < d);
-  ASSERT (d & GMP_NUMB_HIGHBIT);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n));
-
-  do
-    {
-      mp_limb_t q, ul;
-
-      ul = up[--n];
-      udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv);
-      qp[n] = q;
-    }
-  while (n > 0);
-
-  return uh;
-}
-
-#elif DIV_QR_1N_METHOD == 2
-
-mp_limb_t
-mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
-		   mp_limb_t d, mp_limb_t dinv)
-{
-  mp_limb_t B2;
-  mp_limb_t u0, u2;
-  mp_limb_t q0, q1;
-  mp_limb_t p0, p1;
-  mp_limb_t t;
-  mp_size_t j;
-
-  ASSERT (d & GMP_LIMB_HIGHBIT);
-  ASSERT (n > 0);
-  ASSERT (u1 < d);
-
-  if (n == 1)
-    {
-      udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
-      return u1;
-    }
-
-  /* FIXME: Could be precomputed */
-  B2 = -d*dinv;
-
-  umul_ppmm (q1, q0, dinv, u1);
-  umul_ppmm (p1, p0, B2, u1);
-  q1 += u1;
-  ASSERT (q1 >= u1);
-  u0 = up[n-1];	/* Early read, to allow qp == up. */
-  qp[n-1] = q1;
-
-  add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);
-
-  /* FIXME: Keep q1 in a variable between iterations, to reduce number
-     of memory accesses. */
-  for (j = n-2; j-- > 0; )
-    {
-      mp_limb_t q2, cy;
-
-      /* Additions for the q update:
-       *	+-------+
-       *        |u1 * v |
-       *        +---+---+
-       *        | u1|
-       *    +---+---+
-       *    | 1 | v |  (conditional on u2)
-       *    +---+---+
-       *        | 1 |  (conditional on u0 + u2 B2 carry)
-       *        +---+
-       * +      | q0|
-       *   -+---+---+---+
-       *    | q2| q1| q0|
-       *    +---+---+---+
-      */
-      umul_ppmm (p1, t, u1, dinv);
-      add_ssaaaa (q2, q1, -u2, u2 & dinv, CNST_LIMB(0), u1);
-      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), p1);
-      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), q0);
-      q0 = t;
-
-      umul_ppmm (p1, p0, u1, B2);
-      ADDC_LIMB (cy, u0, u0, u2 & B2);
-      u0 -= (-cy) & d;
-
-      /* Final q update */
-      add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), cy);
-      qp[j+1] = q1;
-      MPN_INCR_U (qp+j+2, n-j-2, q2);
-
-      add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
-    }
-
-  q1 = (u2 > 0);
-  u1 -= (-q1) & d;
-
-  t = (u1 >= d);
-  q1 += t;
-  u1 -= (-t) & d;
-
-  udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
-  add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
-
-  MPN_INCR_U (qp+1, n-1, q1);
-
-  qp[0] = q0;
-  return u0;
-}
-
-#else
-#error Unknown DIV_QR_1N_METHOD
-#endif
diff --git a/gmp/mpn/generic/div_qr_1n_pi2.c b/gmp/mpn/generic/div_qr_1n_pi2.c
deleted file mode 100644
index 7ea3410cb6..0000000000
--- a/gmp/mpn/generic/div_qr_1n_pi2.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/* mpn_div_qr_1u_pi2.
-
-   THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS
-   ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/* ISSUES:
-
-   * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
-
-   * Are there any problems with generating n quotient limbs in the q area?  It
-     surely simplifies things.
-
-   * Not yet adequately tested.
-*/
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Define some longlong.h-style macros, but for wider operations.
-   * add_sssaaaa is like longlong.h's add_ssaaaa but propagating
-     carry-out into an additional sum operand.
-*/
-#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER)
-
-#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((USItype)(s2)),					\
-	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
-	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
-#endif
-
-#if defined (__amd64__) && W_TYPE_SIZE == 64
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((UDItype)(s2)),					\
-	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
-	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
-/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
-   processor running in 32-bit mode, since the carry flag then gets the 32-bit
-   carry.  */
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0"	\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
-#endif
-
-#endif /* __GNUC__ */
-
-#ifndef add_sssaaaa
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  do {									\
-    UWtype __s0, __s1, __c0, __c1;					\
-    __s0 = (a0) + (b0);							\
-    __s1 = (a1) + (b1);							\
-    __c0 = __s0 < (a0);							\
-    __c1 = __s1 < (a1);							\
-    (s0) = __s0;							\
-    __s1 = __s1 + __c0;							\
-    (s1) = __s1;							\
-    (s2) += __c1 + (__s1 < __c0);					\
-  } while (0)
-#endif
-
-struct precomp_div_1_pi2
-{
-  mp_limb_t dip[2];
-  mp_limb_t d;
-  int norm_cnt;
-};
-
-mp_limb_t
-mpn_div_qr_1n_pi2 (mp_ptr qp,
-		   mp_srcptr up, mp_size_t un,
-		   struct precomp_div_1_pi2 *pd)
-{
-  mp_limb_t most_significant_q_limb;
-  mp_size_t i;
-  mp_limb_t r, u2, u1, u0;
-  mp_limb_t d0, di1, di0;
-  mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
-  mp_limb_t cnd;
-
-  ASSERT (un >= 2);
-  ASSERT ((pd->d & GMP_NUMB_HIGHBIT) != 0);
-  ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
-  ASSERT_MPN (up, un);
-
-#define q3 q3a
-#define q2 q2b
-#define q1 q1b
-
-  up += un - 3;
-  r = up[2];
-  d0 = pd->d;
-
-  most_significant_q_limb = (r >= d0);
-  r -= d0 & -most_significant_q_limb;
-
-  qp += un - 3;
-  qp[2] = most_significant_q_limb;
-
-  di1 = pd->dip[1];
-  di0 = pd->dip[0];
-
-  for (i = un - 3; i >= 0; i -= 2)
-    {
-      u2 = r;
-      u1 = up[1];
-      u0 = up[0];
-
-      /* Dividend in {r,u1,u0} */
-
-      umul_ppmm (q1d,q0d, u1, di0);
-      umul_ppmm (q2b,q1b, u1, di1);
-      q2b++;				/* cannot spill */
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
-
-      umul_ppmm (q2c,q1c, u2,  di0);
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
-      umul_ppmm (q3a,q2a, u2, di1);
-
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
-
-      q3 += r;
-
-      r = u0 - q2 * d0;
-
-      cnd = (r >= q1);
-      r += d0 & -cnd;
-      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
-
-      if (UNLIKELY (r >= d0))
-	{
-	  r -= d0;
-	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
-	}
-
-      qp[0] = q2;
-      qp[1] = q3;
-
-      up -= 2;
-      qp -= 2;
-    }
-
-  if ((un & 1) == 0)
-    {
-      u2 = r;
-      u1 = up[1];
-
-      udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
-      qp[1] = q3;
-    }
-
-  return r;
-
-#undef q3
-#undef q2
-#undef q1
-}
diff --git a/gmp/mpn/generic/div_qr_1u_pi2.c b/gmp/mpn/generic/div_qr_1u_pi2.c
deleted file mode 100644
index 83d66ef29e..0000000000
--- a/gmp/mpn/generic/div_qr_1u_pi2.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/* mpn_div_qr_1u_pi2.
-
-   THIS FILE CONTAINS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS
-   ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/* ISSUES:
-
-   * Can we really use the high pi2 inverse limb for udiv_qrnnd_preinv?
-
-   * Are there any problems with generating n quotient limbs in the q area?  It
-     surely simplifies things.
-
-   * Not yet adequately tested.
-*/
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Define some longlong.h-style macros, but for wider operations.
-   * add_sssaaaa is like longlong.h's add_ssaaaa but propagating
-     carry-out into an additional sum operand.
-*/
-#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER)
-
-#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((USItype)(s2)),					\
-	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
-	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
-#endif
-
-#if defined (__amd64__) && W_TYPE_SIZE == 64
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((UDItype)(s2)),					\
-	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
-	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
-/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
-   processor running in 32-bit mode, since the carry flag then gets the 32-bit
-   carry.  */
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0"	\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
-#endif
-
-#endif /* __GNUC__ */
-
-#ifndef add_sssaaaa
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  do {									\
-    UWtype __s0, __s1, __c0, __c1;					\
-    __s0 = (a0) + (b0);							\
-    __s1 = (a1) + (b1);							\
-    __c0 = __s0 < (a0);							\
-    __c1 = __s1 < (a1);							\
-    (s0) = __s0;							\
-    __s1 = __s1 + __c0;							\
-    (s1) = __s1;							\
-    (s2) += __c1 + (__s1 < __c0);					\
-  } while (0)
-#endif
-
-struct precomp_div_1_pi2
-{
-  mp_limb_t dip[2];
-  mp_limb_t d;
-  int norm_cnt;
-};
-
-mp_limb_t
-mpn_div_qr_1u_pi2 (mp_ptr qp,
-		   mp_srcptr up, mp_size_t un,
-		   struct precomp_div_1_pi2 *pd)
-{
-  mp_size_t i;
-  mp_limb_t r, u2, u1, u0;
-  mp_limb_t d0, di1, di0;
-  mp_limb_t q3a, q2a, q2b, q1b, q2c, q1c, q1d, q0d;
-  mp_limb_t cnd;
-  int cnt;
-
-  ASSERT (un >= 2);
-  ASSERT ((pd->d & GMP_NUMB_HIGHBIT) == 0);
-  ASSERT (! MPN_OVERLAP_P (qp, un-2, up, un) || qp+2 >= up);
-  ASSERT_MPN (up, un);
-
-#define q3 q3a
-#define q2 q2b
-#define q1 q1b
-
-  up += un - 3;
-  cnt = pd->norm_cnt;
-  r = up[2] >> (GMP_NUMB_BITS - cnt);
-  d0 = pd->d << cnt;
-
-  qp += un - 2;
-
-  di1 = pd->dip[1];
-  di0 = pd->dip[0];
-
-  for (i = un - 3; i >= 0; i -= 2)
-    {
-      u2 = r;
-      u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
-      u0 = (up[1] << cnt) | (up[0] >> (GMP_NUMB_BITS - cnt));
-
-      /* Dividend in {r,u1,u0} */
-
-      umul_ppmm (q1d,q0d, u1, di0);
-      umul_ppmm (q2b,q1b, u1, di1);
-      q2b++;				/* cannot spill */
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
-
-      umul_ppmm (q2c,q1c, u2,  di0);
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
-      umul_ppmm (q3a,q2a, u2, di1);
-
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
-
-      q3 += r;
-
-      r = u0 - q2 * d0;
-
-      cnd = (r >= q1);
-      r += d0 & -cnd;
-      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
-
-      if (UNLIKELY (r >= d0))
-	{
-	  r -= d0;
-	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
-	}
-
-      qp[0] = q2;
-      qp[1] = q3;
-
-      up -= 2;
-      qp -= 2;
-    }
-
-  if ((un & 1) != 0)
-    {
-      u2 = r;
-      u1 = (up[2] << cnt);
-
-      udiv_qrnnd_preinv (q3, r, u2, u1, d0, di1);
-      qp[1] = q3;
-    }
-  else
-    {
-      u2 = r;
-      u1 = (up[2] << cnt) | (up[1] >> (GMP_NUMB_BITS - cnt));
-      u0 = (up[1] << cnt);
-
-      /* Dividend in {r,u1,u0} */
-
-      umul_ppmm (q1d,q0d, u1, di0);
-      umul_ppmm (q2b,q1b, u1, di1);
-      q2b++;				/* cannot spill */
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, u1,u0);
-
-      umul_ppmm (q2c,q1c, u2,  di0);
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2c,q1c);
-      umul_ppmm (q3a,q2a, u2, di1);
-
-      add_sssaaaa (r,q2b,q1b, q2b,q1b, q2a,q1d);
-
-      q3 += r;
-
-      r = u0 - q2 * d0;
-
-      cnd = (r >= q1);
-      r += d0 & -cnd;
-      sub_ddmmss (q3,q2,  q3,q2,  0,cnd);
-
-      if (UNLIKELY (r >= d0))
-	{
-	  r -= d0;
-	  add_ssaaaa (q3,q2,  q3,q2,  0,1);
-	}
-
-      qp[0] = q2;
-      qp[1] = q3;
-    }
-
-  return r >> cnt;
-
-#undef q3
-#undef q2
-#undef q1
-}
diff --git a/gmp/mpn/generic/div_qr_2.c b/gmp/mpn/generic/div_qr_2.c
deleted file mode 100644
index cb07e0e3b4..0000000000
--- a/gmp/mpn/generic/div_qr_2.c
+++ /dev/null
@@ -1,332 +0,0 @@
-/* mpn_div_qr_2 -- Divide natural numbers, producing both remainder and
-   quotient.  The divisor is two limbs.
-
-   Contributed to the GNU project by Torbjorn Granlund and Niels Möller
-
-   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-
-Copyright 1993-1996, 1999-2002, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#ifndef DIV_QR_2_PI2_THRESHOLD
-/* Disabled unless explicitly tuned. */
-#define DIV_QR_2_PI2_THRESHOLD MP_LIMB_T_MAX
-#endif
-
-#ifndef SANITY_CHECK
-#define SANITY_CHECK 0
-#endif
-
-/* Define some longlong.h-style macros, but for wider operations.
-   * add_sssaaaa is like longlong.h's add_ssaaaa but the propagating
-     carry-out into an additional sum operand.
-   * add_csaac accepts two addends and a carry in, and generates a sum
-     and a carry out.  A little like a "full adder".
-*/
-#if defined (__GNUC__)  && ! defined (__INTEL_COMPILER)
-
-#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((USItype)(s2)),					\
-	     "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
-	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
-#define add_csaac(co, s, a, b, ci)					\
-  __asm__ ("bt\t$0, %2\n\tadc\t%5, %k1\n\tadc\t%k0, %k0"		\
-	   : "=r" (co), "=r" (s)					\
-	   : "rm"  ((USItype)(ci)), "0" (CNST_LIMB(0)),			\
-	     "%1" ((USItype)(a)), "g" ((USItype)(b)))
-#endif
-
-#if defined (__amd64__) && W_TYPE_SIZE == 64
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0"		\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "0"  ((UDItype)(s2)),					\
-	     "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
-	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
-#define add_csaac(co, s, a, b, ci)					\
-  __asm__ ("bt\t$0, %2\n\tadc\t%5, %q1\n\tadc\t%q0, %q0"		\
-	   : "=r" (co), "=r" (s)					\
-	   : "rm"  ((UDItype)(ci)), "0" (CNST_LIMB(0)),			\
-	     "%1" ((UDItype)(a)), "g" ((UDItype)(b)))
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
-/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
-   processor running in 32-bit mode, since the carry flag then gets the 32-bit
-   carry.  */
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0"	\
-	   : "=r" (s2), "=&r" (s1), "=&r" (s0)				\
-	   : "r"  (s2), "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
-#endif
-
-#endif /* __GNUC__ */
-
-#ifndef add_sssaaaa
-#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0)				\
-  do {									\
-    UWtype __s0, __s1, __c0, __c1;					\
-    __s0 = (a0) + (b0);							\
-    __s1 = (a1) + (b1);							\
-    __c0 = __s0 < (a0);							\
-    __c1 = __s1 < (a1);							\
-    (s0) = __s0;							\
-    __s1 = __s1 + __c0;							\
-    (s1) = __s1;							\
-    (s2) += __c1 + (__s1 < __c0);					\
-  } while (0)
-#endif
-
-#ifndef add_csaac
-#define add_csaac(co, s, a, b, ci)					\
-  do {									\
-    UWtype __s, __c;							\
-    __s = (a) + (b);							\
-    __c = __s < (a);							\
-    __s = __s + (ci);							\
-    (s) = __s;								\
-    (co) = __c + (__s < (ci));						\
-  } while (0)
-#endif
-
-/* Typically used with r1, r0 same as n3, n2. Other types of overlap
-   between inputs and outputs are not supported. */
-#define udiv_qr_4by2(q1,q0, r1,r0, n3,n2,n1,n0, d1,d0, di1,di0)		\
-  do {									\
-    mp_limb_t _q3, _q2a, _q2, _q1, _q2c, _q1c, _q1d, _q0;		\
-    mp_limb_t _t1, _t0;							\
-    mp_limb_t _c, _mask;						\
-									\
-    umul_ppmm (_q3,_q2a, n3, di1);					\
-    umul_ppmm (_q2,_q1, n2, di1);					\
-    umul_ppmm (_q2c,_q1c, n3, di0);					\
-    add_sssaaaa (_q3,_q2,_q1, _q2,_q1, _q2c,_q1c);			\
-    umul_ppmm (_q1d,_q0, n2, di0);					\
-    add_sssaaaa (_q3,_q2,_q1, _q2,_q1, _q2a,_q1d);			\
-									\
-    add_ssaaaa (r1, r0, n3, n2, CNST_LIMB(0), CNST_LIMB(1));		\
-									\
-    /* [q3,q2,q1,q0] += [n3,n3,n1,n0] */				\
-    add_csaac (_c, _q0, _q0, n0, CNST_LIMB(0));				\
-    add_csaac (_c, _q1, _q1, n1, _c);					\
-    add_csaac (_c, _q2, _q2, r0, _c);					\
-    _q3 = _q3 + r1 + _c;						\
-									\
-    umul_ppmm (_t1,_t0, _q2, d0);					\
-    _t1 += _q2 * d1 + _q3 * d0;						\
-									\
-    sub_ddmmss (r1, r0, n1, n0, _t1, _t0);				\
-									\
-    _mask = -(mp_limb_t) (r1 >= _q1 & (r1 > _q1 | r0 >= _q0));  /* (r1,r0) >= (q1,q0) */  \
-    add_ssaaaa (r1, r0, r1, r0, d1 & _mask, d0 & _mask);		\
-    sub_ddmmss (_q3, _q2, _q3, _q2, CNST_LIMB(0), -_mask);		\
-									\
-    if (UNLIKELY (r1 >= d1))						\
-      {									\
-	if (r1 > d1 || r0 >= d0)					\
-	  {								\
-	    sub_ddmmss (r1, r0, r1, r0, d1, d0);			\
-	    add_ssaaaa (_q3, _q2, _q3, _q2, CNST_LIMB(0), CNST_LIMB(1));\
-	  }								\
-      }									\
-    (q1) = _q3;								\
-    (q0) = _q2;								\
-  } while (0)
-
-static void
-invert_4by2 (mp_ptr di, mp_limb_t d1, mp_limb_t d0)
-{
-  mp_limb_t v1, v0, p1, t1, t0, p0, mask;
-  invert_limb (v1, d1);
-  p1 = d1 * v1;
-  /* <1, v1> * d1 = <B-1, p1> */
-  p1 += d0;
-  if (p1 < d0)
-    {
-      v1--;
-      mask = -(mp_limb_t) (p1 >= d1);
-      p1 -= d1;
-      v1 += mask;
-      p1 -= mask & d1;
-    }
-  /* <1, v1> * d1 + d0 = <B-1, p1> */
-  umul_ppmm (t1, p0, d0, v1);
-  p1 += t1;
-  if (p1 < t1)
-    {
-      if (UNLIKELY (p1 >= d1))
-	{
-	  if (p1 > d1 || p0 >= d0)
-	    {
-	      sub_ddmmss (p1, p0, p1, p0, d1, d0);
-	      v1--;
-	    }
-	}
-      sub_ddmmss (p1, p0, p1, p0, d1, d0);
-      v1--;
-    }
-  /* Now v1 is the 3/2 inverse, <1, v1> * <d1, d0> = <B-1, p1, p0>,
-   * with <p1, p0> + <d1, d0> >= B^2.
-   *
-   * The 4/2 inverse is (B^4 - 1) / <d1, d0> = <1, v1, v0>. The
-   * partial remainder after <1, v1> is
-   *
-   * B^4 - 1 - B <1, v1> <d1, d0> = <B-1, B-1, B-1, B-1> - <B-1, p1, p0, 0>
-   *                              = <~p1, ~p0, B-1>
-   */
-  udiv_qr_3by2 (v0, t1, t0, ~p1, ~p0, MP_LIMB_T_MAX, d1, d0, v1);
-  di[0] = v0;
-  di[1] = v1;
-
-#if SANITY_CHECK
-  {
-    mp_limb_t tp[4];
-    mp_limb_t dp[2];
-    dp[0] = d0;
-    dp[1] = d1;
-    mpn_mul_n (tp, dp, di, 2);
-    ASSERT_ALWAYS (mpn_add_n (tp+2, tp+2, dp, 2) == 0);
-    ASSERT_ALWAYS (tp[2] == MP_LIMB_T_MAX);
-    ASSERT_ALWAYS (tp[3] == MP_LIMB_T_MAX);
-    ASSERT_ALWAYS (mpn_add_n (tp, tp, dp, 2) == 1);
-  }
-#endif
-}
-
-static mp_limb_t
-mpn_div_qr_2n_pi2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
-		   mp_limb_t d1, mp_limb_t d0, mp_limb_t di1, mp_limb_t di0)
-{
-  mp_limb_t qh;
-  mp_size_t i;
-  mp_limb_t r1, r0;
-
-  ASSERT (nn >= 2);
-  ASSERT (d1 & GMP_NUMB_HIGHBIT);
-
-  r1 = np[nn-1];
-  r0 = np[nn-2];
-
-  qh = 0;
-  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
-    {
-#if GMP_NAIL_BITS == 0
-      sub_ddmmss (r1, r0, r1, r0, d1, d0);
-#else
-      r0 = r0 - d0;
-      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
-      r0 &= GMP_NUMB_MASK;
-#endif
-      qh = 1;
-    }
-
-  for (i = nn - 2; i >= 2; i -= 2)
-    {
-      mp_limb_t n1, n0, q1, q0;
-      n1 = np[i-1];
-      n0 = np[i-2];
-      udiv_qr_4by2 (q1, q0, r1, r0, r1, r0, n1, n0, d1, d0, di1, di0);
-      qp[i-1] = q1;
-      qp[i-2] = q0;
-    }
-
-  if (i > 0)
-    {
-      mp_limb_t q;
-      udiv_qr_3by2 (q, r1, r0, r1, r0, np[0], d1, d0, di1);
-      qp[0] = q;
-    }
-  rp[1] = r1;
-  rp[0] = r0;
-
-  return qh;
-}
-
-
-/* Divide num {np,nn} by den {dp,2} and write the nn-2 least
-   significant quotient limbs at qp and the 2 long remainder at np.
-   Return the most significant limb of the quotient.
-
-   Preconditions:
-   1. qp must either not overlap with the input operands at all, or
-      qp >= np + 2 must hold true.  (This means that it's possible to put
-      the quotient in the high part of {np,nn}, right above the remainder.
-   2. nn >= 2.  */
-
-mp_limb_t
-mpn_div_qr_2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
-	      mp_srcptr dp)
-{
-  mp_limb_t d1;
-  mp_limb_t d0;
-  gmp_pi1_t dinv;
-
-  ASSERT (nn >= 2);
-  ASSERT (! MPN_OVERLAP_P (qp, nn-2, np, nn) || qp >= np + 2);
-  ASSERT_MPN (np, nn);
-  ASSERT_MPN (dp, 2);
-
-  d1 = dp[1]; d0 = dp[0];
-
-  ASSERT (d1 > 0);
-
-  if (UNLIKELY (d1 & GMP_NUMB_HIGHBIT))
-    {
-      if (BELOW_THRESHOLD (nn, DIV_QR_2_PI2_THRESHOLD))
-	{
-	  gmp_pi1_t dinv;
-	  invert_pi1 (dinv, d1, d0);
-	  return mpn_div_qr_2n_pi1 (qp, rp, np, nn, d1, d0, dinv.inv32);
-	}
-      else
-	{
-	  mp_limb_t di[2];
-	  invert_4by2 (di, d1, d0);
-	  return mpn_div_qr_2n_pi2 (qp, rp, np, nn, d1, d0, di[1], di[0]);
-	}
-    }
-  else
-    {
-      int shift;
-      count_leading_zeros (shift, d1);
-      d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
-      d0 <<= shift;
-      invert_pi1 (dinv, d1, d0);
-      return mpn_div_qr_2u_pi1 (qp, rp, np, nn, d1, d0, shift, dinv.inv32);
-    }
-}
diff --git a/gmp/mpn/generic/div_qr_2n_pi1.c b/gmp/mpn/generic/div_qr_2n_pi1.c
deleted file mode 100644
index da500e2170..0000000000
--- a/gmp/mpn/generic/div_qr_2n_pi1.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/* mpn_div_qr_2n_pi1
-
-   Contributed to the GNU project by Torbjorn Granlund and Niels Möller
-
-   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-
-Copyright 1993-1996, 1999-2002, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-/* 3/2 loop, for normalized divisor */
-mp_limb_t
-mpn_div_qr_2n_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
-		   mp_limb_t d1, mp_limb_t d0, mp_limb_t di)
-{
-  mp_limb_t qh;
-  mp_size_t i;
-  mp_limb_t r1, r0;
-
-  ASSERT (nn >= 2);
-  ASSERT (d1 & GMP_NUMB_HIGHBIT);
-
-  np += nn - 2;
-  r1 = np[1];
-  r0 = np[0];
-
-  qh = 0;
-  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
-    {
-#if GMP_NAIL_BITS == 0
-      sub_ddmmss (r1, r0, r1, r0, d1, d0);
-#else
-      r0 = r0 - d0;
-      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
-      r0 &= GMP_NUMB_MASK;
-#endif
-      qh = 1;
-    }
-
-  for (i = nn - 2 - 1; i >= 0; i--)
-    {
-      mp_limb_t n0, q;
-      n0 = np[-1];
-      udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
-      np--;
-      qp[i] = q;
-    }
-
-  rp[1] = r1;
-  rp[0] = r0;
-
-  return qh;
-}
diff --git a/gmp/mpn/generic/div_qr_2u_pi1.c b/gmp/mpn/generic/div_qr_2u_pi1.c
deleted file mode 100644
index 0b9ddf5753..0000000000
--- a/gmp/mpn/generic/div_qr_2u_pi1.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/* mpn_div_qr_2u_pi1
-
-   Contributed to the GNU project by Niels Möller
-
-   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-/* 3/2 loop, for unnormalized divisor. Caller must pass shifted d1 and
-   d0, while {np,nn} is shifted on the fly. */
-mp_limb_t
-mpn_div_qr_2u_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
-		   mp_limb_t d1, mp_limb_t d0, int shift, mp_limb_t di)
-{
-  mp_limb_t qh;
-  mp_limb_t r2, r1, r0;
-  mp_size_t i;
-
-  ASSERT (nn >= 2);
-  ASSERT (d1 & GMP_NUMB_HIGHBIT);
-  ASSERT (shift > 0);
-
-  r2 = np[nn-1] >> (GMP_LIMB_BITS - shift);
-  r1 = (np[nn-1] << shift) | (np[nn-2] >> (GMP_LIMB_BITS - shift));
-  r0 = np[nn-2] << shift;
-
-  udiv_qr_3by2 (qh, r2, r1, r2, r1, r0, d1, d0, di);
-
-  for (i = nn - 2 - 1; i >= 0; i--)
-    {
-      mp_limb_t q;
-      r0 = np[i];
-      r1 |= r0 >> (GMP_LIMB_BITS - shift);
-      r0 <<= shift;
-      udiv_qr_3by2 (q, r2, r1, r2, r1, r0, d1, d0, di);
-      qp[i] = q;
-    }
-
-  rp[0] = (r1 >> shift) | (r2 << (GMP_LIMB_BITS - shift));
-  rp[1] = r2 >> shift;
-
-  return qh;
-}
diff --git a/gmp/mpn/generic/dive_1.c b/gmp/mpn/generic/dive_1.c
index 1c0a4e894d..27df57b80e 100644
--- a/gmp/mpn/generic/dive_1.c
+++ b/gmp/mpn/generic/dive_1.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2000-2003, 2005, 2013 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -41,7 +30,7 @@ see https://www.gnu.org/licenses/.  */
 /* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}.
    q will only be correct if d divides a exactly.
 
-   A separate loop is used for shift==0 because n<<GMP_LIMB_BITS doesn't
+   A separate loop is used for shift==0 because n<<BITS_PER_MP_LIMB doesn't
    give zero on all CPUs (for instance it doesn't on the x86s).  This
    separate loop might run faster too, helping odd divisors.
 
@@ -61,7 +50,7 @@ see https://www.gnu.org/licenses/.  */
    faster on some CPUs and would mean just the shift==0 style loop would be
    needed.
 
-   If n<<GMP_LIMB_BITS gives zero on a particular CPU then the separate
+   If n<<BITS_PER_MP_LIMB gives zero on a particular CPU then the separate
    shift==0 loop is unnecessary, and could be eliminated if there's no great
    speed difference.
 
@@ -87,6 +76,14 @@ mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
   ASSERT_MPN (src, size);
   ASSERT_LIMB (divisor);
 
+  s = src[0];
+
+  if (size == 1)
+    {
+      dst[0] = s / divisor;
+      return;
+    }
+
   if ((divisor & 1) == 0)
     {
       count_trailing_zeros (shift, divisor);
@@ -101,39 +98,40 @@ mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
   if (shift != 0)
     {
       c = 0;
+      i = 0;
+      size--;
 
-      s = src[0];
-
-      for (i = 1; i < size; i++)
+      do
 	{
-	  s_next = src[i];
+	  s_next = src[i+1];
 	  ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
 	  s = s_next;
 
 	  SUBC_LIMB (c, l, ls, c);
 
 	  l = (l * inverse) & GMP_NUMB_MASK;
-	  dst[i - 1] = l;
+	  dst[i] = l;
 
 	  umul_ppmm (h, dummy, l, divisor);
 	  c += h;
+
+	  i++;
 	}
       while (i < size);
 
       ls = s >> shift;
       l = ls - c;
       l = (l * inverse) & GMP_NUMB_MASK;
-      dst[size - 1] = l;
+      dst[i] = l;
     }
   else
     {
-      s = src[0];
-
       l = (s * inverse) & GMP_NUMB_MASK;
       dst[0] = l;
+      i = 1;
       c = 0;
 
-      for (i = 1; i < size; i++)
+      do
 	{
 	  umul_ppmm (h, dummy, l, divisor);
 	  c += h;
@@ -143,6 +141,8 @@ mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor)
 
 	  l = (l * inverse) & GMP_NUMB_MASK;
 	  dst[i] = l;
+	  i++;
 	}
+      while (i < size);
     }
 }
diff --git a/gmp/mpn/generic/diveby3.c b/gmp/mpn/generic/diveby3.c
index 2ffd9fe777..6293f65a89 100644
--- a/gmp/mpn/generic/diveby3.c
+++ b/gmp/mpn/generic/diveby3.c
@@ -1,32 +1,21 @@
 /* mpn_divexact_by3c -- mpn exact division by 3.
 
-Copyright 2000-2003, 2008 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/divexact.c b/gmp/mpn/generic/divexact.c
index 47a47e3d80..a0e439cbee 100644
--- a/gmp/mpn/generic/divexact.c
+++ b/gmp/mpn/generic/divexact.c
@@ -4,104 +4,28 @@
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+Copyright 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#if 1
-void
-mpn_divexact (mp_ptr qp,
-	      mp_srcptr np, mp_size_t nn,
-	      mp_srcptr dp, mp_size_t dn)
-{
-  unsigned shift;
-  mp_size_t qn;
-  mp_ptr tp;
-  TMP_DECL;
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-  ASSERT (dn > 0);
-  ASSERT (nn >= dn);
-  ASSERT (dp[dn-1] > 0);
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-  while (dp[0] == 0)
-    {
-      ASSERT (np[0] == 0);
-      dp++;
-      np++;
-      dn--;
-      nn--;
-    }
-
-  if (dn == 1)
-    {
-      MPN_DIVREM_OR_DIVEXACT_1 (qp, np, nn, dp[0]);
-      return;
-    }
-
-  TMP_MARK;
-
-  qn = nn + 1 - dn;
-  count_trailing_zeros (shift, dp[0]);
-
-  if (shift > 0)
-    {
-      mp_ptr wp;
-      mp_size_t ss;
-      ss = (dn > qn) ? qn + 1 : dn;
-
-      tp = TMP_ALLOC_LIMBS (ss);
-      mpn_rshift (tp, dp, ss, shift);
-      dp = tp;
-
-      /* Since we have excluded dn == 1, we have nn > qn, and we need
-	 to shift one limb beyond qn. */
-      wp = TMP_ALLOC_LIMBS (qn + 1);
-      mpn_rshift (wp, np, qn + 1, shift);
-      np = wp;
-    }
-
-  if (dn > qn)
-    dn = qn;
-
-  tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
-  mpn_bdiv_q (qp, np, qn, dp, dn, tp);
-  TMP_FREE;
-}
-
-#else
 
 /* We use the Jebelean's bidirectional exact division algorithm.  This is
    somewhat naively implemented, with equal quotient parts done by 2-adic
@@ -120,8 +44,17 @@ mpn_divexact (mp_ptr qp,
    * It makes the msb part 1 or 2 limbs larger than the lsb part, in spite of
      that the latter is faster.  We should at least reverse this, but perhaps
      we should make the lsb part considerably larger.  (How do we tune this?)
+
+   Perhaps we could somehow use 2-adic division for both parts, not as now
+   truncating division for the upper part and 2-adic for the lower part.
 */
 
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
 mp_size_t
 mpn_divexact_itch (mp_size_t nn, mp_size_t dn)
 {
@@ -143,8 +76,7 @@ mpn_divexact (mp_ptr qp,
   int cnt;
   mp_ptr xdp;
   mp_limb_t di;
-  mp_limb_t cy;
-  gmp_pi1_t dinv;
+  mp_limb_t dip[2], xp[2], cy;
   TMP_DECL;
 
   TMP_MARK;
@@ -158,7 +90,7 @@ mpn_divexact (mp_ptr qp,
       MPN_COPY (tp, np, qn);
       binvert_limb (di, dp[0]);  di = -di;
       dn = MIN (dn, qn);
-      mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+      mpn_sb_bdiv_q (qp, tp, qn, dp, dn, di);
       TMP_FREE;
       return;
     }
@@ -175,14 +107,14 @@ mpn_divexact (mp_ptr qp,
 	  MPN_COPY (tp, np, qn);
 	  binvert_limb (di, dp[0]);  di = -di;
 	  dn = MIN (dn, qn);
-	  mpn_sbpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+	  mpn_sb_bdiv_q (qp, tp, qn, dp, dn, di);
 	}
       else if (BELOW_THRESHOLD (dn, MU_BDIV_Q_THRESHOLD))
 	{
 	  tp = scratch;
 	  MPN_COPY (tp, np, qn);
 	  binvert_limb (di, dp[0]);  di = -di;
-	  mpn_dcpi1_bdiv_q (qp, tp, qn, dp, dn, di);
+	  mpn_dc_bdiv_q (qp, tp, qn, dp, dn, di);
 	}
       else
 	{
@@ -248,14 +180,23 @@ mpn_divexact (mp_ptr qp,
       MPN_COPY (tp, np + nn - nn1, nn1);
     }
 
-  invert_pi1 (dinv, xdp[qn1 - 1], xdp[qn1 - 2]);
   if (BELOW_THRESHOLD (qn1, DC_DIVAPPR_Q_THRESHOLD))
     {
-      qp[qn0 - 1 + nn1 - qn1] = mpn_sbpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dinv.inv32);
+      /* Compute divisor inverse.  */
+      cy = mpn_add_1 (xp, xdp + qn1 - 2, 2, 1);
+      if (cy != 0)
+	dip[0] = dip[1] = 0;
+      else
+	{
+	  mp_limb_t scratch[10];	/* FIXME */
+	  mpn_invert (dip, xp, 2, scratch);
+	}
+
+      qp[qn0 - 1 + nn1 - qn1] = mpn_sb_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, dip);
     }
   else if (BELOW_THRESHOLD (qn1, MU_DIVAPPR_Q_THRESHOLD))
     {
-      qp[qn0 - 1 + nn1 - qn1] = mpn_dcpi1_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1, &dinv);
+      qp[qn0 - 1 + nn1 - qn1] = mpn_dc_divappr_q (qp + qn0 - 1, tp, nn1, xdp, qn1);
     }
   else
     {
@@ -274,12 +215,12 @@ mpn_divexact (mp_ptr qp,
   if (BELOW_THRESHOLD (qn0, DC_BDIV_Q_THRESHOLD))
     {
       MPN_COPY (tp, np, qn0);
-      mpn_sbpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+      mpn_sb_bdiv_q (qp, tp, qn0, dp, qn0, di);
     }
   else if (BELOW_THRESHOLD (qn0, MU_BDIV_Q_THRESHOLD))
     {
       MPN_COPY (tp, np, qn0);
-      mpn_dcpi1_bdiv_q (qp, tp, qn0, dp, qn0, di);
+      mpn_dc_bdiv_q (qp, tp, qn0, dp, qn0, di);
     }
   else
     {
@@ -291,4 +232,3 @@ mpn_divexact (mp_ptr qp,
 
   TMP_FREE;
 }
-#endif
diff --git a/gmp/mpn/generic/divis.c b/gmp/mpn/generic/divis.c
index 9e162e60d2..b05ecd8a78 100644
--- a/gmp/mpn/generic/divis.c
+++ b/gmp/mpn/generic/divis.c
@@ -4,80 +4,86 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
 
-/* Determine whether A={ap,an} is divisible by D={dp,dn}.  Must have both
-   operands normalized, meaning high limbs non-zero, except that an==0 is
+/* Determine whether {ap,asize} is divisible by {dp,dsize}.  Must have both
+   operands normalized, meaning high limbs non-zero, except that asize==0 is
    allowed.
 
-   There usually won't be many low zero bits on D, but the checks for this
+   There usually won't be many low zero bits on d, but the checks for this
    are fast and might pick up a few operand combinations, in particular they
-   might reduce D to fit the single-limb mod_1/modexact_1 code.
+   might reduce d to fit the single-limb mod_1/modexact_1 code.
 
    Future:
 
+   This is currently not much faster than the user doing an mpz_tdiv_r
+   and testing for a zero remainder, but hopefully it can be improved.
+
+   mpn_bdivmod is one possibility, but it only trades udiv_qrnnd's for
+   multiplies, it won't save crossproducts the way it can in mpz_divexact.
+   Definitely worthwhile on small operands for most processors, but a
+   sub-quadratic version will be wanted before it can be used on all sizes.
+
    Getting the remainder limb by limb would make an early exit possible on
    finding a non-zero.  This would probably have to be bdivmod style so
    there's no addback, but it would need a multi-precision inverse and so
    might be slower than the plain method (on small sizes at least).
 
-   When D must be normalized (shifted to low bit set), it's possible to
-   suppress the bit-shifting of A down, as long as it's already been checked
-   that A has at least as many trailing zero bits as D.  */
+   When d must be normalized (shifted to high bit set), it's possible to
+   just append a low zero limb to "a" rather than bit-shifting as
+   mpn_tdiv_qr does internally, so long as it's already been checked that a
+   has at least as many trailing zeros bits as d.  Or equivalently, pass
+   qxn==1 to mpn_tdiv_qr, if/when it accepts that.
+
+   When called from mpz_congruent_p, {ap,asize} is a temporary which can be
+   destroyed.  Maybe it'd be possible to get into mpn_tdiv_qr at a lower
+   level to save copying it, or maybe that function could accept rp==ap.
+
+   Could use __attribute__ ((regparm (2))) on i386, so the parameters
+   wouldn't need extra stack when called from mpz_divisible_p, but a
+   pre-release gcc 3 didn't generate particularly good register juggling in
+   that case, so this isn't done for now.  */
 
 int
-mpn_divisible_p (mp_srcptr ap, mp_size_t an,
-		 mp_srcptr dp, mp_size_t dn)
+mpn_divisible_p (mp_srcptr ap, mp_size_t asize,
+		 mp_srcptr dp, mp_size_t dsize)
 {
   mp_limb_t  alow, dlow, dmask;
-  mp_ptr     qp, rp, tp;
+  mp_ptr     qp, rp;
   mp_size_t  i;
-  mp_limb_t di;
-  unsigned  twos;
   TMP_DECL;
 
-  ASSERT (an >= 0);
-  ASSERT (an == 0 || ap[an-1] != 0);
-  ASSERT (dn >= 1);
-  ASSERT (dp[dn-1] != 0);
-  ASSERT_MPN (ap, an);
-  ASSERT_MPN (dp, dn);
+  ASSERT (asize >= 0);
+  ASSERT (asize == 0 || ap[asize-1] != 0);
+  ASSERT (dsize >= 1);
+  ASSERT (dp[dsize-1] != 0);
+  ASSERT_MPN (ap, asize);
+  ASSERT_MPN (dp, dsize);
 
   /* When a<d only a==0 is divisible.
-     Notice this test covers all cases of an==0. */
-  if (an < dn)
-    return (an == 0);
+     Notice this test covers all cases of asize==0. */
+  if (asize < dsize)
+    return (asize == 0);
 
   /* Strip low zero limbs from d, requiring a==0 on those. */
   for (;;)
@@ -91,9 +97,9 @@ mpn_divisible_p (mp_srcptr ap, mp_size_t an,
       if (alow != 0)
 	return 0;  /* a has fewer low zero limbs than d, so not divisible */
 
-      /* a!=0 and d!=0 so won't get to n==0 */
-      an--; ASSERT (an >= 1);
-      dn--; ASSERT (dn >= 1);
+      /* a!=0 and d!=0 so won't get to size==0 */
+      asize--; ASSERT (asize >= 1);
+      dsize--; ASSERT (dsize >= 1);
       ap++;
       dp++;
     }
@@ -103,88 +109,41 @@ mpn_divisible_p (mp_srcptr ap, mp_size_t an,
   if ((alow & dmask) != 0)
     return 0;
 
-  if (dn == 1)
+  if (dsize == 1)
     {
-      if (ABOVE_THRESHOLD (an, BMOD_1_TO_MOD_1_THRESHOLD))
-	return mpn_mod_1 (ap, an, dlow) == 0;
+      if (BELOW_THRESHOLD (asize, MODEXACT_1_ODD_THRESHOLD))
+	return mpn_mod_1 (ap, asize, dlow) == 0;
 
-      count_trailing_zeros (twos, dlow);
-      dlow >>= twos;
-      return mpn_modexact_1_odd (ap, an, dlow) == 0;
+      if ((dlow & 1) == 0)
+	{
+	  unsigned  twos;
+	  count_trailing_zeros (twos, dlow);
+	  dlow >>= twos;
+	}
+      return mpn_modexact_1_odd (ap, asize, dlow) == 0;
     }
 
-  if (dn == 2)
+  if (dsize == 2)
     {
       mp_limb_t  dsecond = dp[1];
       if (dsecond <= dmask)
 	{
+	  unsigned  twos;
 	  count_trailing_zeros (twos, dlow);
 	  dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
 	  ASSERT_LIMB (dlow);
-	  return MPN_MOD_OR_MODEXACT_1_ODD (ap, an, dlow) == 0;
+	  return MPN_MOD_OR_MODEXACT_1_ODD (ap, asize, dlow) == 0;
 	}
     }
 
-  /* Should we compute Q = A * D^(-1) mod B^k,
-                       R = A - Q * D  mod B^k
-     here, for some small values of k?  Then check if R = 0 (mod B^k).  */
-
-  /* We could also compute A' = A mod T and D' = D mod P, for some
-     P = 3 * 5 * 7 * 11 ..., and then check if any prime factor from P
-     dividing D' also divides A'.  */
-
   TMP_MARK;
 
-  rp = TMP_ALLOC_LIMBS (an + 1);
-  qp = TMP_ALLOC_LIMBS (an - dn + 1); /* FIXME: Could we avoid this? */
-
-  count_trailing_zeros (twos, dp[0]);
-
-  if (twos != 0)
-    {
-      tp = TMP_ALLOC_LIMBS (dn);
-      ASSERT_NOCARRY (mpn_rshift (tp, dp, dn, twos));
-      dp = tp;
+  rp = TMP_ALLOC_LIMBS (asize+1);
+  qp = rp + dsize;
 
-      ASSERT_NOCARRY (mpn_rshift (rp, ap, an, twos));
-    }
-  else
-    {
-      MPN_COPY (rp, ap, an);
-    }
-  if (rp[an - 1] >= dp[dn - 1])
-    {
-      rp[an] = 0;
-      an++;
-    }
-  else if (an == dn)
-    {
-      TMP_FREE;
-      return 0;
-    }
-
-  ASSERT (an > dn);		/* requirement of functions below */
-
-  if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
-      BELOW_THRESHOLD (an - dn, DC_BDIV_QR_THRESHOLD))
-    {
-      binvert_limb (di, dp[0]);
-      mpn_sbpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
-      rp += an - dn;
-    }
-  else if (BELOW_THRESHOLD (dn, MU_BDIV_QR_THRESHOLD))
-    {
-      binvert_limb (di, dp[0]);
-      mpn_dcpi1_bdiv_qr (qp, rp, an, dp, dn, -di);
-      rp += an - dn;
-    }
-  else
-    {
-      tp = TMP_ALLOC_LIMBS (mpn_mu_bdiv_qr_itch (an, dn));
-      mpn_mu_bdiv_qr (qp, rp, rp, an, dp, dn, tp);
-    }
+  mpn_tdiv_qr (qp, rp, (mp_size_t) 0, ap, asize, dp, dsize);
 
-  /* test for {rp,dn} zero or non-zero */
+  /* test for {rp,dsize} zero or non-zero */
   i = 0;
   do
     {
@@ -194,7 +153,7 @@ mpn_divisible_p (mp_srcptr ap, mp_size_t an,
 	  return 0;
 	}
     }
-  while (++i < dn);
+  while (++i < dsize);
 
   TMP_FREE;
   return 1;
diff --git a/gmp/mpn/generic/divrem.c b/gmp/mpn/generic/divrem.c
index f420992746..999ffdd347 100644
--- a/gmp/mpn/generic/divrem.c
+++ b/gmp/mpn/generic/divrem.c
@@ -1,33 +1,24 @@
 /* mpn_divrem -- Divide natural numbers, producing both remainder and
-   quotient.  This is now just a middle layer calling mpn_tdiv_qr.
+   quotient.  This is now just a middle layer for calling the new
+   internal mpn_tdiv_qr.
 
-Copyright 1993-1997, 1999-2002, 2005 Free Software Foundation, Inc.
+Copyright 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2005 Free
+Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -56,7 +47,7 @@ mpn_divrem (mp_ptr qp, mp_size_t qxn,
       TMP_DECL;
 
       TMP_MARK;
-      q2p = TMP_ALLOC_LIMBS (nn + qxn);
+      q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
 
       np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
       qn = nn + qxn - 1;
@@ -81,11 +72,11 @@ mpn_divrem (mp_ptr qp, mp_size_t qxn,
       if (UNLIKELY (qxn != 0))
 	{
 	  mp_ptr n2p;
-	  n2p = TMP_ALLOC_LIMBS (nn + qxn);
+	  n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
 	  MPN_ZERO (n2p, qxn);
 	  MPN_COPY (n2p + qxn, np, nn);
-	  q2p = TMP_ALLOC_LIMBS (nn - dn + qxn + 1);
-	  rp = TMP_ALLOC_LIMBS (dn);
+	  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);
+	  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
 	  mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);
 	  MPN_COPY (np, rp, dn);
 	  qn = nn - dn + qxn;
@@ -94,8 +85,8 @@ mpn_divrem (mp_ptr qp, mp_size_t qxn,
 	}
       else
 	{
-	  q2p = TMP_ALLOC_LIMBS (nn - dn + 1);
-	  rp = TMP_ALLOC_LIMBS (dn);
+	  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);
+	  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
 	  mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);
 	  MPN_COPY (np, rp, dn);	/* overwrite np area with remainder */
 	  qn = nn - dn;
diff --git a/gmp/mpn/generic/divrem_1.c b/gmp/mpn/generic/divrem_1.c
index 9157b5735e..c416946294 100644
--- a/gmp/mpn/generic/divrem_1.c
+++ b/gmp/mpn/generic/divrem_1.c
@@ -1,33 +1,22 @@
 /* mpn_divrem_1 -- mpn by limb division.
 
-Copyright 1991, 1993, 1994, 1996, 1998-2000, 2002, 2003 Free Software
+Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002, 2003 Free Software
 Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -167,7 +156,7 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
   else
     {
       /* Most significant bit of divisor == 0.  */
-      int cnt;
+      int norm;
 
       /* Skip a division if high < divisor (high quotient 0).  Testing here
 	 before normalizing will still skip as often as possible.  */
@@ -189,28 +178,28 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
 	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
 	goto plain;
 
-      count_leading_zeros (cnt, d);
-      d <<= cnt;
-      r <<= cnt;
+      count_leading_zeros (norm, d);
+      d <<= norm;
+      r <<= norm;
 
       if (UDIV_NEEDS_NORMALIZATION
 	  && BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
 	{
-	  mp_limb_t nshift;
 	  if (un != 0)
 	    {
 	      n1 = up[un - 1] << GMP_NAIL_BITS;
-	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
+	      r |= (n1 >> (GMP_LIMB_BITS - norm));
 	      for (i = un - 2; i >= 0; i--)
 		{
 		  n0 = up[i] << GMP_NAIL_BITS;
-		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
-		  udiv_qrnnd (*qp, r, r, nshift, d);
+		  udiv_qrnnd (*qp, r, r,
+			      (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
+			      d);
 		  r >>= GMP_NAIL_BITS;
 		  qp--;
 		  n1 = n0;
 		}
-	      udiv_qrnnd (*qp, r, r, n1 << cnt, d);
+	      udiv_qrnnd (*qp, r, r, n1 << norm, d);
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
@@ -220,26 +209,27 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
-	  return r >> cnt;
+	  return r >> norm;
 	}
       else
 	{
-	  mp_limb_t  dinv, nshift;
+	  mp_limb_t  dinv;
 	  invert_limb (dinv, d);
 	  if (un != 0)
 	    {
 	      n1 = up[un - 1] << GMP_NAIL_BITS;
-	      r |= (n1 >> (GMP_LIMB_BITS - cnt));
+	      r |= (n1 >> (GMP_LIMB_BITS - norm));
 	      for (i = un - 2; i >= 0; i--)
 		{
 		  n0 = up[i] << GMP_NAIL_BITS;
-		  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
-		  udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
+		  udiv_qrnnd_preinv (*qp, r, r,
+				     ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
+				     d, dinv);
 		  r >>= GMP_NAIL_BITS;
 		  qp--;
 		  n1 = n0;
 		}
-	      udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
+	      udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
@@ -249,7 +239,7 @@ mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
 	      r >>= GMP_NAIL_BITS;
 	      qp--;
 	    }
-	  return r >> cnt;
+	  return r >> norm;
 	}
     }
 }
diff --git a/gmp/mpn/generic/divrem_2.c b/gmp/mpn/generic/divrem_2.c
index 30d24bb102..ba761dc36c 100644
--- a/gmp/mpn/generic/divrem_2.c
+++ b/gmp/mpn/generic/divrem_2.c
@@ -1,119 +1,179 @@
 /* mpn_divrem_2 -- Divide natural numbers, producing both remainder and
    quotient.  The divisor is two limbs.
 
-   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+   THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+   RELEASE.
 
 
-Copyright 1993-1996, 1999-2002 Free Software Foundation, Inc.
+Copyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
 
-/* Divide num {np,nn} by den {dp,2} and write the nn-2 least significant
-   quotient limbs at qp and the 2 long remainder at np.  If qxn is non-zero,
-   generate that many fraction bits and append them after the other quotient
-   limbs.  Return the most significant limb of the quotient, this is always 0
-   or 1.
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+   meaning the quotient size where that should happen, the quotient size
+   being how many udiv divisions will be done.
+
+   The default is to use preinv always, CPUs where this doesn't suit have
+   tuned thresholds.  Note in particular that preinv should certainly be
+   used if that's the only division available (USE_PREINV_ALWAYS).  */
+
+#ifndef DIVREM_2_THRESHOLD
+#define DIVREM_2_THRESHOLD  0
+#endif
+
+
+/* Divide num (NP/NSIZE) by den (DP/2) and write
+   the NSIZE-2 least significant quotient limbs at QP
+   and the 2 long remainder at NP.  If QEXTRA_LIMBS is
+   non-zero, generate that many fraction bits and append them after the
+   other quotient limbs.
+   Return the most significant limb of the quotient, this is always 0 or 1.
 
    Preconditions:
+   0. NSIZE >= 2.
    1. The most significant bit of the divisor must be set.
-   2. qp must either not overlap with the input operands at all, or
-      qp >= np + 2 must hold true.  (This means that it's possible to put
-      the quotient in the high part of {np,nn}, right above the remainder.
-   3. nn >= 2, even if qxn is non-zero.  */
+   2. QP must either not overlap with the input operands at all, or
+      QP + 2 >= NP must hold true.  (This means that it's
+      possible to put the quotient in the high part of NUM, right after the
+      remainder in NUM.
+   3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero.  */
 
 mp_limb_t
 mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
 	      mp_ptr np, mp_size_t nn,
 	      mp_srcptr dp)
 {
-  mp_limb_t most_significant_q_limb;
+  mp_limb_t most_significant_q_limb = 0;
   mp_size_t i;
-  mp_limb_t r1, r0, d1, d0;
-  gmp_pi1_t di;
+  mp_limb_t n1, n0, n2;
+  mp_limb_t d1, d0;
+  mp_limb_t d1inv;
+  int use_preinv;
 
   ASSERT (nn >= 2);
   ASSERT (qxn >= 0);
   ASSERT (dp[1] & GMP_NUMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp >= np+2);
+  ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp+2 >= np);
   ASSERT_MPN (np, nn);
   ASSERT_MPN (dp, 2);
 
   np += nn - 2;
   d1 = dp[1];
   d0 = dp[0];
-  r1 = np[1];
-  r0 = np[0];
+  n1 = np[1];
+  n0 = np[0];
 
-  most_significant_q_limb = 0;
-  if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+  if (n1 >= d1 && (n1 > d1 || n0 >= d0))
     {
 #if GMP_NAIL_BITS == 0
-      sub_ddmmss (r1, r0, r1, r0, d1, d0);
+      sub_ddmmss (n1, n0, n1, n0, d1, d0);
 #else
-      r0 = r0 - d0;
-      r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
-      r0 &= GMP_NUMB_MASK;
+      n0 = n0 - d0;
+      n1 = n1 - d1 - (n0 >> GMP_LIMB_BITS - 1);
+      n0 &= GMP_NUMB_MASK;
 #endif
       most_significant_q_limb = 1;
     }
 
-  invert_pi1 (di, d1, d0);
+  use_preinv = ABOVE_THRESHOLD (qxn + nn - 2, DIVREM_2_THRESHOLD);
+  if (use_preinv)
+    invert_limb (d1inv, d1);
 
-  qp += qxn;
-
-  for (i = nn - 2 - 1; i >= 0; i--)
+  for (i = qxn + nn - 2 - 1; i >= 0; i--)
     {
-      mp_limb_t n0, q;
-      n0 = np[-1];
-      udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di.inv32);
-      np--;
-      qp[i] = q;
-    }
+      mp_limb_t q;
+      mp_limb_t r;
 
-  if (UNLIKELY (qxn != 0))
-    {
-      qp -= qxn;
-      for (i = qxn - 1; i >= 0; i--)
+      if (i >= qxn)
+	np--;
+      else
+	np[0] = 0;
+
+      if (n1 == d1)
+	{
+	  /* Q should be either 111..111 or 111..110.  Need special handling
+	     of this rare case as normal division would give overflow.  */
+	  q = GMP_NUMB_MASK;
+
+	  r = (n0 + d1) & GMP_NUMB_MASK;
+	  if (r < d1)	/* Carry in the addition? */
+	    {
+#if GMP_NAIL_BITS == 0
+	      add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
+#else
+	      n0 = np[0] + d0;
+	      n1 = (r - d0 + (n0 >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
+	      n0 &= GMP_NUMB_MASK;
+#endif
+	      qp[i] = q;
+	      continue;
+	    }
+	  n1 = d0 - (d0 != 0);
+	  n0 = -d0 & GMP_NUMB_MASK;
+	}
+      else
 	{
-	  mp_limb_t q;
-	  udiv_qr_3by2 (q, r1, r0, r1, r0, CNST_LIMB(0), d1, d0, di.inv32);
-	  qp[i] = q;
+	  if (use_preinv)
+	    udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv);
+	  else
+	    udiv_qrnnd (q, r, n1, n0 << GMP_NAIL_BITS, d1 << GMP_NAIL_BITS);
+	  r >>= GMP_NAIL_BITS;
+	  umul_ppmm (n1, n0, d0, q << GMP_NAIL_BITS);
+	  n0 >>= GMP_NAIL_BITS;
 	}
-    }
 
-  np[1] = r1;
-  np[0] = r0;
+      n2 = np[0];
+
+    q_test:
+      if (n1 > r || (n1 == r && n0 > n2))
+	{
+	  /* The estimated Q was too large.  */
+	  q--;
+
+#if GMP_NAIL_BITS == 0
+	  sub_ddmmss (n1, n0, n1, n0, 0, d0);
+#else
+	  n0 = n0 - d0;
+	  n1 = n1 - (n0 >> GMP_LIMB_BITS - 1);
+	  n0 &= GMP_NUMB_MASK;
+#endif
+	  r += d1;
+	  if (r >= d1)	/* If not carry, test Q again.  */
+	    goto q_test;
+	}
+
+      qp[i] = q;
+#if GMP_NAIL_BITS == 0
+      sub_ddmmss (n1, n0, r, n2, n1, n0);
+#else
+      n0 = n2 - n0;
+      n1 = r - n1 - (n0 >> GMP_LIMB_BITS - 1);
+      n0 &= GMP_NUMB_MASK;
+#endif
+    }
+  np[1] = n1;
+  np[0] = n0;
 
   return most_significant_q_limb;
 }
diff --git a/gmp/mpn/generic/dump.c b/gmp/mpn/generic/dump.c
index 3a73fe49e3..38309996cc 100644
--- a/gmp/mpn/generic/dump.c
+++ b/gmp/mpn/generic/dump.c
@@ -3,33 +3,22 @@
    FUNCTION WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
 
-Copyright 1996, 2000-2002, 2005 Free Software Foundation, Inc.
+Copyright 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <stdio.h>
 #include "gmp.h"
diff --git a/gmp/mpn/generic/fib2_ui.c b/gmp/mpn/generic/fib2_ui.c
index eb6e56e736..a39d538262 100644
--- a/gmp/mpn/generic/fib2_ui.c
+++ b/gmp/mpn/generic/fib2_ui.c
@@ -4,37 +4,28 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <stdio.h>
 #include "gmp.h"
 #include "gmp-impl.h"
+#include "longlong.h"
+
 
 /* change this to "#define TRACE(x) x" for diagnostics */
 #define TRACE(x)
@@ -61,13 +52,20 @@ see https://www.gnu.org/licenses/.  */
    This property of F[4m+3] can be verified by induction on F[4m+3] =
    7*F[4m-1] - F[4m-5], that formula being a standard lucas sequence
    identity U[i+j] = U[i]*V[j] - U[i-j]*Q^j.
-*/
+
+   Enhancements:
+
+   If there was an mpn_addlshift, it'd be possible to eliminate the yp
+   temporary, using xp=F[k]^2, fp=F[k-1]^2, f1p=xp+fp, fp+=4*fp, fp-=f1p,
+   fp+=2*(-1)^n, etc.  */
 
 mp_size_t
 mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
 {
+  mp_ptr         xp, yp;
   mp_size_t      size;
   unsigned long  nfirst, mask;
+  TMP_DECL;
 
   TRACE (printf ("mpn_fib2_ui n=%lu\n", n));
 
@@ -87,15 +85,15 @@ mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
   if (mask != 1)
     {
       mp_size_t  alloc;
-      mp_ptr        xp;
-      TMP_DECL;
 
       TMP_MARK;
       alloc = MPN_FIB2_SIZE (n);
-      xp = TMP_ALLOC_LIMBS (alloc);
+      TMP_ALLOC_LIMBS_2 (xp,alloc, yp,alloc);
 
       do
 	{
+	  mp_limb_t  c;
+
 	  /* Here fp==F[k] and f1p==F[k-1], with k being the bits of n from
 	     n&mask upwards.
 
@@ -116,65 +114,45 @@ mpn_fib2_ui (mp_ptr fp, mp_ptr f1p, unsigned long int n)
 	  /* f1p[size-1] might be zero, but this occurs rarely, so it's not
 	     worth bothering checking for it */
 	  ASSERT (alloc >= 2*size);
-	  mpn_sqr (xp, fp,  size);
-	  mpn_sqr (fp, f1p, size);
+	  mpn_sqr_n (xp, fp,  size);
+	  mpn_sqr_n (yp, f1p, size);
 	  size *= 2;
 
 	  /* Shrink if possible.  Since fp was normalized there'll be at
 	     most one high zero on xp (and if there is then there's one on
 	     yp too).  */
-	  ASSERT (xp[size-1] != 0 || fp[size-1] == 0);
+	  ASSERT (xp[size-1] != 0 || yp[size-1] == 0);
 	  size -= (xp[size-1] == 0);
 	  ASSERT (xp[size-1] != 0);  /* only one xp high zero */
 
-	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2. */
-	  f1p[size] = mpn_add_n (f1p, xp, fp, size);
-
 	  /* Calculate F[2k+1] = 4*F[k]^2 - F[k-1]^2 + 2*(-1)^k.
 	     n&mask is the low bit of our implied k.  */
-#if HAVE_NATIVE_mpn_rsblsh2_n || HAVE_NATIVE_mpn_rsblsh_n
-#if HAVE_NATIVE_mpn_rsblsh2_n
-	  fp[size] = mpn_rsblsh2_n (fp, fp, xp, size);
-#else /* HAVE_NATIVE_mpn_rsblsh_n */
-	  fp[size] = mpn_rsblsh_n (fp, fp, xp, size, 2);
-#endif
-	  if ((n & mask) == 0)
-	    MPN_INCR_U(fp, size + 1, 2);	/* possible +2 */
-	  else
-	  {
-	    ASSERT (fp[0] >= 2);
-	    fp[0] -= 2;				/* possible -2 */
-	  }
-#else
-	  {
-	    mp_limb_t  c;
-
-	    c = mpn_lshift (xp, xp, size, 2);
-	    xp[0] |= (n & mask ? 0 : 2);	/* possible +2 */
-	    c -= mpn_sub_n (fp, xp, fp, size);
-	    ASSERT (n & mask ? fp[0] != 0 && fp[0] != 1 : 1);
-	    fp[0] -= (n & mask ? 2 : 0);	/* possible -2 */
-	    fp[size] = c;
-	  }
-#endif
+	  c = mpn_lshift (fp, xp, size, 2);
+	  fp[0] |= (n & mask ? 0 : 2);	 /* possible +2 */
+	  c -= mpn_sub_n (fp, fp, yp, size);
+	  ASSERT (n & (mask << 1) ? fp[0] != 0 && fp[0] != 1 : 1);
+	  fp[0] -= (n & mask ? 2 : 0);	 /* possible -2 */
 	  ASSERT (alloc >= size+1);
-	  size += (fp[size] != 0);
+	  xp[size] = 0;
+	  yp[size] = 0;
+	  fp[size] = c;
+	  size += (c != 0);
+
+	  /* Calculate F[2k-1] = F[k]^2 + F[k-1]^2.
+	     F[2k-1]<F[2k+1] so no carry out of "size" limbs. */
+	  ASSERT_NOCARRY (mpn_add_n (f1p, xp, yp, size));
 
 	  /* now n&mask is the new bit of n being considered */
 	  mask >>= 1;
 
 	  /* Calculate F[2k] = F[2k+1] - F[2k-1], replacing the unwanted one of
 	     F[2k+1] and F[2k-1].  */
-	  if (n & mask)
-	    ASSERT_NOCARRY (mpn_sub_n (f1p, fp, f1p, size));
-	  else {
-	    ASSERT_NOCARRY (mpn_sub_n ( fp, fp, f1p, size));
-
-	    /* Can have a high zero after replacing F[2k+1] with F[2k].
-	       f1p will have a high zero if fp does. */
-	    ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
-	    size -= (fp[size-1] == 0);
-	  }
+	  ASSERT_NOCARRY (mpn_sub_n ((n & mask ? f1p : fp), fp, f1p, size));
+
+	  /* Can have a high zero after replacing F[2k+1] with F[2k].
+	     f1p will have a high zero if fp does. */
+	  ASSERT (fp[size-1] != 0 || f1p[size-1] == 0);
+	  size -= (fp[size-1] == 0);
 	}
       while (mask != 1);
 
diff --git a/gmp/mpn/generic/gcd.c b/gmp/mpn/generic/gcd.c
index b14e1ad888..542e0fe7b8 100644
--- a/gmp/mpn/generic/gcd.c
+++ b/gmp/mpn/generic/gcd.c
@@ -1,33 +1,22 @@
 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
 
-Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
+2004, 2005, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -35,7 +24,7 @@ see https://www.gnu.org/licenses/.  */
 
 /* Uses the HGCD operation described in
 
-     N. Möller, On Schönhage's algorithm and subquadratic integer gcd
+     N. M�ller, On Sch�nhage's algorithm and subquadratic integer gcd
      computation, Math. Comp. 77 (2008), 589-607.
 
   to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
@@ -62,76 +51,6 @@ mp_size_t p_table[P_TABLE_SIZE];
 #define CHOOSE_P(n) (2*(n) / 3)
 #endif
 
-struct gcd_ctx
-{
-  mp_ptr gp;
-  mp_size_t gn;
-};
-
-static void
-gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
-	  mp_srcptr qp, mp_size_t qn, int d)
-{
-  struct gcd_ctx *ctx = (struct gcd_ctx *) p;
-  MPN_COPY (ctx->gp, gp, gn);
-  ctx->gn = gn;
-}
-
-#if GMP_NAIL_BITS > 0
-/* Nail supports should be easy, replacing the sub_ddmmss with nails
- * logic. */
-#error Nails not supported.
-#endif
-
-/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
-   Both U and V must be odd. */
-static inline mp_size_t
-gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
-{
-  mp_limb_t u0, u1, v0, v1;
-  mp_size_t gn;
-
-  u0 = up[0];
-  u1 = up[1];
-  v0 = vp[0];
-  v1 = vp[1];
-
-  ASSERT (u0 & 1);
-  ASSERT (v0 & 1);
-
-  /* Check for u0 != v0 needed to ensure that argument to
-   * count_trailing_zeros is non-zero. */
-  while (u1 != v1 && u0 != v0)
-    {
-      unsigned long int r;
-      if (u1 > v1)
-	{
-	  sub_ddmmss (u1, u0, u1, u0, v1, v0);
-	  count_trailing_zeros (r, u0);
-	  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
-	  u1 >>= r;
-	}
-      else  /* u1 < v1.  */
-	{
-	  sub_ddmmss (v1, v0, v1, v0, u1, u0);
-	  count_trailing_zeros (r, v0);
-	  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
-	  v1 >>= r;
-	}
-    }
-
-  gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
-
-  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
-  if (u1 == v1 && u0 == v0)
-    return gn;
-
-  v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
-  gp[0] = mpn_gcd_1 (gp, gn, v0);
-
-  return 1;
-}
-
 mp_size_t
 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
 {
@@ -139,17 +58,13 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
   mp_size_t scratch;
   mp_size_t matrix_scratch;
 
-  struct gcd_ctx ctx;
+  mp_size_t gn;
   mp_ptr tp;
   TMP_DECL;
 
-  ASSERT (usize >= n);
-  ASSERT (n > 0);
-  ASSERT (vp[n-1] > 0);
-
   /* FIXME: Check for small sizes first, before setting up temporary
      storage etc. */
-  talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
+  talloc = MPN_GCD_LEHMER_N_ITCH(n);
 
   /* For initial division */
   scratch = usize - n + 1;
@@ -192,13 +107,11 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
       if (mpn_zero_p (up, n))
 	{
 	  MPN_COPY (gp, vp, n);
-	  ctx.gn = n;
-	  goto done;
+	  TMP_FREE;
+	  return n;
 	}
     }
 
-  ctx.gp = gp;
-
 #if TUNE_GCD_P
   while (CHOOSE_P (n) > 0)
 #else
@@ -221,90 +134,153 @@ mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
       else
 	{
 	  /* Temporary storage n */
-	  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
+	  n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp);
 	  if (n == 0)
-	    goto done;
+	    {
+	      TMP_FREE;
+	      return gn;
+	    }
 	}
     }
 
-  while (n > 2)
-    {
-      struct hgcd_matrix1 M;
-      mp_limb_t uh, ul, vh, vl;
-      mp_limb_t mask;
+  gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp);
+  TMP_FREE;
+  return gn;
+}
 
-      mask = up[n-1] | vp[n-1];
-      ASSERT (mask > 0);
+#ifdef TUNE_GCD_P
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include "speed.h"
 
-      if (mask & GMP_NUMB_HIGHBIT)
-	{
-	  uh = up[n-1]; ul = up[n-2];
-	  vh = vp[n-1]; vl = vp[n-2];
-	}
-      else
-	{
-	  int shift;
+static int
+compare_double(const void *ap, const void *bp)
+{
+  double a = * (const double *) ap;
+  double b = * (const double *) bp;
+
+  if (a < b)
+    return -1;
+  else if (a > b)
+    return 1;
+  else
+    return 0;
+}
 
-	  count_leading_zeros (shift, mask);
-	  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
-	  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
-	  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
-	  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
-	}
+static double
+median (double *v, size_t n)
+{
+  qsort(v, n, sizeof(*v), compare_double);
 
-      /* Try an mpn_hgcd2 step */
-      if (mpn_hgcd2 (uh, ul, vh, vl, &M))
-	{
-	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
-	  MP_PTR_SWAP (up, tp);
-	}
-      else
-	{
-	  /* mpn_hgcd2 has failed. Then either one of a or b is very
-	     small, or the difference is very small. Perform one
-	     subtraction followed by one division. */
+  return v[n/2];
+}
 
-	  /* Temporary storage n */
-	  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
-	  if (n == 0)
-	    goto done;
-	}
-    }
+#define TIME(res, code) do {				\
+  double time_measurement[5];				\
+  unsigned time_i;					\
+							\
+  for (time_i = 0; time_i < 5; time_i++)		\
+    {							\
+      speed_starttime();				\
+      code;						\
+      time_measurement[time_i] = speed_endtime();	\
+    }							\
+  res = median(time_measurement, 5);			\
+} while (0)
+
+int
+main(int argc, char *argv)
+{
+  gmp_randstate_t rands;
+  mp_size_t n;
+  mp_ptr ap;
+  mp_ptr bp;
+  mp_ptr up;
+  mp_ptr vp;
+  mp_ptr gp;
+  mp_ptr tp;
+  TMP_DECL;
 
-  ASSERT(up[n-1] | vp[n-1]);
+  /* Unbuffered so if output is redirected to a file it isn't lost if the
+     program is killed part way through.  */
+  setbuf (stdout, NULL);
+  setbuf (stderr, NULL);
 
-  if (n == 1)
-    {
-      *gp = mpn_gcd_1(up, 1, vp[0]);
-      ctx.gn = 1;
-      goto done;
-    }
+  gmp_randinit_default (rands);
 
-  /* Due to the calling convention for mpn_gcd, at most one can be
-     even. */
+  TMP_MARK;
 
-  if (! (up[0] & 1))
-    MP_PTR_SWAP (up, vp);
+  ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+  tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE));
 
-  ASSERT (up[0] & 1);
+  mpn_random (ap, P_TABLE_SIZE);
+  mpn_random (bp, P_TABLE_SIZE);
 
-  if (vp[0] == 0)
-    {
-      *gp = mpn_gcd_1 (up, 2, vp[1]);
-      ctx.gn = 1;
-      goto done;
-    }
-  else if (! (vp[0] & 1))
+  memset (p_table, 0, sizeof(p_table));
+
+  for (n = 100; n++; n < P_TABLE_SIZE)
     {
-      int r;
-      count_trailing_zeros (r, vp[0]);
-      vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
-      vp[1] >>= r;
-    }
+      mp_size_t p;
+      mp_size_t best_p;
+      double best_time;
+      double lehmer_time;
+
+      if (ap[n-1] == 0)
+	ap[n-1] = 1;
+
+      if (bp[n-1] == 0)
+	bp[n-1] = 1;
+
+      p_table[n] = 0;
+      TIME(lehmer_time, {
+	  MPN_COPY (up, ap, n);
+	  MPN_COPY (vp, bp, n);
+	  mpn_gcd_lehmer_n (gp, up, vp, n, tp);
+	});
 
-  ctx.gn = gcd_2(gp, up, vp);
+      best_time = lehmer_time;
+      best_p = 0;
 
-done:
+      for (p = n * 0.48; p < n * 0.77; p++)
+	{
+	  double t;
+
+	  p_table[n] = p;
+
+	  TIME(t, {
+	      MPN_COPY (up, ap, n);
+	      MPN_COPY (vp, bp, n);
+	      mpn_gcd (gp, up, n, vp, n);
+	    });
+
+	  if (t < best_time)
+	    {
+	      best_time = t;
+	      best_p = p;
+	    }
+	}
+      printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
+      if (best_p > 0)
+	{
+	  double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
+	  printf(" %5.3g%%", speedup);
+	  if (speedup < 1.0)
+	    {
+	      printf(" (ignored)");
+	      best_p = 0;
+	    }
+	}
+      printf("\n");
+
+      p_table[n] = best_p;
+    }
   TMP_FREE;
-  return ctx.gn;
+  gmp_randclear(rands);
+  return 0;
 }
+#endif /* TUNE_GCD_P */
diff --git a/gmp/mpn/generic/gcd_1.c b/gmp/mpn/generic/gcd_1.c
index f6dcb4a2eb..73be15134c 100644
--- a/gmp/mpn/generic/gcd_1.c
+++ b/gmp/mpn/generic/gcd_1.c
@@ -1,54 +1,26 @@
 /* mpn_gcd_1 -- mpn and limb greatest common divisor.
 
-Copyright 1994, 1996, 2000, 2001, 2009, 2012 Free Software Foundation, Inc.
+Copyright 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-#ifndef GCD_1_METHOD
-#define GCD_1_METHOD 2
-#endif
-
-#define USE_ZEROTAB 0
-
-#if USE_ZEROTAB
-#define MAXSHIFT 4
-#define MASK ((1 << MAXSHIFT) - 1)
-static const unsigned char zerotab[1 << MAXSHIFT] =
-{
-#if MAXSHIFT > 4
-  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-#endif
-  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
-};
-#endif
 
 /* Does not work for U == 0 or V == 0.  It would be tough to make it work for
    V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.
@@ -109,10 +81,6 @@ mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
       goto strip_u_maybe;
     }
 
-  ASSERT (ulimb & 1);
-  ASSERT (vlimb & 1);
-
-#if GCD_1_METHOD == 1
   while (ulimb != vlimb)
     {
       ASSERT (ulimb & 1);
@@ -141,58 +109,6 @@ mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
 	  while ((vlimb & 1) == 0);
 	}
     }
-#else
-# if GCD_1_METHOD  == 2
-
-  ulimb >>= 1;
-  vlimb >>= 1;
-
-  while (ulimb != vlimb)
-    {
-      int c;
-      mp_limb_t t;
-      mp_limb_t vgtu;
-
-      t = ulimb - vlimb;
-      vgtu = LIMB_HIGHBIT_TO_MASK (t);
-
-      /* v <-- min (u, v) */
-      vlimb += (vgtu & t);
-
-      /* u <-- |u - v| */
-      ulimb = (t ^ vgtu) - vgtu;
-
-#if USE_ZEROTAB
-      /* Number of trailing zeros is the same no matter if we look at
-       * t or ulimb, but using t gives more parallelism. */
-      c = zerotab[t & MASK];
-
-      while (UNLIKELY (c == MAXSHIFT))
-	{
-	  ulimb >>= MAXSHIFT;
-	  if (0)
-	  strip_u_maybe:
-	    vlimb >>= 1;
-
-	  c = zerotab[ulimb & MASK];
-	}
-#else
-      if (0)
-	{
-	strip_u_maybe:
-	  vlimb >>= 1;
-	  t = ulimb;
-	}
-      count_trailing_zeros (c, t);
-#endif
-      ulimb >>= (c + 1);
-    }
-
-  vlimb = (vlimb << 1) | 1;
-# else
-#  error Unknown GCD_1_METHOD
-# endif
-#endif
 
  done:
   return vlimb << zero_bits;
diff --git a/gmp/mpn/generic/gcd_lehmer.c b/gmp/mpn/generic/gcd_lehmer.c
new file mode 100644
index 0000000000..37fd3c590d
--- /dev/null
+++ b/gmp/mpn/generic/gcd_lehmer.c
@@ -0,0 +1,160 @@
+/* gcd_lehmer.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
+   Both U and V must be odd. */
+static inline mp_size_t
+gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
+{
+  mp_limb_t u0, u1, v0, v1;
+  mp_size_t gn;
+
+  u0 = up[0];
+  u1 = up[1];
+  v0 = vp[0];
+  v1 = vp[1];
+
+  ASSERT (u0 & 1);
+  ASSERT (v0 & 1);
+
+  /* Check for u0 != v0 needed to ensure that argument to
+   * count_trailing_zeros is non-zero. */
+  while (u1 != v1 && u0 != v0)
+    {
+      unsigned long int r;
+      if (u1 > v1)
+	{
+	  u1 -= v1 + (u0 < v0);
+	  u0 = (u0 - v0) & GMP_NUMB_MASK;
+	  count_trailing_zeros (r, u0);
+	  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
+	  u1 >>= r;
+	}
+      else  /* u1 < v1.  */
+	{
+	  v1 -= u1 + (v0 < u0);
+	  v0 = (v0 - u0) & GMP_NUMB_MASK;
+	  count_trailing_zeros (r, v0);
+	  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
+	  v1 >>= r;
+	}
+    }
+
+  gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
+
+  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
+  if (u1 == v1 && u0 == v0)
+    return gn;
+
+  v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
+  gp[0] = mpn_gcd_1 (gp, gn, v0);
+
+  return 1;
+}
+
+/* Temporary storage: n */
+mp_size_t
+mpn_gcd_lehmer_n (mp_ptr gp, mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
+{
+  /* Relax this requirement, and normalize at the start? Must disallow
+     A = B = 0, though. */
+  ASSERT(ap[n-1] > 0 || bp[n-1] > 0);
+
+  while (n > 2)
+    {
+      struct hgcd_matrix1 M;
+      mp_limb_t ah, al, bh, bl;
+      mp_limb_t mask;
+
+      mask = ap[n-1] | bp[n-1];
+      ASSERT (mask > 0);
+
+      if (mask & GMP_NUMB_HIGHBIT)
+	{
+	  ah = ap[n-1]; al = ap[n-2];
+	  bh = bp[n-1]; bl = bp[n-2];
+	}
+      else
+	{
+	  int shift;
+
+	  count_leading_zeros (shift, mask);
+	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+	}
+
+      /* Try an mpn_nhgcd2 step */
+      if (mpn_hgcd2 (ah, al, bh, bl, &M))
+	{
+	  n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
+	  MP_PTR_SWAP (ap, tp);
+	}
+      else
+	{
+	  /* mpn_hgcd2 has failed. Then either one of a or b is very
+	     small, or the difference is very small. Perform one
+	     subtraction followed by one division. */
+	  mp_size_t gn;
+
+	  /* Temporary storage n */
+	  n = mpn_gcd_subdiv_step (gp, &gn, ap, bp, n, tp);
+	  if (n == 0)
+	    return gn;
+	}
+    }
+
+  if (n == 1)
+    {
+      *gp = mpn_gcd_1(ap, 1, bp[0]);
+      return 1;
+    }
+
+  /* Due to the calling convention for mpn_gcd, at most one can be
+     even. */
+
+  if (! (ap[0] & 1))
+    MP_PTR_SWAP (ap, bp);
+
+  ASSERT (ap[0] & 1);
+
+  if (bp[0] == 0)
+    {
+      *gp = mpn_gcd_1 (ap, 2, bp[1]);
+      return 1;
+    }
+  else if (! (bp[0] & 1))
+    {
+      int r;
+      count_trailing_zeros (r, bp[0]);
+      bp[0] = ((bp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (bp[0] >> r);
+      bp[1] >>= r;
+    }
+
+  return gcd_2(gp, ap, bp);
+}
diff --git a/gmp/mpn/generic/gcd_subdiv_step.c b/gmp/mpn/generic/gcd_subdiv_step.c
index 18634bec9f..47c0c26c86 100644
--- a/gmp/mpn/generic/gcd_subdiv_step.c
+++ b/gmp/mpn/generic/gcd_subdiv_step.c
@@ -4,35 +4,22 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2003-2005, 2008, 2010, 2011 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-#include <stdlib.h>		/* for NULL */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -40,47 +27,17 @@ see https://www.gnu.org/licenses/.  */
 
 /* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
    b is small, or the difference is small. Perform one subtraction
-   followed by one division. The normal case is to compute the reduced
-   a and b, and return the new size.
-
-   If s == 0 (used for gcd and gcdext), returns zero if the gcd is
-   found.
-
-   If s > 0, don't reduce to size <= s, and return zero if no
-   reduction is possible (if either a, b or |a-b| is of size <= s). */
-
-/* The hook function is called as
-
-     hook(ctx, gp, gn, qp, qn, d)
-
-   in the following cases:
-
-   + If A = B at the start, G is the gcd, Q is NULL, d = -1.
-
-   + If one input is zero at the start, G is the gcd, Q is NULL,
-     d = 0 if A = G and d = 1 if B = G.
-
-   Otherwise, if d = 0 we have just subtracted a multiple of A from B,
-   and if d = 1 we have subtracted a multiple of B from A.
-
-   + If A = B after subtraction, G is the gcd, Q is NULL.
-
-   + If we get a zero remainder after division, G is the gcd, Q is the
-     quotient.
-
-   + Otherwise, G is NULL, Q is the quotient (often 1).
-
- */
+   followed by one division. If the gcd is found, stores it in gp and
+   *gn, and returns zero. Otherwise, compute the reduced a and b, and
+   return the new size. */
 
+/* FIXME: Check when the smaller number is a single limb, and invoke
+ * mpn_gcd_1. */
 mp_size_t
-mpn_gcd_subdiv_step (mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t s,
-		     gcd_subdiv_step_hook *hook, void *ctx,
-		     mp_ptr tp)
+mpn_gcd_subdiv_step (mp_ptr gp, mp_size_t *gn,
+		     mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
 {
-  static const mp_limb_t one = CNST_LIMB(1);
-  mp_size_t an, bn, qn;
-
-  int swapped;
+  mp_size_t an, bn;
 
   ASSERT (n > 0);
   ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
@@ -89,117 +46,59 @@ mpn_gcd_subdiv_step (mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t s,
   MPN_NORMALIZE (ap, an);
   MPN_NORMALIZE (bp, bn);
 
-  swapped = 0;
-
-  /* Arrange so that a < b, subtract b -= a, and maintain
-     normalization. */
-  if (an == bn)
+  if (UNLIKELY (an == 0))
     {
-      int c;
-      MPN_CMP (c, ap, bp, an);
-      if (UNLIKELY (c == 0))
-	{
-	  /* For gcdext, return the smallest of the two cofactors, so
-	     pass d = -1. */
-	  if (s == 0)
-	    hook (ctx, ap, an, NULL, 0, -1);
-	  return 0;
-	}
-      else if (c > 0)
-	{
-	  MP_PTR_SWAP (ap, bp);
-	  swapped ^= 1;
-	}
-    }
-  else
-    {
-      if (an > bn)
-	{
-	  MPN_PTR_SWAP (ap, an, bp, bn);
-	  swapped ^= 1;
-	}
-    }
-  if (an <= s)
-    {
-      if (s == 0)
-	hook (ctx, bp, bn, NULL, 0, swapped ^ 1);
+    return_b:
+      MPN_COPY (gp, bp, bn);
+      *gn = bn;
       return 0;
     }
-
-  ASSERT_NOCARRY (mpn_sub (bp, bp, bn, ap, an));
-  MPN_NORMALIZE (bp, bn);
-  ASSERT (bn > 0);
-
-  if (bn <= s)
+  else if (UNLIKELY (bn == 0))
     {
-      /* Undo subtraction. */
-      mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
-      if (cy > 0)
-	bp[an] = cy;
+    return_a:
+      MPN_COPY (gp, ap, an);
+      *gn = an;
       return 0;
     }
 
-  /* Arrange so that a < b */
-  if (an == bn)
+  /* Arrange so that a > b, subtract an -= bn, and maintain
+     normalization. */
+  if (an < bn)
+    MPN_PTR_SWAP (ap, an, bp, bn);
+  else if (an == bn)
     {
       int c;
       MPN_CMP (c, ap, bp, an);
       if (UNLIKELY (c == 0))
-	{
-	  if (s > 0)
-	    /* Just record subtraction and return */
-	    hook (ctx, NULL, 0, &one, 1, swapped);
-	  else
-	    /* Found gcd. */
-	    hook (ctx, bp, bn, NULL, 0, swapped);
-	  return 0;
-	}
-
-      hook (ctx, NULL, 0, &one, 1, swapped);
-
-      if (c > 0)
-	{
-	  MP_PTR_SWAP (ap, bp);
-	  swapped ^= 1;
-	}
+	goto return_a;
+      else if (c < 0)
+	MP_PTR_SWAP (ap, bp);
     }
-  else
-    {
-      hook (ctx, NULL, 0, &one, 1, swapped);
 
-      if (an > bn)
-	{
-	  MPN_PTR_SWAP (ap, an, bp, bn);
-	  swapped ^= 1;
-	}
+  ASSERT_NOCARRY (mpn_sub (ap, ap, an, bp, bn));
+  MPN_NORMALIZE (ap, an);
+  ASSERT (an > 0);
+
+  /* Arrange so that a > b, and divide a = q b + r */
+  /* FIXME: an < bn happens when we have cancellation. If that is the
+     common case, then we could reverse the roles of a and b to avoid
+     the swap. */
+  if (an < bn)
+    MPN_PTR_SWAP (ap, an, bp, bn);
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+	goto return_a;
+      else if (c < 0)
+	MP_PTR_SWAP (ap, bp);
     }
 
-  mpn_tdiv_qr (tp, bp, 0, bp, bn, ap, an);
-  qn = bn - an + 1;
-  bn = an;
-  MPN_NORMALIZE (bp, bn);
+  mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
 
-  if (UNLIKELY (bn <= s))
-    {
-      if (s == 0)
-	{
-	  hook (ctx, ap, an, tp, qn, swapped);
-	  return 0;
-	}
-
-      /* Quotient is one too large, so decrement it and add back A. */
-      if (bn > 0)
-	{
-	  mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
-	  if (cy)
-	    bp[an++] = cy;
-	}
-      else
-	MPN_COPY (bp, ap, an);
-
-      MPN_DECR_U (tp, qn, 1);
-    }
+  if (mpn_zero_p (ap, bn))
+    goto return_b;
 
-  hook (ctx, NULL, 0, tp, qn, swapped);
-  return an;
+  return bn;
 }
diff --git a/gmp/mpn/generic/gcdext.c b/gmp/mpn/generic/gcdext.c
index 1c4ff75aab..38487ae66d 100644
--- a/gmp/mpn/generic/gcdext.c
+++ b/gmp/mpn/generic/gcdext.c
@@ -1,33 +1,22 @@
 /* mpn_gcdext -- Extended Greatest Common Divisor.
 
-Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
-Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -96,10 +85,10 @@ hgcd_mul_matrix_vector (struct hgcd_matrix *M,
   return n;
 }
 
-#define COMPUTE_V_ITCH(n) (2*(n))
+#define COMPUTE_V_ITCH(n) (2*(n) + 1)
 
 /* Computes |v| = |(g - u a)| / b, where u may be positive or
-   negative, and v is of the opposite sign. max(a, b) is of size n, u and
+   negative, and v is of the opposite sign. a, b are of size n, u and
    v at most size n, and v must have space for n+1 limbs. */
 static mp_size_t
 compute_v (mp_ptr vp,
@@ -119,11 +108,9 @@ compute_v (mp_ptr vp,
 
   size = ABS (usize);
   ASSERT (size <= n);
-  ASSERT (up[size-1] > 0);
 
   an = n;
   MPN_NORMALIZE (ap, an);
-  ASSERT (gn <= an);
 
   if (an >= size)
     mpn_mul (tp, ap, an, up, size);
@@ -132,6 +119,8 @@ compute_v (mp_ptr vp,
 
   size += an;
 
+  ASSERT (gn <= size);
+
   if (usize > 0)
     {
       /* |v| = -v = (u a - g) / b */
@@ -142,11 +131,11 @@ compute_v (mp_ptr vp,
 	return 0;
     }
   else
-    { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,
-	 (g + |u| a) always fits in (|usize| + an) limbs. */
-
-      ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));
-      size -= (tp[size - 1] == 0);
+    { /* usize < 0 */
+      /* |v| = v = (c - u a) / b = (c + |u| a) / b */
+      mp_limb_t cy = mpn_add (tp, tp, size, gp, gn);
+      if (cy)
+	tp[size++] = cy;
     }
 
   /* Now divide t / b. There must be no remainder */
@@ -157,9 +146,21 @@ compute_v (mp_ptr vp,
   vn = size + 1 - bn;
   ASSERT (vn <= n + 1);
 
-  mpn_divexact (vp, tp, size, bp, bn);
+  /* FIXME: Use divexact. Or do the entire calculation mod 2^{n *
+     GMP_NUMB_BITS}. */
+  mpn_tdiv_qr (vp, tp, 0, tp, size, bp, bn);
   vn -= (vp[vn-1] == 0);
 
+  /* Remainder must be zero */
+#if WANT_ASSERT
+  {
+    mp_size_t i;
+    for (i = 0; i < bn; i++)
+      {
+	ASSERT (tp[i] == 0);
+      }
+  }
+#endif
   return vn;
 }
 
@@ -180,8 +181,7 @@ compute_v (mp_ptr vp,
    For the lehmer call after the loop, Let T denote
    GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
    u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
-   for u, T+1 for v and 2T scratch space. In all, 7T + 3 is
-   sufficient for both operations.
+   + 1 for v and 2T + 1 scratch space. In all, 7T + 3 is sufficient.
 
 */
 
@@ -204,7 +204,6 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
   mp_size_t matrix_scratch;
   mp_size_t ualloc = n + 1;
 
-  struct gcdext_ctx ctx;
   mp_size_t un;
   mp_ptr u0;
   mp_ptr u1;
@@ -215,7 +214,6 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
 
   ASSERT (an >= n);
   ASSERT (n > 0);
-  ASSERT (bp[n-1] > 0);
 
   TMP_MARK;
 
@@ -284,10 +282,6 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
   u0 = tp; tp += ualloc;
   u1 = tp; tp += ualloc;
 
-  ctx.gp = gp;
-  ctx.up = up;
-  ctx.usize = usizep;
-
   {
     /* For the first hgcd call, there are no u updates, and it makes
        some sense to use a different choice for p. */
@@ -321,22 +315,21 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
 	/* mpn_hgcd has failed. Then either one of a or b is very
 	   small, or the difference is very small. Perform one
 	   subtraction followed by one division. */
-	u1[0] = 1;
+	mp_size_t gn;
+	mp_size_t updated_un = 1;
 
-	ctx.u0 = u0;
-	ctx.u1 = u1;
-	ctx.tp = tp + n; /* ualloc */
-	ctx.un = 1;
+	u1[0] = 1;
 
-	/* Temporary storage n */
-	n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	/* Temporary storage 2n + 1 */
+	n = mpn_gcdext_subdiv_step (gp, &gn, up, usizep, ap, bp, n,
+				    u0, u1, &updated_un, tp, tp + n);
 	if (n == 0)
 	  {
 	    TMP_FREE;
-	    return ctx.gn;
+	    return gn;
 	  }
 
-	un = ctx.un;
+	un = updated_un;
 	ASSERT (un < ualloc);
       }
   }
@@ -378,45 +371,22 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
 	  /* mpn_hgcd has failed. Then either one of a or b is very
 	     small, or the difference is very small. Perform one
 	     subtraction followed by one division. */
-	  ctx.u0 = u0;
-	  ctx.u1 = u1;
-	  ctx.tp = tp + n; /* ualloc */
-	  ctx.un = un;
+	  mp_size_t gn;
+	  mp_size_t updated_un = un;
 
-	  /* Temporary storage n */
-	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	  /* Temporary storage 2n + 1 */
+	  n = mpn_gcdext_subdiv_step (gp, &gn, up, usizep, ap, bp, n,
+				      u0, u1, &updated_un, tp, tp + n);
 	  if (n == 0)
 	    {
 	      TMP_FREE;
-	      return ctx.gn;
+	      return gn;
 	    }
 
-	  un = ctx.un;
+	  un = updated_un;
 	  ASSERT (un < ualloc);
 	}
     }
-  /* We have A = ... a + ... b
-	     B =  u0 a +  u1 b
-
-	     a = u1  A + ... B
-	     b = -u0 A + ... B
-
-     with bounds
-
-       |u0|, |u1| <= B / min(a, b)
-
-     We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
-     in which case the only reduction done so far is a = A - k B for
-     some k.
-
-     Compute g = u a + v b = (u u1 - v u0) A + (...) B
-     Here, u, v are bounded by
-
-       |u| <= b,
-       |v| <= a
-  */
-
-  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
 
   if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
     {
@@ -426,10 +396,7 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
       MPN_COPY (gp, ap, n);
 
       MPN_CMP (c, u0, u1, un);
-      /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
-	 this case we choose the cofactor + 1, corresponding to G = A
-	 - k B, rather than -1, corresponding to G = - A + (k+1) B. */
-      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+      ASSERT (c != 0);
       if (c < 0)
 	{
 	  MPN_NORMALIZE (u0, un);
@@ -446,9 +413,10 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
       TMP_FREE;
       return n;
     }
-  else if (UNLIKELY (u0[0] == 0) && un == 1)
+  else if (mpn_zero_p (u0, un))
     {
       mp_size_t gn;
+      ASSERT (un == 1);
       ASSERT (u1[0] == 1);
 
       /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
@@ -459,6 +427,23 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
     }
   else
     {
+      /* We have A = ... a + ... b
+		 B =  u0 a +  u1 b
+
+		 a = u1  A + ... B
+		 b = -u0 A + ... B
+
+	 with bounds
+
+	   |u0|, |u1| <= B / min(a, b)
+
+	 Compute g = u a + v b = (u u1 - v u0) A + (...) B
+	 Here, u, v are bounded by
+
+	 |u| <= b,
+	 |v| <= a
+      */
+
       mp_size_t u0n;
       mp_size_t u1n;
       mp_size_t lehmer_un;
@@ -478,8 +463,6 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
 
       u0n = un;
       MPN_NORMALIZE (u0, u0n);
-      ASSERT (u0n > 0);
-
       if (lehmer_un == 0)
 	{
 	  /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */
@@ -505,12 +488,25 @@ mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
 
       u1n = un;
       MPN_NORMALIZE (u1, u1n);
-      ASSERT (u1n > 0);
+
+      /* It's possible that u0 = 1, u1 = 0 */
+      if (u1n == 0)
+	{
+	  ASSERT (un == 1);
+	  ASSERT (u0[0] == 1);
+
+	  /* u1 == 0 ==> u u1 + v u0 = v */
+	  MPN_COPY (up, lehmer_vp, lehmer_vn);
+	  *usizep = negate ? lehmer_vn : - lehmer_vn;
+
+	  TMP_FREE;
+	  return gn;
+	}
 
       ASSERT (lehmer_un + u1n <= ualloc);
       ASSERT (lehmer_vn + u0n <= ualloc);
 
-      /* We may still have v == 0 */
+      /* Now u0, u1, u are non-zero. We may still have v == 0 */
 
       /* Compute u u0 */
       if (lehmer_un <= u1n)
diff --git a/gmp/mpn/generic/gcdext_1.c b/gmp/mpn/generic/gcdext_1.c
index ea46cceb72..f1dd9ee963 100644
--- a/gmp/mpn/generic/gcdext_1.c
+++ b/gmp/mpn/generic/gcdext_1.c
@@ -1,273 +1,27 @@
 /* mpn_gcdext -- Extended Greatest Common Divisor.
 
-Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-#ifndef GCDEXT_1_USE_BINARY
-#define GCDEXT_1_USE_BINARY 0
-#endif
-
-#ifndef GCDEXT_1_BINARY_METHOD
-#define GCDEXT_1_BINARY_METHOD 2
-#endif
-
-#ifndef USE_ZEROTAB
-#define USE_ZEROTAB 1
-#endif
-
-#if GCDEXT_1_USE_BINARY
-
-#if USE_ZEROTAB
-static unsigned char zerotab[0x40] = {
-  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
-};
-#endif
-
-mp_limb_t
-mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
-	      mp_limb_t u, mp_limb_t v)
-{
-  /* Maintain
-
-     U = t1 u + t0 v
-     V = s1 u + s0 v
-
-     where U, V are the inputs (without any shared power of two),
-     and the matrix has determinant � 2^{shift}.
-  */
-  mp_limb_t s0 = 1;
-  mp_limb_t t0 = 0;
-  mp_limb_t s1 = 0;
-  mp_limb_t t1 = 1;
-  mp_limb_t ug;
-  mp_limb_t vg;
-  mp_limb_t ugh;
-  mp_limb_t vgh;
-  unsigned zero_bits;
-  unsigned shift;
-  unsigned i;
-#if GCDEXT_1_BINARY_METHOD == 2
-  mp_limb_t det_sign;
-#endif
-
-  ASSERT (u > 0);
-  ASSERT (v > 0);
-
-  count_trailing_zeros (zero_bits, u | v);
-  u >>= zero_bits;
-  v >>= zero_bits;
-
-  if ((u & 1) == 0)
-    {
-      count_trailing_zeros (shift, u);
-      u >>= shift;
-      t1 <<= shift;
-    }
-  else if ((v & 1) == 0)
-    {
-      count_trailing_zeros (shift, v);
-      v >>= shift;
-      s0 <<= shift;
-    }
-  else
-    shift = 0;
-
-#if GCDEXT_1_BINARY_METHOD == 1
-  while (u != v)
-    {
-      unsigned count;
-      if (u > v)
-	{
-	  u -= v;
-#if USE_ZEROTAB
-	  count = zerotab [u & 0x3f];
-	  u >>= count;
-	  if (UNLIKELY (count == 6))
-	    {
-	      unsigned c;
-	      do
-		{
-		  c = zerotab[u & 0x3f];
-		  u >>= c;
-		  count += c;
-		}
-	      while (c == 6);
-	    }
-#else
-	  count_trailing_zeros (count, u);
-	  u >>= count;
-#endif
-	  t0 += t1; t1 <<= count;
-	  s0 += s1; s1 <<= count;
-	}
-      else
-	{
-	  v -= u;
-#if USE_ZEROTAB
-	  count = zerotab [v & 0x3f];
-	  v >>= count;
-	  if (UNLIKELY (count == 6))
-	    {
-	      unsigned c;
-	      do
-		{
-		  c = zerotab[v & 0x3f];
-		  v >>= c;
-		  count += c;
-		}
-	      while (c == 6);
-	    }
-#else
-	  count_trailing_zeros (count, v);
-	  v >>= count;
-#endif
-	  t1 += t0; t0 <<= count;
-	  s1 += s0; s0 <<= count;
-	}
-      shift += count;
-    }
-#else
-# if GCDEXT_1_BINARY_METHOD == 2
-  u >>= 1;
-  v >>= 1;
-
-  det_sign = 0;
-
-  while (u != v)
-    {
-      unsigned count;
-      mp_limb_t d =  u - v;
-      mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
-      mp_limb_t sx;
-      mp_limb_t tx;
-
-      /* When v <= u (vgtu == 0), the updates are:
-
-	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
-	   (t1, t0) <-- (t1 << count, t0 + t1)
-
-	 and when v > 0, the updates are
-
-	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
-	   (t1, t0) <-- (t0 << count, t0 + t1)
-
-	 and similarly for s1, s0
-      */
-
-      /* v <-- min (u, v) */
-      v += (vgtu & d);
-
-      /* u <-- |u - v| */
-      u = (d ^ vgtu) - vgtu;
-
-      /* Number of trailing zeros is the same no matter if we look at
-       * d or u, but using d gives more parallelism. */
-#if USE_ZEROTAB
-      count = zerotab[d & 0x3f];
-      if (UNLIKELY (count == 6))
-	{
-	  unsigned c = 6;
-	  do
-	    {
-	      d >>= c;
-	      c = zerotab[d & 0x3f];
-	      count += c;
-	    }
-	  while (c == 6);
-	}
-#else
-      count_trailing_zeros (count, d);
-#endif
-      det_sign ^= vgtu;
-
-      tx = vgtu & (t0 - t1);
-      sx = vgtu & (s0 - s1);
-      t0 += t1;
-      s0 += s1;
-      t1 += tx;
-      s1 += sx;
-
-      count++;
-      u >>= count;
-      t1 <<= count;
-      s1 <<= count;
-      shift += count;
-    }
-  u = (u << 1) + 1;
-# else /* GCDEXT_1_BINARY_METHOD == 2 */
-#  error Unknown GCDEXT_1_BINARY_METHOD
-# endif
-#endif
-
-  /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
-  ug = t0 + t1;
-  vg = s0 + s1;
-
-  ugh = ug/2 + (ug & 1);
-  vgh = vg/2 + (vg & 1);
-
-  /* Now �2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
-     s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
-  for (i = 0; i < shift; i++)
-    {
-      mp_limb_t mask = - ( (s0 | t0) & 1);
-
-      s0 /= 2;
-      t0 /= 2;
-      s0 += mask & vgh;
-      t0 += mask & ugh;
-    }
-  /* FIXME: Try simplifying this condition. */
-  if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) )
-    {
-      s0 -= vg;
-      t0 -= ug;
-    }
-#if GCDEXT_1_BINARY_METHOD == 2
-  /* Conditional negation. */
-  s0 = (s0 ^ det_sign) - det_sign;
-  t0 = (t0 ^ det_sign) - det_sign;
-#endif
-  *sp = s0;
-  *tp = -t0;
-
-  return u << zero_bits;
-}
-
-#else /* !GCDEXT_1_USE_BINARY */
-
 
 /* FIXME: Takes two single-word limbs. It could be extended to a
  * function that accepts a bignum for the first input, and only
@@ -325,4 +79,3 @@ mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
       v1 -= q * v0;
     }
 }
-#endif /* !GCDEXT_1_USE_BINARY */
diff --git a/gmp/mpn/generic/gcdext_lehmer.c b/gmp/mpn/generic/gcdext_lehmer.c
index 547f69a409..8599a4f554 100644
--- a/gmp/mpn/generic/gcdext_lehmer.c
+++ b/gmp/mpn/generic/gcdext_lehmer.c
@@ -1,146 +1,31 @@
 /* mpn_gcdext -- Extended Greatest Common Divisor.
 
-Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
-Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-/* Here, d is the index of the cofactor to update. FIXME: Could use qn
-   = 0 for the common case q = 1. */
-void
-mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
-		 mp_srcptr qp, mp_size_t qn, int d)
-{
-  struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
-  mp_size_t un = ctx->un;
-
-  if (gp)
-    {
-      mp_srcptr up;
-
-      ASSERT (gn > 0);
-      ASSERT (gp[gn-1] > 0);
-
-      MPN_COPY (ctx->gp, gp, gn);
-      ctx->gn = gn;
-
-      if (d < 0)
-	{
-	  int c;
-
-	  /* Must return the smallest cofactor, +u1 or -u0 */
-	  MPN_CMP (c, ctx->u0, ctx->u1, un);
-	  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
-
-	  d = c < 0;
-	}
-
-      up = d ? ctx->u0 : ctx->u1;
-
-      MPN_NORMALIZE (up, un);
-      MPN_COPY (ctx->up, up, un);
-
-      *ctx->usize = d ? -un : un;
-    }
-  else
-    {
-      mp_limb_t cy;
-      mp_ptr u0 = ctx->u0;
-      mp_ptr u1 = ctx->u1;
-
-      ASSERT (d >= 0);
-
-      if (d)
-	MP_PTR_SWAP (u0, u1);
-
-      qn -= (qp[qn-1] == 0);
-
-      /* Update u0 += q  * u1 */
-      if (qn == 1)
-	{
-	  mp_limb_t q = qp[0];
-
-	  if (q == 1)
-	    /* A common case. */
-	    cy = mpn_add_n (u0, u0, u1, un);
-	  else
-	    cy = mpn_addmul_1 (u0, u1, un, q);
-	}
-      else
-	{
-	  mp_size_t u1n;
-	  mp_ptr tp;
-
-	  u1n = un;
-	  MPN_NORMALIZE (u1, u1n);
-
-	  if (u1n == 0)
-	    return;
-
-	  /* Should always have u1n == un here, and u1 >= u0. The
-	     reason is that we alternate adding u0 to u1 and u1 to u0
-	     (corresponding to subtractions a - b and b - a), and we
-	     can get a large quotient only just after a switch, which
-	     means that we'll add (a multiple of) the larger u to the
-	     smaller. */
-
-	  tp = ctx->tp;
-
-	  if (qn > u1n)
-	    mpn_mul (tp, qp, qn, u1, u1n);
-	  else
-	    mpn_mul (tp, u1, u1n, qp, qn);
-
-	  u1n += qn;
-	  u1n -= tp[u1n-1] == 0;
-
-	  if (u1n >= un)
-	    {
-	      cy = mpn_add (u0, tp, u1n, u0, un);
-	      un = u1n;
-	    }
-	  else
-	    /* Note: Unlikely case, maybe never happens? */
-	    cy = mpn_add (u0, u0, un, tp, u1n);
-
-	}
-      u0[un] = cy;
-      ctx->un = un + (cy > 0);
-    }
-}
-
-/* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
-   the matrix-vector multiplication adjusting a, b. If hgcd fails, we
-   need at most n for the quotient and n+1 for the u update (reusing
-   the extra u). In all, 4n + 3. */
+/* Temporary storage: 3*(n+1) for u. n+1 for the matrix-vector
+   multiplications (if hgcd2 succeeds). If hgcd fails, n+1 limbs are
+   needed for the division, with most n for the quotient, and n+1 for
+   the product q u0. In all, 4n + 3. */
 
 mp_size_t
 mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
@@ -156,16 +41,8 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
    * which correspond to the first column of the inverse
    *
    *   M^{-1} = (u1, -v1; -u0, v0)
-   *
-   * This implies that
-   *
-   *   a =  u1 A (mod B)
-   *   b = -u0 A (mod B)
-   *
-   * where A, B denotes the input values.
    */
 
-  struct gcdext_ctx ctx;
   mp_size_t un;
   mp_ptr u0;
   mp_ptr u1;
@@ -178,10 +55,6 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
 
   u1[0] = 1; un = 1;
 
-  ctx.gp = gp;
-  ctx.up = up;
-  ctx.usize = usize;
-
   /* FIXME: Handle n == 2 differently, after the loop? */
   while (n >= 2)
     {
@@ -223,7 +96,7 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
       /* Try an mpn_nhgcd2 step */
       if (mpn_hgcd2 (ah, al, bh, bl, &M))
 	{
-	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
+	  n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
 	  MP_PTR_SWAP (ap, tp);
 	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
 	  MP_PTR_SWAP (u0, u2);
@@ -233,18 +106,17 @@ mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
 	     small, or the difference is very small. Perform one
 	     subtraction followed by one division. */
-	  ctx.u0 = u0;
-	  ctx.u1 = u1;
-	  ctx.tp = u2;
-	  ctx.un = un;
+	  mp_size_t gn;
+	  mp_size_t updated_un = un;
 
 	  /* Temporary storage n for the quotient and ualloc for the
 	     new cofactor. */
-	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
+	  n = mpn_gcdext_subdiv_step (gp, &gn, up, usize, ap, bp, n,
+				      u0, u1, &updated_un, tp, u2);
 	  if (n == 0)
-	    return ctx.gn;
+	    return gn;
 
-	  un = ctx.un;
+	  un = updated_un;
 	}
     }
   ASSERT_ALWAYS (ap[0] > 0);
diff --git a/gmp/mpn/generic/gcdext_subdiv_step.c b/gmp/mpn/generic/gcdext_subdiv_step.c
new file mode 100644
index 0000000000..d54b3bdee1
--- /dev/null
+++ b/gmp/mpn/generic/gcdext_subdiv_step.c
@@ -0,0 +1,197 @@
+/* gcdext_subdiv_step.c.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
+   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
+   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
+   b is small, or the difference is small. Perform one subtraction
+   followed by one division. If the gcd is found, stores it in gp and
+   *gn, and returns zero. Otherwise, compute the reduced a and b,
+   return the new size, and cofactors. */
+
+/* Temporary storage: Needs n limbs for the quotient, at qp. tp must
+   point to an area large enough for the resulting cofactor, plus one
+   limb extra. All in all, 2N + 1 if N is a bound for both inputs and
+   outputs. */
+mp_size_t
+mpn_gcdext_subdiv_step (mp_ptr gp, mp_size_t *gn, mp_ptr up, mp_size_t *usizep,
+			mp_ptr ap, mp_ptr bp, mp_size_t n,
+			mp_ptr u0, mp_ptr u1, mp_size_t *unp,
+			mp_ptr qp, mp_ptr tp)
+{
+  mp_size_t an, bn, un;
+  mp_size_t qn;
+  mp_size_t u0n;
+
+  int swapped;
+
+  an = bn = n;
+
+  ASSERT (an > 0);
+  ASSERT (ap[an-1] > 0 || bp[an-1] > 0);
+
+  MPN_NORMALIZE (ap, an);
+  MPN_NORMALIZE (bp, bn);
+
+  un = *unp;
+
+  swapped = 0;
+
+  if (UNLIKELY (an == 0))
+    {
+    return_b:
+      MPN_COPY (gp, bp, bn);
+      *gn = bn;
+
+      MPN_NORMALIZE (u0, un);
+      MPN_COPY (up, u0, un);
+
+      *usizep = swapped ? un : -un;
+
+      return 0;
+    }
+  else if (UNLIKELY (bn == 0))
+    {
+      MPN_COPY (gp, ap, an);
+      *gn = an;
+
+      MPN_NORMALIZE (u1, un);
+      MPN_COPY (up, u1, un);
+
+      *usizep = swapped ? -un : un;
+
+      return 0;
+    }
+
+  /* Arrange so that a > b, subtract an -= bn, and maintain
+     normalization. */
+  if (an < bn)
+    {
+      MPN_PTR_SWAP (ap, an, bp, bn);
+      MP_PTR_SWAP (u0, u1);
+      swapped ^= 1;
+    }
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+	{
+	  MPN_COPY (gp, ap, an);
+	  *gn = an;
+
+	  /* Must return the smallest cofactor, +u1 or -u0 */
+	  MPN_CMP (c, u0, u1, un);
+	  ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
+
+	  if (c < 0)
+	    {
+	      MPN_NORMALIZE (u0, un);
+	      MPN_COPY (up, u0, un);
+	      swapped ^= 1;
+	    }
+	  else
+	    {
+	      MPN_NORMALIZE_NOT_ZERO (u1, un);
+	      MPN_COPY (up, u1, un);
+	    }
+
+	  *usizep = swapped ? -un : un;
+	  return 0;
+	}
+      else if (c < 0)
+	{
+	  MP_PTR_SWAP (ap, bp);
+	  MP_PTR_SWAP (u0, u1);
+	  swapped ^= 1;
+	}
+    }
+  /* Reduce a -= b, u1 += u0 */
+  ASSERT_NOCARRY (mpn_sub (ap, ap, an, bp, bn));
+  MPN_NORMALIZE (ap, an);
+  ASSERT (an > 0);
+
+  u1[un] = mpn_add_n (u1, u1, u0, un);
+  un += (u1[un] > 0);
+
+  /* Arrange so that a > b, and divide a = q b + r */
+  if (an < bn)
+    {
+      MPN_PTR_SWAP (ap, an, bp, bn);
+      MP_PTR_SWAP (u0, u1);
+      swapped ^= 1;
+    }
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (UNLIKELY (c == 0))
+	goto return_b;
+      else if (c < 0)
+	{
+	  MP_PTR_SWAP (ap, bp);
+	  MP_PTR_SWAP (u0, u1);
+	  swapped ^= 1;
+	}
+    }
+
+  /* Reduce a -= q b, u1 += q u0 */
+  qn = an - bn + 1;
+  mpn_tdiv_qr (qp, ap, 0, ap, an, bp, bn);
+
+  if (mpn_zero_p (ap, bn))
+    goto return_b;
+
+  n = bn;
+
+  /* Update u1 += q u0 */
+  u0n = un;
+  MPN_NORMALIZE (u0, u0n);
+
+  if (u0n > 0)
+    {
+      qn -= (qp[qn - 1] == 0);
+
+      if (qn > u0n)
+	mpn_mul (tp, qp, qn, u0, u0n);
+      else
+	mpn_mul (tp, u0, u0n, qp, qn);
+
+      if (qn + u0n > un)
+	{
+	  ASSERT_NOCARRY (mpn_add (u1, tp, qn + u0n, u1, un));
+	  un = qn + u0n;
+	  un -= (u1[un-1] == 0);
+	}
+      else
+	{
+	  u1[un] = mpn_add (u1, u1, un, tp, qn + u0n);
+	  un += (u1[un] > 0);
+	}
+    }
+
+  *unp = un;
+  return n;
+}
diff --git a/gmp/mpn/generic/get_d.c b/gmp/mpn/generic/get_d.c
index d73d314856..cf4ae86efc 100644
--- a/gmp/mpn/generic/get_d.c
+++ b/gmp/mpn/generic/get_d.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2003, 2004, 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
+Copyright 2003, 2004 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -40,20 +29,33 @@ see https://www.gnu.org/licenses/.  */
 #define _GMP_IEEE_FLOATS 0
 #endif
 
+#if ! _GMP_IEEE_FLOATS
+/* dummy definition, just to let dead code compile */
+union ieee_double_extract {
+  struct {
+    int manh, manl, sig, exp;
+  } s;
+  double d;
+};
+#endif
+
 /* To force use of the generic C code for testing, put
    "#define _GMP_IEEE_FLOATS 0" at this point.  */
 
 
+
 /* In alpha gcc prior to 3.4, signed DI comparisons involving constants are
    rearranged from "x < n" to "x+(-n) < 0", which is of course hopelessly
    wrong if that addition overflows.
 
-   The workaround here avoids this bug by ensuring n is not a literal constant.
-   Note that this is alpha specific.  The offending transformation is/was in
-   alpha.c alpha_emit_conditional_branch() under "We want to use cmpcc/bcc".
+   The workaround here avoids this bug by ensuring n is not a literal
+   constant.  Note that this is alpha specific.  The offending transformation
+   is/was in alpha.c alpha_emit_conditional_branch() under "We want to use
+   cmpcc/bcc".
 
-   Bizarrely, this happens also with Cray cc on alphaev5-cray-unicosmk2.0.6.X,
-   and has the same solution.  Don't know why or how.  */
+   Bizarrely, it turns out this happens also with Cray cc on
+   alphaev5-cray-unicosmk2.0.6.X, and has the same solution.  Don't know why
+   or how.  */
 
 #if HAVE_HOST_CPU_FAMILY_alpha				\
   && ((defined (__GNUC__) && ! __GMP_GNUC_PREREQ(3,4))	\
@@ -68,73 +70,69 @@ static volatile const long CONST_NEG_1022_SUB_53 = -1022 - 53;
 #endif
 
 
-/* Return the value {ptr,size}*2^exp, and negative if sign<0.  Must have
-   size>=1, and a non-zero high limb ptr[size-1].
 
-   When we know the fp format, the result is truncated towards zero.  This is
-   consistent with other gmp conversions, like mpz_set_f or mpz_set_q, and is
-   easy to implement and test.
+/* Return the value {ptr,size}*2^exp, and negative if sign<0.
+   Must have size>=1, and a non-zero high limb ptr[size-1].
 
-   When we do not know the format, such truncation seems much harder.  One
-   would need to defeat any rounding mode, including round-up.
+   {ptr,size} is truncated towards zero.  This is consistent with other gmp
+   conversions, like mpz_set_f or mpz_set_q, and is easy to implement and
+   test.
+
+   In the past conversions had attempted (imperfectly) to let the hardware
+   float rounding mode take effect, but that gets tricky since multiple
+   roundings need to be avoided, or taken into account, and denorms mean the
+   effective precision of the mantissa is not constant.  (For reference,
+   mpz_get_d on IEEE systems was ok, except it operated on the absolute
+   value.  mpf_get_d and mpq_get_d suffered from multiple roundings and from
+   not always using enough bits to get the rounding right.)
 
    It's felt that GMP is not primarily concerned with hardware floats, and
    really isn't enhanced by getting involved with hardware rounding modes
-   (which could even be some weird unknown style), so something unambiguous and
-   straightforward is best.
+   (which could even be some weird unknown style), so something unambiguous
+   and straightforward is best.
 
 
    The IEEE code below is the usual case, it knows either a 32-bit or 64-bit
    limb and is done with shifts and masks.  The 64-bit case in particular
    should come out nice and compact.
 
-   The generic code used to work one bit at a time, which was not only slow,
-   but implicitly relied upon denorms for intermediates, since the lowest bits'
-   weight of a perfectly valid fp number underflows in non-denorm.  Therefore,
-   the generic code now works limb-per-limb, initially creating a number x such
-   that 1 <= x <= BASE.  (BASE is reached only as result of rounding.)  Then
-   x's exponent is scaled with explicit code (not ldexp to avoid libm
-   dependency).  It is a tap-dance to avoid underflow or overflow, beware!
+   The generic code works one bit at a time, which will be quite slow, but
+   should support any binary-based "double" and be safe against any rounding
+   mode.  Note in particular it works on IEEE systems too.
 
 
    Traps:
 
-   Hardware traps for overflow to infinity, underflow to zero, or unsupported
-   denorms may or may not be taken.  The IEEE code works bitwise and so
-   probably won't trigger them, the generic code works by float operations and
-   so probably will.  This difference might be thought less than ideal, but
-   again its felt straightforward code is better than trying to get intimate
-   with hardware exceptions (of perhaps unknown nature).
+   Hardware traps for overflow to infinity, underflow to zero, or
+   unsupported denorms may or may not be taken.  The IEEE code works bitwise
+   and so probably won't trigger them, the generic code works by float
+   operations and so probably will.  This difference might be thought less
+   than ideal, but again its felt straightforward code is better than trying
+   to get intimate with hardware exceptions (of perhaps unknown nature).
 
 
    Not done:
 
-   mpz_get_d in the past handled size==1 with a cast limb->double.  This might
-   still be worthwhile there (for up to the mantissa many bits), but for
-   mpn_get_d here, the cost of applying "exp" to the resulting exponent would
-   probably use up any benefit a cast may have over bit twiddling.  Also, if
-   the exponent is pushed into denorm range then bit twiddling is the only
-   option, to ensure the desired truncation is obtained.
+   mpz_get_d in the past handled size==1 with a cast limb->double.  This
+   might still be worthwhile there (for up to the mantissa many bits), but
+   for mpn_get_d here, the cost of applying "exp" to the resulting exponent
+   would probably use up any benefit a cast may have over bit twiddling.
+   Also, if the exponent is pushed into denorm range then bit twiddling is
+   the only option, to ensure the desired truncation is obtained.
 
 
    Other:
 
    For reference, note that HPPA 8000, 8200, 8500 and 8600 trap FCNV,UDW,DBL
-   to the kernel for values >= 2^63.  This makes it slow, and worse the kernel
-   Linux (what versions?) apparently uses untested code in its trap handling
-   routines, and gets the sign wrong.  We don't use such a limb-to-double
-   cast, neither in the IEEE or generic code.  */
-
+   to the kernel for values >= 2^63.  This makes it slow, and worse the
+   Linux kernel (what versions?) apparently uses untested code in its trap
+   handling routines, and gets the sign wrong.  We don't use such a limb to
+   double cast, neither in the IEEE or generic code.  */
 
 
-#undef FORMAT_RECOGNIZED
-
 double
 mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
 {
-  int lshift, nbits;
-  mp_limb_t x, mhi, mlo;
-
   ASSERT (size >= 0);
   ASSERT_MPN (up, size);
   ASSERT (size == 0 || up[size-1] != 0);
@@ -146,11 +144,10 @@ mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
      overflow.  After this exp can of course be reduced to anywhere within
      the {up,size} region without underflow.  */
   if (UNLIKELY ((unsigned long) (GMP_NUMB_BITS * size)
-		> ((unsigned long) LONG_MAX - exp)))
+		> (unsigned long) (LONG_MAX - exp)))
     {
-#if _GMP_IEEE_FLOATS
-      goto ieee_infinity;
-#endif
+      if (_GMP_IEEE_FLOATS)
+	goto ieee_infinity;
 
       /* generic */
       exp = LONG_MAX;
@@ -160,253 +157,334 @@ mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
       exp += GMP_NUMB_BITS * size;
     }
 
-#if _GMP_IEEE_FLOATS
-    {
-      union ieee_double_extract u;
-
-      up += size;
 
+#if 1
+{
+  int lshift, nbits;
+  union ieee_double_extract u;
+  mp_limb_t x, mhi, mlo;
 #if GMP_LIMB_BITS == 64
-      mlo = up[-1];
-      count_leading_zeros (lshift, mlo);
+  mp_limb_t m;
+  up += size;
+  m = *--up;
+  count_leading_zeros (lshift, m);
 
-      exp -= (lshift - GMP_NAIL_BITS) + 1;
-      mlo <<= lshift;
+  exp -= (lshift - GMP_NAIL_BITS) + 1;
+  m <<= lshift;
 
-      nbits = GMP_LIMB_BITS - lshift;
+  nbits = GMP_LIMB_BITS - lshift;
 
-      if (nbits < 53 && size > 1)
+  if (nbits < 53 && size > 1)
+    {
+      x = *--up;
+      x <<= GMP_NAIL_BITS;
+      x >>= nbits;
+      m |= x;
+      nbits += GMP_NUMB_BITS;
+
+      if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
 	{
-	  x = up[-2];
+	  x = *--up;
 	  x <<= GMP_NAIL_BITS;
 	  x >>= nbits;
-	  mlo |= x;
+	  m |= x;
 	  nbits += GMP_NUMB_BITS;
-
-	  if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
-	    {
-	      x = up[-3];
-	      x <<= GMP_NAIL_BITS;
-	      x >>= nbits;
-	      mlo |= x;
-	      nbits += GMP_NUMB_BITS;
-	    }
 	}
-      mhi = mlo >> (32 + 11);
-      mlo = mlo >> 11;		/* later implicitly truncated to 32 bits */
+    }
+  mhi = m >> (32 + 11);
+  mlo = m >> 11;
 #endif
 #if GMP_LIMB_BITS == 32
-      x = *--up;
-      count_leading_zeros (lshift, x);
+  up += size;
+  x = *--up, size--;
+  count_leading_zeros (lshift, x);
 
-      exp -= (lshift - GMP_NAIL_BITS) + 1;
-      x <<= lshift;
-      mhi = x >> 11;
+  exp -= (lshift - GMP_NAIL_BITS) + 1;
+  x <<= lshift;
+  mhi = x >> 11;
 
-      if (lshift < 11)		/* FIXME: never true if NUMB < 20 bits */
+  if (lshift < 11)		/* FIXME: never true if NUMB < 20 bits */
+    {
+      /* All 20 bits in mhi */
+      mlo = x << 21;
+      /* >= 1 bit in mlo */
+      nbits = GMP_LIMB_BITS - lshift - 21;
+    }
+  else
+    {
+      if (size != 0)
 	{
-	  /* All 20 bits in mhi */
-	  mlo = x << 21;
-	  /* >= 1 bit in mlo */
-	  nbits = GMP_LIMB_BITS - lshift - 21;
+	  nbits = GMP_LIMB_BITS - lshift;
+
+	  x = *--up, size--;
+	  x <<= GMP_NAIL_BITS;
+	  mhi |= x >> nbits >> 11;
+
+	  mlo = x << GMP_LIMB_BITS - nbits - 11;
+	  nbits = nbits + 11 - GMP_NAIL_BITS;
 	}
       else
 	{
-	  if (size > 1)
-	    {
-	      nbits = GMP_LIMB_BITS - lshift;
-
-	      x = *--up, size--;
-	      x <<= GMP_NAIL_BITS;
-	      mhi |= x >> nbits >> 11;
-
-	      mlo = x << GMP_LIMB_BITS - nbits - 11;
-	      nbits = nbits + 11 - GMP_NAIL_BITS;
-	    }
-	  else
-	    {
-	      mlo = 0;
-	      goto done;
-	    }
+	  mlo = 0;
+	  goto done;
 	}
+    }
 
-      /* Now all needed bits in mhi have been accumulated.  Add bits to mlo.  */
+  if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size != 0)
+    {
+      x = *--up, size--;
+      x <<= GMP_NAIL_BITS;
+      x >>= nbits;
+      mlo |= x;
+      nbits += GMP_NUMB_BITS;
 
-      if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size > 1)
+      if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size != 0)
 	{
-	  x = up[-1];
+	  x = *--up, size--;
 	  x <<= GMP_NAIL_BITS;
 	  x >>= nbits;
 	  mlo |= x;
 	  nbits += GMP_NUMB_BITS;
 
-	  if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size > 2)
+	  if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size != 0)
 	    {
-	      x = up[-2];
+	      x = *--up;
 	      x <<= GMP_NAIL_BITS;
 	      x >>= nbits;
 	      mlo |= x;
 	      nbits += GMP_NUMB_BITS;
-
-	      if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size > 3)
-		{
-		  x = up[-3];
-		  x <<= GMP_NAIL_BITS;
-		  x >>= nbits;
-		  mlo |= x;
-		  nbits += GMP_NUMB_BITS;
-		}
 	    }
 	}
+    }
 
-    done:;
+ done:;
 
 #endif
+  {
+    if (UNLIKELY (exp >= CONST_1024))
+      {
+	/* overflow, return infinity */
+      ieee_infinity:
+	mhi = 0;
+	mlo = 0;
+	exp = 1024;
+      }
+    else if (UNLIKELY (exp <= CONST_NEG_1023))
+      {
+	int rshift;
+
+	if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
+	  return 0.0;	 /* denorm underflows to zero */
+
+	rshift = -1022 - exp;
+	ASSERT (rshift > 0 && rshift < 53);
+#if GMP_LIMB_BITS > 53
+	mlo >>= rshift;
+	mhi = mlo >> 32;
+#else
+	if (rshift >= 32)
+	  {
+	    mlo = mhi;
+	    mhi = 0;
+	    rshift -= 32;
+	  }
+	lshift = GMP_LIMB_BITS - rshift;
+	mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
+	mhi >>= rshift;
+#endif
+	exp = -1023;
+      }
+  }
+  u.s.manh = mhi;
+  u.s.manl = mlo;
+  u.s.exp = exp + 1023;
+  u.s.sig = (sign < 0);
+  return u.d;
+}
+#else
+
+
+#define ONE_LIMB    (GMP_LIMB_BITS == 64 && 2*GMP_NUMB_BITS >= 53)
+#define TWO_LIMBS   (GMP_LIMB_BITS == 32 && 3*GMP_NUMB_BITS >= 53)
+
+  if (_GMP_IEEE_FLOATS && (ONE_LIMB || TWO_LIMBS))
+    {
+      union ieee_double_extract	 u;
+      mp_limb_t	 m0, m1, m2, rmask;
+      int	 lshift, rshift;
+
+      m0 = up[size-1];			    /* high limb */
+      m1 = (size >= 2 ? up[size-2] : 0);   /* second highest limb */
+      count_leading_zeros (lshift, m0);
+
+      /* relative to just under high non-zero bit */
+      exp -= (lshift - GMP_NAIL_BITS) + 1;
+
+      if (ONE_LIMB)
+	{
+	  /* lshift to have high of m0 non-zero, and collapse nails */
+	  rshift = GMP_LIMB_BITS - lshift;
+	  m1 <<= GMP_NAIL_BITS;
+	  rmask = GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX;
+	  m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);
+
+	  /* rshift back to have bit 53 of m0 the high non-zero */
+	  m0 >>= 11;
+	}
+      else /* TWO_LIMBS */
+	{
+	  m2 = (size >= 3 ? up[size-3] : 0);  /* third highest limb */
+
+	  /* collapse nails from m1 and m2 */
+#if GMP_NAIL_BITS != 0
+	  m1 = (m1 << GMP_NAIL_BITS) | (m2 >> (GMP_NUMB_BITS-GMP_NAIL_BITS));
+	  m2 <<= 2*GMP_NAIL_BITS;
+#endif
+
+	  /* lshift to have high of m0:m1 non-zero, collapse nails from m0 */
+	  rshift = GMP_LIMB_BITS - lshift;
+	  rmask = (GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX);
+	  m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);
+	  m1 = (m1 << lshift) | ((m2 >> rshift) & rmask);
+
+	  /* rshift back to have bit 53 of m0:m1 the high non-zero */
+	  m1 = (m1 >> 11) | (m0 << (GMP_LIMB_BITS-11));
+	  m0 >>= 11;
+	}
+
       if (UNLIKELY (exp >= CONST_1024))
 	{
 	  /* overflow, return infinity */
 	ieee_infinity:
-	  mhi = 0;
-	  mlo = 0;
+	  m0 = 0;
+	  m1 = 0;
 	  exp = 1024;
 	}
       else if (UNLIKELY (exp <= CONST_NEG_1023))
 	{
-	  int rshift;
-
 	  if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
 	    return 0.0;	 /* denorm underflows to zero */
 
 	  rshift = -1022 - exp;
 	  ASSERT (rshift > 0 && rshift < 53);
-#if GMP_LIMB_BITS > 53
-	  mlo >>= rshift;
-	  mhi = mlo >> 32;
-#else
-	  if (rshift >= 32)
+	  if (ONE_LIMB)
 	    {
-	      mlo = mhi;
-	      mhi = 0;
-	      rshift -= 32;
+	      m0 >>= rshift;
 	    }
-	  lshift = GMP_LIMB_BITS - rshift;
-	  mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
-	  mhi >>= rshift;
-#endif
-	  exp = -1023;
-	}
-      u.s.manh = mhi;
-      u.s.manl = mlo;
-      u.s.exp = exp + 1023;
-      u.s.sig = (sign < 0);
-      return u.d;
-    }
-#define FORMAT_RECOGNIZED 1
-#endif
-
-#if HAVE_DOUBLE_VAX_D
-    {
-      union double_extract u;
-
-      up += size;
-
-      mhi = up[-1];
-
-      count_leading_zeros (lshift, mhi);
-      exp -= lshift;
-      mhi <<= lshift;
-
-      mlo = 0;
-      if (size > 1)
-	{
-	  mlo = up[-2];
-	  if (lshift != 0)
-	    mhi += mlo >> (GMP_LIMB_BITS - lshift);
-	  mlo <<= lshift;
-
-	  if (size > 2 && lshift > 8)
+	  else /* TWO_LIMBS */
 	    {
-	      x = up[-3];
-	      mlo += x >> (GMP_LIMB_BITS - lshift);
+	      if (rshift >= 32)
+		{
+		  m1 = m0;
+		  m0 = 0;
+		  rshift -= 32;
+		}
+	      lshift = GMP_LIMB_BITS - rshift;
+	      m1 = (m1 >> rshift) | (rshift == 0 ? 0 : m0 << lshift);
+	      m0 >>= rshift;
 	    }
+	  exp = -1023;
 	}
 
-      if (UNLIKELY (exp >= 128))
+      if (ONE_LIMB)
 	{
-	  /* overflow, return maximum number */
-	  mhi = 0xffffffff;
-	  mlo = 0xffffffff;
-	  exp = 127;
+#if GMP_LIMB_BITS > 32	/* avoid compiler warning about big shift */
+	  u.s.manh = m0 >> 32;
+#endif
+	  u.s.manl = m0;
 	}
-      else if (UNLIKELY (exp < -128))
+      else /* TWO_LIMBS */
 	{
-	  return 0.0;	 /* underflows to zero */
+	  u.s.manh = m0;
+	  u.s.manl = m1;
 	}
 
-      u.s.man3 = mhi >> 24;	/* drop msb, since implicit */
-      u.s.man2 = mhi >> 8;
-      u.s.man1 = (mhi << 8) + (mlo >> 24);
-      u.s.man0 = mlo >> 8;
-      u.s.exp = exp + 128;
-      u.s.sig = sign < 0;
+      u.s.exp = exp + 1023;
+      u.s.sig = (sign < 0);
       return u.d;
     }
-#define FORMAT_RECOGNIZED 1
-#endif
-
-#if ! FORMAT_RECOGNIZED
-    {      /* Non-IEEE or strange limb size, do something generic. */
-      mp_size_t i;
-      double d, weight;
-      unsigned long uexp;
-
-      /* First generate an fp number disregarding exp, instead keeping things
-	 within the numb base factor from 1, which should prevent overflow and
-	 underflow even for the most exponent limited fp formats.  The
-	 termination criteria should be refined, since we now include too many
-	 limbs.  */
-      weight = 1/MP_BASE_AS_DOUBLE;
-      d = up[size - 1];
-      for (i = size - 2; i >= 0; i--)
+  else
+    {
+      /* Non-IEEE or strange limb size, do something generic. */
+
+      mp_size_t	     i;
+      mp_limb_t	     limb, bit;
+      int	     shift;
+      double	     base, factor, prev_factor, d, new_d, diff;
+
+      /* "limb" is "up[i]" the limb being examined, "bit" is a mask for the
+	 bit being examined, initially the highest non-zero bit.  */
+      i = size-1;
+      limb = up[i];
+      count_leading_zeros (shift, limb);
+      bit = GMP_LIMB_HIGHBIT >> shift;
+
+      /* relative to just under high non-zero bit */
+      exp -= (shift - GMP_NAIL_BITS) + 1;
+
+      /* Power up "factor" to 2^exp, being the value of the "bit" in "limb"
+	 being examined.  */
+      base = (exp >= 0 ? 2.0 : 0.5);
+      exp = ABS (exp);
+      factor = 1.0;
+      for (;;)
 	{
-	  d += up[i] * weight;
-	  weight /= MP_BASE_AS_DOUBLE;
-	  if (weight == 0)
+	  if (exp & 1)
+	    {
+	      prev_factor = factor;
+	      factor *= base;
+	      FORCE_DOUBLE (factor);
+	      if (factor == 0.0)
+		return 0.0;	/* underflow */
+	      if (factor == prev_factor)
+		{
+		  d = factor;	  /* overflow, apparent infinity */
+		  goto generic_done;
+		}
+	    }
+	  exp >>= 1;
+	  if (exp == 0)
 	    break;
+	  base *= base;
 	}
 
-      /* Now apply exp.  */
-      exp -= GMP_NUMB_BITS;
-      if (exp > 0)
-	{
-	  weight = 2.0;
-	  uexp = exp;
-	}
-      else
-	{
-	  weight = 0.5;
-	  uexp = 1 - (unsigned long) (exp + 1);
-	}
-#if 1
-      /* Square-and-multiply exponentiation.  */
-      if (uexp & 1)
-	d *= weight;
-      while (uexp >>= 1)
-	{
-	  weight *= weight;
-	  if (uexp & 1)
-	    d *= weight;
-	}
-#else
-      /* Plain exponentiation.  */
-      while (uexp > 0)
+      /* Add a "factor" for each non-zero bit, working from high to low.
+	 Stop if any rounding occurs, hence implementing a truncation.
+
+	 Note no attention is paid to DBL_MANT_DIG, since the effective
+	 number of bits in the mantissa isn't constant when in denorm range.
+	 We also encountered an ARM system with apparently somewhat doubtful
+	 software floats where DBL_MANT_DIG claimed 53 bits but only 32
+	 actually worked.  */
+
+      d = factor;  /* high bit */
+      for (;;)
 	{
-	  d *= weight;
-	  uexp--;
+	  factor *= 0.5;  /* next bit */
+	  bit >>= 1;
+	  if (bit == 0)
+	    {
+	      /* next limb, if any */
+	      i--;
+	      if (i < 0)
+		break;
+	      limb = up[i];
+	      bit = GMP_NUMB_HIGHBIT;
+	    }
+
+	  if (bit & limb)
+	    {
+	      new_d = d + factor;
+	      FORCE_DOUBLE (new_d);
+	      diff = new_d - d;
+	      if (diff != factor)
+		break;	 /* rounding occured, stop now */
+	      d = new_d;
+	    }
 	}
-#endif
 
-      return sign >= 0 ? d : -d;
+    generic_done:
+      return (sign >= 0 ? d : -d);
     }
 #endif
 }
diff --git a/gmp/mpn/generic/get_str.c b/gmp/mpn/generic/get_str.c
index 42e93c9cee..df007578cc 100644
--- a/gmp/mpn/generic/get_str.c
+++ b/gmp/mpn/generic/get_str.c
@@ -7,34 +7,23 @@
    FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE
    GNU MP RELEASE.
 
-Copyright 1991-1994, 1996, 2000-2002, 2004, 2006-2008, 2011, 2012 Free Software
-Foundation, Inc.
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
+2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -361,8 +350,7 @@ mpn_dc_get_str (unsigned char *str, size_t len,
 
 
 /* There are no leading zeros on the digits generated at str, but that's not
-   currently a documented feature.  The current mpz_out_str and mpz_get_str
-   rely on it.  */
+   currently a documented feature.  */
 
 size_t
 mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
@@ -394,7 +382,7 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
       int bit_pos;
       mp_size_t i;
       unsigned char *s = str;
-      mp_bitcnt_t bits;
+      unsigned long bits;
 
       n1 = up[un - 1];
       count_leading_zeros (cnt, n1);
@@ -403,11 +391,11 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
 	 R + bits_per_digit * n when input ends in nth least significant
 	 nibble. */
 
-      bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
+      bits = GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
       cnt = bits % bits_per_digit;
       if (cnt != 0)
 	bits += bits_per_digit - cnt;
-      bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
+      bit_pos = bits - (un - 1) * GMP_NUMB_BITS;
 
       /* Fast loop for bit output.  */
       i = un - 1;
@@ -451,12 +439,9 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
     mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;
     mp_limb_t cy;
     mp_size_t shift;
-    size_t ndig;
-
-    DIGITS_IN_BASE_PER_LIMB (ndig, un, base);
-    xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */
 
     n_pows = 0;
+    xn = 1 + un*(mp_bases[base].chars_per_bit_exactly*GMP_NUMB_BITS)/mp_bases[base].chars_per_limb;
     for (pn = xn; pn != 1; pn = (pn + 1) >> 1)
       {
 	exptab[n_pows] = pn;
@@ -488,7 +473,7 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
 
 	ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_get_str_powtab_alloc (un));
 
-	mpn_sqr (t, p, n);
+	mpn_sqr_n (t, p, n);
 
 	digits_in_base *= 2;
 	n *= 2;  n -= t[n - 1] == 0;
@@ -546,7 +531,7 @@ mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
 
   /* Using our precomputed powers, now in powtab[], convert our number.  */
   tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
-  out_len = mpn_dc_get_str (str, 0, up, un, powtab + (pi - 1), tmp) - str;
+  out_len = mpn_dc_get_str (str, 0, up, un, powtab - 1 + pi, tmp) - str;
   TMP_FREE;
 
   return out_len;
diff --git a/gmp/mpn/generic/gmp-mparam.h b/gmp/mpn/generic/gmp-mparam.h
index 7dc057aa0c..b22b96ef67 100644
--- a/gmp/mpn/generic/gmp-mparam.h
+++ b/gmp/mpn/generic/gmp-mparam.h
@@ -5,29 +5,18 @@ Copyright 1991, 1993, 1994, 2000 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
-/* Values for GMP_LIMB_BITS etc will be determined by ./configure and put
+/* Values for BITS_PER_MP_LIMB etc will be determined by ./configure and put
    in config.h. */
diff --git a/gmp/mpn/generic/hgcd.c b/gmp/mpn/generic/hgcd.c
index e27a9bdd82..5fc650bbd9 100644
--- a/gmp/mpn/generic/hgcd.c
+++ b/gmp/mpn/generic/hgcd.c
@@ -4,38 +4,497 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
+/* For input of size n, matrix elements are of size at most ceil(n/2)
+   - 1, but we need two limbs extra. */
+void
+mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
+{
+  mp_size_t s = (n+1)/2 + 1;
+  M->alloc = s;
+  M->n = 1;
+  MPN_ZERO (p, 4 * s);
+  M->p[0][0] = p;
+  M->p[0][1] = p + s;
+  M->p[1][0] = p + 2 * s;
+  M->p[1][1] = p + 3 * s;
+
+  M->p[0][0][0] = M->p[1][1][0] = 1;
+}
+
+/* Updated column COL, adding in column (1-COL). */
+static void
+hgcd_matrix_update_1 (struct hgcd_matrix *M, unsigned col)
+{
+  mp_limb_t c0, c1;
+  ASSERT (col < 2);
+
+  c0 = mpn_add_n (M->p[0][col], M->p[0][0], M->p[0][1], M->n);
+  c1 = mpn_add_n (M->p[1][col], M->p[1][0], M->p[1][1], M->n);
+
+  M->p[0][col][M->n] = c0;
+  M->p[1][col][M->n] = c1;
+
+  M->n += (c0 | c1) != 0;
+  ASSERT (M->n < M->alloc);
+}
+
+/* Updated column COL, adding in column Q * (1-COL). Temporary
+ * storage: qn + n <= M->alloc, where n is the size of the largest
+ * element in column 1 - COL. */
+static void
+hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
+		      unsigned col, mp_ptr tp)
+{
+  ASSERT (col < 2);
+
+  if (qn == 1)
+    {
+      mp_limb_t q = qp[0];
+      mp_limb_t c0, c1;
+
+      c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
+      c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
+
+      M->p[0][col][M->n] = c0;
+      M->p[1][col][M->n] = c1;
+
+      M->n += (c0 | c1) != 0;
+    }
+  else
+    {
+      unsigned row;
+
+      /* Carries for the unlikely case that we get both high words
+	 from the multiplication and carries from the addition. */
+      mp_limb_t c[2];
+      mp_size_t n;
+
+      /* The matrix will not necessarily grow in size by qn, so we
+	 need normalization in order not to overflow M. */
+
+      for (n = M->n; n + qn > M->n; n--)
+	{
+	  ASSERT (n > 0);
+	  if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
+	    break;
+	}
+
+      ASSERT (qn + n <= M->alloc);
+
+      for (row = 0; row < 2; row++)
+	{
+	  if (qn <= n)
+	    mpn_mul (tp, M->p[row][1-col], n, qp, qn);
+	  else
+	    mpn_mul (tp, qp, qn, M->p[row][1-col], n);
+
+	  ASSERT (n + qn >= M->n);
+	  c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
+	}
+      if (c[0] | c[1])
+	{
+	  M->n = n + qn + 1;
+	  M->p[0][col][n-1] = c[0];
+	  M->p[1][col][n-1] = c[1];
+	}
+      else
+	{
+	  n += qn;
+	  n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
+	  if (n > M->n)
+	    M->n = n;
+	}
+    }
+
+  ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Since the M1 elements fit in
+   GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
+   temporary space M->n */
+static void
+hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
+		   mp_ptr tp)
+{
+  mp_size_t n0, n1;
+
+  /* Could avoid copy by some swapping of pointers. */
+  MPN_COPY (tp, M->p[0][0], M->n);
+  n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
+  MPN_COPY (tp, M->p[1][0], M->n);
+  n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
+
+  /* Depends on zero initialization */
+  M->n = MAX(n0, n1);
+  ASSERT (M->n < M->alloc);
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+   division. Reduces the size by almost one limb or more, but never
+   below the given size s. Return new size for a and b, or 0 if no
+   more steps are possible.
+
+   If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
+   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
+   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+   resulting size of $.
+
+   If N is the input size to the calling hgcd, then s = floor(N/2) +
+   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
+   < N, so N is sufficient.
+*/
+
+static mp_size_t
+hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+	   struct hgcd_matrix *M, mp_ptr tp)
+{
+  struct hgcd_matrix1 M1;
+  mp_limb_t mask;
+  mp_limb_t ah, al, bh, bl;
+  mp_size_t an, bn, qn;
+  int col;
+
+  ASSERT (n > s);
+
+  mask = ap[n-1] | bp[n-1];
+  ASSERT (mask > 0);
+
+  if (n == s + 1)
+    {
+      if (mask < 4)
+	goto subtract;
+
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else if (mask & GMP_NUMB_HIGHBIT)
+    {
+      ah = ap[n-1]; al = ap[n-2];
+      bh = bp[n-1]; bl = bp[n-2];
+    }
+  else
+    {
+      int shift;
+
+      count_leading_zeros (shift, mask);
+      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+    }
+
+  /* Try an mpn_hgcd2 step */
+  if (mpn_hgcd2 (ah, al, bh, bl, &M1))
+    {
+      /* Multiply M <- M * M1 */
+      hgcd_matrix_mul_1 (M, &M1, tp);
+
+      /* Can't swap inputs, so we need to copy. */
+      MPN_COPY (tp, ap, n);
+      /* Multiply M1^{-1} (a;b) */
+      return mpn_hgcd_mul_matrix1_inverse_vector (&M1, ap, tp, bp, n);
+    }
+
+ subtract:
+  /* There are two ways in which mpn_hgcd2 can fail. Either one of ah and
+     bh was too small, or ah, bh were (almost) equal. Perform one
+     subtraction step (for possible cancellation of high limbs),
+     followed by one division. */
+
+  /* Since we must ensure that #(a-b) > s, we handle cancellation of
+     high limbs explicitly up front. (FIXME: Or is it better to just
+     subtract, normalize, and use an addition to undo if it turns out
+     the the difference is too small?) */
+  for (an = n; an > s; an--)
+    if (ap[an-1] != bp[an-1])
+      break;
+
+  if (an == s)
+    return 0;
+
+  /* Maintain a > b. When needed, swap a and b, and let col keep track
+     of how to update M. */
+  if (ap[an-1] > bp[an-1])
+    {
+      /* a is largest. In the subtraction step, we need to update
+	 column 1 of M */
+      col = 1;
+    }
+  else
+    {
+      MP_PTR_SWAP (ap, bp);
+      col = 0;
+    }
+
+  bn = n;
+  MPN_NORMALIZE (bp, bn);
+  if (bn <= s)
+    return 0;
+
+  /* We have #a, #b > s. When is it possible that #(a-b) < s? For
+     cancellation to happen, the numbers must be of the form
+
+       a = x + 1, 0,            ..., 0,            al
+       b = x    , GMP_NUMB_MAX, ..., GMP_NUMB_MAX, bl
+
+     where al, bl denotes the least significant k limbs. If al < bl,
+     then #(a-b) < k, and if also high(al) != 0, high(bl) != GMP_NUMB_MAX,
+     then #(a-b) = k. If al >= bl, then #(a-b) = k + 1. */
+
+  if (ap[an-1] == bp[an-1] + 1)
+    {
+      mp_size_t k;
+      int c;
+      for (k = an-1; k > s; k--)
+	if (ap[k-1] != 0 || bp[k-1] != GMP_NUMB_MAX)
+	  break;
+
+      MPN_CMP (c, ap, bp, k);
+      if (c < 0)
+	{
+	  mp_limb_t cy;
+
+	  /* The limbs from k and up are cancelled. */
+	  if (k == s)
+	    return 0;
+	  cy = mpn_sub_n (ap, ap, bp, k);
+	  ASSERT (cy == 1);
+	  an = k;
+	}
+      else
+	{
+	  ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, k));
+	  ap[k] = 1;
+	  an = k + 1;
+	}
+    }
+  else
+    ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an));
+
+  ASSERT (an > s);
+  ASSERT (ap[an-1] > 0);
+  ASSERT (bn > s);
+  ASSERT (bp[bn-1] > 0);
+
+  hgcd_matrix_update_1 (M, col);
+
+  if (an < bn)
+    {
+      MPN_PTR_SWAP (ap, an, bp, bn);
+      col ^= 1;
+    }
+  else if (an == bn)
+    {
+      int c;
+      MPN_CMP (c, ap, bp, an);
+      if (c < 0)
+	{
+	  MP_PTR_SWAP (ap, bp);
+	  col ^= 1;
+	}
+    }
+
+  /* Divide a / b. */
+  qn = an + 1 - bn;
+
+  /* FIXME: We could use an approximate division, that may return a
+     too small quotient, and only guarantee that the size of r is
+     almost the size of b. FIXME: Let ap and remainder overlap. */
+  mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
+  qn -= (tp[qn -1] == 0);
+
+  /* Normalize remainder */
+  an = bn;
+  for ( ; an > s; an--)
+    if (ap[an-1] > 0)
+      break;
+
+  if (an <= s)
+    {
+      /* Quotient is too large */
+      mp_limb_t cy;
+
+      cy = mpn_add (ap, bp, bn, ap, an);
+
+      if (cy > 0)
+	{
+	  ASSERT (bn < n);
+	  ap[bn] = cy;
+	  bp[bn] = 0;
+	  bn++;
+	}
+
+      MPN_DECR_U (tp, qn, 1);
+      qn -= (tp[qn-1] == 0);
+    }
+
+  if (qn > 0)
+    hgcd_matrix_update_q (M, tp, qn, col, tp + qn);
+
+  return bn;
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+   with elements of size at most (n+1)/2 - 1. Returns new size of a,
+   b, or zero if no reduction is possible. */
+mp_size_t
+mpn_hgcd_lehmer (mp_ptr ap, mp_ptr bp, mp_size_t n,
+		 struct hgcd_matrix *M, mp_ptr tp)
+{
+  mp_size_t s = n/2 + 1;
+  mp_size_t nn;
+
+  ASSERT (n > s);
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+
+  nn = hgcd_step (n, ap, bp, s, M, tp);
+  if (!nn)
+    return 0;
+
+  for (;;)
+    {
+      n = nn;
+      ASSERT (n > s);
+      nn = hgcd_step (n, ap, bp, s, M, tp);
+      if (!nn )
+	return n;
+    }
+}
+
+/* Multiply M by M1 from the right. Needs 4*(M->n + M1->n) + 5 limbs
+   of temporary storage (see mpn_matrix22_mul_itch). */
+void
+mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
+		     mp_ptr tp)
+{
+  mp_size_t n;
+
+  /* About the new size of M:s elements. Since M1's diagonal elements
+     are > 0, no element can decrease. The new elements are of size
+     M->n + M1->n, one limb more or less. The computation of the
+     matrix product produces elements of size M->n + M1->n + 1. But
+     the true size, after normalization, may be three limbs smaller. */
+
+  /* FIXME: Strassen multiplication gives only a small speedup. In FFT
+     multiplication range, this function could be sped up quite a lot
+     using invariance. */
+  ASSERT (M->n + M1->n < M->alloc);
+
+  ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
+	   | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
+
+  ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
+	   | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
+
+  mpn_matrix22_mul (M->p[0][0], M->p[0][1],
+		    M->p[1][0], M->p[1][1], M->n,
+		    M1->p[0][0], M1->p[0][1],
+		    M1->p[1][0], M1->p[1][1], M1->n, tp);
+
+  /* Index of last potentially non-zero limb, size is one greater. */
+  n = M->n + M1->n;
+
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+
+  ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
+
+  M->n = n + 1;
+}
+
+/* Multiplies the least significant p limbs of (a;b) by M^-1.
+   Temporary space needed: 2 * (p + M->n)*/
+mp_size_t
+mpn_hgcd_matrix_adjust (struct hgcd_matrix *M,
+			mp_size_t n, mp_ptr ap, mp_ptr bp,
+			mp_size_t p, mp_ptr tp)
+{
+  /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
+     = (r11 a - r01 b; - r10 a + r00 b */
+
+  mp_ptr t0 = tp;
+  mp_ptr t1 = tp + p + M->n;
+  mp_limb_t ah, bh;
+  mp_limb_t cy;
+
+  ASSERT (p + M->n  < n);
+
+  /* First compute the two values depending on a, before overwriting a */
+
+  if (M->n >= p)
+    {
+      mpn_mul (t0, M->p[1][1], M->n, ap, p);
+      mpn_mul (t1, M->p[1][0], M->n, ap, p);
+    }
+  else
+    {
+      mpn_mul (t0, ap, p, M->p[1][1], M->n);
+      mpn_mul (t1, ap, p, M->p[1][0], M->n);
+    }
+
+  /* Update a */
+  MPN_COPY (ap, t0, p);
+  ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
+
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][1], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][1], M->n);
+
+  cy = mpn_sub (ap, ap, n, t0, p + M->n);
+  ASSERT (cy <= ah);
+  ah -= cy;
+
+  /* Update b */
+  if (M->n >= p)
+    mpn_mul (t0, M->p[0][0], M->n, bp, p);
+  else
+    mpn_mul (t0, bp, p, M->p[0][0], M->n);
+
+  MPN_COPY (bp, t0, p);
+  bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
+  cy = mpn_sub (bp, bp, n, t1, p + M->n);
+  ASSERT (cy <= bh);
+  bh -= cy;
+
+  if (ah > 0 || bh > 0)
+    {
+      ap[n] = ah;
+      bp[n] = bh;
+      n++;
+    }
+  else
+    {
+      /* The subtraction can reduce the size by at most one limb. */
+      if (ap[n-1] == 0 && bp[n-1] == 0)
+	n--;
+    }
+  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+  return n;
+}
 
 /* Size analysis for hgcd:
 
@@ -46,15 +505,16 @@ see https://www.gnu.org/licenses/.  */
 
    Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
    = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
-   and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
-   4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
+   and for the hgcd_matrix_mul, we may need 4 ceil(n/2) + 1. In total,
+   4 * ceil(n/4) + 4 ceil(n/2) + 5 <= 12 ceil(n/4) + 5.
 
    For the recursive call, we need S(n1) = S(ceil(n/2)).
 
-   S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
-	<= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
-	<= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
-	<= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
+   S(n) <= 12*ceil(n/4) + 5 + S(ceil(n/2))
+	<= 12*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 5k + S(ceil(n/2^k))
+	<= 12*(2 ceil(n/4) + k) + 5k + S(n/2^k)
+	<= 24 ceil(n/4) + 17k + S(n/2^k)
+
 */
 
 mp_size_t
@@ -65,14 +525,15 @@ mpn_hgcd_itch (mp_size_t n)
   mp_size_t nscaled;
 
   if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
-    return n;
+    return MPN_HGCD_LEHMER_ITCH (n);
 
   /* Get the recursion depth. */
   nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
   count_leading_zeros (count, nscaled);
   k = GMP_LIMB_BITS - count;
 
-  return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
+  return 24 * ((n+3) / 4) + 17 * k
+    + MPN_HGCD_LEHMER_ITCH (HGCD_THRESHOLD);
 }
 
 /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
@@ -84,8 +545,9 @@ mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
 	  struct hgcd_matrix *M, mp_ptr tp)
 {
   mp_size_t s = n/2 + 1;
+  mp_size_t n2 = (3*n)/4 + 1;
 
-  mp_size_t nn;
+  mp_size_t p, nn;
   int success = 0;
 
   if (n <= s)
@@ -97,83 +559,65 @@ mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
 
   ASSERT ((n+1)/2 - 1 < M->alloc);
 
-  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
-    {
-      mp_size_t n2 = (3*n)/4 + 1;
-      mp_size_t p = n/2;
+  if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
+    return mpn_hgcd_lehmer (ap, bp, n, M, tp);
 
-      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
-      if (nn)
-	{
-	  n = nn;
-	  success = 1;
-	}
+  p = n/2;
+  nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
+  if (nn > 0)
+    {
+      /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+	 = 2 (n - 1) */
+      n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+      success = 1;
+    }
+  while (n > n2)
+    {
+      /* Needs n + 1 storage */
+      nn = hgcd_step (n, ap, bp, s, M, tp);
+      if (!nn)
+	return success ? n : 0;
+      n = nn;
+      success = 1;
+    }
 
-      /* NOTE: It appears this loop never runs more than once (at
-	 least when not recursing to hgcd_appr). */
-      while (n > n2)
-	{
-	  /* Needs n + 1 storage */
-	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
-	  if (!nn)
-	    return success ? n : 0;
+  if (n > s + 2)
+    {
+      struct hgcd_matrix M1;
+      mp_size_t scratch;
 
-	  n = nn;
-	  success = 1;
-	}
+      p = 2*s - n + 1;
+      scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
 
-      if (n > s + 2)
+      mpn_hgcd_matrix_init(&M1, n - p, tp);
+      nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
+      if (nn > 0)
 	{
-	  struct hgcd_matrix M1;
-	  mp_size_t scratch;
-
-	  p = 2*s - n + 1;
-	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
-
-	  mpn_hgcd_matrix_init(&M1, n - p, tp);
-
-	  /* FIXME: Should use hgcd_reduce, but that may require more
-	     scratch space, which requires review. */
-
-	  nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
-	  if (nn > 0)
-	    {
-	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
-	      ASSERT (M->n + 2 >= M1.n);
-
-	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
-		 then either q or q + 1 is a correct quotient, and M1 will
-		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
-		 rules out the case that the size of M * M1 is much
-		 smaller than the expected M->n + M1->n. */
-
-	      ASSERT (M->n + M1.n < M->alloc);
-
-	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
-		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
-	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
-
-	      /* We need a bound for of M->n + M1.n. Let n be the original
-		 input size. Then
-
-		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
-
-		 and it follows that
-
-		 M.n + M1.n <= ceil(n/2) + 1
-
-		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
-		 amount of needed scratch space. */
-	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
-	      success = 1;
-	    }
+	  /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+	  ASSERT (M->n + 2 >= M1.n);
+
+	  /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+	     then either q or q + 1 is a correct quotient, and M1 will
+	     start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+	     rules out the case that the size of M * M1 is much
+	     smaller than the expected M->n + M1->n. */
+
+	  ASSERT (M->n + M1.n < M->alloc);
+
+	  /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+	     = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+	  n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+	  /* Needs 4 ceil(n/2) + 1 */
+	  mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+	  success = 1;
 	}
     }
 
+  /* This really is the base case */
   for (;;)
     {
       /* Needs s+3 < n */
-      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+      nn = hgcd_step (n, ap, bp, s, M, tp);
       if (!nn)
 	return success ? n : 0;
 
diff --git a/gmp/mpn/generic/hgcd2.c b/gmp/mpn/generic/hgcd2.c
index 129637063f..ffc8c44f67 100644
--- a/gmp/mpn/generic/hgcd2.c
+++ b/gmp/mpn/generic/hgcd2.c
@@ -4,33 +4,23 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 1996, 1998, 2000-2004, 2008, 2012 Free Software Foundation, Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -209,7 +199,7 @@ div2 (mp_ptr rp,
 
 /* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
    matrix M. Returns 1 if we make progress, i.e. can perform at least
-   one subtraction. Otherwise returns zero. */
+   one subtraction. Otherwise returns zero.. */
 
 /* FIXME: Possible optimizations:
 
@@ -348,6 +338,8 @@ mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
   for (;;)
     {
       ASSERT (ah >= bh);
+      if (ah == bh)
+	break;
 
       ah -= bh;
       if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
@@ -377,6 +369,8 @@ mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
 	}
     subtract_a1:
       ASSERT (bh >= ah);
+      if (ah == bh)
+	break;
 
       bh -= ah;
       if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
@@ -445,3 +439,31 @@ mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
   n += (ah | bh) > 0;
   return n;
 }
+
+/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
+   the left. Uses three buffers, to avoid a copy. */
+mp_size_t
+mpn_hgcd_mul_matrix1_inverse_vector (const struct hgcd_matrix1 *M,
+				     mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+  mp_limb_t h0, h1;
+
+  /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
+
+     r  = u11 * a
+     r -= u01 * b
+     b *= u00
+     b -= u10 * a
+  */
+
+  h0 =    mpn_mul_1 (rp, ap, n, M->u[1][1]);
+  h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
+  ASSERT (h0 == h1);
+
+  h0 =    mpn_mul_1 (bp, bp, n, M->u[0][0]);
+  h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
+  ASSERT (h0 == h1);
+
+  n -= (rp[n-1] | bp[n-1]) == 0;
+  return n;
+}
diff --git a/gmp/mpn/generic/hgcd2_jacobi.c b/gmp/mpn/generic/hgcd2_jacobi.c
deleted file mode 100644
index e59c32a341..0000000000
--- a/gmp/mpn/generic/hgcd2_jacobi.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/* hgcd2_jacobi.c
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 1996, 1998, 2000-2004, 2008, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#if GMP_NAIL_BITS > 0
-#error Nails not supported.
-#endif
-
-/* FIXME: Duplicated in hgcd2.c. Should move to gmp-impl.h, and
-   possibly be renamed. */
-static inline mp_limb_t
-div1 (mp_ptr rp,
-      mp_limb_t n0,
-      mp_limb_t d0)
-{
-  mp_limb_t q = 0;
-
-  if ((mp_limb_signed_t) n0 < 0)
-    {
-      int cnt;
-      for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
-	{
-	  d0 = d0 << 1;
-	}
-
-      q = 0;
-      while (cnt)
-	{
-	  q <<= 1;
-	  if (n0 >= d0)
-	    {
-	      n0 = n0 - d0;
-	      q |= 1;
-	    }
-	  d0 = d0 >> 1;
-	  cnt--;
-	}
-    }
-  else
-    {
-      int cnt;
-      for (cnt = 0; n0 >= d0; cnt++)
-	{
-	  d0 = d0 << 1;
-	}
-
-      q = 0;
-      while (cnt)
-	{
-	  d0 = d0 >> 1;
-	  q <<= 1;
-	  if (n0 >= d0)
-	    {
-	      n0 = n0 - d0;
-	      q |= 1;
-	    }
-	  cnt--;
-	}
-    }
-  *rp = n0;
-  return q;
-}
-
-/* Two-limb division optimized for small quotients.  */
-static inline mp_limb_t
-div2 (mp_ptr rp,
-      mp_limb_t nh, mp_limb_t nl,
-      mp_limb_t dh, mp_limb_t dl)
-{
-  mp_limb_t q = 0;
-
-  if ((mp_limb_signed_t) nh < 0)
-    {
-      int cnt;
-      for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
-	{
-	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
-	  dl = dl << 1;
-	}
-
-      while (cnt)
-	{
-	  q <<= 1;
-	  if (nh > dh || (nh == dh && nl >= dl))
-	    {
-	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
-	      q |= 1;
-	    }
-	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
-	  dh = dh >> 1;
-	  cnt--;
-	}
-    }
-  else
-    {
-      int cnt;
-      for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
-	{
-	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
-	  dl = dl << 1;
-	}
-
-      while (cnt)
-	{
-	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
-	  dh = dh >> 1;
-	  q <<= 1;
-	  if (nh > dh || (nh == dh && nl >= dl))
-	    {
-	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
-	      q |= 1;
-	    }
-	  cnt--;
-	}
-    }
-
-  rp[0] = nl;
-  rp[1] = nh;
-
-  return q;
-}
-
-int
-mpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
-		  struct hgcd_matrix1 *M, unsigned *bitsp)
-{
-  mp_limb_t u00, u01, u10, u11;
-  unsigned bits = *bitsp;
-
-  if (ah < 2 || bh < 2)
-    return 0;
-
-  if (ah > bh || (ah == bh && al > bl))
-    {
-      sub_ddmmss (ah, al, ah, al, bh, bl);
-      if (ah < 2)
-	return 0;
-
-      u00 = u01 = u11 = 1;
-      u10 = 0;
-      bits = mpn_jacobi_update (bits, 1, 1);
-    }
-  else
-    {
-      sub_ddmmss (bh, bl, bh, bl, ah, al);
-      if (bh < 2)
-	return 0;
-
-      u00 = u10 = u11 = 1;
-      u01 = 0;
-      bits = mpn_jacobi_update (bits, 0, 1);
-    }
-
-  if (ah < bh)
-    goto subtract_a;
-
-  for (;;)
-    {
-      ASSERT (ah >= bh);
-      if (ah == bh)
-	goto done;
-
-      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
-	{
-	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
-	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
-
-	  break;
-	}
-
-      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
-	 1), affecting the second column of M. */
-      ASSERT (ah > bh);
-      sub_ddmmss (ah, al, ah, al, bh, bl);
-
-      if (ah < 2)
-	goto done;
-
-      if (ah <= bh)
-	{
-	  /* Use q = 1 */
-	  u01 += u00;
-	  u11 += u10;
-	  bits = mpn_jacobi_update (bits, 1, 1);
-	}
-      else
-	{
-	  mp_limb_t r[2];
-	  mp_limb_t q = div2 (r, ah, al, bh, bl);
-	  al = r[0]; ah = r[1];
-	  if (ah < 2)
-	    {
-	      /* A is too small, but q is correct. */
-	      u01 += q * u00;
-	      u11 += q * u10;
-	      bits = mpn_jacobi_update (bits, 1, q & 3);
-	      goto done;
-	    }
-	  q++;
-	  u01 += q * u00;
-	  u11 += q * u10;
-	  bits = mpn_jacobi_update (bits, 1, q & 3);
-	}
-    subtract_a:
-      ASSERT (bh >= ah);
-      if (ah == bh)
-	goto done;
-
-      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
-	{
-	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
-	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
-
-	  goto subtract_a1;
-	}
-
-      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
-	 1), affecting the first column of M. */
-      sub_ddmmss (bh, bl, bh, bl, ah, al);
-
-      if (bh < 2)
-	goto done;
-
-      if (bh <= ah)
-	{
-	  /* Use q = 1 */
-	  u00 += u01;
-	  u10 += u11;
-	  bits = mpn_jacobi_update (bits, 0, 1);
-	}
-      else
-	{
-	  mp_limb_t r[2];
-	  mp_limb_t q = div2 (r, bh, bl, ah, al);
-	  bl = r[0]; bh = r[1];
-	  if (bh < 2)
-	    {
-	      /* B is too small, but q is correct. */
-	      u00 += q * u01;
-	      u10 += q * u11;
-	      bits = mpn_jacobi_update (bits, 0, q & 3);
-	      goto done;
-	    }
-	  q++;
-	  u00 += q * u01;
-	  u10 += q * u11;
-	  bits = mpn_jacobi_update (bits, 0, q & 3);
-	}
-    }
-
-  /* NOTE: Since we discard the least significant half limb, we don't
-     get a truly maximal M (corresponding to |a - b| <
-     2^{GMP_LIMB_BITS +1}). */
-  /* Single precision loop */
-  for (;;)
-    {
-      ASSERT (ah >= bh);
-      if (ah == bh)
-	break;
-
-      ah -= bh;
-      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
-	break;
-
-      if (ah <= bh)
-	{
-	  /* Use q = 1 */
-	  u01 += u00;
-	  u11 += u10;
-	  bits = mpn_jacobi_update (bits, 1, 1);
-	}
-      else
-	{
-	  mp_limb_t r;
-	  mp_limb_t q = div1 (&r, ah, bh);
-	  ah = r;
-	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
-	    {
-	      /* A is too small, but q is correct. */
-	      u01 += q * u00;
-	      u11 += q * u10;
-	      bits = mpn_jacobi_update (bits, 1, q & 3);
-	      break;
-	    }
-	  q++;
-	  u01 += q * u00;
-	  u11 += q * u10;
-	  bits = mpn_jacobi_update (bits, 1, q & 3);
-	}
-    subtract_a1:
-      ASSERT (bh >= ah);
-      if (ah == bh)
-	break;
-
-      bh -= ah;
-      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
-	break;
-
-      if (bh <= ah)
-	{
-	  /* Use q = 1 */
-	  u00 += u01;
-	  u10 += u11;
-	  bits = mpn_jacobi_update (bits, 0, 1);
-	}
-      else
-	{
-	  mp_limb_t r;
-	  mp_limb_t q = div1 (&r, bh, ah);
-	  bh = r;
-	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
-	    {
-	      /* B is too small, but q is correct. */
-	      u00 += q * u01;
-	      u10 += q * u11;
-	      bits = mpn_jacobi_update (bits, 0, q & 3);
-	      break;
-	    }
-	  q++;
-	  u00 += q * u01;
-	  u10 += q * u11;
-	  bits = mpn_jacobi_update (bits, 0, q & 3);
-	}
-    }
-
- done:
-  M->u[0][0] = u00; M->u[0][1] = u01;
-  M->u[1][0] = u10; M->u[1][1] = u11;
-  *bitsp = bits;
-
-  return 1;
-}
diff --git a/gmp/mpn/generic/hgcd_appr.c b/gmp/mpn/generic/hgcd_appr.c
deleted file mode 100644
index 660219372f..0000000000
--- a/gmp/mpn/generic/hgcd_appr.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/* hgcd_appr.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
-   HGCD_THRESHOLD at the end? */
-mp_size_t
-mpn_hgcd_appr_itch (mp_size_t n)
-{
-  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
-    return n;
-  else
-    {
-      unsigned k;
-      int count;
-      mp_size_t nscaled;
-
-      /* Get the recursion depth. */
-      nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
-      count_leading_zeros (count, nscaled);
-      k = GMP_LIMB_BITS - count;
-
-      return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
-    }
-}
-
-/* Destroys inputs. */
-int
-mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
-	       struct hgcd_matrix *M, mp_ptr tp)
-{
-  mp_size_t s;
-  int success = 0;
-
-  ASSERT (n > 0);
-
-  ASSERT ((ap[n-1] | bp[n-1]) != 0);
-
-  if (n <= 2)
-    /* Implies s = n. A fairly uninteresting case but exercised by the
-       random inputs of the testsuite. */
-    return 0;
-
-  ASSERT ((n+1)/2 - 1 < M->alloc);
-
-  /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
-     we discard some of the least significant limbs, we must keep one
-     additional bit to account for the truncation error. We maintain
-     the GMP_NUMB_BITS * s - extra_bits as the current target size. */
-
-  s = n/2 + 1;
-  if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
-    {
-      unsigned extra_bits = 0;
-
-      while (n > 2)
-	{
-	  mp_size_t nn;
-
-	  ASSERT (n > s);
-	  ASSERT (n <= 2*s);
-
-	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
-	  if (!nn)
-	    break;
-
-	  n = nn;
-	  success = 1;
-
-	  /* We can truncate and discard the lower p bits whenever nbits <=
-	     2*sbits - p. To account for the truncation error, we must
-	     adjust
-
-	     sbits <-- sbits + 1 - p,
-
-	     rather than just sbits <-- sbits - p. This adjustment makes
-	     the produced matrix slightly smaller than it could be. */
-
-	  if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
-	    {
-	      mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
-
-	      if (extra_bits == 0)
-		{
-		  /* We cross a limb boundary and bump s. We can't do that
-		     if the result is that it makes makes min(U, V)
-		     smaller than 2^{GMP_NUMB_BITS} s. */
-		  if (s + 1 == n
-		      || mpn_zero_p (ap + s + 1, n - s - 1)
-		      || mpn_zero_p (bp + s + 1, n - s - 1))
-		    continue;
-
-		  extra_bits = GMP_NUMB_BITS - 1;
-		  s++;
-		}
-	      else
-		{
-		  extra_bits--;
-		}
-
-	      /* Drop the p least significant limbs */
-	      ap += p; bp += p; n -= p; s -= p;
-	    }
-	}
-
-      ASSERT (s > 0);
-
-      if (extra_bits > 0)
-	{
-	  /* We can get here only of we have dropped at least one of the least
-	     significant bits, so we can decrement ap and bp. We can then shift
-	     left extra bits using mpn_rshift. */
-	  /* NOTE: In the unlikely case that n is large, it would be preferable
-	     to do an initial subdiv step to reduce the size before shifting,
-	     but that would mean duplicating mpn_gcd_subdiv_step with a bit
-	     count rather than a limb count. */
-	  ap--; bp--;
-	  ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
-	  bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
-	  n += (ap[n] | bp[n]) > 0;
-
-	  ASSERT (success);
-
-	  while (n > 2)
-	    {
-	      mp_size_t nn;
-
-	      ASSERT (n > s);
-	      ASSERT (n <= 2*s);
-
-	      nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
-
-	      if (!nn)
-		return 1;
-
-	      n = nn;
-	    }
-	}
-
-      if (n == 2)
-	{
-	  struct hgcd_matrix1 M1;
-	  ASSERT (s == 1);
-
-	  if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
-	    {
-	      /* Multiply M <- M * M1 */
-	      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
-	      success = 1;
-	    }
-	}
-      return success;
-    }
-  else
-    {
-      mp_size_t n2 = (3*n)/4 + 1;
-      mp_size_t p = n/2;
-      mp_size_t nn;
-
-      nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
-      if (nn)
-	{
-	  n = nn;
-	  /* FIXME: Discard some of the low limbs immediately? */
-	  success = 1;
-	}
-
-      while (n > n2)
-	{
-	  mp_size_t nn;
-
-	  /* Needs n + 1 storage */
-	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
-	  if (!nn)
-	    return success;
-
-	  n = nn;
-	  success = 1;
-	}
-      if (n > s + 2)
-	{
-	  struct hgcd_matrix M1;
-	  mp_size_t scratch;
-
-	  p = 2*s - n + 1;
-	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
-
-	  mpn_hgcd_matrix_init(&M1, n - p, tp);
-	  if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
-	    {
-	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
-	      ASSERT (M->n + 2 >= M1.n);
-
-	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
-		 then either q or q + 1 is a correct quotient, and M1 will
-		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
-		 rules out the case that the size of M * M1 is much
-		 smaller than the expected M->n + M1->n. */
-
-	      ASSERT (M->n + M1.n < M->alloc);
-
-	      /* We need a bound for of M->n + M1.n. Let n be the original
-		 input size. Then
-
-		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
-
-		 and it follows that
-
-		 M.n + M1.n <= ceil(n/2) + 1
-
-		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
-		 amount of needed scratch space. */
-	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
-	      return 1;
-	    }
-	}
-
-      for(;;)
-	{
-	  mp_size_t nn;
-
-	  ASSERT (n > s);
-	  ASSERT (n <= 2*s);
-
-	  nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
-
-	  if (!nn)
-	    return success;
-
-	  n = nn;
-	  success = 1;
-	}
-    }
-}
diff --git a/gmp/mpn/generic/hgcd_jacobi.c b/gmp/mpn/generic/hgcd_jacobi.c
deleted file mode 100644
index 0a49e5b3a7..0000000000
--- a/gmp/mpn/generic/hgcd_jacobi.c
+++ /dev/null
@@ -1,244 +0,0 @@
-/* hgcd_jacobi.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* This file is almost a copy of hgcd.c, with some added calls to
-   mpn_jacobi_update */
-
-struct hgcd_jacobi_ctx
-{
-  struct hgcd_matrix *M;
-  unsigned *bitsp;
-};
-
-static void
-hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
-		  mp_srcptr qp, mp_size_t qn, int d)
-{
-  ASSERT (!gp);
-  ASSERT (d >= 0);
-
-  MPN_NORMALIZE (qp, qn);
-  if (qn > 0)
-    {
-      struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p;
-      /* NOTES: This is a bit ugly. A tp area is passed to
-	 gcd_subdiv_step, which stores q at the start of that area. We
-	 now use the rest. */
-      mp_ptr tp = (mp_ptr) qp + qn;
-
-      mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);
-      *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3);
-    }
-}
-
-/* Perform a few steps, using some of mpn_hgcd2, subtraction and
-   division. Reduces the size by almost one limb or more, but never
-   below the given size s. Return new size for a and b, or 0 if no
-   more steps are possible.
-
-   If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
-   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
-   fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
-   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
-   resulting size of M.
-
-   If N is the input size to the calling hgcd, then s = floor(N/2) +
-   1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
-   < N, so N is sufficient.
-*/
-
-static mp_size_t
-hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
-		  struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
-{
-  struct hgcd_matrix1 M1;
-  mp_limb_t mask;
-  mp_limb_t ah, al, bh, bl;
-
-  ASSERT (n > s);
-
-  mask = ap[n-1] | bp[n-1];
-  ASSERT (mask > 0);
-
-  if (n == s + 1)
-    {
-      if (mask < 4)
-	goto subtract;
-
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else if (mask & GMP_NUMB_HIGHBIT)
-    {
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else
-    {
-      int shift;
-
-      count_leading_zeros (shift, mask);
-      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
-      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
-      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
-      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
-    }
-
-  /* Try an mpn_hgcd2 step */
-  if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))
-    {
-      /* Multiply M <- M * M1 */
-      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
-
-      /* Can't swap inputs, so we need to copy. */
-      MPN_COPY (tp, ap, n);
-      /* Multiply M1^{-1} (a;b) */
-      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
-    }
-
- subtract:
-  {
-    struct hgcd_jacobi_ctx ctx;
-    ctx.M = M;
-    ctx.bitsp = bitsp;
-
-    return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);
-  }
-}
-
-/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
-   with elements of size at most (n+1)/2 - 1. Returns new size of a,
-   b, or zero if no reduction is possible. */
-
-/* Same scratch requirements as for mpn_hgcd. */
-mp_size_t
-mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,
-		 struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
-{
-  mp_size_t s = n/2 + 1;
-
-  mp_size_t nn;
-  int success = 0;
-
-  if (n <= s)
-    /* Happens when n <= 2, a fairly uninteresting case but exercised
-       by the random inputs of the testsuite. */
-    return 0;
-
-  ASSERT ((ap[n-1] | bp[n-1]) > 0);
-
-  ASSERT ((n+1)/2 - 1 < M->alloc);
-
-  if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
-    {
-      mp_size_t n2 = (3*n)/4 + 1;
-      mp_size_t p = n/2;
-
-      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);
-      if (nn > 0)
-	{
-	  /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
-	     = 2 (n - 1) */
-	  n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
-	  success = 1;
-	}
-      while (n > n2)
-	{
-	  /* Needs n + 1 storage */
-	  nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
-	  if (!nn)
-	    return success ? n : 0;
-	  n = nn;
-	  success = 1;
-	}
-
-      if (n > s + 2)
-	{
-	  struct hgcd_matrix M1;
-	  mp_size_t scratch;
-
-	  p = 2*s - n + 1;
-	  scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
-
-	  mpn_hgcd_matrix_init(&M1, n - p, tp);
-	  nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);
-	  if (nn > 0)
-	    {
-	      /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
-	      ASSERT (M->n + 2 >= M1.n);
-
-	      /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
-		 then either q or q + 1 is a correct quotient, and M1 will
-		 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
-		 rules out the case that the size of M * M1 is much
-		 smaller than the expected M->n + M1->n. */
-
-	      ASSERT (M->n + M1.n < M->alloc);
-
-	      /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
-		 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
-	      n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
-
-	      /* We need a bound for of M->n + M1.n. Let n be the original
-		 input size. Then
-
-		 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
-
-		 and it follows that
-
-		 M.n + M1.n <= ceil(n/2) + 1
-
-		 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
-		 amount of needed scratch space. */
-	      mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
-	      success = 1;
-	    }
-	}
-    }
-
-  for (;;)
-    {
-      /* Needs s+3 < n */
-      nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
-      if (!nn)
-	return success ? n : 0;
-
-      n = nn;
-      success = 1;
-    }
-}
diff --git a/gmp/mpn/generic/hgcd_matrix.c b/gmp/mpn/generic/hgcd_matrix.c
deleted file mode 100644
index d9db331603..0000000000
--- a/gmp/mpn/generic/hgcd_matrix.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/* hgcd_matrix.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2003-2005, 2008, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* For input of size n, matrix elements are of size at most ceil(n/2)
-   - 1, but we need two limbs extra. */
-void
-mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
-{
-  mp_size_t s = (n+1)/2 + 1;
-  M->alloc = s;
-  M->n = 1;
-  MPN_ZERO (p, 4 * s);
-  M->p[0][0] = p;
-  M->p[0][1] = p + s;
-  M->p[1][0] = p + 2 * s;
-  M->p[1][1] = p + 3 * s;
-
-  M->p[0][0][0] = M->p[1][1][0] = 1;
-}
-
-/* Update column COL, adding in Q * column (1-COL). Temporary storage:
- * qn + n <= M->alloc, where n is the size of the largest element in
- * column 1 - COL. */
-void
-mpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
-			  unsigned col, mp_ptr tp)
-{
-  ASSERT (col < 2);
-
-  if (qn == 1)
-    {
-      mp_limb_t q = qp[0];
-      mp_limb_t c0, c1;
-
-      c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
-      c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
-
-      M->p[0][col][M->n] = c0;
-      M->p[1][col][M->n] = c1;
-
-      M->n += (c0 | c1) != 0;
-    }
-  else
-    {
-      unsigned row;
-
-      /* Carries for the unlikely case that we get both high words
-	 from the multiplication and carries from the addition. */
-      mp_limb_t c[2];
-      mp_size_t n;
-
-      /* The matrix will not necessarily grow in size by qn, so we
-	 need normalization in order not to overflow M. */
-
-      for (n = M->n; n + qn > M->n; n--)
-	{
-	  ASSERT (n > 0);
-	  if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
-	    break;
-	}
-
-      ASSERT (qn + n <= M->alloc);
-
-      for (row = 0; row < 2; row++)
-	{
-	  if (qn <= n)
-	    mpn_mul (tp, M->p[row][1-col], n, qp, qn);
-	  else
-	    mpn_mul (tp, qp, qn, M->p[row][1-col], n);
-
-	  ASSERT (n + qn >= M->n);
-	  c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
-	}
-
-      n += qn;
-
-      if (c[0] | c[1])
-	{
-	  M->p[0][col][n] = c[0];
-	  M->p[1][col][n] = c[1];
-	  n++;
-	}
-      else
-	{
-	  n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
-	  ASSERT (n >= M->n);
-	}
-      M->n = n;
-    }
-
-  ASSERT (M->n < M->alloc);
-}
-
-/* Multiply M by M1 from the right. Since the M1 elements fit in
-   GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
-   temporary space M->n */
-void
-mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
-		       mp_ptr tp)
-{
-  mp_size_t n0, n1;
-
-  /* Could avoid copy by some swapping of pointers. */
-  MPN_COPY (tp, M->p[0][0], M->n);
-  n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
-  MPN_COPY (tp, M->p[1][0], M->n);
-  n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
-
-  /* Depends on zero initialization */
-  M->n = MAX(n0, n1);
-  ASSERT (M->n < M->alloc);
-}
-
-/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
-   of temporary storage (see mpn_matrix22_mul_itch). */
-void
-mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
-		     mp_ptr tp)
-{
-  mp_size_t n;
-
-  /* About the new size of M:s elements. Since M1's diagonal elements
-     are > 0, no element can decrease. The new elements are of size
-     M->n + M1->n, one limb more or less. The computation of the
-     matrix product produces elements of size M->n + M1->n + 1. But
-     the true size, after normalization, may be three limbs smaller.
-
-     The reason that the product has normalized size >= M->n + M1->n -
-     2 is subtle. It depends on the fact that M and M1 can be factored
-     as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
-     M ending with a large power and M1 starting with a large power of
-     the same matrix. */
-
-  /* FIXME: Strassen multiplication gives only a small speedup. In FFT
-     multiplication range, this function could be sped up quite a lot
-     using invariance. */
-  ASSERT (M->n + M1->n < M->alloc);
-
-  ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
-	   | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
-
-  ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
-	   | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
-
-  mpn_matrix22_mul (M->p[0][0], M->p[0][1],
-		    M->p[1][0], M->p[1][1], M->n,
-		    M1->p[0][0], M1->p[0][1],
-		    M1->p[1][0], M1->p[1][1], M1->n, tp);
-
-  /* Index of last potentially non-zero limb, size is one greater. */
-  n = M->n + M1->n;
-
-  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
-  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
-  n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
-
-  ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
-
-  M->n = n + 1;
-}
-
-/* Multiplies the least significant p limbs of (a;b) by M^-1.
-   Temporary space needed: 2 * (p + M->n)*/
-mp_size_t
-mpn_hgcd_matrix_adjust (const struct hgcd_matrix *M,
-			mp_size_t n, mp_ptr ap, mp_ptr bp,
-			mp_size_t p, mp_ptr tp)
-{
-  /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
-     = (r11 a - r01 b; - r10 a + r00 b */
-
-  mp_ptr t0 = tp;
-  mp_ptr t1 = tp + p + M->n;
-  mp_limb_t ah, bh;
-  mp_limb_t cy;
-
-  ASSERT (p + M->n  < n);
-
-  /* First compute the two values depending on a, before overwriting a */
-
-  if (M->n >= p)
-    {
-      mpn_mul (t0, M->p[1][1], M->n, ap, p);
-      mpn_mul (t1, M->p[1][0], M->n, ap, p);
-    }
-  else
-    {
-      mpn_mul (t0, ap, p, M->p[1][1], M->n);
-      mpn_mul (t1, ap, p, M->p[1][0], M->n);
-    }
-
-  /* Update a */
-  MPN_COPY (ap, t0, p);
-  ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
-
-  if (M->n >= p)
-    mpn_mul (t0, M->p[0][1], M->n, bp, p);
-  else
-    mpn_mul (t0, bp, p, M->p[0][1], M->n);
-
-  cy = mpn_sub (ap, ap, n, t0, p + M->n);
-  ASSERT (cy <= ah);
-  ah -= cy;
-
-  /* Update b */
-  if (M->n >= p)
-    mpn_mul (t0, M->p[0][0], M->n, bp, p);
-  else
-    mpn_mul (t0, bp, p, M->p[0][0], M->n);
-
-  MPN_COPY (bp, t0, p);
-  bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
-  cy = mpn_sub (bp, bp, n, t1, p + M->n);
-  ASSERT (cy <= bh);
-  bh -= cy;
-
-  if (ah > 0 || bh > 0)
-    {
-      ap[n] = ah;
-      bp[n] = bh;
-      n++;
-    }
-  else
-    {
-      /* The subtraction can reduce the size by at most one limb. */
-      if (ap[n-1] == 0 && bp[n-1] == 0)
-	n--;
-    }
-  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
-  return n;
-}
diff --git a/gmp/mpn/generic/hgcd_reduce.c b/gmp/mpn/generic/hgcd_reduce.c
deleted file mode 100644
index 6f3d61ecea..0000000000
--- a/gmp/mpn/generic/hgcd_reduce.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/* hgcd_reduce.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Computes R -= A * B. Result must be non-negative. Normalized down
-   to size an, and resulting size is returned. */
-static mp_size_t
-submul (mp_ptr rp, mp_size_t rn,
-	mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
-{
-  mp_ptr tp;
-  TMP_DECL;
-
-  ASSERT (bn > 0);
-  ASSERT (an >= bn);
-  ASSERT (rn >= an);
-  ASSERT (an + bn <= rn + 1);
-
-  TMP_MARK;
-  tp = TMP_ALLOC_LIMBS (an + bn);
-
-  mpn_mul (tp, ap, an, bp, bn);
-  if (an + bn > rn)
-    {
-      ASSERT (tp[rn] == 0);
-      bn--;
-    }
-  ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn));
-  TMP_FREE;
-
-  while (rn > an && (rp[rn-1] == 0))
-    rn--;
-
-  return rn;
-}
-
-/* Computes (a, b)  <--  M^{-1} (a; b) */
-/* FIXME:
-    x Take scratch parameter, and figure out scratch need.
-
-    x Use some fallback for small M->n?
-*/
-static mp_size_t
-hgcd_matrix_apply (const struct hgcd_matrix *M,
-		   mp_ptr ap, mp_ptr bp,
-		   mp_size_t n)
-{
-  mp_size_t an, bn, un, vn, nn;
-  mp_size_t mn[2][2];
-  mp_size_t modn;
-  mp_ptr tp, sp, scratch;
-  mp_limb_t cy;
-  unsigned i, j;
-
-  TMP_DECL;
-
-  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
-
-  an = n;
-  MPN_NORMALIZE (ap, an);
-  bn = n;
-  MPN_NORMALIZE (bp, bn);
-
-  for (i = 0; i < 2; i++)
-    for (j = 0; j < 2; j++)
-      {
-	mp_size_t k;
-	k = M->n;
-	MPN_NORMALIZE (M->p[i][j], k);
-	mn[i][j] = k;
-      }
-
-  ASSERT (mn[0][0] > 0);
-  ASSERT (mn[1][1] > 0);
-  ASSERT ( (mn[0][1] | mn[1][0]) > 0);
-
-  TMP_MARK;
-
-  if (mn[0][1] == 0)
-    {
-      /* A unchanged, M = (1, 0; q, 1) */
-      ASSERT (mn[0][0] == 1);
-      ASSERT (M->p[0][0][0] == 1);
-      ASSERT (mn[1][1] == 1);
-      ASSERT (M->p[1][1][0] == 1);
-
-      /* Put B <-- B - q A */
-      nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
-    }
-  else if (mn[1][0] == 0)
-    {
-      /* B unchanged, M = (1, q; 0, 1) */
-      ASSERT (mn[0][0] == 1);
-      ASSERT (M->p[0][0][0] == 1);
-      ASSERT (mn[1][1] == 1);
-      ASSERT (M->p[1][1][0] == 1);
-
-      /* Put A  <-- A - q * B */
-      nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
-    }
-  else
-    {
-      /* A = m00 a + m01 b  ==> a <= A / m00, b <= A / m01.
-	 B = m10 a + m11 b  ==> a <= B / m10, b <= B / m11. */
-      un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
-      vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
-
-      nn = MAX (un, vn);
-      /* In the range of interest, mulmod_bnm1 should always beat mullo. */
-      modn = mpn_mulmod_bnm1_next_size (nn + 1);
-
-      scratch = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (modn, modn, M->n));
-      tp = TMP_ALLOC_LIMBS (modn);
-      sp = TMP_ALLOC_LIMBS (modn);
-
-      ASSERT (n <= 2*modn);
-
-      if (n > modn)
-	{
-	  cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
-	  MPN_INCR_U (ap, modn, cy);
-
-	  cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
-	  MPN_INCR_U (bp, modn, cy);
-
-	  n = modn;
-	}
-
-      mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
-      mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
-
-      /* FIXME: Handle the small n case in some better way. */
-      if (n + mn[1][1] < modn)
-	MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
-      if (n + mn[0][1] < modn)
-	MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
-
-      cy = mpn_sub_n (tp, tp, sp, modn);
-      MPN_DECR_U (tp, modn, cy);
-
-      ASSERT (mpn_zero_p (tp + nn, modn - nn));
-
-      mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
-      MPN_COPY (ap, tp, nn);
-      mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
-
-      if (n + mn[1][0] < modn)
-	MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
-      if (n + mn[0][0] < modn)
-	MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
-
-      cy = mpn_sub_n (tp, tp, sp, modn);
-      MPN_DECR_U (tp, modn, cy);
-
-      ASSERT (mpn_zero_p (tp + nn, modn - nn));
-      MPN_COPY (bp, tp, nn);
-
-      while ( (ap[nn-1] | bp[nn-1]) == 0)
-	{
-	  nn--;
-	  ASSERT (nn > 0);
-	}
-    }
-  TMP_FREE;
-
-  return nn;
-}
-
-mp_size_t
-mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
-{
-  mp_size_t itch;
-  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
-    {
-      itch = mpn_hgcd_itch (n-p);
-
-      /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
-	 (p + ceil((n-p)/2) - 1 <= n + p - 1 */
-      if (itch < n + p - 1)
-	itch = n + p - 1;
-    }
-  else
-    {
-      itch = 2*(n-p) + mpn_hgcd_itch (n-p);
-      /* Currently, hgcd_matrix_apply allocates its own storage. */
-    }
-  return itch;
-}
-
-/* FIXME: Document storage need. */
-mp_size_t
-mpn_hgcd_reduce (struct hgcd_matrix *M,
-		 mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
-		 mp_ptr tp)
-{
-  mp_size_t nn;
-  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
-    {
-      nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
-      if (nn > 0)
-	/* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
-	   = 2 (n - 1) */
-	return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
-    }
-  else
-    {
-      MPN_COPY (tp, ap + p, n - p);
-      MPN_COPY (tp + n - p, bp + p, n - p);
-      if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
-	return hgcd_matrix_apply (M, ap, bp, n);
-    }
-  return 0;
-}
diff --git a/gmp/mpn/generic/hgcd_step.c b/gmp/mpn/generic/hgcd_step.c
deleted file mode 100644
index e58894ff3b..0000000000
--- a/gmp/mpn/generic/hgcd_step.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/* hgcd_step.c.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2003-2005, 2008, 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-static void
-hgcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
-	   mp_srcptr qp, mp_size_t qn, int d)
-{
-  ASSERT (!gp);
-  ASSERT (d >= 0);
-  ASSERT (d <= 1);
-
-  MPN_NORMALIZE (qp, qn);
-  if (qn > 0)
-    {
-      struct hgcd_matrix *M = (struct hgcd_matrix *) p;
-      /* NOTES: This is a bit ugly. A tp area is passed to
-	 gcd_subdiv_step, which stores q at the start of that area. We
-	 now use the rest. */
-      mp_ptr tp = (mp_ptr) qp + qn;
-      mpn_hgcd_matrix_update_q (M, qp, qn, d, tp);
-    }
-}
-
-/* Perform a few steps, using some of mpn_hgcd2, subtraction and
-   division. Reduces the size by almost one limb or more, but never
-   below the given size s. Return new size for a and b, or 0 if no
-   more steps are possible.
-
-   If hgcd2 succeeds, needs temporary space for hgcd_matrix_mul_1, M->n
-   limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
-   fails, needs space for the quotient, qn <= n - s limbs, for and
-   hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
-   (resulting size of M) + 1.
-
-   If N is the input size to the calling hgcd, then s = floor(N/2) +
-   1, M->n < N, qn + product size <= n - s + n - s + 1 = 2 (n - s) + 1
-   <= N.
-*/
-
-mp_size_t
-mpn_hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
-	       struct hgcd_matrix *M, mp_ptr tp)
-{
-  struct hgcd_matrix1 M1;
-  mp_limb_t mask;
-  mp_limb_t ah, al, bh, bl;
-
-  ASSERT (n > s);
-
-  mask = ap[n-1] | bp[n-1];
-  ASSERT (mask > 0);
-
-  if (n == s + 1)
-    {
-      if (mask < 4)
-	goto subtract;
-
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else if (mask & GMP_NUMB_HIGHBIT)
-    {
-      ah = ap[n-1]; al = ap[n-2];
-      bh = bp[n-1]; bl = bp[n-2];
-    }
-  else
-    {
-      int shift;
-
-      count_leading_zeros (shift, mask);
-      ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
-      al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
-      bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
-      bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
-    }
-
-  /* Try an mpn_hgcd2 step */
-  if (mpn_hgcd2 (ah, al, bh, bl, &M1))
-    {
-      /* Multiply M <- M * M1 */
-      mpn_hgcd_matrix_mul_1 (M, &M1, tp);
-
-      /* Can't swap inputs, so we need to copy. */
-      MPN_COPY (tp, ap, n);
-      /* Multiply M1^{-1} (a;b) */
-      return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
-    }
-
- subtract:
-
-  return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp);
-}
diff --git a/gmp/mpn/generic/invert.c b/gmp/mpn/generic/invert.c
index 4bc459d728..e40d3611e6 100644
--- a/gmp/mpn/generic/invert.c
+++ b/gmp/mpn/generic/invert.c
@@ -1,91 +1,60 @@
-/* invert.c -- Compute floor((B^{2n}-1)/U) - B^n.
+/* Compute {up,n}^(-1).
 
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright (C) 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
+Copyright (C) 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
+#include <stdlib.h>
 #include "gmp.h"
 #include "gmp-impl.h"
-#include "longlong.h"
+
+/* Formulas:
+	z = 2z-(zz)d
+	z = 2z-(zd)z
+	z = z(2-zd)
+	z = z-z*(zd-1)
+	z = z+z*(1-zd)
+*/
+
+mp_size_t
+mpn_invert_itch (mp_size_t n)
+{
+  return 3 * n + 2;
+}
 
 void
 mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
 {
-  ASSERT (n > 0);
-  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
-  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
-  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
-
-  if (n == 1)
-    invert_limb (*ip, *dp);
-  else {
-    TMP_DECL;
-
-    TMP_MARK;
-    if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
-      {
-	/* Maximum scratch needed by this branch: 2*n */
-	mp_size_t i;
-	mp_ptr xp;
-
-	xp = scratch;				/* 2 * n limbs */
-	for (i = n - 1; i >= 0; i--)
-	  xp[i] = GMP_NUMB_MAX;
-	mpn_com (xp + n, dp, n);
-	if (n == 2) {
-	  mpn_divrem_2 (ip, 0, xp, 4, dp);
-	} else {
-	  gmp_pi1_t inv;
-	  invert_pi1 (inv, dp[n-1], dp[n-2]);
-	  /* FIXME: should we use dcpi1_div_q, for big sizes? */
-	  mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
-	}
-      }
-    else { /* Use approximated inverse; correct the result if needed. */
-      mp_limb_t e; /* The possible error in the approximate inverse */
+  mp_ptr np, rp;
+  mp_size_t i;
+  TMP_DECL;
+
+  TMP_MARK;
+  if (scratch == NULL)
+    {
+      scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (n));
+    }
 
-      ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
-      e = mpn_ni_invertappr (ip, dp, n, scratch);
+  np = scratch;					/* 2 * n limbs */
+  rp = scratch + 2 * n;				/* n + 2 limbs */
+  for (i = n - 1; i >= 0; i--)
+    np[i] = ~CNST_LIMB(0);
+  mpn_com_n (np + n, dp, n);
+  mpn_tdiv_qr (rp, ip, 0L, np, 2 * n, dp, n);
+  MPN_COPY (ip, rp, n);
 
-      if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
-	/* Code to detect and correct the "off by one" approximation. */
-	mpn_mul_n (scratch, ip, dp, n);
-	ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n));
-	if (! mpn_add (scratch, scratch, 2*n, dp, n))
-	  MPN_INCR_U (ip, n, 1); /* The value was wrong, correct it.  */
-      }
-    }
-    TMP_FREE;
-  }
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/invertappr.c b/gmp/mpn/generic/invertappr.c
deleted file mode 100644
index 12326b8b75..0000000000
--- a/gmp/mpn/generic/invertappr.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* mpn_invertappr and helper functions.  Compute I such that
-   floor((B^{2n}-1)/U - 1 <= I + B^n <= floor((B^{2n}-1)/U.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   The algorithm used here was inspired by ApproximateReciprocal from "Modern
-   Computer Arithmetic", by Richard P. Brent and Paul Zimmermann.  Special
-   thanks to Paul Zimmermann for his very valuable suggestions on all the
-   theoretical aspects during the work on this code.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright (C) 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/* FIXME: Remove NULL and TMP_*, as soon as all the callers properly
-   allocate and pass the scratch to the function. */
-#include <stdlib.h>		/* for NULL */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* FIXME: The iterative version splits the operand in two slightly unbalanced
-   parts, the use of log_2 (or counting the bits) underestimate the maximum
-   number of iterations.  */
-
-#if TUNE_PROGRAM_BUILD
-#define NPOWS \
- ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
-#define MAYBE_dcpi1_divappr   1
-#else
-#define NPOWS \
- ((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)) - LOG2C (INV_NEWTON_THRESHOLD))
-#define MAYBE_dcpi1_divappr \
-  (INV_NEWTON_THRESHOLD < DC_DIVAPPR_Q_THRESHOLD)
-#if (INV_NEWTON_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD) && \
-    (INV_APPR_THRESHOLD > INV_MULMOD_BNM1_THRESHOLD)
-#undef  INV_MULMOD_BNM1_THRESHOLD
-#define INV_MULMOD_BNM1_THRESHOLD 0 /* always when Newton */
-#endif
-#endif
-
-/* All the three functions mpn{,_bc,_ni}_invertappr (ip, dp, n, scratch), take
-   the strictly normalised value {dp,n} (i.e., most significant bit must be set)
-   as an input, and compute {ip,n}: the approximate reciprocal of {dp,n}.
-
-   Let e = mpn*_invertappr (ip, dp, n, scratch) be the returned value; the
-   following conditions are satisfied by the output:
-     0 <= e <= 1;
-     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1+e) .
-   I.e. e=0 means that the result {ip,n} equals the one given by mpn_invert.
-	e=1 means that the result _may_ be one less than expected.
-
-   The _bc version returns e=1 most of the time.
-   The _ni version should return e=0 most of the time; only about 1% of
-   possible random input should give e=1.
-
-   When the strict result is needed, i.e., e=0 in the relation above:
-     {dp,n}*(B^n+{ip,n}) < B^{2n} <= {dp,n}*(B^n+{ip,n}+1) ;
-   the function mpn_invert (ip, dp, n, scratch) should be used instead.  */
-
-/* Maximum scratch needed by this branch (at tp): 3*n + 2 */
-static mp_limb_t
-mpn_bc_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr tp)
-{
-  mp_ptr xp;
-
-  ASSERT (n > 0);
-  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
-  ASSERT (! MPN_OVERLAP_P (ip, n, tp, mpn_invertappr_itch(n)));
-  ASSERT (! MPN_OVERLAP_P (dp, n, tp, mpn_invertappr_itch(n)));
-
-  /* Compute a base value of r limbs. */
-  if (n == 1)
-    invert_limb (*ip, *dp);
-  else {
-    mp_size_t i;
-    xp = tp + n + 2;				/* 2 * n limbs */
-
-    for (i = n - 1; i >= 0; i--)
-      xp[i] = GMP_NUMB_MAX;
-    mpn_com (xp + n, dp, n);
-
-    /* Now xp contains B^2n - {dp,n}*B^n - 1 */
-
-    /* FIXME: if mpn_*pi1_divappr_q handles n==2, use it! */
-    if (n == 2) {
-      mpn_divrem_2 (ip, 0, xp, 4, dp);
-    } else {
-      gmp_pi1_t inv;
-      invert_pi1 (inv, dp[n-1], dp[n-2]);
-      if (! MAYBE_dcpi1_divappr
-	  || BELOW_THRESHOLD (n, DC_DIVAPPR_Q_THRESHOLD))
-	mpn_sbpi1_divappr_q (ip, xp, 2 * n, dp, n, inv.inv32);
-      else
-	mpn_dcpi1_divappr_q (ip, xp, 2 * n, dp, n, &inv);
-      MPN_DECR_U(ip, n, 1);
-      return 1;
-    }
-  }
-  return 0;
-}
-
-/* mpn_ni_invertappr: computes the approximate reciprocal using Newton's
-   iterations (at least one).
-
-   Inspired by Algorithm "ApproximateReciprocal", published in "Modern Computer
-   Arithmetic" by Richard P. Brent and Paul Zimmermann, algorithm 3.5, page 121
-   in version 0.4 of the book.
-
-   Some adaptations were introduced, to allow product mod B^m-1 and return the
-   value e.
-
-   USE_MUL_N = 1 (default) introduces a correction in such a way that "the
-   value of B^{n+h}-T computed at step 8 cannot exceed B^n-1" (the book reads
-   "2B^n-1").  This correction should not require to modify the proof.
-
-   We use a wrapped product modulo B^m-1.  NOTE: is there any normalisation
-   problem for the [0] class?  It shouldn't: we compute 2*|A*X_h - B^{n+h}| <
-   B^m-1.  We may get [0] if and only if we get AX_h = B^{n+h}.  This can
-   happen only if A=B^{n}/2, but this implies X_h = B^{h}*2-1 i.e., AX_h =
-   B^{n+h} - A, then we get into the "negative" branch, where X_h is not
-   incremented (because A < B^n).
-
-   FIXME: the scratch for mulmod_bnm1 does not currently fit in the scratch, it
-   is allocated apart.  */
-
-#define USE_MUL_N 1
-
-mp_limb_t
-mpn_ni_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
-{
-  mp_limb_t cy;
-  mp_ptr xp;
-  mp_size_t rn, mn;
-  mp_size_t sizes[NPOWS], *sizp;
-  mp_ptr tp;
-  TMP_DECL;
-#define rp scratch
-
-  ASSERT (n > 2);
-  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
-  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
-  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
-
-  /* Compute the computation precisions from highest to lowest, leaving the
-     base case size in 'rn'.  */
-  sizp = sizes;
-  rn = n;
-  do {
-    *sizp = rn;
-    rn = ((rn) >> 1) + 1;
-    sizp ++;
-  } while (ABOVE_THRESHOLD (rn, INV_NEWTON_THRESHOLD));
-
-  /* We search the inverse of 0.{dp,n}, we compute it as 1.{ip,n} */
-  dp += n;
-  ip += n;
-
-  /* Compute a base value of rn limbs. */
-  mpn_bc_invertappr (ip - rn, dp - rn, rn, scratch);
-
-  TMP_MARK;
-
-  if (ABOVE_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD))
-    {
-      mn = mpn_mulmod_bnm1_next_size (n + 1);
-      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (mn, n, (n >> 1) + 1));
-    }
-  /* Use Newton's iterations to get the desired precision.*/
-
-  /* define rp scratch; 2rn + 1 limbs <= 2(n>>1 + 1) + 1 <= n + 3  limbs */
-  /* Maximum scratch needed by this branch <= 3*n + 2 */
-  xp = scratch + n + 3;				/*  n + rn limbs */
-  while (1) {
-    mp_limb_t method;
-
-    n = *--sizp;
-    /*
-      v    n  v
-      +----+--+
-      ^ rn ^
-    */
-
-    /* Compute i_jd . */
-    if (BELOW_THRESHOLD (n, INV_MULMOD_BNM1_THRESHOLD)
-	|| ((mn = mpn_mulmod_bnm1_next_size (n + 1)) > (n + rn))) {
-      /* FIXME: We do only need {xp,n+1}*/
-      mpn_mul (xp, dp - n, n, ip - rn, rn);
-      mpn_add_n (xp + rn, xp + rn, dp - n, n - rn + 1);
-      method = 1; /* Remember we used (truncated) product */
-      /* We computed cy.{xp,rn+n} <- 1.{ip,rn} * 0.{dp,n} */
-    } else { /* Use B^n-1 wraparound */
-      mpn_mulmod_bnm1 (xp, mn, dp - n, n, ip - rn, rn, tp);
-      /* We computed {xp,mn} <- {ip,rn} * {dp,n} mod (B^mn-1) */
-      /* We know that 2*|ip*dp + dp*B^rn - B^{rn+n}| < B^mn-1 */
-      /* Add dp*B^rn mod (B^mn-1) */
-      ASSERT (n >= mn - rn);
-      xp[mn] = 1 + mpn_add_n (xp + rn, xp + rn, dp - n, mn - rn);
-      cy = mpn_add_n (xp, xp, dp - (n - (mn - rn)), n - (mn - rn));
-      MPN_INCR_U (xp + n - (mn - rn), mn + 1 - n + (mn - rn), cy);
-      ASSERT (n + rn >=  mn);
-      /* Subtract B^{rn+n} */
-      MPN_DECR_U (xp + rn + n - mn, 2*mn + 1 - rn - n, 1);
-      if (xp[mn])
-	MPN_INCR_U (xp, mn, xp[mn] - 1);
-      else
-	MPN_DECR_U (xp, mn, 1);
-      method = 0; /* Remember we are working Mod B^m-1 */
-    }
-
-    if (xp[n] < 2) { /* "positive" residue class */
-      cy = 1;
-      while (xp[n] || mpn_cmp (xp, dp - n, n)>0) {
-	xp[n] -= mpn_sub_n (xp, xp, dp - n, n);
-	cy ++;
-      }
-      MPN_DECR_U(ip - rn, rn, cy);
-      ASSERT (cy <= 4); /* at most 3 cycles for the while above */
-      ASSERT_NOCARRY (mpn_sub_n (xp, dp - n, xp, n));
-      ASSERT (xp[n] == 0);
-    } else { /* "negative" residue class */
-      mpn_com (xp, xp, n + 1);
-      MPN_INCR_U(xp, n + 1, method);
-      ASSERT (xp[n] <= 1);
-#if USE_MUL_N
-      if (xp[n]) {
-	MPN_INCR_U(ip - rn, rn, 1);
-	ASSERT_CARRY (mpn_sub_n (xp, xp, dp - n, n));
-      }
-#endif
-    }
-
-    /* Compute x_ju_j. FIXME:We need {rp+rn,rn}, mulhi? */
-#if USE_MUL_N
-    mpn_mul_n (rp, xp + n - rn, ip - rn, rn);
-#else
-    rp[2*rn] = 0;
-    mpn_mul (rp, xp + n - rn, rn + xp[n], ip - rn, rn);
-#endif
-    /* We need _only_ the carry from the next addition  */
-    /* Anyway 2rn-n <= 2... we don't need to optimise.  */
-    cy = mpn_add_n (rp + rn, rp + rn, xp + n - rn, 2*rn - n);
-    cy = mpn_add_nc (ip - n, rp + 3*rn - n, xp + rn, n - rn, cy);
-    MPN_INCR_U (ip - rn, rn, cy + (1-USE_MUL_N)*(rp[2*rn] + xp[n]));
-    if (sizp == sizes) { /* Get out of the cycle */
-      /* Check for possible carry propagation from below. */
-      cy = rp[3*rn - n - 1] > GMP_NUMB_MAX - 7; /* Be conservative. */
-/*    cy = mpn_add_1 (rp + rn, rp + rn, 2*rn - n, 4); */
-      break;
-    }
-    rn = n;
-  }
-  TMP_FREE;
-
-  return cy;
-#undef rp
-}
-
-mp_limb_t
-mpn_invertappr (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
-{
-  mp_limb_t res;
-  TMP_DECL;
-
-  TMP_MARK;
-
-  if (scratch == NULL)
-    scratch = TMP_ALLOC_LIMBS (mpn_invertappr_itch (n));
-
-  ASSERT (n > 0);
-  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
-  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
-  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
-  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));
-
-  if (BELOW_THRESHOLD (n, INV_NEWTON_THRESHOLD))
-    res = mpn_bc_invertappr (ip, dp, n, scratch);
-  else
-    res = mpn_ni_invertappr (ip, dp, n, scratch);
-
-  TMP_FREE;
-  return res;
-}
diff --git a/gmp/mpn/generic/jacbase.c b/gmp/mpn/generic/jacbase.c
index cd52bc9513..6972a130d9 100644
--- a/gmp/mpn/generic/jacbase.c
+++ b/gmp/mpn/generic/jacbase.c
@@ -3,33 +3,22 @@
    THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
    INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP.
 
-Copyright 1999-2002, 2010 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -83,15 +72,15 @@ see https://www.gnu.org/licenses/.  */
 #define PROCESS_TWOS_EVEN               \
   {                                     \
     int  two, mask, shift;              \
-					\
+                                        \
     two = JACOBI_TWO_U_BIT1 (b);        \
     mask = (~a & 2);                    \
     a >>= 1;                            \
-					\
+                                        \
     shift = (~a & 1);                   \
     a >>= shift;                        \
     result_bit1 ^= two ^ (two & mask);  \
-					\
+                                        \
     while ((a & 1) == 0)                \
       {                                 \
 	a >>= 1;                        \
@@ -102,14 +91,14 @@ see https://www.gnu.org/licenses/.  */
 #define PROCESS_TWOS_ANY                \
   {                                     \
     int  two, mask, shift;              \
-					\
+                                        \
     two = JACOBI_TWO_U_BIT1 (b);        \
     shift = (~a & 1);                   \
     a >>= shift;                        \
-					\
+                                        \
     mask = shift << 1;                  \
     result_bit1 ^= (two & mask);        \
-					\
+                                        \
     while ((a & 1) == 0)                \
       {                                 \
 	a >>= 1;                        \
@@ -119,9 +108,9 @@ see https://www.gnu.org/licenses/.  */
   }
 #endif
 
-#if JACOBI_BASE_METHOD < 4
+
 /* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
-   with a restricted range of inputs accepted, namely b>1, b odd.
+   with a restricted range of inputs accepted, namely b>1, b odd, and a<=b.
 
    The initial result_bit1 is taken as a parameter for the convenience of
    mpz_kronecker_ui() et al.  The sign changes both here and in those
@@ -133,13 +122,17 @@ see https://www.gnu.org/licenses/.  */
 
    Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
    possible, but a couple of tests suggest it's not a significant speedup,
-   and may even be a slowdown, so what's here is good enough for now. */
+   and may even be a slowdown, so what's here is good enough for now.
+
+   Future: The code doesn't demand a<=b actually, so maybe this could be
+   relaxed.  All the places this is used currently call with a<=b though.  */
 
 int
 mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
 {
   ASSERT (b & 1);  /* b odd */
   ASSERT (b != 1);
+  ASSERT (a <= b);
 
   if (a == 0)
     return 0;
@@ -148,15 +141,11 @@ mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
   if (a == 1)
     goto done;
 
-  if (a >= b)
-    goto a_gt_b;
-
   for (;;)
     {
       result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
       MP_LIMB_T_SWAP (a, b);
 
-    a_gt_b:
       do
 	{
 	  /* working on (a/b), a,b odd, a>=b */
@@ -177,67 +166,3 @@ mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
  done:
   return JACOBI_BIT1_TO_PN (result_bit1);
 }
-#endif
-
-#if JACOBI_BASE_METHOD == 4
-/* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a
- * parameter. We have no need for the convention that the sign is in
- * bit 1, internally we use bit 0. */
-
-/* FIXME: Could try table-based count_trailing_zeros. */
-int
-mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit)
-{
-  int c;
-
-  ASSERT (b & 1);
-  ASSERT (b > 1);
-
-  if (a == 0)
-    /* This is the only line which depends on b > 1 */
-    return 0;
-
-  bit >>= 1;
-
-  /* Below, we represent a and b shifted right so that the least
-     significant one bit is implicit. */
-
-  b >>= 1;
-
-  count_trailing_zeros (c, a);
-  bit ^= c & (b ^ (b >> 1));
-
-  /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */
-  a >>= c;
-  a >>= 1;
-
-  do
-    {
-      mp_limb_t t = a - b;
-      mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t);
-
-      if (t == 0)
-	return 0;
-
-      /* If b > a, invoke reciprocity */
-      bit ^= (bgta & a & b);
-
-      /* b <-- min (a, b) */
-      b += (bgta & t);
-
-      /* a <-- |a - b| */
-      a = (t ^ bgta) - bgta;
-
-      /* Number of trailing zeros is the same no matter if we look at
-       * t or a, but using t gives more parallelism. */
-      count_trailing_zeros (c, t);
-      c ++;
-      /* (2/b) = -1 if b = 3 or 5 mod 8 */
-      bit ^= c & (b ^ (b >> 1));
-      a >>= c;
-    }
-  while (b > 0);
-
-  return 1-2*(bit & 1);
-}
-#endif /* JACOBI_BASE_METHOD == 4 */
diff --git a/gmp/mpn/generic/jacobi.c b/gmp/mpn/generic/jacobi.c
deleted file mode 100644
index bdc3ec67da..0000000000
--- a/gmp/mpn/generic/jacobi.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/* jacobi.c
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
-Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#ifndef JACOBI_DC_THRESHOLD
-#define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
-#endif
-
-/* Schönhage's rules:
- *
- * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
- *
- * If r1 is odd, then
- *
- *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
- *
- * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
- *
- * If r1 is even, r2 must be odd. We have
- *
- *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
- *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
- *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
- *
- * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
- * q1 times gives
- *
- *   (r1 | r0) = (r1 | r2) = (r3 | r2)
- *
- * On the other hand, if r1 = 2 (mod 4), the sign factor is
- * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
- *
- *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
- *   = q1 (r0-1)/2 + q1 (q1-1)/2
- *
- * and we can summarize the even case as
- *
- *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)
- *
- * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
- *
- * What about termination? The remainder sequence ends with (0|1) = 1
- * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
- * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
- * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
- *
- * Examples: (11|15) = - (15|11) = - (4|11)
- *            (4|11) =    (4| 3) =   (1| 3)
- *            (1| 3) = (3|1) = (0|1) = 1
- *
- *             (2|7) = (2|1) = (0|1) = 1
- *
- * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
- *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)
- *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
- *
- */
-
-/* In principle, the state consists of four variables: e (one bit), a,
-   b (two bits each), d (one bit). Collected factors are (-1)^e. a and
-   b are the least significant bits of the current remainders. d
-   (denominator) is 0 if we're currently subtracting multiplies of a
-   from b, and 1 if we're subtracting b from a.
-
-   e is stored in the least significant bit, while a, b and d are
-   coded as only 13 distinct values in bits 1-4, according to the
-   following table. For rows not mentioning d, the value is either
-   implied, or it doesn't matter. */
-
-#if WANT_ASSERT
-static const struct
-{
-  unsigned char a;
-  unsigned char b;
-} decode_table[13] = {
-  /*  0 */ { 0, 1 },
-  /*  1 */ { 0, 3 },
-  /*  2 */ { 1, 1 },
-  /*  3 */ { 1, 3 },
-  /*  4 */ { 2, 1 },
-  /*  5 */ { 2, 3 },
-  /*  6 */ { 3, 1 },
-  /*  7 */ { 3, 3 }, /* d = 1 */
-  /*  8 */ { 1, 0 },
-  /*  9 */ { 1, 2 },
-  /* 10 */ { 3, 0 },
-  /* 11 */ { 3, 2 },
-  /* 12 */ { 3, 3 }, /* d = 0 */
-};
-#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
-#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
-#endif /* WANT_ASSERT */
-
-const unsigned char jacobi_table[208] = {
-#include "jacobitab.h"
-};
-
-#define BITS_FAIL 31
-
-static void
-jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
-	     mp_srcptr qp, mp_size_t qn, int d)
-{
-  unsigned *bitsp = (unsigned *) p;
-
-  if (gp)
-    {
-      ASSERT (gn > 0);
-      if (gn != 1 || gp[0] != 1)
-	{
-	  *bitsp = BITS_FAIL;
-	  return;
-	}
-    }
-
-  if (qp)
-    {
-      ASSERT (qn > 0);
-      ASSERT (d >= 0);
-      *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
-    }
-}
-
-#define CHOOSE_P(n) (2*(n) / 3)
-
-int
-mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
-{
-  mp_size_t scratch;
-  mp_size_t matrix_scratch;
-  mp_ptr tp;
-
-  TMP_DECL;
-
-  ASSERT (n > 0);
-  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
-  ASSERT ( (bp[0] | ap[0]) & 1);
-
-  /* FIXME: Check for small sizes first, before setting up temporary
-     storage etc. */
-  scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
-
-  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
-    {
-      mp_size_t hgcd_scratch;
-      mp_size_t update_scratch;
-      mp_size_t p = CHOOSE_P (n);
-      mp_size_t dc_scratch;
-
-      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
-      hgcd_scratch = mpn_hgcd_itch (n - p);
-      update_scratch = p + n - 1;
-
-      dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
-      if (dc_scratch > scratch)
-	scratch = dc_scratch;
-    }
-
-  TMP_MARK;
-  tp = TMP_ALLOC_LIMBS(scratch);
-
-  while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
-    {
-      struct hgcd_matrix M;
-      mp_size_t p = 2*n/3;
-      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
-      mp_size_t nn;
-      mpn_hgcd_matrix_init (&M, n - p, tp);
-
-      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
-			    tp + matrix_scratch);
-      if (nn > 0)
-	{
-	  ASSERT (M.n <= (n - p - 1)/2);
-	  ASSERT (M.n + p <= (p + n - 1) / 2);
-	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
-	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
-	}
-      else
-	{
-	  /* Temporary storage n */
-	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
-	  if (!n)
-	    {
-	      TMP_FREE;
-	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
-	    }
-	}
-    }
-
-  while (n > 2)
-    {
-      struct hgcd_matrix1 M;
-      mp_limb_t ah, al, bh, bl;
-      mp_limb_t mask;
-
-      mask = ap[n-1] | bp[n-1];
-      ASSERT (mask > 0);
-
-      if (mask & GMP_NUMB_HIGHBIT)
-	{
-	  ah = ap[n-1]; al = ap[n-2];
-	  bh = bp[n-1]; bl = bp[n-2];
-	}
-      else
-	{
-	  int shift;
-
-	  count_leading_zeros (shift, mask);
-	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
-	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
-	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
-	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
-	}
-
-      /* Try an mpn_nhgcd2 step */
-      if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
-	{
-	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
-	  MP_PTR_SWAP (ap, tp);
-	}
-      else
-	{
-	  /* mpn_hgcd2 has failed. Then either one of a or b is very
-	     small, or the difference is very small. Perform one
-	     subtraction followed by one division. */
-	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
-	  if (!n)
-	    {
-	      TMP_FREE;
-	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
-	    }
-	}
-    }
-
-  if (bits >= 16)
-    MP_PTR_SWAP (ap, bp);
-
-  ASSERT (bp[0] & 1);
-
-  if (n == 1)
-    {
-      mp_limb_t al, bl;
-      al = ap[0];
-      bl = bp[0];
-
-      TMP_FREE;
-      if (bl == 1)
-	return 1 - 2*(bits & 1);
-      else
-	return mpn_jacobi_base (al, bl, bits << 1);
-    }
-
-  else
-    {
-      int res = mpn_jacobi_2 (ap, bp, bits & 1);
-      TMP_FREE;
-      return res;
-    }
-}
diff --git a/gmp/mpn/generic/jacobi_2.c b/gmp/mpn/generic/jacobi_2.c
deleted file mode 100644
index 9f480f7834..0000000000
--- a/gmp/mpn/generic/jacobi_2.c
+++ /dev/null
@@ -1,352 +0,0 @@
-/* jacobi_2.c
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 1996, 1998, 2000-2004, 2008, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#ifndef JACOBI_2_METHOD
-#define JACOBI_2_METHOD 2
-#endif
-
-/* Computes (a / b) where b is odd, and a and b are otherwise arbitrary
-   two-limb numbers. */
-#if JACOBI_2_METHOD == 1
-int
-mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
-{
-  mp_limb_t ah, al, bh, bl;
-  int c;
-
-  al = ap[0];
-  ah = ap[1];
-  bl = bp[0];
-  bh = bp[1];
-
-  ASSERT (bl & 1);
-
-  bl = ((bh << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK) | (bl >> 1);
-  bh >>= 1;
-
-  if ( (bh | bl) == 0)
-    return 1 - 2*(bit & 1);
-
-  if ( (ah | al) == 0)
-    return 0;
-
-  if (al == 0)
-    {
-      al = ah;
-      ah = 0;
-      bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
-    }
-  count_trailing_zeros (c, al);
-  bit ^= c & (bl ^ (bl >> 1));
-
-  c++;
-  if (UNLIKELY (c == GMP_NUMB_BITS))
-    {
-      al = ah;
-      ah = 0;
-    }
-  else
-    {
-      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
-      ah >>= c;
-    }
-  while ( (ah | bh) > 0)
-    {
-      mp_limb_t th, tl;
-      mp_limb_t bgta;
-
-      sub_ddmmss (th, tl, ah, al, bh, bl);
-      if ( (tl | th) == 0)
-	return 0;
-
-      bgta = LIMB_HIGHBIT_TO_MASK (th);
-
-      /* If b > a, invoke reciprocity */
-      bit ^= (bgta & al & bl);
-
-      /* b <-- min (a, b) */
-      add_ssaaaa (bh, bl, bh, bl, th & bgta, tl & bgta);
-
-      if ( (bh | bl) == 0)
-	return 1 - 2*(bit & 1);
-
-      /* a <-- |a - b| */
-      al = (bgta ^ tl) - bgta;
-      ah = (bgta ^ th);
-
-      if (UNLIKELY (al == 0))
-	{
-	  /* If b > a, al == 0 implies that we have a carry to
-	     propagate. */
-	  al = ah - bgta;
-	  ah = 0;
-	  bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
-	}
-      count_trailing_zeros (c, al);
-      c++;
-      bit ^= c & (bl ^ (bl >> 1));
-
-      if (UNLIKELY (c == GMP_NUMB_BITS))
-	{
-	  al = ah;
-	  ah = 0;
-	}
-      else
-	{
-	  al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
-	  ah >>= c;
-	}
-    }
-
-  ASSERT (bl > 0);
-
-  while ( (al | bl) & GMP_LIMB_HIGHBIT)
-    {
-      /* Need an extra comparison to get the mask. */
-      mp_limb_t t = al - bl;
-      mp_limb_t bgta = - (bl > al);
-
-      if (t == 0)
-	return 0;
-
-      /* If b > a, invoke reciprocity */
-      bit ^= (bgta & al & bl);
-
-      /* b <-- min (a, b) */
-      bl += (bgta & t);
-
-      /* a <-- |a - b| */
-      al = (t ^ bgta) - bgta;
-
-      /* Number of trailing zeros is the same no matter if we look at
-       * t or a, but using t gives more parallelism. */
-      count_trailing_zeros (c, t);
-      c ++;
-      /* (2/b) = -1 if b = 3 or 5 mod 8 */
-      bit ^= c & (bl ^ (bl >> 1));
-
-      if (UNLIKELY (c == GMP_NUMB_BITS))
-	return 1 - 2*(bit & 1);
-
-      al >>= c;
-    }
-
-  /* Here we have a little impedance mismatch. Better to inline it? */
-  return mpn_jacobi_base (2*al+1, 2*bl+1, bit << 1);
-}
-#elif JACOBI_2_METHOD == 2
-int
-mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
-{
-  mp_limb_t ah, al, bh, bl;
-  int c;
-
-  al = ap[0];
-  ah = ap[1];
-  bl = bp[0];
-  bh = bp[1];
-
-  ASSERT (bl & 1);
-
-  /* Use bit 1. */
-  bit <<= 1;
-
-  if (bh == 0 && bl == 1)
-    /* (a|1) = 1 */
-    return 1 - (bit & 2);
-
-  if (al == 0)
-    {
-      if (ah == 0)
-	/* (0|b) = 0, b > 1 */
-	return 0;
-
-      count_trailing_zeros (c, ah);
-      bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
-
-      al = bl;
-      bl = ah >> c;
-
-      if (bl == 1)
-	/* (1|b) = 1 */
-	return 1 - (bit & 2);
-
-      ah = bh;
-
-      bit ^= al & bl;
-
-      goto b_reduced;
-    }
-  if ( (al & 1) == 0)
-    {
-      count_trailing_zeros (c, al);
-
-      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
-      ah >>= c;
-      bit ^= (c << 1) & (bl ^ (bl >> 1));
-    }
-  if (ah == 0)
-    {
-      if (bh > 0)
-	{
-	  bit ^= al & bl;
-	  MP_LIMB_T_SWAP (al, bl);
-	  ah = bh;
-	  goto b_reduced;
-	}
-      goto ab_reduced;
-    }
-
-  while (bh > 0)
-    {
-      /* Compute (a|b) */
-      while (ah > bh)
-	{
-	  sub_ddmmss (ah, al, ah, al, bh, bl);
-	  if (al == 0)
-	    {
-	      count_trailing_zeros (c, ah);
-	      bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
-
-	      al = bl;
-	      bl = ah >> c;
-	      ah = bh;
-
-	      bit ^= al & bl;
-	      goto b_reduced;
-	    }
-	  count_trailing_zeros (c, al);
-	  bit ^= (c << 1) & (bl ^ (bl >> 1));
-	  al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
-	  ah >>= c;
-	}
-      if (ah == bh)
-	goto cancel_hi;
-
-      if (ah == 0)
-	{
-	  bit ^= al & bl;
-	  MP_LIMB_T_SWAP (al, bl);
-	  ah = bh;
-	  break;
-	}
-
-      bit ^= al & bl;
-
-      /* Compute (b|a) */
-      while (bh > ah)
-	{
-	  sub_ddmmss (bh, bl, bh, bl, ah, al);
-	  if (bl == 0)
-	    {
-	      count_trailing_zeros (c, bh);
-	      bit ^= ((GMP_NUMB_BITS + c) << 1) & (al ^ (al >> 1));
-
-	      bl = bh >> c;
-	      bit ^= al & bl;
-	      goto b_reduced;
-	    }
-	  count_trailing_zeros (c, bl);
-	  bit ^= (c << 1) & (al ^ (al >> 1));
-	  bl = ((bh << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (bl >> c);
-	  bh >>= c;
-	}
-      bit ^= al & bl;
-
-      /* Compute (a|b) */
-      if (ah == bh)
-	{
-	cancel_hi:
-	  if (al < bl)
-	    {
-	      MP_LIMB_T_SWAP (al, bl);
-	      bit ^= al & bl;
-	    }
-	  al -= bl;
-	  if (al == 0)
-	    return 0;
-
-	  count_trailing_zeros (c, al);
-	  bit ^= (c << 1) & (bl ^ (bl >> 1));
-	  al >>= c;
-
-	  if (al == 1)
-	    return 1 - (bit & 2);
-
-	  MP_LIMB_T_SWAP (al, bl);
-	  bit ^= al & bl;
-	  break;
-	}
-    }
-
- b_reduced:
-  /* Compute (a|b), with b a single limb. */
-  ASSERT (bl & 1);
-
-  if (bl == 1)
-    /* (a|1) = 1 */
-    return 1 - (bit & 2);
-
-  while (ah > 0)
-    {
-      ah -= (al < bl);
-      al -= bl;
-      if (al == 0)
-	{
-	  if (ah == 0)
-	    return 0;
-	  count_trailing_zeros (c, ah);
-	  bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
-	  al = ah >> c;
-	  goto ab_reduced;
-	}
-      count_trailing_zeros (c, al);
-
-      al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
-      ah >>= c;
-      bit ^= (c << 1) & (bl ^ (bl >> 1));
-    }
- ab_reduced:
-  ASSERT (bl & 1);
-  ASSERT (bl > 1);
-
-  return mpn_jacobi_base (al, bl, bit);
-}
-#else
-#error Unsupported value for JACOBI_2_METHOD
-#endif
diff --git a/gmp/mpn/generic/logops_n.c b/gmp/mpn/generic/logops_n.c
deleted file mode 100644
index 1b534ff4ba..0000000000
--- a/gmp/mpn/generic/logops_n.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/* mpn_and_n, mpn_ior_n, etc -- mpn logical operations.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#ifdef OPERATION_and_n
-#define func __MPN(and_n)
-#define call mpn_and_n
-#endif
-
-#ifdef OPERATION_andn_n
-#define func __MPN(andn_n)
-#define call mpn_andn_n
-#endif
-
-#ifdef OPERATION_nand_n
-#define func __MPN(nand_n)
-#define call mpn_nand_n
-#endif
-
-#ifdef OPERATION_ior_n
-#define func __MPN(ior_n)
-#define call mpn_ior_n
-#endif
-
-#ifdef OPERATION_iorn_n
-#define func __MPN(iorn_n)
-#define call mpn_iorn_n
-#endif
-
-#ifdef OPERATION_nior_n
-#define func __MPN(nior_n)
-#define call mpn_nior_n
-#endif
-
-#ifdef OPERATION_xor_n
-#define func __MPN(xor_n)
-#define call mpn_xor_n
-#endif
-
-#ifdef OPERATION_xnor_n
-#define func __MPN(xnor_n)
-#define call mpn_xnor_n
-#endif
-
-void
-func (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
-{
-  call (rp, up, vp, n);
-}
diff --git a/gmp/mpn/generic/lshift.c b/gmp/mpn/generic/lshift.c
index 5182632976..fdc7e4423e 100644
--- a/gmp/mpn/generic/lshift.c
+++ b/gmp/mpn/generic/lshift.c
@@ -1,32 +1,22 @@
 /* mpn_lshift -- Shift left low level.
 
-Copyright 1991, 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/lshiftc.c b/gmp/mpn/generic/lshiftc.c
deleted file mode 100644
index e8051b7b93..0000000000
--- a/gmp/mpn/generic/lshiftc.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/* mpn_lshiftc -- Shift left low level with complement.
-
-Copyright 1991, 1993, 1994, 1996, 2000-2002, 2009 Free Software Foundation,
-Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Shift U (pointed to by up and n limbs long) cnt bits to the left
-   and store the n least significant limbs of the result at rp.
-   Return the bits shifted out from the most significant limb.
-
-   Argument constraints:
-   1. 0 < cnt < GMP_NUMB_BITS.
-   2. If the result is to be written over the input, rp must be >= up.
-*/
-
-mp_limb_t
-mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
-{
-  mp_limb_t high_limb, low_limb;
-  unsigned int tnc;
-  mp_size_t i;
-  mp_limb_t retval;
-
-  ASSERT (n >= 1);
-  ASSERT (cnt >= 1);
-  ASSERT (cnt < GMP_NUMB_BITS);
-  ASSERT (MPN_SAME_OR_DECR_P (rp, up, n));
-
-  up += n;
-  rp += n;
-
-  tnc = GMP_NUMB_BITS - cnt;
-  low_limb = *--up;
-  retval = low_limb >> tnc;
-  high_limb = (low_limb << cnt);
-
-  for (i = n - 1; i != 0; i--)
-    {
-      low_limb = *--up;
-      *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
-      high_limb = low_limb << cnt;
-    }
-  *--rp = (~high_limb) & GMP_NUMB_MASK;
-
-  return retval;
-}
diff --git a/gmp/mpn/generic/matrix22_mul.c b/gmp/mpn/generic/matrix22_mul.c
index 59531eb1b2..f979385d9d 100644
--- a/gmp/mpn/generic/matrix22_mul.c
+++ b/gmp/mpn/generic/matrix22_mul.c
@@ -1,38 +1,25 @@
 /* matrix22_mul.c.
 
-   Contributed by Niels Möller and Marco Bodrato.
-
    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2003-2005, 2008, 2009 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -83,198 +70,143 @@ mpn_matrix22_mul_itch (mp_size_t rn, mp_size_t mn)
       || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
     return 3*rn + 2*mn;
   else
-    return 3*(rn + mn) + 5;
+    return 4*(rn + mn) + 5;
 }
 
 /* Algorithm:
 
     / s0 \   /  1  0  0  0 \ / r0 \
-    | s1 |   |  0  1  0  1 | | r1 |
-    | s2 |   |  0  0 -1  1 | | r2 |
-    | s3 | = |  0  1 -1  1 | \ r3 /
-    | s4 |   | -1  1 -1  1 |
-    | s5 |   |  0  1  0  0 |
-    \ s6 /   \  0  0  1  0 /
+    | s1 |   |  0  1  0  0 | | r1 |
+    | s2 |   |  0  0  1  1 | | r2 |
+    | s3 | = | -1  0  1  1 | \ r3 /
+    | s4 |   |  1  0 -1  0 |
+    | s5 |   |  1  1 -1 -1 |
+    \ s6 /   \  0  0  0  1 /
 
     / t0 \   /  1  0  0  0 \ / m0 \
-    | t1 |   |  0  1  0  1 | | m1 |
-    | t2 |   |  0  0 -1  1 | | m2 |
-    | t3 | = |  0  1 -1  1 | \ m3 /
-    | t4 |   | -1  1 -1  1 |
-    | t5 |   |  0  1  0  0 |
-    \ t6 /   \  0  0  1  0 /
-
-  Note: the two matrices above are the same, but s_i and t_i are used
-  in the same product, only for i<4, see "A Strassen-like Matrix
-  Multiplication suited for squaring and higher power computation" by
-  M. Bodrato, in Proceedings of ISSAC 2010.
-
-    / r0 \   / 1 0  0  0  0  1  0 \ / s0*t0 \
-    | r1 | = | 0 0 -1  1 -1  1  0 | | s1*t1 |
-    | r2 |   | 0 1  0 -1  0 -1 -1 | | s2*t2 |
-    \ r3 /   \ 0 1  1 -1  0 -1  0 / | s3*t3 |
-				    | s4*t5 |
-				    | s5*t6 |
-				    \ s6*t4 /
-
-  The scheduling uses two temporaries U0 and U1 to store products, and
-  two, S0 and T0, to store combinations of entries of the two
-  operands.
+    | t1 |   |  0  0  1  0 | | m1 |
+    | t2 |   | -1  1  0  0 | | m2 |
+    | t3 | = |  1 -1  0  1 | \ m3 /
+    | t4 |   |  0 -1  0  1 |
+    | t5 |   |  0  0  0  1 |
+    \ t6 /   \ -1  1  1 -1 /
+
+    / r0 \   / 1 1 0 0 0 0 0 \ / s0 * t0 \
+    | r1 | = | 1 0 1 1 0 1 0 | | s1 * t1 |
+    | r2 |   | 1 0 0 1 1 0 1 | | s2 * t2 |
+    \ r3 /   \ 1 0 1 1 1 0 0 / | s3 * t3 |
+			       | s4 * t4 |
+			       | s5 * t5 |
+			       \ s6 * t6 /
 */
 
 /* Computes R = R * M. Elements are numbers R = (r0, r1; r2, r3).
  *
  * Resulting elements are of size up to rn + mn + 1.
  *
- * Temporary storage: 3 rn + 3 mn + 5. */
+ * Temporary storage: 4 rn + 4 mn + 5. */
 void
 mpn_matrix22_mul_strassen (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
 			   mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
 			   mp_ptr tp)
 {
-  mp_ptr s0, t0, u0, u1;
-  int r1s, r3s, s0s, t0s, u1s;
-  s0 = tp; tp += rn + 1;
-  t0 = tp; tp += mn + 1;
+  mp_ptr s2, s3, t2, t3, u0, u1;
+  int r2s, r3s, s3s, t2s, t3s, u0s, u1s;
+  s2 = tp; tp += rn;
+  s3 = tp; tp += rn + 1;
+  t2 = tp; tp += mn;
+  t3 = tp; tp += mn + 1;
   u0 = tp; tp += rn + mn + 1;
   u1 = tp; /* rn + mn + 2 */
 
-  MUL (u0, r1, rn, m2, mn);		/* u5 = s5 * t6 */
-  r3s = abs_sub_n (r3, r3, r2, rn);	/* r3 - r2 */
-  if (r3s)
-    {
-      r1s = abs_sub_n (r1, r1, r3, rn);
-      r1[rn] = 0;
-    }
-  else
-    {
-      r1[rn] = mpn_add_n (r1, r1, r3, rn);
-      r1s = 0;				/* r1 - r2 + r3  */
-    }
-  if (r1s)
-    {
-      s0[rn] = mpn_add_n (s0, r1, r0, rn);
-      s0s = 0;
-    }
-  else if (r1[rn] != 0)
-    {
-      s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn);
-      s0s = 1;				/* s4 = -r0 + r1 - r2 + r3 */
-					/* Reverse sign! */
-    }
-  else
-    {
-      s0s = abs_sub_n (s0, r0, r1, rn);
-      s0[rn] = 0;
-    }
-  MUL (u1, r0, rn, m0, mn);		/* u0 = s0 * t0 */
-  r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
-  ASSERT (r0[rn+mn] < 2);		/* u0 + u5 */
+  MUL (u0, r0, rn, m0, mn); /* 0 */
+  MUL (u1, r1, rn, m2, mn); /* 1 */
 
-  t0s = abs_sub_n (t0, m3, m2, mn);
-  u1s = r3s^t0s^1;			/* Reverse sign! */
-  MUL (u1, r3, rn, t0, mn);		/* u2 = s2 * t2 */
-  u1[rn+mn] = 0;
-  if (t0s)
-    {
-      t0s = abs_sub_n (t0, m1, t0, mn);
-      t0[mn] = 0;
-    }
-  else
-    {
-      t0[mn] = mpn_add_n (t0, t0, m1, mn);
-    }
+  MPN_COPY (s2, r3, rn);
 
-  /* FIXME: Could be simplified if we had space for rn + mn + 2 limbs
-     at r3. I'd expect that for matrices of random size, the high
-     words t0[mn] and r1[rn] are non-zero with a pretty small
-     probability. If that can be confirmed this should be done as an
-     unconditional rn x (mn+1) followed by an if (UNLIKELY (r1[rn]))
-     add_n. */
-  if (t0[mn] != 0)
+  r3[rn] = mpn_add_n (r3, r3, r2, rn);
+  r0[rn] = 0;
+  s3s = abs_sub_n (s3, r3, r0, rn + 1);
+  t2s = abs_sub_n (t2, m1, m0, mn);
+  if (t2s)
     {
-      MUL (r3, r1, rn, t0, mn + 1);	/* u3 = s3 * t3 */
-      ASSERT (r1[rn] < 2);
-      if (r1[rn] != 0)
-	mpn_add_n (r3 + rn, r3 + rn, t0, mn + 1);
+      t3[mn] = mpn_add_n (t3, m3, t2, mn);
+      t3s = 0;
     }
   else
     {
-      MUL (r3, r1, rn + 1, t0, mn);
+      t3s = abs_sub_n (t3, m3, t2, mn);
+      t3[mn] = 0;
     }
 
-  ASSERT (r3[rn+mn] < 4);
+  r2s = abs_sub_n (r2, r0, r2, rn);
+  r0[rn+mn] = mpn_add_n (r0, u0, u1, rn + mn);
 
-  u0[rn+mn] = 0;
-  if (r1s^t0s)
+  MUL(u1, s3, rn+1, t3, mn+1); /* 3 */
+  u1s = s3s ^ t3s;
+  ASSERT (u1[rn+mn+1] == 0);
+  ASSERT (u1[rn+mn] < 4);
+
+  if (u1s)
     {
-      r3s = abs_sub_n (r3, u0, r3, rn + mn + 1);
+      u0[rn+mn] = 0;
+      u0s = abs_sub_n (u0, u0, u1, rn + mn + 1);
     }
   else
     {
-      ASSERT_NOCARRY (mpn_add_n (r3, r3, u0, rn + mn + 1));
-      r3s = 0;				/* u3 + u5 */
+      u0[rn+mn] = u1[rn+mn] + mpn_add_n (u0, u0, u1, rn + mn);
+      u0s = 0;
     }
+  MUL(u1, r3, rn + 1, t2, mn); /* 2 */
+  u1s = t2s;
+  ASSERT (u1[rn+mn] < 2);
 
-  if (t0s)
-    {
-      t0[mn] = mpn_add_n (t0, t0, m0, mn);
-    }
-  else if (t0[mn] != 0)
-    {
-      t0[mn] -= mpn_sub_n (t0, t0, m0, mn);
-    }
-  else
+  u1s = add_signed_n (u1, u0, u0s, u1, u1s, rn + mn + 1);
+
+  t2s = abs_sub_n (t2, m3, m1, mn);
+  if (s3s)
     {
-      t0s = abs_sub_n (t0, t0, m0, mn);
+      s3[rn] += mpn_add_n (s3, s3, r1, rn);
+      s3s = 0;
     }
-  MUL (u0, r2, rn, t0, mn + 1);		/* u6 = s6 * t4 */
-  ASSERT (u0[rn+mn] < 2);
-  if (r1s)
+  else if (s3[rn] > 0)
     {
-      ASSERT_NOCARRY (mpn_sub_n (r1, r2, r1, rn));
+      s3[rn] -= mpn_sub_n (s3, s3, r1, rn);
+      s3s = 1;
     }
   else
     {
-      r1[rn] += mpn_add_n (r1, r1, r2, rn);
-    }
-  rn++;
-  t0s = add_signed_n (r2, r3, r3s, u0, t0s, rn + mn);
-					/* u3 + u5 + u6 */
-  ASSERT (r2[rn+mn-1] < 4);
-  r3s = add_signed_n (r3, r3, r3s, u1, u1s, rn + mn);
-					/* -u2 + u3 + u5  */
-  ASSERT (r3[rn+mn-1] < 3);
-  MUL (u0, s0, rn, m1, mn);		/* u4 = s4 * t5 */
-  ASSERT (u0[rn+mn-1] < 2);
-  t0[mn] = mpn_add_n (t0, m3, m1, mn);
-  MUL (u1, r1, rn, t0, mn + 1);		/* u1 = s1 * t1 */
-  mn += rn;
-  ASSERT (u1[mn-1] < 4);
-  ASSERT (u1[mn] == 0);
-  ASSERT_NOCARRY (add_signed_n (r1, r3, r3s, u0, s0s, mn));
-					/* -u2 + u3 - u4 + u5  */
-  ASSERT (r1[mn-1] < 2);
-  if (r3s)
-    {
-      ASSERT_NOCARRY (mpn_add_n (r3, u1, r3, mn));
+      s3s = abs_sub_n (s3, r1, s3, rn);
     }
-  else
+  MUL (r1, s3, rn+1, m3, mn); /* 5 */
+  ASSERT_NOCARRY(add_signed_n (r1, r1, s3s, u1, u1s, rn + mn + 1));
+  ASSERT (r1[rn + mn] < 2);
+
+  MUL (r3, r2, rn, t2, mn); /* 4 */
+  r3s = r2s ^ t2s;
+  r3[rn + mn] = 0;
+  u0s = add_signed_n (u0, u0, u0s, r3, r3s, rn + mn + 1);
+  ASSERT_NOCARRY (add_signed_n (r3, r3, r3s, u1, u1s, rn + mn + 1));
+  ASSERT (r3[rn + mn] < 2);
+
+  if (t3s)
     {
-      ASSERT_NOCARRY (mpn_sub_n (r3, u1, r3, mn));
-					/* u1 + u2 - u3 - u5  */
+      t3[mn] += mpn_add_n (t3, m2, t3, mn);
+      t3s = 0;
     }
-  ASSERT (r3[mn-1] < 2);
-  if (t0s)
+  else if (t3[mn] > 0)
     {
-      ASSERT_NOCARRY (mpn_add_n (r2, u1, r2, mn));
+      t3[mn] -= mpn_sub_n (t3, t3, m2, mn);
+      t3s = 1;
     }
   else
     {
-      ASSERT_NOCARRY (mpn_sub_n (r2, u1, r2, mn));
-					/* u1 - u3 - u5 - u6  */
+      t3s = abs_sub_n (t3, m2, t3, mn);
     }
-  ASSERT (r2[mn-1] < 2);
+  MUL (r2, s2, rn, t3, mn + 1); /* 6 */
+
+  ASSERT_NOCARRY (add_signed_n (r2, r2, t3s, u0, u0s, rn + mn + 1));
+  ASSERT (r2[rn + mn] < 2);
 }
 
 void
diff --git a/gmp/mpn/generic/matrix22_mul1_inverse_vector.c b/gmp/mpn/generic/matrix22_mul1_inverse_vector.c
deleted file mode 100644
index 83b2fb5134..0000000000
--- a/gmp/mpn/generic/matrix22_mul1_inverse_vector.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/* matrix22_mul1_inverse_vector.c
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2008, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
-   the left. Uses three buffers, to avoid a copy. */
-mp_size_t
-mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *M,
-				  mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
-{
-  mp_limb_t h0, h1;
-
-  /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
-
-     r  = u11 * a
-     r -= u01 * b
-     b *= u00
-     b -= u10 * a
-  */
-
-  h0 =    mpn_mul_1 (rp, ap, n, M->u[1][1]);
-  h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
-  ASSERT (h0 == h1);
-
-  h0 =    mpn_mul_1 (bp, bp, n, M->u[0][0]);
-  h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
-  ASSERT (h0 == h1);
-
-  n -= (rp[n-1] | bp[n-1]) == 0;
-  return n;
-}
diff --git a/gmp/mpn/generic/mod_1.c b/gmp/mpn/generic/mod_1.c
index 0212020201..7c892814e1 100644
--- a/gmp/mpn/generic/mod_1.c
+++ b/gmp/mpn/generic/mod_1.c
@@ -3,34 +3,23 @@
    Return the single-limb remainder.
    There are no constraints on the value of the divisor.
 
-Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007-2009, 2012 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009 Free
+Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -53,43 +42,18 @@ see https://www.gnu.org/licenses/.  */
 #define MOD_1_UNNORM_THRESHOLD  0
 #endif
 
-#ifndef MOD_1U_TO_MOD_1_1_THRESHOLD
-#define MOD_1U_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#ifndef MOD_1_1_THRESHOLD
+#define MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
 #endif
 
-#ifndef MOD_1N_TO_MOD_1_1_THRESHOLD
-#define MOD_1N_TO_MOD_1_1_THRESHOLD  MP_SIZE_T_MAX /* default is not to use mpn_mod_1s */
+#ifndef MOD_1_2_THRESHOLD
+#define MOD_1_2_THRESHOLD  10
 #endif
 
-#ifndef MOD_1_1_TO_MOD_1_2_THRESHOLD
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD  10
+#ifndef MOD_1_4_THRESHOLD
+#define MOD_1_4_THRESHOLD  120
 #endif
 
-#ifndef MOD_1_2_TO_MOD_1_4_THRESHOLD
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD  20
-#endif
-
-#if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p
-/* Duplicates declarations in tune/speed.h */
-mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
-mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
-
-void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
-void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
-
-#undef mpn_mod_1_1p
-#define mpn_mod_1_1p(ap, n, b, pre)			     \
-  (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre)     \
-   : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre)  \
-      : __gmpn_mod_1_1p (ap, n, b, pre)))
-
-#undef mpn_mod_1_1p_cps
-#define mpn_mod_1_1p_cps(pre, b)				\
-  (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b)		\
-   : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b)	\
-      : __gmpn_mod_1_1p_cps (pre, b)))
-#endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */
-
 
 /* The comments in mpn/generic/divrem_1.c apply here too.
 
@@ -150,12 +114,12 @@ mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
   if (UDIV_NEEDS_NORMALIZATION
       && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
     {
-      mp_limb_t nshift;
       for (i = un - 2; i >= 0; i--)
 	{
 	  n0 = up[i] << GMP_NAIL_BITS;
-	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
-	  udiv_qrnnd (dummy, r, r, nshift, d);
+	  udiv_qrnnd (dummy, r, r,
+		      (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
+		      d);
 	  r >>= GMP_NAIL_BITS;
 	  n1 = n0;
 	}
@@ -165,18 +129,19 @@ mpn_mod_1_unnorm (mp_srcptr up, mp_size_t un, mp_limb_t d)
     }
   else
     {
-      mp_limb_t inv, nshift;
+      mp_limb_t inv;
       invert_limb (inv, d);
 
       for (i = un - 2; i >= 0; i--)
 	{
 	  n0 = up[i] << GMP_NAIL_BITS;
-	  nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
-	  udiv_rnnd_preinv (r, r, nshift, d, inv);
+	  udiv_qrnnd_preinv (dummy, r, r,
+			     (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
+			     d, inv);
 	  r >>= GMP_NAIL_BITS;
 	  n1 = n0;
 	}
-      udiv_rnnd_preinv (r, r, n1 << cnt, d, inv);
+      udiv_qrnnd_preinv (dummy, r, r, n1 << cnt, d, inv);
       r >>= GMP_NAIL_BITS;
       return r >> cnt;
     }
@@ -222,7 +187,7 @@ mpn_mod_1_norm (mp_srcptr up, mp_size_t un, mp_limb_t d)
       for (i = un - 1; i >= 0; i--)
 	{
 	  n0 = up[i] << GMP_NAIL_BITS;
-	  udiv_rnnd_preinv (r, r, n0, d, inv);
+	  udiv_qrnnd_preinv (dummy, r, r, n0, d, inv);
 	  r >>= GMP_NAIL_BITS;
 	}
       return r;
@@ -242,40 +207,29 @@ mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
 
   if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
     {
-      if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
-	{
-	  return mpn_mod_1_norm (ap, n, b);
-	}
-      else
-	{
-	  mp_limb_t pre[4];
-	  mpn_mod_1_1p_cps (pre, b);
-	  return mpn_mod_1_1p (ap, n, b, pre);
-	}
+      /* The functions below do not handle this large divisor.  */
+      return mpn_mod_1_norm (ap, n, b);
+    }
+  else if (BELOW_THRESHOLD (n, MOD_1_1_THRESHOLD))
+    {
+      return mpn_mod_1_unnorm (ap, n, b);
+    }
+  else if (BELOW_THRESHOLD (n, MOD_1_2_THRESHOLD))
+    {
+      mp_limb_t pre[4];
+      mpn_mod_1s_1p_cps (pre, b);
+      return mpn_mod_1s_1p (ap, n, b << pre[1], pre);
+    }
+  else if (BELOW_THRESHOLD (n, MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
+    {
+      mp_limb_t pre[5];
+      mpn_mod_1s_2p_cps (pre, b);
+      return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
     }
   else
     {
-      if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
-	{
-	  return mpn_mod_1_unnorm (ap, n, b);
-	}
-      else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
-	{
-	  mp_limb_t pre[4];
-	  mpn_mod_1_1p_cps (pre, b);
-	  return mpn_mod_1_1p (ap, n, b << pre[1], pre);
-	}
-      else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
-	{
-	  mp_limb_t pre[5];
-	  mpn_mod_1s_2p_cps (pre, b);
-	  return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
-	}
-      else
-	{
-	  mp_limb_t pre[7];
-	  mpn_mod_1s_4p_cps (pre, b);
-	  return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
-	}
+      mp_limb_t pre[7];
+      mpn_mod_1s_4p_cps (pre, b);
+      return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
     }
 }
diff --git a/gmp/mpn/generic/mod_1_1.c b/gmp/mpn/generic/mod_1_1.c
index 2e111399ed..27c7f8f1b6 100644
--- a/gmp/mpn/generic/mod_1_1.c
+++ b/gmp/mpn/generic/mod_1_1.c
@@ -1,208 +1,74 @@
-/* mpn_mod_1_1p (ap, n, b, cps)
+/* mpn_mod_1s_1p (ap, n, b, cps)
    Divide (ap,,n) by b.  Return the single-limb remainder.
+   Requires that b < B / 2.
 
-   Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
-   Based on a suggestion by Peter L. Montgomery.
+   Contributed to the GNU project by Torbjorn Granlund.
 
    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2008-2011, 2013 Free Software Foundation, Inc.
+Copyright 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-#ifndef MOD_1_1P_METHOD
-# define MOD_1_1P_METHOD 1    /* need to make sure this is 2 for asm testing */
-#endif
-
-/* Define some longlong.h-style macros, but for wider operations.
- * add_mssaaaa is like longlong.h's add_ssaaaa, but also generates
- * carry out, in the form of a mask. */
-
-#if defined (__GNUC__)
-
-#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add	%6, %k2\n\t"					\
-	     "adc	%4, %k1\n\t"					\
-	     "sbb	%k0, %k0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
-	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add	%6, %q2\n\t"					\
-	     "adc	%4, %q1\n\t"					\
-	     "sbb	%q0, %q0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
-	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
-#endif
-
-#if defined (__sparc__) && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addxcc	%r3, %4, %1\n\t"				\
-	     "subx	%%g0, %%g0, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
-	 __CLOBBER_CC)
-#endif
-
-#if defined (__sparc__) && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addccc	%r7, %8, %%g0\n\t"				\
-	     "addccc	%r3, %4, %1\n\t"				\
-	     "clr	%0\n\t"						\
-	     "movcs	%%xcc, -1, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),		\
-	     "rJ" ((al) >> 32), "rI" ((bl) >> 32)			\
-	 __CLOBBER_CC)
-#if __VIS__ >= 0x300
-#undef add_mssaaaa
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "addcc	%r5, %6, %2\n\t"				\
-	     "addxccc	%r3, %4, %1\n\t"				\
-	     "clr	%0\n\t"						\
-	     "movcs	%%xcc, -1, %0"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
-	 __CLOBBER_CC)
-#endif
-#endif
-
-#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
-/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
-   processor running in 32-bit mode, since the carry flag then gets the 32-bit
-   carry.  */
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "add%I6c	%2, %5, %6\n\t"					\
-	     "adde	%1, %3, %4\n\t"					\
-	     "subfe	%0, %0, %0\n\t"					\
-	     "nor	%0, %0, %0"					\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
-#endif
-
-#if defined (__s390x__) && W_TYPE_SIZE == 64
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  __asm__ (  "algr	%2, %6\n\t"					\
-	     "alcgr	%1, %4\n\t"					\
-	     "lghi	%0, 0\n\t"					\
-	     "alcgr	%0, %0\n\t"					\
-	     "lcgr	%0, %0"						\
-	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
-	   : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),			\
-	     "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
-#endif
-
-#if defined (__arm__) && W_TYPE_SIZE == 32
-#define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
-  __asm__ (  "adds	%2, %5, %6\n\t"					\
-	     "adcs	%1, %3, %4\n\t"					\
-	     "movcc	%0, #0\n\t"					\
-	     "movcs	%0, #-1"					\
-	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
-	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
-#endif
-#endif /* defined (__GNUC__) */
-
-#ifndef add_mssaaaa
-#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
-  do {									\
-    UWtype __s0, __s1, __c0, __c1;					\
-    __s0 = (a0) + (b0);							\
-    __s1 = (a1) + (b1);							\
-    __c0 = __s0 < (a0);							\
-    __c1 = __s1 < (a1);							\
-    (s0) = __s0;							\
-    __s1 = __s1 + __c0;							\
-    (s1) = __s1;							\
-    (m) = - (__c1 + (__s1 < __c0));					\
-  } while (0)
-#endif
-
-#if MOD_1_1P_METHOD == 1
 void
-mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+mpn_mod_1s_1p_cps (mp_limb_t cps[4], mp_limb_t b)
 {
   mp_limb_t bi;
   mp_limb_t B1modb, B2modb;
   int cnt;
 
+  ASSERT (b <= GMP_NUMB_MAX / 2);
+
   count_leading_zeros (cnt, b);
 
   b <<= cnt;
   invert_limb (bi, b);
 
-  cps[0] = bi;
-  cps[1] = cnt;
-
-  B1modb = -b;
-  if (LIKELY (cnt != 0))
-    B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+  B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
-  cps[2] = B1modb >> cnt;
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+
+  B1modb >>= cnt;
+  B2modb >>= cnt;
 
-  /* In the normalized case, this can be simplified to
-   *
-   *   B2modb = - b * bi;
-   *   ASSERT (B2modb <= b);    // NB: equality iff b = B/2
-   */
-  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
-  cps[3] = B2modb >> cnt;
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb;
+  cps[3] = B2modb;
 }
 
 mp_limb_t
-mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
+mpn_mod_1s_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
 {
-  mp_limb_t rh, rl, bi, ph, pl, r;
+  mp_limb_t rh, rl, bi, q, ph, pl, r;
   mp_limb_t B1modb, B2modb;
   mp_size_t i;
   int cnt;
-  mp_limb_t mask;
-
-  ASSERT (n >= 2);		/* fix tuneup.c if this is changed */
 
   B1modb = bmodb[2];
   B2modb = bmodb[3];
 
-  rl = ap[n - 1];
-  umul_ppmm (ph, pl, rl, B1modb);
-  add_ssaaaa (rh, rl, ph, pl, CNST_LIMB(0), ap[n - 2]);
+  umul_ppmm (ph, pl, ap[n - 1], B1modb);
+  add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
 
   for (i = n - 3; i >= 0; i -= 1)
     {
@@ -211,122 +77,28 @@ mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
 	    + HI(rr)  * (B^2 mod b)		<= (B-1)(b-1)
       */
       umul_ppmm (ph, pl, rl, B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i]);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i]);
 
       umul_ppmm (rh, rl, rh, B2modb);
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
-  cnt = bmodb[1];
   bi = bmodb[0];
-
-  if (LIKELY (cnt != 0))
-    rh = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-
-  mask = -(mp_limb_t) (rh >= b);
-  rh -= mask & b;
-
-  udiv_rnnd_preinv (r, rh, rl << cnt, b, bi);
-
-  return r >> cnt;
-}
-#endif /* MOD_1_1P_METHOD == 1 */
-
-#if MOD_1_1P_METHOD == 2
-void
-mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
-{
-  mp_limb_t bi;
-  mp_limb_t B2modb;
-  int cnt;
-
-  count_leading_zeros (cnt, b);
-
-  b <<= cnt;
-  invert_limb (bi, b);
-
-  cps[0] = bi;
-  cps[1] = cnt;
-
-  if (LIKELY (cnt != 0))
-    {
-      mp_limb_t B1modb = -b;
-      B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
-      ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
-      cps[2] = B1modb >> cnt;
-    }
-  B2modb = - b * bi;
-  ASSERT (B2modb <= b);    // NB: equality iff b = B/2
-  cps[3] = B2modb;
-}
-
-mp_limb_t
-mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t bmodb[4])
-{
-  int cnt;
-  mp_limb_t bi, B1modb;
-  mp_limb_t r0, r1;
-  mp_limb_t r;
-
-  ASSERT (n >= 2);		/* fix tuneup.c if this is changed */
-
-  r0 = ap[n-2];
-  r1 = ap[n-1];
-
-  if (n > 2)
-    {
-      mp_limb_t B2modb, B2mb;
-      mp_limb_t p0, p1;
-      mp_limb_t r2;
-      mp_size_t j;
-
-      B2modb = bmodb[3];
-      B2mb = B2modb - b;
-
-      umul_ppmm (p1, p0, r1, B2modb);
-      add_mssaaaa (r2, r1, r0, r0, ap[n-3], p1, p0);
-
-      for (j = n-4; j >= 0; j--)
-	{
-	  mp_limb_t cy;
-	  /* mp_limb_t t = r0 + B2mb; */
-	  umul_ppmm (p1, p0, r1, B2modb);
-
-	  ADDC_LIMB (cy, r0, r0, r2 & B2modb);
-	  /* Alternative, for cmov: if (cy) r0 = t; */
-	  r0 -= (-cy) & b;
-	  add_mssaaaa (r2, r1, r0, r0, ap[j], p1, p0);
-	}
-
-      r1 -= (r2 & b);
-    }
-
   cnt = bmodb[1];
+#if 1
+  {
+    mp_limb_t mask;
+    r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+    mask = -(mp_limb_t) (r >= b);
+    r -= mask & b;
+  }
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 1);	/* optimize for small quotient? */
+#endif
 
-  if (LIKELY (cnt != 0))
-    {
-      mp_limb_t t;
-      mp_limb_t B1modb = bmodb[2];
-
-      umul_ppmm (r1, t, r1, B1modb);
-      r0 += t;
-      r1 += (r0 < t);
-
-      /* Normalize */
-      r1 = (r1 << cnt) | (r0 >> (GMP_LIMB_BITS - cnt));
-      r0 <<= cnt;
-
-      /* NOTE: Might get r1 == b here, but udiv_rnnd_preinv allows that. */
-    }
-  else
-    {
-      mp_limb_t mask = -(mp_limb_t) (r1 >= b);
-      r1 -= mask & b;
-    }
-
-  bi = bmodb[0];
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
 
-  udiv_rnnd_preinv (r, r1, r0, b, bi);
   return r >> cnt;
 }
-#endif /* MOD_1_1P_METHOD == 2 */
diff --git a/gmp/mpn/generic/mod_1_2.c b/gmp/mpn/generic/mod_1_2.c
index 7acf3dbdd1..ffadd536de 100644
--- a/gmp/mpn/generic/mod_1_2.c
+++ b/gmp/mpn/generic/mod_1_2.c
@@ -3,39 +3,27 @@
    Requires that b < B / 2.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Based on a suggestion by Peter L. Montgomery.
 
    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2008-2010 Free Software Foundation, Inc.
+Copyright 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -48,75 +36,49 @@ mpn_mod_1s_2p_cps (mp_limb_t cps[5], mp_limb_t b)
   mp_limb_t B1modb, B2modb, B3modb;
   int cnt;
 
-  ASSERT (b <= (~(mp_limb_t) 0) / 2);
+  ASSERT (b <= GMP_NUMB_MAX / 2);
 
   count_leading_zeros (cnt, b);
 
   b <<= cnt;
   invert_limb (bi, b);
 
-  cps[0] = bi;
-  cps[1] = cnt;
-
   B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
-  cps[2] = B1modb >> cnt;
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  udiv_rnd_preinv (B3modb, B2modb, b, bi);
 
-  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
   cps[3] = B2modb >> cnt;
-
-  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
   cps[4] = B3modb >> cnt;
-
-#if WANT_ASSERT
-  {
-    int i;
-    b = cps[2];
-    for (i = 3; i <= 4; i++)
-      {
-	b += cps[i];
-	ASSERT (b >= cps[i]);
-      }
-  }
-#endif
 }
 
 mp_limb_t
-mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[5])
+mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[5])
 {
-  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
   mp_limb_t B1modb, B2modb, B3modb;
   mp_size_t i;
   int cnt;
 
-  ASSERT (n >= 1);
-
   B1modb = cps[2];
   B2modb = cps[3];
   B3modb = cps[4];
 
   if ((n & 1) != 0)
     {
-      if (n == 1)
-	{
-	  rl = ap[n - 1];
-	  bi = cps[0];
-	  cnt = cps[1];
-	  udiv_rnnd_preinv (r, rl >> (GMP_LIMB_BITS - cnt),
-			     rl << cnt, b, bi);
-	  return r >> cnt;
-	}
-
-      umul_ppmm (ph, pl, ap[n - 2], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
       umul_ppmm (rh, rl, ap[n - 1], B2modb);
+      umul_ppmm (ph, pl, ap[n - 2], B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
       n--;
     }
   else
     {
-      rh = ap[n - 1];
-      rl = ap[n - 2];
+      umul_ppmm (rh, rl, ap[n - 1], B1modb);
+      add_ssaaaa (rh, rl, rh, rl, 0, ap[n - 2]);
     }
 
   for (i = n - 4; i >= 0; i -= 2)
@@ -127,7 +89,7 @@ mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[5])
 	    + HI(rr)  * (B^3 mod b)		<= (B-1)(b-1)
       */
       umul_ppmm (ph, pl, ap[i + 1], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
 
       umul_ppmm (ch, cl, rl, B2modb);
       add_ssaaaa (ph, pl, ph, pl, ch, cl);
@@ -136,14 +98,20 @@ mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[5])
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
-  umul_ppmm (rh, cl, rh, B1modb);
-  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
-
-  cnt = cps[1];
   bi = cps[0];
+  cnt = cps[1];
 
+#if 1
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, 0, cl);
   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 2);	/* optimize for small quotient? */
+#endif
+
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
 
   return r >> cnt;
 }
diff --git a/gmp/mpn/generic/mod_1_3.c b/gmp/mpn/generic/mod_1_3.c
index f4137f4315..77989fc0ae 100644
--- a/gmp/mpn/generic/mod_1_3.c
+++ b/gmp/mpn/generic/mod_1_3.c
@@ -3,39 +3,27 @@
    Requires that d < B / 3.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Based on a suggestion by Peter L. Montgomery.
 
    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2008-2010, 2013 Free Software Foundation, Inc.
+Copyright 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -48,82 +36,46 @@ mpn_mod_1s_3p_cps (mp_limb_t cps[6], mp_limb_t b)
   mp_limb_t B1modb, B2modb, B3modb, B4modb;
   int cnt;
 
-  ASSERT (b <= (~(mp_limb_t) 0) / 3);
+  ASSERT (b <= GMP_NUMB_MAX / 3);
 
   count_leading_zeros (cnt, b);
 
   b <<= cnt;
   invert_limb (bi, b);
 
-  cps[0] = bi;
-  cps[1] = cnt;
-
   B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
-  cps[2] = B1modb >> cnt;
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  udiv_rnd_preinv (B3modb, B2modb, b, bi);
+  udiv_rnd_preinv (B4modb, B3modb, b, bi);
 
-  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
   cps[3] = B2modb >> cnt;
-
-  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
   cps[4] = B3modb >> cnt;
-
-  udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
   cps[5] = B4modb >> cnt;
-
-#if WANT_ASSERT
-  {
-    int i;
-    b = cps[2];
-    for (i = 3; i <= 5; i++)
-      {
-	b += cps[i];
-	ASSERT (b >= cps[i]);
-      }
-  }
-#endif
 }
 
 mp_limb_t
-mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[6])
+mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[6])
 {
-  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
   mp_limb_t B1modb, B2modb, B3modb, B4modb;
   mp_size_t i;
   int cnt;
 
-  ASSERT (n >= 1);
-
   B1modb = cps[2];
   B2modb = cps[3];
   B3modb = cps[4];
   B4modb = cps[5];
 
-  /* We compute n mod 3 in a tricky way, which works except for when n is so
-     close to the maximum size that we don't need to support it.  The final
-     cast to int is a workaround for HP cc.  */
-  switch ((int) ((mp_limb_t) n * MODLIMB_INVERSE_3 >> (GMP_NUMB_BITS - 2)))
-    {
-    case 0:
-      umul_ppmm (ph, pl, ap[n - 2], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
-      umul_ppmm (rh, rl, ap[n - 1], B2modb);
-      add_ssaaaa (rh, rl, rh, rl, ph, pl);
-      n -= 3;
-      break;
-    case 2:	/* n mod 3 = 1 */
-      rh = 0;
-      rl = ap[n - 1];
-      n -= 1;
-      break;
-    case 1:	/* n mod 3 = 2 */
-      rh = ap[n - 1];
-      rl = ap[n - 2];
-      n -= 2;
-      break;
-    }
+  umul_ppmm (ph, pl, ap[n - 2], B1modb);
+  add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
+  umul_ppmm (ch, cl, ap[n - 1], B2modb);
+  add_ssaaaa (rh, rl, ph, pl, ch, cl);
 
-  for (i = n - 3; i >= 0; i -= 3)
+  for (i = n - 6; i >= 0; i -= 3)
     {
       /* rr = ap[i]				< B
 	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
@@ -132,7 +84,7 @@ mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[6])
 	    + HI(rr)  * (B^4 mod b)		<= (B-1)(b-1)
       */
       umul_ppmm (ph, pl, ap[i + 1], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
 
       umul_ppmm (ch, cl, ap[i + 2], B2modb);
       add_ssaaaa (ph, pl, ph, pl, ch, cl);
@@ -144,14 +96,35 @@ mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[6])
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
-  umul_ppmm (rh, cl, rh, B1modb);
-  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+  if (i >= -2)
+    {
+      umul_ppmm (ph, pl, rl, B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 2]);
+      umul_ppmm (rh, rl, rh, B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      if (i >= -1)
+	{
+	  umul_ppmm (ph, pl, rl, B1modb);
+	  add_ssaaaa (ph, pl, ph, pl, 0, ap[0]);
+	  umul_ppmm (rh, rl, rh, B2modb);
+	  add_ssaaaa (rh, rl, rh, rl, ph, pl);
+	}
+    }
 
-  cnt = cps[1];
   bi = cps[0];
+  cnt = cps[1];
 
+#if 1
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, 0, cl);
   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 3);	/* optimize for small quotient? */
+#endif
+
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
 
   return r >> cnt;
 }
diff --git a/gmp/mpn/generic/mod_1_4.c b/gmp/mpn/generic/mod_1_4.c
index 716a0c66de..74893386a9 100644
--- a/gmp/mpn/generic/mod_1_4.c
+++ b/gmp/mpn/generic/mod_1_4.c
@@ -1,41 +1,29 @@
-/* mpn_mod_1s_4p (ap, n, b, cps)
+/* mpn_mod_1s_3p (ap, n, b, cps)
    Divide (ap,,n) by b.  Return the single-limb remainder.
    Requires that d < B / 4.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Based on a suggestion by Peter L. Montgomery.
 
    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2008-2010 Free Software Foundation, Inc.
+Copyright 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -48,92 +36,53 @@ mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
   mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
   int cnt;
 
-  ASSERT (b <= (~(mp_limb_t) 0) / 4);
+  ASSERT (b <= GMP_NUMB_MAX / 4);
 
   count_leading_zeros (cnt, b);
 
   b <<= cnt;
   invert_limb (bi, b);
 
-  cps[0] = bi;
-  cps[1] = cnt;
-
   B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
   ASSERT (B1modb <= b);		/* NB: not fully reduced mod b */
-  cps[2] = B1modb >> cnt;
+  udiv_rnd_preinv (B2modb, B1modb, b, bi);
+  udiv_rnd_preinv (B3modb, B2modb, b, bi);
+  udiv_rnd_preinv (B4modb, B3modb, b, bi);
+  udiv_rnd_preinv (B5modb, B4modb, b, bi);
 
-  udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi);
+  cps[0] = bi;
+  cps[1] = cnt;
+  cps[2] = B1modb >> cnt;
   cps[3] = B2modb >> cnt;
-
-  udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi);
   cps[4] = B3modb >> cnt;
-
-  udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi);
   cps[5] = B4modb >> cnt;
-
-  udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi);
   cps[6] = B5modb >> cnt;
-
-#if WANT_ASSERT
-  {
-    int i;
-    b = cps[2];
-    for (i = 3; i <= 6; i++)
-      {
-	b += cps[i];
-	ASSERT (b >= cps[i]);
-      }
-  }
-#endif
 }
 
 mp_limb_t
-mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
+mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
 {
-  mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+  mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
   mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
   mp_size_t i;
   int cnt;
 
-  ASSERT (n >= 1);
-
   B1modb = cps[2];
   B2modb = cps[3];
   B3modb = cps[4];
   B4modb = cps[5];
   B5modb = cps[6];
 
-  switch (n & 3)
-    {
-    case 0:
-      umul_ppmm (ph, pl, ap[n - 3], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]);
-      umul_ppmm (ch, cl, ap[n - 2], B2modb);
-      add_ssaaaa (ph, pl, ph, pl, ch, cl);
-      umul_ppmm (rh, rl, ap[n - 1], B3modb);
-      add_ssaaaa (rh, rl, rh, rl, ph, pl);
-      n -= 4;
-      break;
-    case 1:
-      rh = 0;
-      rl = ap[n - 1];
-      n -= 1;
-      break;
-    case 2:
-      rh = ap[n - 1];
-      rl = ap[n - 2];
-      n -= 2;
-      break;
-    case 3:
-      umul_ppmm (ph, pl, ap[n - 2], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]);
-      umul_ppmm (rh, rl, ap[n - 1], B2modb);
-      add_ssaaaa (rh, rl, rh, rl, ph, pl);
-      n -= 3;
-      break;
-    }
+  umul_ppmm (ph, pl, ap[n - 3], B1modb);
+  add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
+
+  umul_ppmm (ch, cl, ap[n - 2], B2modb);
+  add_ssaaaa (ph, pl, ph, pl, ch, cl);
 
-  for (i = n - 4; i >= 0; i -= 4)
+  umul_ppmm (ch, cl, ap[n - 1], B3modb);
+  add_ssaaaa (rh, rl, ph, pl, ch, cl);
+
+  for (i = n - 8; i >= 0; i -= 4)
     {
       /* rr = ap[i]				< B
 	    + ap[i+1] * (B mod b)		<= (B-1)(b-1)
@@ -143,7 +92,7 @@ mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
 	    + HI(rr)  * (B^5 mod b)		<= (B-1)(b-1)
       */
       umul_ppmm (ph, pl, ap[i + 1], B1modb);
-      add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
 
       umul_ppmm (ch, cl, ap[i + 2], B2modb);
       add_ssaaaa (ph, pl, ph, pl, ch, cl);
@@ -158,14 +107,42 @@ mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7])
       add_ssaaaa (rh, rl, rh, rl, ph, pl);
     }
 
-  umul_ppmm (rh, cl, rh, B1modb);
-  add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl);
+  if (i >= -3)
+    {
+      umul_ppmm (ph, pl, rl, B1modb);
+      add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 3]);
+      umul_ppmm (rh, rl, rh, B2modb);
+      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+      if (i >= -2)
+	{
+	  umul_ppmm (ph, pl, rl, B1modb);
+	  add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 2]);
+	  umul_ppmm (rh, rl, rh, B2modb);
+	  add_ssaaaa (rh, rl, rh, rl, ph, pl);
+	  if (i >= -1)
+	    {
+	      umul_ppmm (ph, pl, rl, B1modb);
+	      add_ssaaaa (ph, pl, ph, pl, 0, ap[0]);
+	      umul_ppmm (rh, rl, rh, B2modb);
+	      add_ssaaaa (rh, rl, rh, rl, ph, pl);
+	    }
+	}
+    }
 
-  cnt = cps[1];
   bi = cps[0];
+  cnt = cps[1];
 
+#if 1
+  umul_ppmm (rh, cl, rh, B1modb);
+  add_ssaaaa (rh, rl, rh, rl, 0, cl);
   r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-  udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+#else
+  udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
+		     (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
+  ASSERT (q <= 4);	/* optimize for small quotient? */
+#endif
+
+  udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
 
   return r >> cnt;
 }
diff --git a/gmp/mpn/generic/mod_34lsub1.c b/gmp/mpn/generic/mod_34lsub1.c
index 7c07af7acc..6bd149892d 100644
--- a/gmp/mpn/generic/mod_34lsub1.c
+++ b/gmp/mpn/generic/mod_34lsub1.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2000-2002 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
diff --git a/gmp/mpn/generic/mode1o.c b/gmp/mpn/generic/mode1o.c
index ec91da223d..064becdadf 100644
--- a/gmp/mpn/generic/mode1o.c
+++ b/gmp/mpn/generic/mode1o.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2000-2004 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -41,7 +30,7 @@ see https://www.gnu.org/licenses/.  */
 
            r*B^k + a - c == q*d
 
-   where B=2^GMP_LIMB_BITS, a is {src,size}, k is either size or size-1
+   where B=2^BITS_PER_MP_LIMB, a is {src,size}, k is either size or size-1
    (the caller won't know which), and q is the quotient (discarded).  d must
    be odd, c can be any limb value.
 
diff --git a/gmp/mpn/generic/mu_bdiv_q.c b/gmp/mpn/generic/mu_bdiv_q.c
index 0a8010ec15..3b5f56d088 100644
--- a/gmp/mpn/generic/mu_bdiv_q.c
+++ b/gmp/mpn/generic/mu_bdiv_q.c
@@ -4,44 +4,40 @@
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
-/*
-   The idea of the algorithm used herein is to compute a smaller inverted value
-   than used in the standard Barrett algorithm, and thus save time in the
-   Newton iterations, and pay just a small price when using the inverted value
-   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+/* We use the "misunderstanding algorithm" (MU), discovered by Paul Zimmermann
+   and Torbjorn Granlund when Torbjorn misunderstood Paul's explanation of
+   Jebelean's bidirectional exact division algorithm.
+
+   The idea of this algorithm is to compute a smaller inverted value than used
+   in the standard Barrett algorithm, and thus save time in the Newton
+   iterations, and pay just a small price when using the inverted value for
+   developing quotient bits.
+
+   Written by Torbjorn Granlund.  Paul Zimmermann suggested the use of the
+   "wrap around" trick.
 */
 
 #include "gmp.h"
@@ -53,10 +49,11 @@ see https://www.gnu.org/licenses/.  */
 
    Requirements: N >= D
 		 D >= 1
+		 N mod D = 0
 		 D odd
 		 dn >= 2
 		 nn >= 2
-		 scratch space as determined by mpn_mu_bdiv_q_itch(nn,dn).
+		 scratch space as determined by mpn_divexact_itch(nn,dn).
 
    Write quotient to Q = {qp,nn}.
 
@@ -72,10 +69,10 @@ mpn_mu_bdiv_q (mp_ptr qp,
 	       mp_srcptr dp, mp_size_t dn,
 	       mp_ptr scratch)
 {
+  mp_ptr ip;
+  mp_ptr rp;
   mp_size_t qn;
   mp_size_t in;
-  int cy, c0;
-  mp_size_t tn, wn;
 
   qn = nn;
 
@@ -85,52 +82,74 @@ mpn_mu_bdiv_q (mp_ptr qp,
   if (qn > dn)
     {
       mp_size_t b;
+      mp_ptr tp;
+      mp_limb_t cy;
+      int k;
+      mp_size_t m, wn;
+      mp_size_t i;
 
       /* |_______________________|   dividend
 			|________|   divisor  */
 
-#define ip           scratch			/* in */
-#define rp           (scratch + in)		/* dn or rest >= binvert_itch(in) */
-#define tp           (scratch + in + dn)	/* dn+in or next_size(dn) */
-#define scratch_out  (scratch + in + dn + tn)	/* mulmod_bnm1_itch(next_size(dn)) */
-
       /* Compute an inverse size that is a nice partition of the quotient.  */
       b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
       in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
 
       /* Some notes on allocation:
 
-	 When in = dn, R dies when mpn_mullo returns, if in < dn the low in
+	 When in = dn, R dies when mpn_mullow returns, if in < dn the low in
 	 limbs of R dies at that point.  We could save memory by letting T live
 	 just under R, and let the upper part of T expand into R. These changes
 	 should reduce itch to perhaps 3dn.
        */
 
-      mpn_binvert (ip, dp, in, rp);
+      ip = scratch;			/* in limbs */
+      rp = scratch + in;		/* dn limbs */
+      tp = scratch + in + dn;		/* dn + in limbs FIXME: mpn_fft_next_size */
+      scratch += in;			/* Roughly 2in+1 limbs */
+
+      mpn_binvert (ip, dp, in, scratch);
 
       cy = 0;
 
       MPN_COPY (rp, np, dn);
       np += dn;
-      mpn_mullo_n (qp, rp, ip, in);
+      mpn_mullow_n (qp, rp, ip, in);
       qn -= in;
 
+      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
+	{
+	  k = mpn_fft_best_k (dn, 0);
+	  m = mpn_fft_next_size (dn, k);
+	  wn = dn + in - m;			/* number of wrapped limbs */
+	  ASSERT_ALWAYS (wn >= 0);		/* could handle this below */
+	}
+
       while (qn > in)
 	{
-	  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
-	  else
+#if WANT_FFT
+	  if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
 	    {
-	      tn = mpn_mulmod_bnm1_next_size (dn);
-	      mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	      wn = dn + in - tn;		/* number of wrapped limbs */
-	      if (wn > 0)
-		{
-		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
-		  mpn_decr_u (tp + wn, c0);
-		}
+	      /* The two multiplicands are dn and 'in' limbs, with dn >= in.
+		 The relevant part of the result will typically partially wrap,
+		 and that part will come out as subtracted to the right.  The
+		 unwrapped part, m-in limbs at the high end of tp, is the lower
+		 part of the sought product.  The wrapped part, at the low end
+		 of tp, will be subtracted from the low part of the partial
+		 remainder; we undo that operation with another subtraction. */
+	      int c0;
+
+	      mpn_mul_fft (tp, m, dp, dn, qp, in, k);
+
+	      c0 = mpn_sub_n (tp + m, rp, tp, wn);
+
+	      for (i = wn; c0 != 0 && i < in; i++)
+		c0 = tp[i] == GMP_NUMB_MASK;
+	      mpn_incr_u (tp + in, c0);
 	    }
-
+	  else
+#endif
+	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
 	  qp += in;
 	  if (dn != in)
 	    {
@@ -145,28 +164,29 @@ mpn_mu_bdiv_q (mp_ptr qp,
 	  /* Subtract tp[dn+in-1...dn] from dividend.  */
 	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
 	  np += in;
-	  mpn_mullo_n (qp, rp, ip, in);
+	  mpn_mullow_n (qp, rp, ip, in);
 	  qn -= in;
 	}
 
       /* Generate last qn limbs.
 	 FIXME: It should be possible to limit precision here, since qn is
 	 typically somewhat smaller than dn.  No big gains expected.  */
-
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, in);		/* mulhi, need tp[qn+in-1...in] */
-      else
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
 	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	  wn = dn + in - tn;			/* number of wrapped limbs */
-	  if (wn > 0)
-	    {
-	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
-	      mpn_decr_u (tp + wn, c0);
-	    }
-	}
+	  int c0;
+
+	  mpn_mul_fft (tp, m, dp, dn, qp, in, k);
+
+	  c0 = mpn_sub_n (tp + m, rp, tp, wn);
 
+	  for (i = wn; c0 != 0 && i < in; i++)
+	    c0 = tp[i] == GMP_NUMB_MASK;
+	  mpn_incr_u (tp + in, c0);
+	}
+      else
+#endif
+	mpn_mul (tp, dp, dn, qp, in);		/* mulhi, need tp[qn+in-1...in] */
       qp += in;
       if (dn != in)
 	{
@@ -179,93 +199,57 @@ mpn_mu_bdiv_q (mp_ptr qp,
 	}
 
       mpn_sub_nc (rp + dn - in, np, tp + dn, qn - (dn - in), cy);
-      mpn_mullo_n (qp, rp, ip, qn);
-
-#undef ip
-#undef rp
-#undef tp
-#undef scratch_out
-   }
+      mpn_mullow_n (qp, rp, ip, qn);
+    }
   else
     {
       /* |_______________________|   dividend
 		|________________|   divisor  */
 
-#define ip           scratch		/* in */
-#define tp           (scratch + in)	/* qn+in or next_size(qn) or rest >= binvert_itch(in) */
-#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(qn)) */
-
       /* Compute half-sized inverse.  */
       in = qn - (qn >> 1);
 
-      mpn_binvert (ip, dp, in, tp);
+      ip = scratch;			/* ceil(qn/2) limbs */
+      rp = scratch + in;		/* ceil(qn/2)+qn limbs */
+      scratch += in;			/* 2*ceil(qn/2)+2 */
 
-      mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
+      mpn_binvert (ip, dp, in, scratch);
 
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, qn, qp, in);		/* mulhigh */
-      else
+      mpn_mullow_n (qp, np, ip, in);		/* low `in' quotient limbs */
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (qn, MUL_FFT_MODF_THRESHOLD))
 	{
-	  tn = mpn_mulmod_bnm1_next_size (qn);
-	  mpn_mulmod_bnm1 (tp, tn, dp, qn, qp, in, scratch_out);
-	  wn = qn + in - tn;			/* number of wrapped limbs */
-	  if (wn > 0)
-	    {
-	      c0 = mpn_cmp (tp, np, wn) < 0;
-	      mpn_decr_u (tp + wn, c0);
-	    }
+	  int k;
+	  mp_size_t m;
+
+	  k = mpn_fft_best_k (qn, 0);
+	  m = mpn_fft_next_size (qn, k);
+	  mpn_mul_fft (rp, m, dp, qn, qp, in, k);
+	  if (mpn_cmp (np, rp, in) < 0)
+	    mpn_incr_u (rp + in, 1);
 	}
+      else
+#endif
+	mpn_mul (rp, dp, qn, qp, in);		/* mulhigh */
 
-      mpn_sub_n (tp, np + in, tp + in, qn - in);
-      mpn_mullo_n (qp + in, tp, ip, qn - in);	/* high qn-in quotient limbs */
-
-#undef ip
-#undef tp
-#undef scratch_out
+      mpn_sub_n (rp, np + in, rp + in, qn - in);
+      mpn_mullow_n (qp + in, rp, ip, qn - in);	/* high qn-in quotient limbs */
     }
 }
 
 mp_size_t
 mpn_mu_bdiv_q_itch (mp_size_t nn, mp_size_t dn)
 {
-  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
-  mp_size_t b;
+  mp_size_t qn;
 
   qn = nn;
 
   if (qn > dn)
     {
-      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
-      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	{
-	  tn = dn + in;
-	  itch_out = 0;
-	}
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
-	}
-      itch_binvert = mpn_binvert_itch (in);
-      itches = dn + tn + itch_out;
-      return in + MAX (itches, itch_binvert);
+      return 4 * dn;		/* FIXME FIXME FIXME need mpn_fft_next_size */
     }
   else
     {
-      in = qn - (qn >> 1);
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	{
-	  tn = qn + in;
-	  itch_out = 0;
-	}
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (qn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn, qn, in);
-	}
-      itch_binvert = mpn_binvert_itch (in);
-      itches = tn + itch_out;
-      return in + MAX (itches, itch_binvert);
+      return 2 * qn + 1 + 2;	/* FIXME FIXME FIXME need mpn_fft_next_size */
     }
 }
diff --git a/gmp/mpn/generic/mu_bdiv_qr.c b/gmp/mpn/generic/mu_bdiv_qr.c
index d265440f2b..e66b4a117e 100644
--- a/gmp/mpn/generic/mu_bdiv_qr.c
+++ b/gmp/mpn/generic/mu_bdiv_qr.c
@@ -1,289 +1,51 @@
-/* mpn_mu_bdiv_qr(qp,rp,np,nn,dp,dn,tp) -- Compute {np,nn} / {dp,dn} mod B^qn,
-   where qn = nn-dn, storing the result in {qp,qn}.  Overlap allowed between Q
-   and N; all other overlap disallowed.
+/* mpn_mu_bdiv_qr -- divide-and-conquer Hensel division using a variant of
+   Barrett's algorithm, returning quotient and remainder.
 
-   Contributed to the GNU project by Torbjorn Granlund.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2005-2007, 2009, 2010, 2012 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-
-/*
-   The idea of the algorithm used herein is to compute a smaller inverted value
-   than used in the standard Barrett algorithm, and thus save time in the
-   Newton iterations, and pay just a small price when using the inverted value
-   for developing quotient bits.  This algorithm was presented at ICMS 2006.
-*/
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
 
-/* N = {np,nn}
-   D = {dp,dn}
+/* Computes Hensel binary division of {np, 2*n} by {dp, n}.
+
+   Output:
+
+      q = n * d^{-1} mod 2^{qn * GMP_NUMB_BITS},
 
-   Requirements: N >= D
-		 D >= 1
-		 D odd
-		 dn >= 2
-		 nn >= 2
-		 scratch space as determined by mpn_mu_bdiv_qr_itch(nn,dn).
+      r = (n - q * d) * 2^{-qn * GMP_NUMB_BITS}
 
-   Write quotient to Q = {qp,nn-dn}.
+   Stores q at qp. Stores the n least significant limbs of r at the high half
+   of np, and returns the borrow from the subtraction n - q*d.
 
-   FIXME: When iterating, perhaps do the small step before loop, not after.
-   FIXME: Try to avoid the scalar divisions when computing inverse size.
-   FIXME: Trim allocation for (qn > dn) case, 3*dn might be possible.  In
-	  particular, when dn==in, tp and rp could use the same space.
-*/
-mp_limb_t
+   d must be odd. dinv is (-d)^-1 mod 2^GMP_NUMB_BITS. */
+
+void
 mpn_mu_bdiv_qr (mp_ptr qp,
 		mp_ptr rp,
 		mp_srcptr np, mp_size_t nn,
 		mp_srcptr dp, mp_size_t dn,
 		mp_ptr scratch)
 {
-  mp_size_t qn;
-  mp_size_t in;
-  mp_limb_t cy, c0;
-  mp_size_t tn, wn;
-
-  qn = nn - dn;
-
-  ASSERT (dn >= 2);
-  ASSERT (qn >= 2);
-
-  if (qn > dn)
-    {
-      mp_size_t b;
-
-      /* |_______________________|   dividend
-			|________|   divisor  */
-
-#define ip           scratch		/* in */
-#define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
-#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
-
-      /* Compute an inverse size that is a nice partition of the quotient.  */
-      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
-      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
-
-      /* Some notes on allocation:
-
-	 When in = dn, R dies when mpn_mullo returns, if in < dn the low in
-	 limbs of R dies at that point.  We could save memory by letting T live
-	 just under R, and let the upper part of T expand into R. These changes
-	 should reduce itch to perhaps 3dn.
-       */
-
-      mpn_binvert (ip, dp, in, tp);
-
-      MPN_COPY (rp, np, dn);
-      np += dn;
-      cy = 0;
-
-      while (qn > in)
-	{
-	  mpn_mullo_n (qp, rp, ip, in);
-
-	  if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	    mpn_mul (tp, dp, dn, qp, in);	/* mulhi, need tp[dn+in-1...in] */
-	  else
-	    {
-	      tn = mpn_mulmod_bnm1_next_size (dn);
-	      mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	      wn = dn + in - tn;		/* number of wrapped limbs */
-	      if (wn > 0)
-		{
-		  c0 = mpn_sub_n (tp + tn, tp, rp, wn);
-		  mpn_decr_u (tp + wn, c0);
-		}
-	    }
-
-	  qp += in;
-	  qn -= in;
-
-	  if (dn != in)
-	    {
-	      /* Subtract tp[dn-1...in] from partial remainder.  */
-	      cy += mpn_sub_n (rp, rp + in, tp + in, dn - in);
-	      if (cy == 2)
-		{
-		  mpn_incr_u (tp + dn, 1);
-		  cy = 1;
-		}
-	    }
-	  /* Subtract tp[dn+in-1...dn] from dividend.  */
-	  cy = mpn_sub_nc (rp + dn - in, np, tp + dn, in, cy);
-	  np += in;
-	}
-
-      /* Generate last qn limbs.  */
-      mpn_mullo_n (qp, rp, ip, qn);
-
-      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, qn);		/* mulhi, need tp[qn+in-1...in] */
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
-	  wn = dn + qn - tn;			/* number of wrapped limbs */
-	  if (wn > 0)
-	    {
-	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
-	      mpn_decr_u (tp + wn, c0);
-	    }
-	}
-
-      if (dn != qn)
-	{
-	  cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
-	  if (cy == 2)
-	    {
-	      mpn_incr_u (tp + dn, 1);
-	      cy = 1;
-	    }
-	}
-      return mpn_sub_nc (rp + dn - qn, np, tp + dn, qn, cy);
-
-#undef ip
-#undef tp
-#undef scratch_out
-    }
-  else
-    {
-      /* |_______________________|   dividend
-		|________________|   divisor  */
-
-#define ip           scratch		/* in */
-#define tp           (scratch + in)	/* dn+in or next_size(dn) or rest >= binvert_itch(in) */
-#define scratch_out  (scratch + in + tn)/* mulmod_bnm1_itch(next_size(dn)) */
-
-      /* Compute half-sized inverse.  */
-      in = qn - (qn >> 1);
-
-      mpn_binvert (ip, dp, in, tp);
-
-      mpn_mullo_n (qp, np, ip, in);		/* low `in' quotient limbs */
-
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, in);		/* mulhigh */
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	  wn = dn + in - tn;			/* number of wrapped limbs */
-	  if (wn > 0)
-	    {
-	      c0 = mpn_sub_n (tp + tn, tp, np, wn);
-	      mpn_decr_u (tp + wn, c0);
-	    }
-	}
-
-      qp += in;
-      qn -= in;
-
-      cy = mpn_sub_n (rp, np + in, tp + in, dn);
-      mpn_mullo_n (qp, rp, ip, qn);		/* high qn quotient limbs */
-
-      if (BELOW_THRESHOLD (qn, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, qn);		/* mulhigh */
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, qn, scratch_out);
-	  wn = dn + qn - tn;			/* number of wrapped limbs */
-	  if (wn > 0)
-	    {
-	      c0 = mpn_sub_n (tp + tn, tp, rp, wn);
-	      mpn_decr_u (tp + wn, c0);
-	    }
-	}
-
-      cy += mpn_sub_n (rp, rp + qn, tp + qn, dn - qn);
-      if (cy == 2)
-	{
-	  mpn_incr_u (tp + dn, 1);
-	  cy = 1;
-	}
-      return mpn_sub_nc (rp + dn - qn, np + dn + in, tp + dn, qn, cy);
-
-#undef ip
-#undef tp
-#undef scratch_out
-    }
-}
-
-mp_size_t
-mpn_mu_bdiv_qr_itch (mp_size_t nn, mp_size_t dn)
-{
-  mp_size_t qn, in, tn, itch_binvert, itch_out, itches;
-  mp_size_t b;
-
-  qn = nn - dn;
-
-  if (qn > dn)
-    {
-      b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
-      in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	{
-	  tn = dn + in;
-	  itch_out = 0;
-	}
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
-	}
-      itch_binvert = mpn_binvert_itch (in);
-      itches = tn + itch_out;
-      return in + MAX (itches, itch_binvert);
-    }
-  else
-    {
-      in = qn - (qn >> 1);
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	{
-	  tn = dn + in;
-	  itch_out = 0;
-	}
-      else
-	{
-	  tn = mpn_mulmod_bnm1_next_size (dn);
-	  itch_out = mpn_mulmod_bnm1_itch (tn, dn, in);
-	}
-    }
-  itch_binvert = mpn_binvert_itch (in);
-  itches = tn + itch_out;
-  return in + MAX (itches, itch_binvert);
+  ASSERT_ALWAYS (0);
 }
diff --git a/gmp/mpn/generic/mu_div_q.c b/gmp/mpn/generic/mu_div_q.c
index 8768ba6c60..150e8b77cd 100644
--- a/gmp/mpn/generic/mu_div_q.c
+++ b/gmp/mpn/generic/mu_div_q.c
@@ -1,46 +1,29 @@
-/* mpn_mu_div_q.
+/* mpn_mu_div_q, mpn_preinv_mu_div_q.
 
-   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+   Contributed to the GNU project by Torbj�rn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2005-2007, 2009, 2010, 2013 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-/*
-   The idea of the algorithm used herein is to compute a smaller inverted value
-   than used in the standard Barrett algorithm, and thus save time in the
-   Newton iterations, and pay just a small price when using the inverted value
-   for developing quotient bits.  This algorithm was presented at ICMS 2006.
-*/
 
 /*
   Things to work on:
@@ -48,14 +31,18 @@ see https://www.gnu.org/licenses/.  */
   1. This is a rudimentary implementation of mpn_mu_div_q.  The algorithm is
      probably close to optimal, except when mpn_mu_divappr_q fails.
 
-  2. We used to fall back to mpn_mu_div_qr when we detect a possible
-     mpn_mu_divappr_q rounding problem, now we multiply and compare.
+     An alternative which could be considered for much simpler code for the
+     complex qn>=dn arm would be to allocate a temporary nn+1 limb buffer, then
+     simply call mpn_mu_divappr_q.  Such a temporary allocation is
+     unfortunately very large.
+
+  2. Instead of falling back to mpn_mu_div_qr when we detect a possible
+     mpn_mu_divappr_q rounding problem, we could multiply and compare.
      Unfortunately, since mpn_mu_divappr_q does not return the partial
-     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr could
-     solve that.
+     remainder, this also doesn't become optimal.  A mpn_mu_divappr_qr
+     could solve that.
 
-  3. The allocations done here should be made from the scratch area, which
-     then would need to be amended.
+  3. The allocations done here should be made from the scratch area.
 */
 
 #include <stdlib.h>		/* for NULL */
@@ -65,13 +52,13 @@ see https://www.gnu.org/licenses/.  */
 
 mp_limb_t
 mpn_mu_div_q (mp_ptr qp,
-	      mp_srcptr np, mp_size_t nn,
+	      mp_ptr np, mp_size_t nn,
 	      mp_srcptr dp, mp_size_t dn,
 	      mp_ptr scratch)
 {
-  mp_ptr tp, rp;
-  mp_size_t qn;
-  mp_limb_t cy, qh;
+  mp_ptr tp, rp, ip, this_ip;
+  mp_size_t qn, in, this_in;
+  mp_limb_t cy;
   TMP_DECL;
 
   TMP_MARK;
@@ -82,28 +69,59 @@ mpn_mu_div_q (mp_ptr qp,
 
   if (qn >= dn)			/* nn >= 2*dn + 1 */
     {
-       /* |_______________________|   dividend
-			 |________|   divisor  */
+      /* Find max inverse size needed by the two preinv calls.  */
+      if (dn != qn)
+	{
+	  mp_size_t in1, in2;
 
-      rp = TMP_BALLOC_LIMBS (nn + 1);
-      MPN_COPY (rp + 1, np, nn);
-      rp[0] = 0;
+	  in1 = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
+	  in2 = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
+	  in = MAX (in1, in2);
+	}
+      else
+	{
+	  in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
+	}
 
-      qh = mpn_cmp (rp + 1 + nn - dn, dp, dn) >= 0;
-      if (qh != 0)
-	mpn_sub_n (rp + 1 + nn - dn, rp + 1 + nn - dn, dp, dn);
+      ip = TMP_BALLOC_LIMBS (in + 1);
 
-      cy = mpn_mu_divappr_q (tp, rp, nn + 1, dp, dn, scratch);
+      if (dn == in)
+	{
+	  MPN_COPY (scratch + 1, dp, in);
+	  scratch[0] = 1;
+	  mpn_invert (ip, scratch, in + 1, NULL);
+	  MPN_COPY_INCR (ip, ip + 1, in);
+	}
+      else
+	{
+	  cy = mpn_add_1 (scratch, dp + dn - (in + 1), in + 1, 1);
+	  if (UNLIKELY (cy != 0))
+	    MPN_ZERO (ip, in);
+	  else
+	    {
+	      mpn_invert (ip, scratch, in + 1, NULL);
+	      MPN_COPY_INCR (ip, ip + 1, in);
+	    }
+	}
 
-      if (UNLIKELY (cy != 0))
+       /* |_______________________|   dividend
+			 |________|   divisor  */
+      rp = TMP_BALLOC_LIMBS (2 * dn + 1);
+      if (dn != qn)		/* FIXME: perhaps mpn_mu_div_qr should DTRT */
 	{
-	  /* Since the partial remainder fed to mpn_preinv_mu_divappr_q was
-	     canonically reduced, replace the returned value of B^(qn-dn)+eps
-	     by the largest possible value.  */
-	  mp_size_t i;
-	  for (i = 0; i < qn + 1; i++)
-	    tp[i] = GMP_NUMB_MAX;
+	  this_in = mpn_mu_div_qr_choose_in (qn - dn, dn, 0);
+	  this_ip = ip + in - this_in;
+	  mpn_preinv_mu_div_qr (tp + dn + 1, rp + dn + 1, np + dn, qn, dp, dn,
+				this_ip, this_in, scratch);
 	}
+      else
+	MPN_COPY (rp + dn + 1, np + dn, dn);
+
+      MPN_COPY (rp + 1, np, dn);
+      rp[0] = 0;
+      this_in = mpn_mu_divappr_q_choose_in (dn + 1, dn, 0);
+      this_ip = ip + in - this_in;
+      mpn_preinv_mu_divappr_q (tp, rp, 2*dn + 1, dp, dn, this_ip, this_in, scratch);
 
       /* The max error of mpn_mu_divappr_q is +4.  If the low quotient limb is
 	 greater than the max error, we cannot trust the quotient.  */
@@ -113,73 +131,27 @@ mpn_mu_div_q (mp_ptr qp,
 	}
       else
 	{
-	  mp_limb_t cy;
-	  mp_ptr pp;
-
-	  pp = rp;
-	  mpn_mul (pp, tp + 1, qn, dp, dn);
-
-	  cy = (qh != 0) ? mpn_add_n (pp + qn, pp + qn, dp, dn) : 0;
-
-	  if (cy || mpn_cmp (pp, np, nn) > 0) /* At most is wrong by one, no cycle. */
-	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
-	  else /* Same as above */
-	    MPN_COPY (qp, tp + 1, qn);
+	  /* Fall back to plain mpn_mu_div_qr.  */
+	  mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
 	}
     }
   else
     {
        /* |_______________________|   dividend
 		 |________________|   divisor  */
+      mpn_mu_divappr_q (tp, np + nn - (2*qn + 2), 2*qn + 2, dp + dn - (qn + 1), qn + 1, scratch);
 
-      /* FIXME: When nn = 2dn-1, qn becomes dn-1, and the numerator size passed
-	 here becomes 2dn, i.e., more than nn.  This shouldn't hurt, since only
-	 the most significant dn-1 limbs will actually be read, but it is not
-	 pretty.  */
-
-      qh = mpn_mu_divappr_q (tp, np + nn - (2 * qn + 2), 2 * qn + 2,
-			     dp + dn - (qn + 1), qn + 1, scratch);
-
-      /* The max error of mpn_mu_divappr_q is +4, but we get an additional
-         error from the divisor truncation.  */
-      if (tp[0] > 6)
+      if (tp[0] > 4)
 	{
 	  MPN_COPY (qp, tp + 1, qn);
 	}
       else
 	{
-	  mp_limb_t cy;
-
-	  /* FIXME: a shorter product should be enough; we may use already
-	     allocated space... */
-	  rp = TMP_BALLOC_LIMBS (nn);
-	  mpn_mul (rp, dp, dn, tp + 1, qn);
-
-	  cy = (qh != 0) ? mpn_add_n (rp + qn, rp + qn, dp, dn) : 0;
-
-	  if (cy || mpn_cmp (rp, np, nn) > 0) /* At most is wrong by one, no cycle. */
-	    qh -= mpn_sub_1 (qp, tp + 1, qn, 1);
-	  else /* Same as above */
-	    MPN_COPY (qp, tp + 1, qn);
+	  rp = TMP_BALLOC_LIMBS (dn);
+	  mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
 	}
     }
 
   TMP_FREE;
-  return qh;
-}
-
-mp_size_t
-mpn_mu_div_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
-{
-  mp_size_t qn;
-
-  qn = nn - dn;
-  if (qn >= dn)
-    {
-      return mpn_mu_divappr_q_itch (nn + 1, dn, mua_k);
-    }
-  else
-    {
-      return mpn_mu_divappr_q_itch (2 * qn + 2, qn + 1, mua_k);
-    }
+  return 0;
 }
diff --git a/gmp/mpn/generic/mu_div_qr.c b/gmp/mpn/generic/mu_div_qr.c
index f4700a1ea6..9049e5907a 100644
--- a/gmp/mpn/generic/mu_div_qr.c
+++ b/gmp/mpn/generic/mu_div_qr.c
@@ -7,67 +7,87 @@
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+/* We use the "misunderstanding algorithm" (MUA), discovered by Paul Zimmermann
+   and Torbjorn Granlund when Torbjorn misunderstood Paul's explanation of
+   Jebelean's bidirectional exact division algorithm.
 
+   The idea of this algorithm is to compute a smaller inverted value than used
+   in the standard Barrett algorithm, and thus save time in the Newton
+   iterations, and pay just a small price when using the inverted value for
+   developing quotient bits.
 
-/*
-   The idea of the algorithm used herein is to compute a smaller inverted value
-   than used in the standard Barrett algorithm, and thus save time in the
-   Newton iterations, and pay just a small price when using the inverted value
-   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+   Written by Torbjorn Granlund.  Paul Zimmermann suggested the use of the
+   "wrap around" trick.  Based on the GMP divexact code and inspired by code
+   contributed to GMP by Karl Hasselstroem.
 */
 
-/* CAUTION: This code and the code in mu_divappr_q.c should be edited in sync.
+
+/* CAUTION: This code and the code in mu_divappr_q.c should be edited in lockstep.
 
  Things to work on:
 
+  * Passing k isn't a great interface.  Either 'in' should be passed, or
+    determined by the code.
+
+  * The current mpn_mu_div_qr_itch isn't exactly scientifically written.
+    Scratch space buffer overruns are not unlikely before some analysis is
+    applied.  Since scratch requirements are expected to change, such an
+    analysis will have to wait til things settle.
+
+  * This isn't optimal when the remainder isn't needed, since the final
+    multiplication could be made special and take O(1) time on average, in that
+    case.  This is particularly bad when qn << dn.  At some level, code as in
+    GMP 4 mpn_tdiv_qr should be used, effectively dividing the leading 2qn
+    dividend limbs by the qn divisor limbs.
+
   * This isn't optimal when the quotient isn't needed, as it might take a lot
-    of space.  The computation is always needed, though, so there is no time to
-    save with special code.
+    of space.  The computation is always needed, though, so there is not time
+    to save with special code.
 
   * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
-    demonstrated by the fact that the mpn_invertappr function's scratch needs
-    mean that we need to keep a large allocation long after it is needed.
-    Things are worse as mpn_mul_fft does not accept any scratch parameter,
-    which means we'll have a large memory hole while in mpn_mul_fft.  In
-    general, a peak scratch need in the beginning of a function isn't
-    well-handled by the itch/scratch scheme.
+    demonstrated by the fact that the mpn_inv function's scratch needs means
+    that we need to keep a large allocation long after it is needed.  Things
+    are worse as mpn_mul_fft does not accept any scratch parameter, which means
+    we'll have a large memory hole while in mpn_mul_fft.  In general, a peak
+    scratch need in the beginning of a function isn't well-handled by the
+    itch/scratch scheme.
+
+  * Some ideas from comments in divexact.c apply to this code too.
 */
 
+/* the NOSTAT stuff handles properly the case where files are concatenated */
+#ifdef NOSTAT
+#undef STAT
+#endif
+
 #ifdef STAT
 #undef STAT
 #define STAT(x) x
 #else
+#define NOSTAT
 #define STAT(x)
 #endif
 
@@ -76,98 +96,65 @@ see https://www.gnu.org/licenses/.  */
 #include "gmp-impl.h"
 
 
-/* FIXME: The MU_DIV_QR_SKEW_THRESHOLD was not analysed properly.  It gives a
-   speedup according to old measurements, but does the decision mechanism
-   really make sense?  It seem like the quotient between dn and qn might be
-   what we really should be checking.  */
-#ifndef MU_DIV_QR_SKEW_THRESHOLD
-#define MU_DIV_QR_SKEW_THRESHOLD 100
-#endif
-
-#ifdef CHECK				/* FIXME: Enable in minithres */
-#undef  MU_DIV_QR_SKEW_THRESHOLD
-#define MU_DIV_QR_SKEW_THRESHOLD 1
-#endif
-
-
-static mp_limb_t mpn_mu_div_qr2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
-
-
-mp_limb_t
-mpn_mu_div_qr (mp_ptr qp,
-	       mp_ptr rp,
-	       mp_srcptr np,
-	       mp_size_t nn,
-	       mp_srcptr dp,
-	       mp_size_t dn,
-	       mp_ptr scratch)
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
+   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+   (c) qn < dn/3:       in = qn
+   In all cases we have in <= dn.
+ */
+mp_size_t
+mpn_mu_div_qr_choose_in (mp_size_t qn, mp_size_t dn, int k)
 {
-  mp_size_t qn;
-  mp_limb_t cy, qh;
+  mp_size_t in;
 
-  qn = nn - dn;
-  if (qn + MU_DIV_QR_SKEW_THRESHOLD < dn)
+  if (k == 0)
     {
-      /* |______________|_ign_first__|   dividend			  nn
-		|_______|_ign_first__|   divisor			  dn
-
-		|______|	     quotient (prel)			  qn
-
-		 |___________________|   quotient * ignored-divisor-part  dn-1
-      */
-
-      /* Compute a preliminary quotient and a partial remainder by dividing the
-	 most significant limbs of each operand.  */
-      qh = mpn_mu_div_qr2 (qp, rp + nn - (2 * qn + 1),
-			   np + nn - (2 * qn + 1), 2 * qn + 1,
-			   dp + dn - (qn + 1), qn + 1,
-			   scratch);
-
-      /* Multiply the quotient by the divisor limbs ignored above.  */
-      if (dn - (qn + 1) > qn)
-	mpn_mul (scratch, dp, dn - (qn + 1), qp, qn);  /* prod is dn-1 limbs */
-      else
-	mpn_mul (scratch, qp, qn, dp, dn - (qn + 1));  /* prod is dn-1 limbs */
-
-      if (qh)
-	cy = mpn_add_n (scratch + qn, scratch + qn, dp, dn - (qn + 1));
+      mp_size_t b;
+      if (qn > dn)
+	{
+	  /* Compute an inverse size that is a nice partition of the quotient.  */
+	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+	}
+      else if (3 * qn > dn)
+	{
+	  in = (qn - 1) / 2 + 1;	/* b = 2 */
+	}
       else
-	cy = 0;
-      scratch[dn - 1] = cy;
-
-      cy = mpn_sub_n (rp, np, scratch, nn - (2 * qn + 1));
-      cy = mpn_sub_nc (rp + nn - (2 * qn + 1),
-		       rp + nn - (2 * qn + 1),
-		       scratch + nn - (2 * qn + 1),
-		       qn + 1, cy);
-      if (cy)
 	{
-	  qh -= mpn_sub_1 (qp, qp, qn, 1);
-	  mpn_add_n (rp, rp, dp, dn);
+	  in = (qn - 1) / 1 + 1;	/* b = 1 */
 	}
     }
   else
     {
-      qh = mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
+      mp_size_t xn;
+      xn = MIN (dn, qn);
+      in = (xn - 1) / k + 1;
     }
 
-  return qh;
+  return in;
 }
 
 static mp_limb_t
 mpn_mu_div_qr2 (mp_ptr qp,
 		mp_ptr rp,
-		mp_srcptr np,
+		mp_ptr np,
 		mp_size_t nn,
 		mp_srcptr dp,
 		mp_size_t dn,
 		mp_ptr scratch)
 {
   mp_size_t qn, in;
-  mp_limb_t cy, qh;
+  mp_limb_t cy;
   mp_ptr ip, tp;
 
-  ASSERT (dn > 1);
+  /* FIXME: We should probably not handle tiny operands, but do it for now.  */
+  if (dn == 1)
+    {
+      rp[0] = mpn_divrem_1 (scratch, 0L, np, nn, dp[0]);
+      MPN_COPY (qp, scratch, nn - 1);
+      return scratch[nn - 1];
+    }
 
   qn = nn - dn;
 
@@ -178,7 +165,7 @@ mpn_mu_div_qr2 (mp_ptr qp,
 #if 1
   /* This alternative inverse computation method gets slightly more accurate
      results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function
-     not adapted (3) mpn_invertappr scratch needs not met.  */
+     not adapted (3) mpn_invert scratch needs not met.  */
   ip = scratch;
   tp = scratch + in + 1;
 
@@ -187,7 +174,7 @@ mpn_mu_div_qr2 (mp_ptr qp,
     {
       MPN_COPY (tp + 1, dp, in);
       tp[0] = 1;
-      mpn_invertappr (ip, tp, in + 1, NULL);
+      mpn_invert (ip, tp, in + 1, NULL);
       MPN_COPY_INCR (ip, ip + 1, in);
     }
   else
@@ -197,7 +184,7 @@ mpn_mu_div_qr2 (mp_ptr qp,
 	MPN_ZERO (ip, in);
       else
 	{
-	  mpn_invertappr (ip, tp, in + 1, NULL);
+	  mpn_invert (ip, tp, in + 1, NULL);
 	  MPN_COPY_INCR (ip, ip + 1, in);
 	}
     }
@@ -213,11 +200,11 @@ mpn_mu_div_qr2 (mp_ptr qp,
     {
       tp[in + 1] = 0;
       MPN_COPY (tp + in + 2, dp, in);
-      mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+      mpn_invert (tp, tp + in + 1, in + 1, NULL);
     }
   else
     {
-      mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+      mpn_invert (tp, dp + dn - (in + 1), in + 1, NULL);
     }
   cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
   if (UNLIKELY (cy != 0))
@@ -225,15 +212,24 @@ mpn_mu_div_qr2 (mp_ptr qp,
   MPN_COPY (ip, tp + 1, in);
 #endif
 
-  qh = mpn_preinv_mu_div_qr (qp, rp, np, nn, dp, dn, ip, in, scratch + in);
+/* We can't really handle qh = 1 like this since we'd here clobber N, which is
+   not allowed in the way we've defined this function's API.  */
+#if 0
+  qh = mpn_cmp (np + qn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np + qn, np + qn, dp, dn);
+#endif
 
-  return qh;
+  mpn_preinv_mu_div_qr (qp, rp, np, nn, dp, dn, ip, in, scratch + in);
+
+/*  return qh; */
+  return 0;
 }
 
-mp_limb_t
+void
 mpn_preinv_mu_div_qr (mp_ptr qp,
 		      mp_ptr rp,
-		      mp_srcptr np,
+		      mp_ptr np,
 		      mp_size_t nn,
 		      mp_srcptr dp,
 		      mp_size_t dn,
@@ -242,26 +238,24 @@ mpn_preinv_mu_div_qr (mp_ptr qp,
 		      mp_ptr scratch)
 {
   mp_size_t qn;
-  mp_limb_t cy, cx, qh;
+  mp_limb_t cy;
+  mp_ptr tp;
   mp_limb_t r;
-  mp_size_t tn, wn;
-
-#define tp           scratch
-#define scratch_out  (scratch + tn)
 
   qn = nn - dn;
 
+  if (qn == 0)
+    {
+      MPN_COPY (rp, np, dn);
+      return;
+    }
+
+  tp = scratch;
+
   np += qn;
   qp += qn;
 
-  qh = mpn_cmp (np, dp, dn) >= 0;
-  if (qh != 0)
-    mpn_sub_n (rp, np, dp, dn);
-  else
-    MPN_COPY_INCR (rp, np, dn);
-
-  if (qn == 0)
-    return qh;			/* Degenerate use.  Should we allow this? */
+  MPN_COPY (rp, np, dn);
 
   while (qn > 0)
     {
@@ -277,30 +271,36 @@ mpn_preinv_mu_div_qr (mp_ptr qp,
 	 by the upper part of the partial remainder R.  */
       mpn_mul_n (tp, rp + dn - in, ip, in);		/* mulhi  */
       cy = mpn_add_n (qp, tp + in, rp + dn - in, in);	/* I's msb implicit */
-      ASSERT_ALWAYS (cy == 0);
-
-      qn -= in;
+      ASSERT_ALWAYS (cy == 0);			/* FIXME */
 
       /* Compute the product of the quotient block and the divisor D, to be
 	 subtracted from the partial remainder combined with new limbs from the
-	 dividend N.  We only really need the low dn+1 limbs.  */
-
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
-      else
+	 dividend N.  We only really need the low dn limbs.  */
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
 	{
-	  tn = mpn_mulmod_bnm1_next_size (dn + 1);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  /* Use the wrap-around trick.  */
+	  mp_size_t m, wn;
+	  int k;
+
+	  k = mpn_fft_best_k (dn + 1, 0);
+	  m = mpn_fft_next_size (dn + 1, k);
+	  wn = dn + in - m;			/* number of wrapped limbs */
+
+	  mpn_mul_fft (tp, m, dp, dn, qp, in, k);
+
 	  if (wn > 0)
 	    {
-	      cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
-	      cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
-	      cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
-	      ASSERT_ALWAYS (cx >= cy);
-	      mpn_incr_u (tp, cx - cy);
+	      cy = mpn_add_n (tp, tp, rp + dn - wn, wn);
+	      mpn_incr_u (tp + wn, cy);
+
+	      cy = mpn_cmp (rp + dn - in, tp + dn, m - dn) < 0;
+	      mpn_decr_u (tp, cy);
 	    }
 	}
+      else
+#endif
+	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
 
       r = rp[dn - in] - tp[dn];
 
@@ -352,65 +352,112 @@ mpn_preinv_mu_div_qr (mp_ptr qp,
 		printf ("\n");
 	      }
 	    );
-    }
 
-  return qh;
+      qn -= in;
+    }
 }
 
-/* In case k=0 (automatic choice), we distinguish 3 cases:
-   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
-   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
-   (c) qn < dn/3:       in = qn
-   In all cases we have in <= dn.
- */
-mp_size_t
-mpn_mu_div_qr_choose_in (mp_size_t qn, mp_size_t dn, int k)
+#define THRES 100		/* FIXME: somewhat arbitrary */
+
+#ifdef CHECK
+#undef THRES
+#define THRES 1
+#endif
+
+mp_limb_t
+mpn_mu_div_qr (mp_ptr qp,
+	       mp_ptr rp,
+	       mp_ptr np,
+	       mp_size_t nn,
+	       mp_srcptr dp,
+	       mp_size_t dn,
+	       mp_ptr scratch)
 {
-  mp_size_t in;
+  mp_size_t qn;
 
-  if (k == 0)
+  qn = nn - dn;
+  if (qn + THRES < dn)
     {
-      mp_size_t b;
-      if (qn > dn)
-	{
-	  /* Compute an inverse size that is a nice partition of the quotient.  */
-	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
-	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
-	}
-      else if (3 * qn > dn)
+      /* |______________|________|   dividend				  nn
+		|_______|________|   divisor				  dn
+
+		|______|	     quotient (prel)			  qn
+
+		 |_______________|   quotient * ignored-part-of(divisor)  dn-1
+      */
+
+      mp_limb_t cy, x;
+
+      if (mpn_cmp (np + nn - (qn + 1), dp + dn - (qn + 1), qn + 1) >= 0)
 	{
-	  in = (qn - 1) / 2 + 1;	/* b = 2 */
+	  /* Quotient is 111...111, could optimize this rare case at some point.  */
+	  mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
+	  return 0;
 	}
+
+      /* Compute a preliminary quotient and a partial remainder by dividing the
+	 most significant limbs of each operand.  */
+      mpn_mu_div_qr2 (qp, rp + nn - (2 * qn + 1),
+		      np + nn - (2 * qn + 1), 2 * qn + 1,
+		      dp + dn - (qn + 1), qn + 1,
+		      scratch);
+
+      /* Multiply the quotient by the divisor limbs ignored above.  */
+      if (dn - (qn + 1) > qn)
+	mpn_mul (scratch, dp, dn - (qn + 1), qp, qn);  /* prod is dn-1 limbs */
       else
+	mpn_mul (scratch, qp, qn, dp, dn - (qn + 1));  /* prod is dn-1 limbs */
+
+      cy = mpn_sub_n (rp, np, scratch, nn - (2 * qn + 1));
+      cy = mpn_sub_nc (rp + nn - (2 * qn + 1),
+		       rp + nn - (2 * qn + 1),
+		       scratch + nn - (2 * qn + 1),
+		       qn, cy);
+      x = rp[dn - 1];
+      rp[dn - 1] = x - cy;
+      if (cy > x)
 	{
-	  in = (qn - 1) / 1 + 1;	/* b = 1 */
+	  mpn_decr_u (qp, 1);
+	  mpn_add_n (rp, rp, dp, dn);
 	}
     }
   else
     {
-      mp_size_t xn;
-      xn = MIN (dn, qn);
-      in = (xn - 1) / k + 1;
+      return mpn_mu_div_qr2 (qp, rp, np, nn, dp, dn, scratch);
     }
 
-  return in;
+  return 0;			/* FIXME */
 }
 
 mp_size_t
 mpn_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, int mua_k)
 {
-  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
-  mp_size_t in = mpn_mu_div_qr_choose_in (nn - dn, dn, mua_k);
-  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+  mp_size_t qn, m;
+  int k;
 
-  return in + itch_local + itch_out;
-}
+  /* FIXME: This isn't very carefully written, and might grossly overestimate
+     the amount of scratch needed, and might perhaps also underestimate it,
+     leading to potential buffer overruns.  In particular k=0 might lead to
+     gross overestimates.  */
 
-mp_size_t
-mpn_preinv_mu_div_qr_itch (mp_size_t nn, mp_size_t dn, mp_size_t in)
-{
-  mp_size_t itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
-  mp_size_t itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
+  if (dn == 1)
+    return nn;
 
-  return itch_local + itch_out;
+  qn = nn - dn;
+  if (qn >= dn)
+    {
+      k = mpn_fft_best_k (dn + 1, 0);
+      m = mpn_fft_next_size (dn + 1, k);
+      return (mua_k <= 1
+	      ? 6 * dn
+	      : m + 2 * dn);
+    }
+  else
+    {
+      k = mpn_fft_best_k (dn + 1, 0);
+      m = mpn_fft_next_size (dn + 1, k);
+      return (mua_k <= 1
+	      ? m + 4 * qn
+	      : m + 2 * qn);
+    }
 }
diff --git a/gmp/mpn/generic/mu_divappr_q.c b/gmp/mpn/generic/mu_divappr_q.c
index c218b59fee..0a0434399f 100644
--- a/gmp/mpn/generic/mu_divappr_q.c
+++ b/gmp/mpn/generic/mu_divappr_q.c
@@ -7,63 +7,87 @@
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
 
-Copyright 2005-2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
 
+/* We use the "misunderstanding algorithm" (MUA), discovered by Paul Zimmermann
+   and Torbjorn Granlund when Torbjorn misunderstood Paul's explanation of
+   Jebelean's bidirectional exact division algorithm.
 
-/*
-   The idea of the algorithm used herein is to compute a smaller inverted value
-   than used in the standard Barrett algorithm, and thus save time in the
-   Newton iterations, and pay just a small price when using the inverted value
-   for developing quotient bits.  This algorithm was presented at ICMS 2006.
+   The idea of this algorithm is to compute a smaller inverted value than used
+   in the standard Barrett algorithm, and thus save time in the Newton
+   iterations, and pay just a small price when using the inverted value for
+   developing quotient bits.
+
+   Written by Torbjorn Granlund.  Paul Zimmermann suggested the use of the
+   "wrap around" trick.  Based on the GMP divexact code and inspired by code
+   contributed to GMP by Karl Hasselstroem.
 */
 
-/* CAUTION: This code and the code in mu_div_qr.c should be edited in sync.
+
+/* CAUTION: This code and the code in mu_div_qr.c should be edited in lockstep.
 
  Things to work on:
 
+  * Passing k isn't a great interface.  Either 'in' should be passed, or
+    determined by the code.
+
+  * The current mpn_mu_div_qr_itch isn't exactly scientifically written.
+    Scratch space buffer overruns are not unlikely before some analysis is
+    applied.  Since scratch requirements are expected to change, such an
+    analysis will have to wait til things settle.
+
+  * This isn't optimal when the remainder isn't needed, since the final
+    multiplication could be made special and take O(1) time on average, in that
+    case.  This is particularly bad when qn << dn.  At some level, code as in
+    GMP 4 mpn_tdiv_qr should be used, effectively dividing the leading 2qn
+    dividend limbs by the qn divisor limbs.
+
+  * This isn't optimal when the quotient isn't needed, as it might take a lot
+    of space.  The computation is always needed, though, so there is not time
+    to save with special code.
+
   * The itch/scratch scheme isn't perhaps such a good idea as it once seemed,
-    demonstrated by the fact that the mpn_invertappr function's scratch needs
-    mean that we need to keep a large allocation long after it is needed.
-    Things are worse as mpn_mul_fft does not accept any scratch parameter,
-    which means we'll have a large memory hole while in mpn_mul_fft.  In
-    general, a peak scratch need in the beginning of a function isn't
-    well-handled by the itch/scratch scheme.
+    demonstrated by the fact that the mpn_inv function's scratch needs means
+    that we need to keep a large allocation long after it is needed.  Things
+    are worse as mpn_mul_fft does not accept any scratch parameter, which means
+    we'll have a large memory hole while in mpn_mul_fft.  In general, a peak
+    scratch need in the beginning of a function isn't well-handled by the
+    itch/scratch scheme.
+
+  * Some ideas from comments in divexact.c apply to this code too.
 */
 
+/* the NOSTAT stuff handles properly the case where files are concatenated */
+#ifdef NOSTAT
+#undef STAT
+#endif
+
 #ifdef STAT
 #undef STAT
 #define STAT(x) x
 #else
+#define NOSTAT
 #define STAT(x)
 #endif
 
@@ -72,22 +96,68 @@ see https://www.gnu.org/licenses/.  */
 #include "gmp-impl.h"
 
 
+/* In case k=0 (automatic choice), we distinguish 3 cases:
+   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
+   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
+   (c) qn < dn/3:       in = qn
+   In all cases we have in <= dn.
+ */
+mp_size_t
+mpn_mu_divappr_q_choose_in (mp_size_t qn, mp_size_t dn, int k)
+{
+  mp_size_t in;
+
+  if (k == 0)
+    {
+      mp_size_t b;
+      if (qn > dn)
+	{
+	  /* Compute an inverse size that is a nice partition of the quotient.  */
+	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
+	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
+	}
+      else if (3 * qn > dn)
+	{
+	  in = (qn - 1) / 2 + 1;	/* b = 2 */
+	}
+      else
+	{
+	  in = (qn - 1) / 1 + 1;	/* b = 1 */
+	}
+    }
+  else
+    {
+      mp_size_t xn;
+      xn = MIN (dn, qn);
+      in = (xn - 1) / k + 1;
+    }
+
+  return in;
+}
+
 mp_limb_t
 mpn_mu_divappr_q (mp_ptr qp,
-		  mp_srcptr np,
+		  mp_ptr np,
 		  mp_size_t nn,
 		  mp_srcptr dp,
 		  mp_size_t dn,
 		  mp_ptr scratch)
 {
   mp_size_t qn, in;
-  mp_limb_t cy, qh;
+  mp_limb_t cy;
   mp_ptr ip, tp;
 
-  ASSERT (dn > 1);
+  /* FIXME: We should probably not handle tiny operands, but do it for now.  */
+  if (dn == 1)
+    {
+      mpn_divrem_1 (scratch, 0L, np, nn, dp[0]);
+      MPN_COPY (qp, scratch, nn - 1);
+      return scratch[nn - 1];
+    }
 
   qn = nn - dn;
 
+#if 1
   /* If Q is smaller than D, truncate operands. */
   if (qn + 1 < dn)
     {
@@ -95,7 +165,18 @@ mpn_mu_divappr_q (mp_ptr qp,
       nn -= dn - (qn + 1);
       dp += dn - (qn + 1);
       dn = qn + 1;
+
+      /* Since D is cut here, we can have a carry in N'/D' even if we don't
+	 have it for N/D.  */
+      if (mpn_cmp (np + nn - (qn + 1), dp, qn + 1) >= 0)
+	{ /* quotient is 111...111 */
+	  mp_size_t i;
+	  for (i = 0; i <= qn; i ++)
+	    qp[i] = ~ (mp_limb_t) 0;
+	  return 0;
+	}
     }
+#endif
 
   /* Compute the inverse size.  */
   in = mpn_mu_divappr_q_choose_in (qn, dn, 0);
@@ -104,7 +185,7 @@ mpn_mu_divappr_q (mp_ptr qp,
 #if 1
   /* This alternative inverse computation method gets slightly more accurate
      results.  FIXMEs: (1) Temp allocation needs not analysed (2) itch function
-     not adapted (3) mpn_invertappr scratch needs not met.  */
+     not adapted (3) mpn_invert scratch needs not met.  */
   ip = scratch;
   tp = scratch + in + 1;
 
@@ -113,7 +194,7 @@ mpn_mu_divappr_q (mp_ptr qp,
     {
       MPN_COPY (tp + 1, dp, in);
       tp[0] = 1;
-      mpn_invertappr (ip, tp, in + 1, NULL);
+      mpn_invert (ip, tp, in + 1, NULL);
       MPN_COPY_INCR (ip, ip + 1, in);
     }
   else
@@ -123,7 +204,7 @@ mpn_mu_divappr_q (mp_ptr qp,
 	MPN_ZERO (ip, in);
       else
 	{
-	  mpn_invertappr (ip, tp, in + 1, NULL);
+	  mpn_invert (ip, tp, in + 1, NULL);
 	  MPN_COPY_INCR (ip, ip + 1, in);
 	}
     }
@@ -139,11 +220,11 @@ mpn_mu_divappr_q (mp_ptr qp,
     {
       tp[in + 1] = 0;
       MPN_COPY (tp + in + 2, dp, in);
-      mpn_invertappr (tp, tp + in + 1, in + 1, NULL);
+      mpn_invert (tp, tp + in + 1, in + 1, NULL);
     }
   else
     {
-      mpn_invertappr (tp, dp + dn - (in + 1), in + 1, NULL);
+      mpn_invert (tp, dp + dn - (in + 1), in + 1, NULL);
     }
   cy = mpn_sub_1 (tp, tp, in + 1, GMP_NUMB_HIGHBIT);
   if (UNLIKELY (cy != 0))
@@ -151,14 +232,23 @@ mpn_mu_divappr_q (mp_ptr qp,
   MPN_COPY (ip, tp + 1, in);
 #endif
 
-  qh = mpn_preinv_mu_divappr_q (qp, np, nn, dp, dn, ip, in, scratch + in);
+/* We can't really handle qh = 1 like this since we'd here clobber N, which is
+   not allowed in the way we've defined this function's API.  */
+#if 0
+  qh = mpn_cmp (np + qn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np + qn, np + qn, dp, dn);
+#endif
+
+  mpn_preinv_mu_divappr_q (qp, np, nn, dp, dn, ip, in, scratch + in);
 
-  return qh;
+/*  return qh; */
+  return 0;
 }
 
-mp_limb_t
+void
 mpn_preinv_mu_divappr_q (mp_ptr qp,
-			 mp_srcptr np,
+			 mp_ptr np,
 			 mp_size_t nn,
 			 mp_srcptr dp,
 			 mp_size_t dn,
@@ -166,28 +256,24 @@ mpn_preinv_mu_divappr_q (mp_ptr qp,
 			 mp_size_t in,
 			 mp_ptr scratch)
 {
+  mp_ptr rp;
   mp_size_t qn;
-  mp_limb_t cy, cx, qh;
+  mp_limb_t cy;
+  mp_ptr tp;
   mp_limb_t r;
-  mp_size_t tn, wn;
-
-#define rp           scratch
-#define tp           (scratch + dn)
-#define scratch_out  (scratch + dn + tn)
 
   qn = nn - dn;
 
+  if (qn == 0)
+    return;
+
+  rp = scratch;
+  tp = scratch + dn;
+
   np += qn;
   qp += qn;
 
-  qh = mpn_cmp (np, dp, dn) >= 0;
-  if (qh != 0)
-    mpn_sub_n (rp, np, dp, dn);
-  else
-    MPN_COPY (rp, np, dn);
-
-  if (qn == 0)
-    return qh;			/* Degenerate use.  Should we allow this? */
+  MPN_COPY (rp, np, dn);
 
   while (qn > 0)
     {
@@ -203,7 +289,7 @@ mpn_preinv_mu_divappr_q (mp_ptr qp,
 	 by the upper part of the partial remainder R.  */
       mpn_mul_n (tp, rp + dn - in, ip, in);		/* mulhi  */
       cy = mpn_add_n (qp, tp + in, rp + dn - in, in);	/* I's msb implicit */
-      ASSERT_ALWAYS (cy == 0);
+      ASSERT_ALWAYS (cy == 0);			/* FIXME */
 
       qn -= in;
       if (qn == 0)
@@ -212,23 +298,31 @@ mpn_preinv_mu_divappr_q (mp_ptr qp,
       /* Compute the product of the quotient block and the divisor D, to be
 	 subtracted from the partial remainder combined with new limbs from the
 	 dividend N.  We only really need the low dn limbs.  */
-
-      if (BELOW_THRESHOLD (in, MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD))
-	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
-      else
+#if WANT_FFT
+      if (ABOVE_THRESHOLD (dn, MUL_FFT_MODF_THRESHOLD))
 	{
-	  tn = mpn_mulmod_bnm1_next_size (dn + 1);
-	  mpn_mulmod_bnm1 (tp, tn, dp, dn, qp, in, scratch_out);
-	  wn = dn + in - tn;			/* number of wrapped limbs */
+	  /* Use the wrap-around trick.  */
+	  mp_size_t m, wn;
+	  int k;
+
+	  k = mpn_fft_best_k (dn + 1, 0);
+	  m = mpn_fft_next_size (dn + 1, k);
+	  wn = dn + in - m;			/* number of wrapped limbs */
+
+	  mpn_mul_fft (tp, m, dp, dn, qp, in, k);
+
 	  if (wn > 0)
 	    {
-	      cy = mpn_sub_n (tp, tp, rp + dn - wn, wn);
-	      cy = mpn_sub_1 (tp + wn, tp + wn, tn - wn, cy);
-	      cx = mpn_cmp (rp + dn - in, tp + dn, tn - dn) < 0;
-	      ASSERT_ALWAYS (cx >= cy);
-	      mpn_incr_u (tp, cx - cy);
+	      cy = mpn_add_n (tp, tp, rp + dn - wn, wn);
+	      mpn_incr_u (tp + wn, cy);
+
+	      cy = mpn_cmp (rp + dn - in, tp + dn, m - dn) < 0;
+	      mpn_decr_u (tp, cy);
 	    }
 	}
+      else
+#endif
+	mpn_mul (tp, dp, dn, qp, in);		/* dn+in limbs, high 'in' cancels */
 
       r = rp[dn - in] - tp[dn];
 
@@ -284,80 +378,45 @@ mpn_preinv_mu_divappr_q (mp_ptr qp,
 
   /* FIXME: We should perhaps be somewhat more elegant in our rounding of the
      quotient.  For now, just make sure the returned quotient is >= the real
-     quotient; add 3 with saturating arithmetic.  */
+     quotient.  */
   qn = nn - dn;
-  cy += mpn_add_1 (qp, qp, qn, 3);
+  cy = mpn_add_1 (qp, qp, qn, 3);
   if (cy != 0)
     {
-      if (qh != 0)
-	{
-	  /* Return a quotient of just 1-bits, with qh set.  */
-	  mp_size_t i;
-	  for (i = 0; i < qn; i++)
-	    qp[i] = GMP_NUMB_MAX;
-	}
-      else
-	{
-	  /* Propagate carry into qh.  */
-	  qh = 1;
-	}
+      MPN_ZERO (qp, qn);
+      mpn_sub_1 (qp, qp, qn, 1);
     }
-
-  return qh;
 }
 
-/* In case k=0 (automatic choice), we distinguish 3 cases:
-   (a) dn < qn:         in = ceil(qn / ceil(qn/dn))
-   (b) dn/3 < qn <= dn: in = ceil(qn / 2)
-   (c) qn < dn/3:       in = qn
-   In all cases we have in <= dn.
- */
 mp_size_t
-mpn_mu_divappr_q_choose_in (mp_size_t qn, mp_size_t dn, int k)
+mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
 {
-  mp_size_t in;
+  mp_size_t qn, m;
+  int k;
 
-  if (k == 0)
-    {
-      mp_size_t b;
-      if (qn > dn)
-	{
-	  /* Compute an inverse size that is a nice partition of the quotient.  */
-	  b = (qn - 1) / dn + 1;	/* ceil(qn/dn), number of blocks */
-	  in = (qn - 1) / b + 1;	/* ceil(qn/b) = ceil(qn / ceil(qn/dn)) */
-	}
-      else if (3 * qn > dn)
-	{
-	  in = (qn - 1) / 2 + 1;	/* b = 2 */
-	}
-      else
-	{
-	  in = (qn - 1) / 1 + 1;	/* b = 1 */
-	}
-    }
-  else
-    {
-      mp_size_t xn;
-      xn = MIN (dn, qn);
-      in = (xn - 1) / k + 1;
-    }
+  /* FIXME: This isn't very carefully written, and might grossly overestimate
+     the amount of scratch needed, and might perhaps also underestimate it,
+     leading to potential buffer overruns.  In particular k=0 might lead to
+     gross overestimates.  */
 
-  return in;
-}
-
-mp_size_t
-mpn_mu_divappr_q_itch (mp_size_t nn, mp_size_t dn, int mua_k)
-{
-  mp_size_t qn, in, itch_local, itch_out;
+  if (dn == 1)
+    return nn;
 
   qn = nn - dn;
-  if (qn + 1 < dn)
+  if (qn >= dn)
     {
-      dn = qn + 1;
+      k = mpn_fft_best_k (dn + 1, 0);
+      m = mpn_fft_next_size (dn + 1, k);
+      return dn + (mua_k <= 1
+		   ? 6 * dn
+		   : m + 2 * dn);
+    }
+  else
+    {
+      k = mpn_fft_best_k (dn + 1, 0);
+      m = mpn_fft_next_size (dn + 1, k);
+      return dn + (mua_k <= 1
+		   ? m + 4 * qn
+		   : m + 2 * qn);
     }
-  in = mpn_mu_divappr_q_choose_in (qn, dn, mua_k);
-
-  itch_local = mpn_mulmod_bnm1_next_size (dn + 1);
-  itch_out = mpn_mulmod_bnm1_itch (itch_local, dn, in);
-  return in + dn + itch_local + itch_out;
 }
diff --git a/gmp/mpn/generic/mul.c b/gmp/mpn/generic/mul.c
index 2d72df3d4d..489e1f524f 100644
--- a/gmp/mpn/generic/mul.c
+++ b/gmp/mpn/generic/mul.c
@@ -2,34 +2,23 @@
 
    Contributed to the GNU project by Torbjorn Granlund.
 
-Copyright 1991, 1993, 1994, 1996, 1997, 1999-2003, 2005-2007, 2009, 2010, 2012
-Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005,
+2006, 2007 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -39,42 +28,6 @@ see https://www.gnu.org/licenses/.  */
 #define MUL_BASECASE_MAX_UN 500
 #endif
 
-/* Areas where the different toom algorithms can be called (extracted
-   from the t-toom*.c files, and ignoring small constant offsets):
-
-   1/6  1/5 1/4 4/13 1/3 3/8 2/5 5/11 1/2 3/5 2/3 3/4 4/5   1 vn/un
-                                        4/7              6/7
-				       6/11
-                                       |--------------------| toom22 (small)
-                                                           || toom22 (large)
-                                                       |xxxx| toom22 called
-                      |-------------------------------------| toom32
-                                         |xxxxxxxxxxxxxxxx| | toom32 called
-                                               |------------| toom33
-                                                          |x| toom33 called
-             |---------------------------------|            | toom42
-	              |xxxxxxxxxxxxxxxxxxxxxxxx|            | toom42 called
-                                       |--------------------| toom43
-                                               |xxxxxxxxxx|   toom43 called
-         |-----------------------------|                      toom52 (unused)
-                                                   |--------| toom44
-						   |xxxxxxxx| toom44 called
-                              |--------------------|        | toom53
-                                        |xxxxxx|              toom53 called
-    |-------------------------|                               toom62 (unused)
-                                           |----------------| toom54 (unused)
-                      |--------------------|                  toom63
-	                      |xxxxxxxxx|                   | toom63 called
-                          |---------------------------------| toom6h
-						   |xxxxxxxx| toom6h called
-                                  |-------------------------| toom8h (32 bit)
-                 |------------------------------------------| toom8h (64 bit)
-						   |xxxxxxxx| toom8h called
-*/
-
-#define TOOM33_OK(an,bn) (6 + 2 * an < 3 * bn)
-#define TOOM44_OK(an,bn) (12 + 3 * an < 4 * bn)
-
 /* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
    (pointed to by VP, with VN limbs), and store the result at PRODP.  The
    result is UN + VN limbs.  Return the most significant limb of the result.
@@ -87,34 +40,6 @@ see https://www.gnu.org/licenses/.  */
    2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
       the multiplier and the multiplicand.  */
 
-/*
-  * The cutoff lines in the toomX2 and toomX3 code are now exactly between the
-    ideal lines of the surrounding algorithms.  Is that optimal?
-
-  * The toomX3 code now uses a structure similar to the one of toomX2, except
-    that it loops longer in the unbalanced case.  The result is that the
-    remaining area might have un < vn.  Should we fix the toomX2 code in a
-    similar way?
-
-  * The toomX3 code is used for the largest non-FFT unbalanced operands.  It
-    therefore calls mpn_mul recursively for certain cases.
-
-  * Allocate static temp space using THRESHOLD variables (except for toom44
-    when !WANT_FFT).  That way, we can typically have no TMP_ALLOC at all.
-
-  * We sort ToomX2 algorithms together, assuming the toom22, toom32, toom42
-    have the same vn threshold.  This is not true, we should actually use
-    mul_basecase for slightly larger operands for toom32 than for toom22, and
-    even larger for toom42.
-
-  * That problem is even more prevalent for toomX3.  We therefore use special
-    THRESHOLD variables there.
-
-  * Is our ITCH allocation correct?
-*/
-
-#define ITCH (16*vn + 100)
-
 mp_limb_t
 mpn_mul (mp_ptr prodp,
 	 mp_srcptr up, mp_size_t un,
@@ -128,11 +53,13 @@ mpn_mul (mp_ptr prodp,
   if (un == vn)
     {
       if (up == vp)
-	mpn_sqr (prodp, up, un);
+	mpn_sqr_n (prodp, up, un);
       else
 	mpn_mul_n (prodp, up, vp, un);
+      return prodp[2 * un - 1];
     }
-  else if (vn < MUL_TOOM22_THRESHOLD)
+
+  if (vn < MUL_KARATSUBA_THRESHOLD)
     { /* plain schoolbook multiplication */
 
       /* Unless un is very large, or else if have an applicable mpn_mul_N,
@@ -171,9 +98,9 @@ mpn_mul (mp_ptr prodp,
 	    The parts marked with X are the parts whose sums are copied into
 	    the temporary buffer.  */
 
-	  mp_limb_t tp[MUL_TOOM22_THRESHOLD_LIMIT];
+	  mp_limb_t tp[MUL_KARATSUBA_THRESHOLD_LIMIT];
 	  mp_limb_t cy;
-	  ASSERT (MUL_TOOM22_THRESHOLD <= MUL_TOOM22_THRESHOLD_LIMIT);
+          ASSERT (MUL_KARATSUBA_THRESHOLD <= MUL_KARATSUBA_THRESHOLD_LIMIT);
 
 	  mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
 	  prodp += MUL_BASECASE_MAX_UN;
@@ -184,7 +111,7 @@ mpn_mul (mp_ptr prodp,
 	    {
 	      mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
 	      cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
-	      mpn_incr_u (prodp + vn, cy);
+	      mpn_incr_u (prodp + vn, cy);		/* safe? */
 	      prodp += MUL_BASECASE_MAX_UN;
 	      MPN_COPY (tp, prodp, vn);		/* preserve high triangle */
 	      up += MUL_BASECASE_MAX_UN;
@@ -196,233 +123,100 @@ mpn_mul (mp_ptr prodp,
 	    }
 	  else
 	    {
-	      ASSERT (un > 0);
+	      ASSERT_ALWAYS (un > 0);
 	      mpn_mul_basecase (prodp, vp, vn, up, un);
 	    }
 	  cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
-	  mpn_incr_u (prodp + vn, cy);
+	  mpn_incr_u (prodp + vn, cy);		/* safe? */
 	}
+      return prodp[un + vn - 1];
     }
-  else if (BELOW_THRESHOLD (vn, MUL_TOOM33_THRESHOLD))
-    {
-      /* Use ToomX2 variants */
-      mp_ptr scratch;
-      TMP_SDECL; TMP_SMARK;
-
-      scratch = TMP_SALLOC_LIMBS (ITCH);
-
-      /* FIXME: This condition (repeated in the loop below) leaves from a vn*vn
-	 square to a (3vn-1)*vn rectangle.  Leaving such a rectangle is hardly
-	 wise; we would get better balance by slightly moving the bound.  We
-	 will sometimes end up with un < vn, like in the X3 arm below.  */
-      if (un >= 3 * vn)
-	{
-	  mp_limb_t cy;
-	  mp_ptr ws;
-
-	  /* The maximum ws usage is for the mpn_mul result.  */
-	  ws = TMP_SALLOC_LIMBS (4 * vn);
 
-	  mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
-	  un -= 2 * vn;
-	  up += 2 * vn;
-	  prodp += 2 * vn;
-
-	  while (un >= 3 * vn)
-	    {
-	      mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
-	      un -= 2 * vn;
-	      up += 2 * vn;
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, 2 * vn);
-	      mpn_incr_u (prodp + vn, cy);
-	      prodp += 2 * vn;
-	    }
-
-	  /* vn <= un < 3vn */
-
-	  if (4 * un < 5 * vn)
-	    mpn_toom22_mul (ws, up, un, vp, vn, scratch);
-	  else if (4 * un < 7 * vn)
-	    mpn_toom32_mul (ws, up, un, vp, vn, scratch);
-	  else
-	    mpn_toom42_mul (ws, up, un, vp, vn, scratch);
-
-	  cy = mpn_add_n (prodp, prodp, ws, vn);
-	  MPN_COPY (prodp + vn, ws + vn, un);
-	  mpn_incr_u (prodp + vn, cy);
-	}
-      else
-	{
-	  if (4 * un < 5 * vn)
-	    mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
-	  else if (4 * un < 7 * vn)
-	    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
-	  else
-	    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
-	}
-      TMP_SFREE;
-    }
-  else if (BELOW_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) ||
-	   BELOW_THRESHOLD (3 * vn, MUL_FFT_THRESHOLD))
+  if (ABOVE_THRESHOLD ((un + vn) >> 1, MUL_FFT_THRESHOLD) &&
+      ABOVE_THRESHOLD (vn, MUL_FFT_THRESHOLD / 3)) /* FIXME */
     {
-      /* Handle the largest operands that are not in the FFT range.  The 2nd
-	 condition makes very unbalanced operands avoid the FFT code (except
-	 perhaps as coefficient products of the Toom code.  */
-
-      if (BELOW_THRESHOLD (vn, MUL_TOOM44_THRESHOLD) || !TOOM44_OK (un, vn))
-	{
-	  /* Use ToomX3 variants */
-	  mp_ptr scratch;
-	  TMP_SDECL; TMP_SMARK;
-
-	  scratch = TMP_SALLOC_LIMBS (ITCH);
-
-	  if (2 * un >= 5 * vn)
-	    {
-	      mp_limb_t cy;
-	      mp_ptr ws;
-
-	      /* The maximum ws usage is for the mpn_mul result.  */
-	      ws = TMP_SALLOC_LIMBS (7 * vn >> 1);
-
-	      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
-		mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
-	      else
-		mpn_toom63_mul (prodp, up, 2 * vn, vp, vn, scratch);
-	      un -= 2 * vn;
-	      up += 2 * vn;
-	      prodp += 2 * vn;
-
-	      while (2 * un >= 5 * vn)	/* un >= 2.5vn */
-		{
-		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
-		    mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
-		  else
-		    mpn_toom63_mul (ws, up, 2 * vn, vp, vn, scratch);
-		  un -= 2 * vn;
-		  up += 2 * vn;
-		  cy = mpn_add_n (prodp, prodp, ws, vn);
-		  MPN_COPY (prodp + vn, ws + vn, 2 * vn);
-		  mpn_incr_u (prodp + vn, cy);
-		  prodp += 2 * vn;
-		}
-
-	      /* vn / 2 <= un < 2.5vn */
-
-	      if (un < vn)
-		mpn_mul (ws, vp, vn, up, un);
-	      else
-		mpn_mul (ws, up, un, vp, vn);
-
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, un);
-	      mpn_incr_u (prodp + vn, cy);
-	    }
-	  else
-	    {
-	      if (6 * un < 7 * vn)
-		mpn_toom33_mul (prodp, up, un, vp, vn, scratch);
-	      else if (2 * un < 3 * vn)
-		{
-		  if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM43_THRESHOLD))
-		    mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
-		  else
-		    mpn_toom43_mul (prodp, up, un, vp, vn, scratch);
-		}
-	      else if (6 * un < 11 * vn)
-		{
-		  if (4 * un < 7 * vn)
-		    {
-		      if (BELOW_THRESHOLD (vn, MUL_TOOM32_TO_TOOM53_THRESHOLD))
-			mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
-		      else
-			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
-		    }
-		  else
-		    {
-		      if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM53_THRESHOLD))
-			mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
-		      else
-			mpn_toom53_mul (prodp, up, un, vp, vn, scratch);
-		    }
-		}
-	      else
-		{
-		  if (BELOW_THRESHOLD (vn, MUL_TOOM42_TO_TOOM63_THRESHOLD))
-		    mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
-		  else
-		    mpn_toom63_mul (prodp, up, un, vp, vn, scratch);
-		}
-	    }
-	  TMP_SFREE;
-	}
-      else
-	{
-	  mp_ptr scratch;
-	  TMP_DECL; TMP_MARK;
-
-	  if (BELOW_THRESHOLD (vn, MUL_TOOM6H_THRESHOLD))
-	    {
-	      scratch = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (un, vn));
-	      mpn_toom44_mul (prodp, up, un, vp, vn, scratch);
-	    }
-	  else if (BELOW_THRESHOLD (vn, MUL_TOOM8H_THRESHOLD))
-	    {
-	      scratch = TMP_ALLOC_LIMBS (mpn_toom6h_mul_itch (un, vn));
-	      mpn_toom6h_mul (prodp, up, un, vp, vn, scratch);
-	    }
-	  else
-	    {
-	      scratch = TMP_ALLOC_LIMBS (mpn_toom8h_mul_itch (un, vn));
-	      mpn_toom8h_mul (prodp, up, un, vp, vn, scratch);
-	    }
-	  TMP_FREE;
-	}
+      mpn_mul_fft_full (prodp, up, un, vp, vn);
+      return prodp[un + vn - 1];
     }
-  else
-    {
-      if (un >= 8 * vn)
-	{
-	  mp_limb_t cy;
-	  mp_ptr ws;
-	  TMP_DECL; TMP_MARK;
-
-	  /* The maximum ws usage is for the mpn_mul result.  */
-	  ws = TMP_BALLOC_LIMBS (9 * vn >> 1);
-
-	  mpn_fft_mul (prodp, up, 3 * vn, vp, vn);
-	  un -= 3 * vn;
-	  up += 3 * vn;
-	  prodp += 3 * vn;
 
-	  while (2 * un >= 7 * vn)	/* un >= 3.5vn  */
-	    {
-	      mpn_fft_mul (ws, up, 3 * vn, vp, vn);
-	      un -= 3 * vn;
-	      up += 3 * vn;
-	      cy = mpn_add_n (prodp, prodp, ws, vn);
-	      MPN_COPY (prodp + vn, ws + vn, 3 * vn);
-	      mpn_incr_u (prodp + vn, cy);
-	      prodp += 3 * vn;
-	    }
-
-	  /* vn / 2 <= un < 3.5vn */
-
-	  if (un < vn)
-	    mpn_mul (ws, vp, vn, up, un);
-	  else
-	    mpn_mul (ws, up, un, vp, vn);
-
-	  cy = mpn_add_n (prodp, prodp, ws, vn);
-	  MPN_COPY (prodp + vn, ws + vn, un);
-	  mpn_incr_u (prodp + vn, cy);
-
-	  TMP_FREE;
-	}
-      else
-	mpn_fft_mul (prodp, up, un, vp, vn);
-    }
+  {
+    mp_ptr ws;
+    mp_ptr scratch;
+#if WANT_ASSERT
+    mp_ptr ssssp;
+#endif
+    TMP_DECL;
+    TMP_MARK;
+
+#define WSALL (4 * vn)
+    ws = TMP_SALLOC_LIMBS (WSALL + 1);
+
+#define ITCH ((un + vn) * 4 + 100)
+    scratch = TMP_ALLOC_LIMBS (ITCH + 1);
+#if WANT_ASSERT
+    ssssp = scratch + ITCH;
+    ws[WSALL] = 0xbabecafe;
+    ssssp[0] = 0xbeef;
+#endif
 
-  return prodp[un + vn - 1];	/* historic */
+    if (un >= 3 * vn)
+      {
+	mp_limb_t cy;
+
+	mpn_toom42_mul (prodp, up, 2 * vn, vp, vn, scratch);
+	un -= 2 * vn;
+	up += 2 * vn;
+	prodp += 2 * vn;
+
+	while (un >= 3 * vn)
+	  {
+	    mpn_toom42_mul (ws, up, 2 * vn, vp, vn, scratch);
+	    un -= 2 * vn;
+	    up += 2 * vn;
+	    cy = mpn_add_n (prodp, prodp, ws, vn);
+	    MPN_COPY (prodp + vn, ws + vn, 2 * vn);
+	    mpn_incr_u (prodp + vn, cy);
+	    prodp += 2 * vn;
+	  }
+
+	if (5 * un > 9 * vn)
+	  {
+	    mpn_toom42_mul (ws, up, un, vp, vn, scratch);
+	    cy = mpn_add_n (prodp, prodp, ws, vn);
+	    MPN_COPY (prodp + vn, ws + vn, un);
+	    mpn_incr_u (prodp + vn, cy);
+	  }
+	else if (9 * un > 10 * vn)
+	  {
+	    mpn_toom32_mul (ws, up, un, vp, vn, scratch);
+	    cy = mpn_add_n (prodp, prodp, ws, vn);
+	    MPN_COPY (prodp + vn, ws + vn, un);
+	    mpn_incr_u (prodp + vn, cy);
+	  }
+	else
+	  {
+	    mpn_toom22_mul (ws, up, un, vp, vn, scratch);
+	    cy = mpn_add_n (prodp, prodp, ws, vn);
+	    MPN_COPY (prodp + vn, ws + vn, un);
+	    mpn_incr_u (prodp + vn, cy);
+	  }
+
+	ASSERT (ws[WSALL] == 0xbabecafe);
+	ASSERT (ssssp[0] == 0xbeef);
+	TMP_FREE;
+	return prodp[un + vn - 1];
+      }
+
+    if (un * 5 > vn * 9)
+      mpn_toom42_mul (prodp, up, un, vp, vn, scratch);
+    else if (9 * un > 10 * vn)
+      mpn_toom32_mul (prodp, up, un, vp, vn, scratch);
+    else
+      mpn_toom22_mul (prodp, up, un, vp, vn, scratch);
+
+    ASSERT (ws[WSALL] == 0xbabecafe);
+    ASSERT (ssssp[0] == 0xbeef);
+    TMP_FREE;
+    return prodp[un + vn - 1];
+  }
 }
diff --git a/gmp/mpn/generic/mul_1.c b/gmp/mpn/generic/mul_1.c
index 6b2ee59a2c..b8290cc6af 100644
--- a/gmp/mpn/generic/mul_1.c
+++ b/gmp/mpn/generic/mul_1.c
@@ -1,33 +1,23 @@
 /* mpn_mul_1 -- Multiply a limb vector with a single limb and store the
    product in a second limb vector.
 
-Copyright 1991-1994, 1996, 2000-2002 Free Software Foundation, Inc.
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/mul_basecase.c b/gmp/mpn/generic/mul_basecase.c
index 9309ef72c8..4f02545d57 100644
--- a/gmp/mpn/generic/mul_basecase.c
+++ b/gmp/mpn/generic/mul_basecase.c
@@ -4,33 +4,24 @@
    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
 
-Copyright 1991-1994, 1996, 1997, 2000-2002 Free Software Foundation, Inc.
+
+Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -42,7 +33,7 @@ see https://www.gnu.org/licenses/.  */
    Note that prodp gets usize+vsize limbs stored, even if the actual result
    only needs usize+vsize-1.
 
-   There's no good reason to call here with vsize>=MUL_TOOM22_THRESHOLD.
+   There's no good reason to call here with vsize>=MUL_KARATSUBA_THRESHOLD.
    Currently this is allowed, but it might not be in the future.
 
    This is the most critical code for multiplication.  All multiplies rely
diff --git a/gmp/mpn/generic/mul_fft.c b/gmp/mpn/generic/mul_fft.c
index 5e763a3a73..836a89a001 100644
--- a/gmp/mpn/generic/mul_fft.c
+++ b/gmp/mpn/generic/mul_fft.c
@@ -6,33 +6,23 @@
    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 1998-2010, 2012, 2013 Free Software Foundation, Inc.
+Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /* References:
@@ -70,79 +60,76 @@ see https://www.gnu.org/licenses/.  */
 #include "gmp-impl.h"
 
 #ifdef WANT_ADDSUB
-#include "generic/add_n_sub_n.c"
-#define HAVE_NATIVE_mpn_add_n_sub_n 1
+#include "generic/addsub_n.c"
+#define HAVE_NATIVE_mpn_addsub_n 1
 #endif
 
-static mp_limb_t mpn_mul_fft_internal (mp_ptr, mp_size_t, int, mp_ptr *,
-				       mp_ptr *, mp_ptr, mp_ptr, mp_size_t,
-				       mp_size_t, mp_size_t, int **, mp_ptr, int);
-static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, mp_size_t, mp_size_t, mp_srcptr,
-				   mp_size_t, mp_size_t, mp_size_t, mp_ptr);
+static mp_limb_t mpn_mul_fft_internal
+__GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, int, int, mp_ptr *, mp_ptr *,
+	      mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_size_t, int **, mp_ptr,
+	      int));
 
 
 /* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n.
-   We have sqr=0 if for a multiply, sqr=1 for a square.
-   There are three generations of this code; we keep the old ones as long as
-   some gmp-mparam.h is not updated.  */
-
-
-/*****************************************************************************/
-
-#if TUNE_PROGRAM_BUILD || (defined (MUL_FFT_TABLE3) && defined (SQR_FFT_TABLE3))
+   sqr==0 if for a multiply, sqr==1 for a square.
+   Don't declare it static since it is needed by tuneup.
+*/
+#ifdef MUL_FFT_TABLE2
 
-#ifndef FFT_TABLE3_SIZE		/* When tuning this is defined in gmp-impl.h */
-#if defined (MUL_FFT_TABLE3_SIZE) && defined (SQR_FFT_TABLE3_SIZE)
-#if MUL_FFT_TABLE3_SIZE > SQR_FFT_TABLE3_SIZE
-#define FFT_TABLE3_SIZE MUL_FFT_TABLE3_SIZE
+#if defined (MUL_FFT_TABLE2_SIZE) && defined (SQR_FFT_TABLE2_SIZE)
+#if MUL_FFT_TABLE2_SIZE > SQR_FFT_TABLE2_SIZE
+#define FFT_TABLE2_SIZE MUL_FFT_TABLE2_SIZE
 #else
-#define FFT_TABLE3_SIZE SQR_FFT_TABLE3_SIZE
-#endif
+#define FFT_TABLE2_SIZE SQR_FFT_TABLE2_SIZE
 #endif
 #endif
 
-#ifndef FFT_TABLE3_SIZE
-#define FFT_TABLE3_SIZE 200
+#ifndef FFT_TABLE2_SIZE
+#define FFT_TABLE2_SIZE 200
 #endif
 
-FFT_TABLE_ATTRS struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE] =
+/* FIXME: The format of this should change to need less space.
+   Perhaps put n and k in the same 32-bit word, with n shifted-down
+   (k-2) steps, and k using the 4-5 lowest bits.  That's possible since
+   n-1 is highly divisible.
+   Alternatively, separate n and k out into separate arrays.  */
+struct nk {
+  unsigned int n:27;
+  unsigned int k:5;
+};
+
+static struct nk mpn_fft_table2[2][FFT_TABLE2_SIZE] =
 {
-  MUL_FFT_TABLE3,
-  SQR_FFT_TABLE3
+  MUL_FFT_TABLE2,
+  SQR_FFT_TABLE2
 };
 
 int
 mpn_fft_best_k (mp_size_t n, int sqr)
 {
-  FFT_TABLE_ATTRS struct fft_table_nk *fft_tab, *tab;
-  mp_size_t tab_n, thres;
+  struct nk *tab;
   int last_k;
 
-  fft_tab = mpn_fft_table3[sqr];
-  last_k = fft_tab->k;
-  for (tab = fft_tab + 1; ; tab++)
+  last_k = 4;
+  for (tab = mpn_fft_table2[sqr] + 1; ; tab++)
     {
-      tab_n = tab->n;
-      thres = tab_n << last_k;
-      if (n <= thres)
+      if (n < tab->n)
 	break;
       last_k = tab->k;
     }
   return last_k;
 }
-
-#define MPN_FFT_BEST_READY 1
 #endif
 
-/*****************************************************************************/
-
-#if ! defined (MPN_FFT_BEST_READY)
+#if !defined (MUL_FFT_TABLE2) || TUNE_PROGRAM_BUILD
 FFT_TABLE_ATTRS mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE] =
 {
   MUL_FFT_TABLE,
   SQR_FFT_TABLE
 };
+#endif
 
+#if !defined (MUL_FFT_TABLE2)
 int
 mpn_fft_best_k (mp_size_t n, int sqr)
 {
@@ -160,9 +147,6 @@ mpn_fft_best_k (mp_size_t n, int sqr)
 }
 #endif
 
-/*****************************************************************************/
-
-
 /* Returns smallest possible number of limbs >= pl for a fft of size 2^k,
    i.e. smallest multiple of 2^k >= pl.
 
@@ -196,97 +180,137 @@ mpn_fft_initl (int **l, int k)
     }
 }
 
+/* Shift {up, n} of cnt bits to the left, store the complemented result
+   in {rp, n}, and output the shifted bits (not complemented).
+   Same as:
+     cc = mpn_lshift (rp, up, n, cnt);
+     mpn_com_n (rp, rp, n);
+     return cc;
 
-/* r <- a*2^d mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1}
+   Assumes n >= 1, 1 < cnt < GMP_NUMB_BITS, rp >= up.
+*/
+#ifndef HAVE_NATIVE_mpn_lshiftc
+#undef mpn_lshiftc
+static mp_limb_t
+mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+  mp_limb_t high_limb, low_limb;
+  unsigned int tnc;
+  mp_size_t i;
+  mp_limb_t retval;
+
+  up += n;
+  rp += n;
+
+  tnc = GMP_NUMB_BITS - cnt;
+  low_limb = *--up;
+  retval = low_limb >> tnc;
+  high_limb = (low_limb << cnt);
+
+  for (i = n - 1; i != 0; i--)
+    {
+      low_limb = *--up;
+      *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK;
+      high_limb = low_limb << cnt;
+    }
+  *--rp = (~high_limb) & GMP_NUMB_MASK;
+
+  return retval;
+}
+#endif
+
+/* r <- a*2^e mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1}
    Assumes a is semi-normalized, i.e. a[n] <= 1.
    r and a must have n+1 limbs, and not overlap.
 */
 static void
-mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t d, mp_size_t n)
+mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, unsigned int d, mp_size_t n)
 {
-  unsigned int sh;
-  mp_size_t m;
+  int sh, negate;
   mp_limb_t cc, rd;
 
   sh = d % GMP_NUMB_BITS;
-  m = d / GMP_NUMB_BITS;
+  d /= GMP_NUMB_BITS;
+  negate = d >= n;
+  if (negate)
+    d -= n;
 
-  if (m >= n)			/* negate */
+  if (negate)
     {
-      /* r[0..m-1]  <-- lshift(a[n-m]..a[n-1], sh)
-	 r[m..n-1]  <-- -lshift(a[0]..a[n-m-1],  sh) */
-
-      m -= n;
+      /* r[0..d-1]  <-- lshift(a[n-d]..a[n-1], sh)
+	 r[d..n-1]  <-- -lshift(a[0]..a[n-d-1],  sh) */
       if (sh != 0)
 	{
 	  /* no out shift below since a[n] <= 1 */
-	  mpn_lshift (r, a + n - m, m + 1, sh);
-	  rd = r[m];
-	  cc = mpn_lshiftc (r + m, a, n - m, sh);
+	  mpn_lshift (r, a + n - d, d + 1, sh);
+	  rd = r[d];
+	  cc = mpn_lshiftc (r + d, a, n - d, sh);
 	}
       else
 	{
-	  MPN_COPY (r, a + n - m, m);
+	  MPN_COPY (r, a + n - d, d);
 	  rd = a[n];
-	  mpn_com (r + m, a, n - m);
+	  mpn_com_n (r + d, a, n - d);
 	  cc = 0;
 	}
 
-      /* add cc to r[0], and add rd to r[m] */
+      /* add cc to r[0], and add rd to r[d] */
 
-      /* now add 1 in r[m], subtract 1 in r[n], i.e. add 1 in r[0] */
+      /* now add 1 in r[d], subtract 1 in r[n], i.e. add 1 in r[0] */
 
       r[n] = 0;
       /* cc < 2^sh <= 2^(GMP_NUMB_BITS-1) thus no overflow here */
       cc++;
       mpn_incr_u (r, cc);
 
-      rd++;
+      rd ++;
       /* rd might overflow when sh=GMP_NUMB_BITS-1 */
       cc = (rd == 0) ? 1 : rd;
-      r = r + m + (rd == 0);
+      r = r + d + (rd == 0);
       mpn_incr_u (r, cc);
+
+      return;
+    }
+
+  /* if negate=0,
+	r[0..d-1]  <-- -lshift(a[n-d]..a[n-1], sh)
+	r[d..n-1]  <-- lshift(a[0]..a[n-d-1],  sh)
+  */
+  if (sh != 0)
+    {
+      /* no out bits below since a[n] <= 1 */
+      mpn_lshiftc (r, a + n - d, d + 1, sh);
+      rd = ~r[d];
+      /* {r, d+1} = {a+n-d, d+1} << sh */
+      cc = mpn_lshift (r + d, a, n - d, sh); /* {r+d, n-d} = {a, n-d}<<sh */
     }
   else
     {
-      /* r[0..m-1]  <-- -lshift(a[n-m]..a[n-1], sh)
-	 r[m..n-1]  <-- lshift(a[0]..a[n-m-1],  sh)  */
-      if (sh != 0)
-	{
-	  /* no out bits below since a[n] <= 1 */
-	  mpn_lshiftc (r, a + n - m, m + 1, sh);
-	  rd = ~r[m];
-	  /* {r, m+1} = {a+n-m, m+1} << sh */
-	  cc = mpn_lshift (r + m, a, n - m, sh); /* {r+m, n-m} = {a, n-m}<<sh */
-	}
-      else
-	{
-	  /* r[m] is not used below, but we save a test for m=0 */
-	  mpn_com (r, a + n - m, m + 1);
-	  rd = a[n];
-	  MPN_COPY (r + m, a, n - m);
-	  cc = 0;
-	}
+      /* r[d] is not used below, but we save a test for d=0 */
+      mpn_com_n (r, a + n - d, d + 1);
+      rd = a[n];
+      MPN_COPY (r + d, a, n - d);
+      cc = 0;
+    }
 
-      /* now complement {r, m}, subtract cc from r[0], subtract rd from r[m] */
+  /* now complement {r, d}, subtract cc from r[0], subtract rd from r[d] */
 
-      /* if m=0 we just have r[0]=a[n] << sh */
-      if (m != 0)
-	{
-	  /* now add 1 in r[0], subtract 1 in r[m] */
-	  if (cc-- == 0) /* then add 1 to r[0] */
-	    cc = mpn_add_1 (r, r, n, CNST_LIMB(1));
-	  cc = mpn_sub_1 (r, r, m, cc) + 1;
-	  /* add 1 to cc instead of rd since rd might overflow */
-	}
+  /* if d=0 we just have r[0]=a[n] << sh */
+  if (d != 0)
+    {
+      /* now add 1 in r[0], subtract 1 in r[d] */
+      if (cc-- == 0) /* then add 1 to r[0] */
+	cc = mpn_add_1 (r, r, n, CNST_LIMB(1));
+      cc = mpn_sub_1 (r, r, d, cc) + 1;
+      /* add 1 to cc instead of rd since rd might overflow */
+    }
 
-      /* now subtract cc and rd from r[m..n] */
+  /* now subtract cc and rd from r[d..n] */
 
-      r[n] = -mpn_sub_1 (r + m, r + m, n - m, cc);
-      r[n] -= mpn_sub_1 (r + m, r + m, n - m, rd);
-      if (r[n] & GMP_LIMB_HIGHBIT)
-	r[n] = mpn_add_1 (r, r, n, CNST_LIMB(1));
-    }
+  r[n] = -mpn_sub_1 (r + d, r + d, n - d, cc);
+  r[n] -= mpn_sub_1 (r + d, r + d, n - d, rd);
+  if (r[n] & GMP_LIMB_HIGHBIT)
+    r[n] = mpn_add_1 (r, r, n, CNST_LIMB(1));
 }
 
 
@@ -294,7 +318,7 @@ mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t d, mp_size_t n)
    Assumes a and b are semi-normalized.
 */
 static inline void
-mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
+mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n)
 {
   mp_limb_t c, x;
 
@@ -325,7 +349,7 @@ mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
    Assumes a and b are semi-normalized.
 */
 static inline void
-mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n)
+mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n)
 {
   mp_limb_t c, x;
 
@@ -363,8 +387,8 @@ mpn_fft_fft (mp_ptr *Ap, mp_size_t K, int **ll,
   if (K == 2)
     {
       mp_limb_t cy;
-#if HAVE_NATIVE_mpn_add_n_sub_n
-      cy = mpn_add_n_sub_n (Ap[0], Ap[inc], Ap[0], Ap[inc], n + 1) & 1;
+#if HAVE_NATIVE_mpn_addsub_n
+      cy = mpn_addsub_n (Ap[0], Ap[inc], Ap[0], Ap[inc], n + 1) & 1;
 #else
       MPN_COPY (tp, Ap[0], n + 1);
       mpn_add_n (Ap[0], Ap[0], Ap[inc], n + 1);
@@ -377,14 +401,14 @@ mpn_fft_fft (mp_ptr *Ap, mp_size_t K, int **ll,
     }
   else
     {
-      mp_size_t j, K2 = K >> 1;
+      int j;
       int *lk = *ll;
 
-      mpn_fft_fft (Ap,     K2, ll-1, 2 * omega, n, inc * 2, tp);
-      mpn_fft_fft (Ap+inc, K2, ll-1, 2 * omega, n, inc * 2, tp);
+      mpn_fft_fft (Ap,     K >> 1, ll-1, 2 * omega, n, inc * 2, tp);
+      mpn_fft_fft (Ap+inc, K >> 1, ll-1, 2 * omega, n, inc * 2, tp);
       /* A[2*j*inc]   <- A[2*j*inc] + omega^l[k][2*j*inc] A[(2j+1)inc]
 	 A[(2j+1)inc] <- A[2*j*inc] + omega^l[k][(2j+1)inc] A[(2j+1)inc] */
-      for (j = 0; j < K2; j++, lk += 2, Ap += 2 * inc)
+      for (j = 0; j < (K >> 1); j++, lk += 2, Ap += 2 * inc)
 	{
 	  /* Ap[inc] <- Ap[0] + Ap[inc] * 2^(lk[1] * omega)
 	     Ap[0]   <- Ap[0] + Ap[inc] * 2^(lk[0] * omega) */
@@ -429,7 +453,7 @@ mpn_fft_normalize (mp_ptr ap, mp_size_t n)
 
 /* a[i] <- a[i]*b[i] mod 2^(n*GMP_NUMB_BITS)+1 for 0 <= i < K */
 static void
-mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
+mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, int K)
 {
   int i;
   int sqr = (ap == bp);
@@ -439,13 +463,12 @@ mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
 
   if (n >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
     {
-      mp_size_t K2, nprime2, Nprime2, M2, maxLK, l, Mp2;
-      int k;
-      int **fft_l, *tmp;
+      int k, K2, nprime2, Nprime2, M2, maxLK, l, Mp2;
+      int **_fft_l;
       mp_ptr *Ap, *Bp, A, B, T;
 
       k = mpn_fft_best_k (n, sqr);
-      K2 = (mp_size_t) 1 << k;
+      K2 = 1 << k;
       ASSERT_ALWAYS((n & (K2 - 1)) == 0);
       maxLK = (K2 > GMP_NUMB_BITS) ? K2 : GMP_NUMB_BITS;
       M2 = n * GMP_NUMB_BITS >> k;
@@ -457,10 +480,10 @@ mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
       /* we should ensure that nprime2 is a multiple of the next K */
       if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
 	{
-	  mp_size_t K3;
+	  unsigned long K3;
 	  for (;;)
 	    {
-	      K3 = (mp_size_t) 1 << mpn_fft_best_k (nprime2, sqr);
+	      K3 = 1L << mpn_fft_best_k (nprime2, sqr);
 	      if ((nprime2 & (K3 - 1)) == 0)
 		break;
 	      nprime2 = (nprime2 + K3 - 1) & -K3;
@@ -472,53 +495,41 @@ mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
 
       Mp2 = Nprime2 >> k;
 
-      Ap = TMP_BALLOC_MP_PTRS (K2);
-      Bp = TMP_BALLOC_MP_PTRS (K2);
-      A = TMP_BALLOC_LIMBS (2 * (nprime2 + 1) << k);
-      T = TMP_BALLOC_LIMBS (2 * (nprime2 + 1));
-      B = A + ((nprime2 + 1) << k);
-      fft_l = TMP_BALLOC_TYPE (k + 1, int *);
-      tmp = TMP_BALLOC_TYPE ((size_t) 2 << k, int);
+      Ap = TMP_ALLOC_MP_PTRS (K2);
+      Bp = TMP_ALLOC_MP_PTRS (K2);
+      A = TMP_ALLOC_LIMBS (2 * K2 * (nprime2 + 1));
+      T = TMP_ALLOC_LIMBS (2 * (nprime2 + 1));
+      B = A + K2 * (nprime2 + 1);
+      _fft_l = TMP_ALLOC_TYPE (k + 1, int *);
       for (i = 0; i <= k; i++)
-	{
-	  fft_l[i] = tmp;
-	  tmp += (mp_size_t) 1 << i;
-	}
+	_fft_l[i] = TMP_ALLOC_TYPE (1<<i, int);
+      mpn_fft_initl (_fft_l, k);
 
-      mpn_fft_initl (fft_l, k);
-
-      TRACE (printf ("recurse: %ldx%ld limbs -> %ld times %ldx%ld (%1.2f)\n", n,
+      TRACE (printf ("recurse: %ldx%ld limbs -> %d times %dx%d (%1.2f)\n", n,
 		    n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2));
       for (i = 0; i < K; i++, ap++, bp++)
 	{
-	  mp_limb_t cy;
 	  mpn_fft_normalize (*ap, n);
 	  if (!sqr)
 	    mpn_fft_normalize (*bp, n);
-
-	  mpn_mul_fft_decompose (A, Ap, K2, nprime2, *ap, (l << k) + 1, l, Mp2, T);
-	  if (!sqr)
-	    mpn_mul_fft_decompose (B, Bp, K2, nprime2, *bp, (l << k) + 1, l, Mp2, T);
-
-	  cy = mpn_mul_fft_internal (*ap, n, k, Ap, Bp, A, B, nprime2,
-				     l, Mp2, fft_l, T, sqr);
-	  (*ap)[n] = cy;
+	  mpn_mul_fft_internal (*ap, *ap, *bp, n, k, K2, Ap, Bp, A, B, nprime2,
+				l, Mp2, _fft_l, T, 1);
 	}
     }
   else
     {
       mp_ptr a, b, tp, tpn;
       mp_limb_t cc;
-      mp_size_t n2 = 2 * n;
-      tp = TMP_BALLOC_LIMBS (n2);
+      int n2 = 2 * n;
+      tp = TMP_ALLOC_LIMBS (n2);
       tpn = tp + n;
-      TRACE (printf ("  mpn_mul_n %ld of %ld limbs\n", K, n));
+      TRACE (printf ("  mpn_mul_n %d of %ld limbs\n", K, n));
       for (i = 0; i < K; i++)
 	{
 	  a = *ap++;
 	  b = *bp++;
 	  if (sqr)
-	    mpn_sqr (tp, a, n);
+	    mpn_sqr_n (tp, a, n);
 	  else
 	    mpn_mul_n (tp, b, a, n);
 	  if (a[n] != 0)
@@ -546,13 +557,13 @@ mpn_fft_mul_modF_K (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t K)
    This condition is also fulfilled at exit.
 */
 static void
-mpn_fft_fftinv (mp_ptr *Ap, mp_size_t K, mp_size_t omega, mp_size_t n, mp_ptr tp)
+mpn_fft_fftinv (mp_ptr *Ap, int K, mp_size_t omega, mp_size_t n, mp_ptr tp)
 {
   if (K == 2)
     {
       mp_limb_t cy;
-#if HAVE_NATIVE_mpn_add_n_sub_n
-      cy = mpn_add_n_sub_n (Ap[0], Ap[1], Ap[0], Ap[1], n + 1) & 1;
+#if HAVE_NATIVE_mpn_addsub_n
+      cy = mpn_addsub_n (Ap[0], Ap[1], Ap[0], Ap[1], n + 1) & 1;
 #else
       MPN_COPY (tp, Ap[0], n + 1);
       mpn_add_n (Ap[0], Ap[0], Ap[1], n + 1);
@@ -565,7 +576,7 @@ mpn_fft_fftinv (mp_ptr *Ap, mp_size_t K, mp_size_t omega, mp_size_t n, mp_ptr tp
     }
   else
     {
-      mp_size_t j, K2 = K >> 1;
+      int j, K2 = K >> 1;
 
       mpn_fft_fftinv (Ap,      K2, 2 * omega, n, tp);
       mpn_fft_fftinv (Ap + K2, K2, 2 * omega, n, tp);
@@ -583,14 +594,15 @@ mpn_fft_fftinv (mp_ptr *Ap, mp_size_t K, mp_size_t omega, mp_size_t n, mp_ptr tp
 }
 
 
-/* R <- A/2^k mod 2^(n*GMP_NUMB_BITS)+1 */
+/* A <- A/2^k mod 2^(n*GMP_NUMB_BITS)+1 */
 static void
-mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t k, mp_size_t n)
+mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, int k, mp_size_t n)
 {
-  mp_bitcnt_t i;
+  int i;
 
   ASSERT (r != a);
-  i = (mp_bitcnt_t) 2 * n * GMP_NUMB_BITS - k;
+  i = 2 * n * GMP_NUMB_BITS;
+  i = (i - k) % i;		/* FIXME: This % looks superfluous */
   mpn_fft_mul_2exp_modF (r, a, i, n);
   /* 1/2^k = 2^(2nL-k) mod 2^(n*GMP_NUMB_BITS)+1 */
   /* normalize so that R < 2^(n*GMP_NUMB_BITS)+1 */
@@ -602,11 +614,13 @@ mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, mp_bitcnt_t k, mp_size_t n)
    Returns carry out, i.e. 1 iff {ap,an} = -1 mod 2^(n*GMP_NUMB_BITS)+1,
    then {rp,n}=0.
 */
-static mp_size_t
+static int
 mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an)
 {
-  mp_size_t l, m, rpn;
+  mp_size_t l;
+  long int m;
   mp_limb_t cc;
+  int rpn;
 
   ASSERT ((n <= an) && (an <= 3 * n));
   m = an - 2 * n;
@@ -640,11 +654,10 @@ mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an)
    We must have nl <= 2*K*l.
 */
 static void
-mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, mp_size_t K, mp_size_t nprime,
-		       mp_srcptr n, mp_size_t nl, mp_size_t l, mp_size_t Mp,
-		       mp_ptr T)
+mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, int K, int nprime, mp_srcptr n,
+		       mp_size_t nl, int l, int Mp, mp_ptr T)
 {
-  mp_size_t i, j;
+  int i, j;
   mp_ptr tmp;
   mp_size_t Kl = K * l;
   TMP_DECL;
@@ -655,7 +668,7 @@ mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, mp_size_t K, mp_size_t nprime,
       mp_size_t dif = nl - Kl;
       mp_limb_signed_t cy;
 
-      tmp = TMP_BALLOC_LIMBS(Kl + 1);
+      tmp = TMP_ALLOC_LIMBS(Kl + 1);
 
       if (dif > Kl)
 	{
@@ -717,30 +730,48 @@ mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, mp_size_t K, mp_size_t nprime,
 }
 
 /* op <- n*m mod 2^N+1 with fft of size 2^k where N=pl*GMP_NUMB_BITS
-   op is pl limbs, its high bit is returned.
+   n and m have respectively nl and ml limbs
+   op must have space for pl+1 limbs if rec=1 (and pl limbs if rec=0).
    One must have pl = mpn_fft_next_size (pl, k).
    T must have space for 2 * (nprime + 1) limbs.
+
+   If rec=0, then store only the pl low bits of the result, and return
+   the out carry.
 */
 
 static mp_limb_t
-mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, int k,
-		      mp_ptr *Ap, mp_ptr *Bp, mp_ptr A, mp_ptr B,
+mpn_mul_fft_internal (mp_ptr op, mp_srcptr n, mp_srcptr m, mp_size_t pl,
+		      int k, int K,
+		      mp_ptr *Ap, mp_ptr *Bp,
+		      mp_ptr A, mp_ptr B,
 		      mp_size_t nprime, mp_size_t l, mp_size_t Mp,
-		      int **fft_l, mp_ptr T, int sqr)
+		      int **_fft_l,
+		      mp_ptr T, int rec)
 {
-  mp_size_t K, i, pla, lo, sh, j;
+  int i, sqr, pla, lo, sh, j;
   mp_ptr p;
   mp_limb_t cc;
 
-  K = (mp_size_t) 1 << k;
+  sqr = n == m;
+
+  TRACE (printf ("pl=%ld k=%d K=%d np=%ld l=%ld Mp=%ld rec=%d sqr=%d\n",
+		 pl,k,K,nprime,l,Mp,rec,sqr));
+
+  /* decomposition of inputs into arrays Ap[i] and Bp[i] */
+  if (rec)
+    {
+      mpn_mul_fft_decompose (A, Ap, K, nprime, n, K * l + 1, l, Mp, T);
+      if (!sqr)
+	mpn_mul_fft_decompose (B, Bp, K, nprime, m, K * l + 1, l, Mp, T);
+    }
 
   /* direct fft's */
-  mpn_fft_fft (Ap, K, fft_l + k, 2 * Mp, nprime, 1, T);
+  mpn_fft_fft (Ap, K, _fft_l + k, 2 * Mp, nprime, 1, T);
   if (!sqr)
-    mpn_fft_fft (Bp, K, fft_l + k, 2 * Mp, nprime, 1, T);
+    mpn_fft_fft (Bp, K, _fft_l + k, 2 * Mp, nprime, 1, T);
 
   /* term to term multiplications */
-  mpn_fft_mul_modF_K (Ap, sqr ? Ap : Bp, nprime, K);
+  mpn_fft_mul_modF_K (Ap, (sqr) ? Ap : Bp, nprime, K);
 
   /* inverse fft's */
   mpn_fft_fftinv (Ap, K, 2 * Mp, nprime, T);
@@ -804,14 +835,18 @@ mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, int k,
   /* here p < 2^(2M) [K 2^(M(K-1)) + (K-1) 2^(M(K-2)) + ... ]
      < K 2^(2M) [2^(M(K-1)) + 2^(M(K-2)) + ... ]
      < K 2^(2M) 2^(M(K-1))*2 = 2^(M*K+M+k+1) */
-  return mpn_fft_norm_modF (op, pl, p, pla);
+  i = mpn_fft_norm_modF (op, pl, p, pla);
+  if (rec) /* store the carry out */
+    op[pl] = i;
+
+  return i;
 }
 
 /* return the lcm of a and 2^k */
-static mp_bitcnt_t
-mpn_mul_fft_lcm (mp_bitcnt_t a, int k)
+static unsigned long int
+mpn_mul_fft_lcm (unsigned long int a, unsigned int k)
 {
-  mp_bitcnt_t l = k;
+  unsigned long int l = k;
 
   while (a % 2 == 0 && k > 0)
     {
@@ -828,11 +863,10 @@ mpn_mul_fft (mp_ptr op, mp_size_t pl,
 	     mp_srcptr m, mp_size_t ml,
 	     int k)
 {
-  int i;
-  mp_size_t K, maxLK;
+  int K, maxLK, i;
   mp_size_t N, Nprime, nprime, M, Mp, l;
   mp_ptr *Ap, *Bp, A, T, B;
-  int **fft_l, *tmp;
+  int **_fft_l;
   int sqr = (n == m && nl == ml);
   mp_limb_t h;
   TMP_DECL;
@@ -842,72 +876,63 @@ mpn_mul_fft (mp_ptr op, mp_size_t pl,
 
   TMP_MARK;
   N = pl * GMP_NUMB_BITS;
-  fft_l = TMP_BALLOC_TYPE (k + 1, int *);
-  tmp = TMP_BALLOC_TYPE ((size_t) 2 << k, int);
+  _fft_l = TMP_ALLOC_TYPE (k + 1, int *);
   for (i = 0; i <= k; i++)
-    {
-      fft_l[i] = tmp;
-      tmp += (mp_size_t) 1 << i;
-    }
-
-  mpn_fft_initl (fft_l, k);
-  K = (mp_size_t) 1 << k;
+    _fft_l[i] = TMP_ALLOC_TYPE (1 << i, int);
+  mpn_fft_initl (_fft_l, k);
+  K = 1 << k;
   M = N >> k;	/* N = 2^k M */
   l = 1 + (M - 1) / GMP_NUMB_BITS;
-  maxLK = mpn_mul_fft_lcm (GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */
+  maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */
 
   Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK;
   /* Nprime = ceil((2*M+k+3)/maxLK)*maxLK; */
   nprime = Nprime / GMP_NUMB_BITS;
-  TRACE (printf ("N=%ld K=%ld, M=%ld, l=%ld, maxLK=%ld, Np=%ld, np=%ld\n",
+  TRACE (printf ("N=%ld K=%d, M=%ld, l=%ld, maxLK=%d, Np=%ld, np=%ld\n",
 		 N, K, M, l, maxLK, Nprime, nprime));
   /* we should ensure that recursively, nprime is a multiple of the next K */
   if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD))
     {
-      mp_size_t K2;
+      unsigned long K2;
       for (;;)
 	{
-	  K2 = (mp_size_t) 1 << mpn_fft_best_k (nprime, sqr);
+	  K2 = 1L << mpn_fft_best_k (nprime, sqr);
 	  if ((nprime & (K2 - 1)) == 0)
 	    break;
 	  nprime = (nprime + K2 - 1) & -K2;
 	  Nprime = nprime * GMP_LIMB_BITS;
 	  /* warning: since nprime changed, K2 may change too! */
 	}
-      TRACE (printf ("new maxLK=%ld, Np=%ld, np=%ld\n", maxLK, Nprime, nprime));
+      TRACE (printf ("new maxLK=%d, Np=%ld, np=%ld\n", maxLK, Nprime, nprime));
     }
   ASSERT_ALWAYS (nprime < pl); /* otherwise we'll loop */
 
-  T = TMP_BALLOC_LIMBS (2 * (nprime + 1));
+  T = TMP_ALLOC_LIMBS (2 * (nprime + 1));
   Mp = Nprime >> k;
 
-  TRACE (printf ("%ldx%ld limbs -> %ld times %ldx%ld limbs (%1.2f)\n",
+  TRACE (printf ("%ldx%ld limbs -> %d times %ldx%ld limbs (%1.2f)\n",
 		pl, pl, K, nprime, nprime, 2.0 * (double) N / Nprime / K);
 	 printf ("   temp space %ld\n", 2 * K * (nprime + 1)));
 
-  A = TMP_BALLOC_LIMBS (K * (nprime + 1));
-  Ap = TMP_BALLOC_MP_PTRS (K);
+  A = __GMP_ALLOCATE_FUNC_LIMBS (2 * K * (nprime + 1));
+  B = A + K * (nprime + 1);
+  Ap = TMP_ALLOC_MP_PTRS (K);
+  Bp = TMP_ALLOC_MP_PTRS (K);
+
+  /* special decomposition for main call */
+  /* nl is the number of significant limbs in n */
   mpn_mul_fft_decompose (A, Ap, K, nprime, n, nl, l, Mp, T);
-  if (sqr)
-    {
-      mp_size_t pla;
-      pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */
-      B = TMP_BALLOC_LIMBS (pla);
-      Bp = TMP_BALLOC_MP_PTRS (K);
-    }
-  else
-    {
-      B = TMP_BALLOC_LIMBS (K * (nprime + 1));
-      Bp = TMP_BALLOC_MP_PTRS (K);
-      mpn_mul_fft_decompose (B, Bp, K, nprime, m, ml, l, Mp, T);
-    }
-  h = mpn_mul_fft_internal (op, pl, k, Ap, Bp, A, B, nprime, l, Mp, fft_l, T, sqr);
+  if (n != m)
+    mpn_mul_fft_decompose (B, Bp, K, nprime, m, ml, l, Mp, T);
+
+  h = mpn_mul_fft_internal (op, n, m, pl, k, K, Ap, Bp, A, B, nprime, l, Mp, _fft_l, T, 0);
 
   TMP_FREE;
+  __GMP_FREE_FUNC_LIMBS (A, 2 * K * (nprime + 1));
+
   return h;
 }
 
-#if WANT_OLD_FFT_FULL
 /* multiply {n, nl} by {m, ml}, and put the result in {op, nl+ml} */
 void
 mpn_mul_fft_full (mp_ptr op,
@@ -916,9 +941,9 @@ mpn_mul_fft_full (mp_ptr op,
 {
   mp_ptr pad_op;
   mp_size_t pl, pl2, pl3, l;
-  mp_size_t cc, c2, oldcc;
   int k2, k3;
   int sqr = (n == m && nl == ml);
+  int cc, c2, oldcc;
 
   pl = nl + ml; /* total number of limbs of the result */
 
@@ -935,7 +960,7 @@ mpn_mul_fft_full (mp_ptr op,
   pl2 = (2 * pl - 1) / 5; /* ceil (2pl/5) - 1 */
   do
     {
-      pl2++;
+      pl2 ++;
       k2 = mpn_fft_best_k (pl2, sqr); /* best fft size for pl2 limbs */
       pl2 = mpn_fft_next_size (pl2, k2);
       pl3 = 3 * pl2 / 2; /* since k>=FFT_FIRST_K=4, pl2 is a multiple of 2^4,
@@ -949,23 +974,23 @@ mpn_mul_fft_full (mp_ptr op,
 
   ASSERT_ALWAYS(pl3 <= pl);
   cc = mpn_mul_fft (op, pl3, n, nl, m, ml, k3);     /* mu */
-  ASSERT(cc == 0);
+  ASSERT_ALWAYS(cc == 0);
   pad_op = __GMP_ALLOCATE_FUNC_LIMBS (pl2);
   cc = mpn_mul_fft (pad_op, pl2, n, nl, m, ml, k2); /* lambda */
   cc = -cc + mpn_sub_n (pad_op, pad_op, op, pl2);    /* lambda - low(mu) */
   /* 0 <= cc <= 1 */
-  ASSERT(0 <= cc && cc <= 1);
+  ASSERT_ALWAYS(0 <= cc && cc <= 1);
   l = pl3 - pl2; /* l = pl2 / 2 since pl3 = 3/2 * pl2 */
   c2 = mpn_add_n (pad_op, pad_op, op + pl2, l);
   cc = mpn_add_1 (pad_op + l, pad_op + l, l, (mp_limb_t) c2) - cc;
-  ASSERT(-1 <= cc && cc <= 1);
+  ASSERT_ALWAYS(-1 <= cc && cc <= 1);
   if (cc < 0)
     cc = mpn_add_1 (pad_op, pad_op, pl2, (mp_limb_t) -cc);
-  ASSERT(0 <= cc && cc <= 1);
+  ASSERT_ALWAYS(0 <= cc && cc <= 1);
   /* now lambda-mu = {pad_op, pl2} - cc mod 2^(pl2*GMP_NUMB_BITS)+1 */
   oldcc = cc;
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  c2 = mpn_add_n_sub_n (pad_op + l, pad_op, pad_op, pad_op + l, l);
+#if HAVE_NATIVE_mpn_addsub_n
+  c2 = mpn_addsub_n (pad_op + l, pad_op, pad_op, pad_op + l, l);
   /* c2 & 1 is the borrow, c2 & 2 is the carry */
   cc += c2 >> 1; /* carry out from high <- low + high */
   c2 = c2 & 1; /* borrow out from low <- low - high */
@@ -975,7 +1000,7 @@ mpn_mul_fft_full (mp_ptr op,
     TMP_DECL;
 
     TMP_MARK;
-    tmp = TMP_BALLOC_LIMBS (l);
+    tmp = TMP_ALLOC_LIMBS (l);
     MPN_COPY (tmp, pad_op, l);
     c2 = mpn_sub_n (pad_op,      pad_op, pad_op + l, l);
     cc += mpn_add_n (pad_op + l, tmp,    pad_op + l, l);
@@ -1011,4 +1036,3 @@ mpn_mul_fft_full (mp_ptr op,
   /* since the final result has at most pl limbs, no carry out below */
   mpn_add_1 (op + pl2, op + pl2, pl - pl2, (mp_limb_t) c2);
 }
-#endif
diff --git a/gmp/mpn/generic/mul_n.c b/gmp/mpn/generic/mul_n.c
index 5df8b16fa0..4aa25f9b58 100644
--- a/gmp/mpn/generic/mul_n.c
+++ b/gmp/mpn/generic/mul_n.c
@@ -1,38 +1,695 @@
-/* mpn_mul_n -- multiply natural numbers.
+/* mpn_mul_n and helper function -- Multiply/square natural numbers.
 
-Copyright 1991, 1993, 1994, 1996-2003, 2005, 2008, 2009 Free Software
-Foundation, Inc.
+   THE HELPER FUNCTIONS IN THIS FILE (meaning everything except mpn_mul_n) ARE
+   INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH
+   DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE
+   OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
+2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
+
+/* Multiplies using 3 half-sized mults and so on recursively.
+ * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1].
+ * No overlap of p[...] with a[...] or b[...].
+ * ws is workspace.
+ */
+
+void
+mpn_kara_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws)
+{
+  mp_limb_t w, w0, w1;
+  mp_size_t n2;
+  mp_srcptr x, y;
+  mp_size_t i;
+  int sign;
+
+  n2 = n >> 1;
+  ASSERT (n2 > 0);
+
+  if ((n & 1) != 0)
+    {
+      /* Odd length. */
+      mp_size_t n1, n3, nm1;
+
+      n3 = n - n2;
+
+      sign = 0;
+      w = a[n2];
+      if (w != 0)
+	w -= mpn_sub_n (p, a, a + n3, n2);
+      else
+	{
+	  i = n2;
+	  do
+	    {
+	      --i;
+	      w0 = a[i];
+	      w1 = a[n3 + i];
+	    }
+	  while (w0 == w1 && i != 0);
+	  if (w0 < w1)
+	    {
+	      x = a + n3;
+	      y = a;
+	      sign = ~0;
+	    }
+	  else
+	    {
+	      x = a;
+	      y = a + n3;
+	    }
+	  mpn_sub_n (p, x, y, n2);
+	}
+      p[n2] = w;
+
+      w = b[n2];
+      if (w != 0)
+	w -= mpn_sub_n (p + n3, b, b + n3, n2);
+      else
+	{
+	  i = n2;
+	  do
+	    {
+	      --i;
+	      w0 = b[i];
+	      w1 = b[n3 + i];
+	    }
+	  while (w0 == w1 && i != 0);
+	  if (w0 < w1)
+	    {
+	      x = b + n3;
+	      y = b;
+	      sign = ~sign;
+	    }
+	  else
+	    {
+	      x = b;
+	      y = b + n3;
+	    }
+	  mpn_sub_n (p + n3, x, y, n2);
+	}
+      p[n] = w;
+
+      n1 = n + 1;
+      if (n2 < MUL_KARATSUBA_THRESHOLD)
+	{
+	  if (n3 < MUL_KARATSUBA_THRESHOLD)
+	    {
+	      mpn_mul_basecase (ws, p, n3, p + n3, n3);
+	      mpn_mul_basecase (p, a, n3, b, n3);
+	    }
+	  else
+	    {
+	      mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
+	      mpn_kara_mul_n (p, a, b, n3, ws + n1);
+	    }
+	  mpn_mul_basecase (p + n1, a + n3, n2, b + n3, n2);
+	}
+      else
+	{
+	  mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
+	  mpn_kara_mul_n (p, a, b, n3, ws + n1);
+	  mpn_kara_mul_n (p + n1, a + n3, b + n3, n2, ws + n1);
+	}
+
+      if (sign)
+	mpn_add_n (ws, p, ws, n1);
+      else
+	mpn_sub_n (ws, p, ws, n1);
+
+      nm1 = n - 1;
+      if (mpn_add_n (ws, p + n1, ws, nm1))
+	{
+	  mp_limb_t x = (ws[nm1] + 1) & GMP_NUMB_MASK;
+	  ws[nm1] = x;
+	  if (x == 0)
+	    ws[n] = (ws[n] + 1) & GMP_NUMB_MASK;
+	}
+      if (mpn_add_n (p + n3, p + n3, ws, n1))
+	{
+	  mpn_incr_u (p + n1 + n3, 1);
+	}
+    }
+  else
+    {
+      /* Even length. */
+      i = n2;
+      do
+	{
+	  --i;
+	  w0 = a[i];
+	  w1 = a[n2 + i];
+	}
+      while (w0 == w1 && i != 0);
+      sign = 0;
+      if (w0 < w1)
+	{
+	  x = a + n2;
+	  y = a;
+	  sign = ~0;
+	}
+      else
+	{
+	  x = a;
+	  y = a + n2;
+	}
+      mpn_sub_n (p, x, y, n2);
+
+      i = n2;
+      do
+	{
+	  --i;
+	  w0 = b[i];
+	  w1 = b[n2 + i];
+	}
+      while (w0 == w1 && i != 0);
+      if (w0 < w1)
+	{
+	  x = b + n2;
+	  y = b;
+	  sign = ~sign;
+	}
+      else
+	{
+	  x = b;
+	  y = b + n2;
+	}
+      mpn_sub_n (p + n2, x, y, n2);
+
+      /* Pointwise products. */
+      if (n2 < MUL_KARATSUBA_THRESHOLD)
+	{
+	  mpn_mul_basecase (ws, p, n2, p + n2, n2);
+	  mpn_mul_basecase (p, a, n2, b, n2);
+	  mpn_mul_basecase (p + n, a + n2, n2, b + n2, n2);
+	}
+      else
+	{
+	  mpn_kara_mul_n (ws, p, p + n2, n2, ws + n);
+	  mpn_kara_mul_n (p, a, b, n2, ws + n);
+	  mpn_kara_mul_n (p + n, a + n2, b + n2, n2, ws + n);
+	}
+
+      /* Interpolate. */
+      if (sign)
+	w = mpn_add_n (ws, p, ws, n);
+      else
+	w = -mpn_sub_n (ws, p, ws, n);
+      w += mpn_add_n (ws, p + n, ws, n);
+      w += mpn_add_n (p + n2, p + n2, ws, n);
+      MPN_INCR_U (p + n2 + n, 2 * n - (n2 + n), w);
+    }
+}
+
+void
+mpn_kara_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws)
+{
+  mp_limb_t w, w0, w1;
+  mp_size_t n2;
+  mp_srcptr x, y;
+  mp_size_t i;
+
+  n2 = n >> 1;
+  ASSERT (n2 > 0);
+
+  if ((n & 1) != 0)
+    {
+      /* Odd length. */
+      mp_size_t n1, n3, nm1;
+
+      n3 = n - n2;
+
+      w = a[n2];
+      if (w != 0)
+	w -= mpn_sub_n (p, a, a + n3, n2);
+      else
+	{
+	  i = n2;
+	  do
+	    {
+	      --i;
+	      w0 = a[i];
+	      w1 = a[n3 + i];
+	    }
+	  while (w0 == w1 && i != 0);
+	  if (w0 < w1)
+	    {
+	      x = a + n3;
+	      y = a;
+	    }
+	  else
+	    {
+	      x = a;
+	      y = a + n3;
+	    }
+	  mpn_sub_n (p, x, y, n2);
+	}
+      p[n2] = w;
+
+      n1 = n + 1;
+
+      /* n2 is always either n3 or n3-1 so maybe the two sets of tests here
+	 could be combined.  But that's not important, since the tests will
+	 take a miniscule amount of time compared to the function calls.  */
+      if (BELOW_THRESHOLD (n3, SQR_BASECASE_THRESHOLD))
+	{
+	  mpn_mul_basecase (ws, p, n3, p, n3);
+	  mpn_mul_basecase (p,  a, n3, a, n3);
+	}
+      else if (BELOW_THRESHOLD (n3, SQR_KARATSUBA_THRESHOLD))
+	{
+	  mpn_sqr_basecase (ws, p, n3);
+	  mpn_sqr_basecase (p,  a, n3);
+	}
+      else
+	{
+	  mpn_kara_sqr_n   (ws, p, n3, ws + n1);	 /* (x-y)^2 */
+	  mpn_kara_sqr_n   (p,  a, n3, ws + n1);	 /* x^2	    */
+	}
+      if (BELOW_THRESHOLD (n2, SQR_BASECASE_THRESHOLD))
+	mpn_mul_basecase (p + n1, a + n3, n2, a + n3, n2);
+      else if (BELOW_THRESHOLD (n2, SQR_KARATSUBA_THRESHOLD))
+	mpn_sqr_basecase (p + n1, a + n3, n2);
+      else
+	mpn_kara_sqr_n   (p + n1, a + n3, n2, ws + n1);	 /* y^2	    */
+
+      /* Since x^2+y^2-(x-y)^2 = 2xy >= 0 there's no need to track the
+	 borrow from mpn_sub_n.	 If it occurs then it'll be cancelled by a
+	 carry from ws[n].  Further, since 2xy fits in n1 limbs there won't
+	 be any carry out of ws[n] other than cancelling that borrow. */
+
+      mpn_sub_n (ws, p, ws, n1);	     /* x^2-(x-y)^2 */
+
+      nm1 = n - 1;
+      if (mpn_add_n (ws, p + n1, ws, nm1))   /* x^2+y^2-(x-y)^2 = 2xy */
+	{
+	  mp_limb_t x = (ws[nm1] + 1) & GMP_NUMB_MASK;
+	  ws[nm1] = x;
+	  if (x == 0)
+	    ws[n] = (ws[n] + 1) & GMP_NUMB_MASK;
+	}
+      if (mpn_add_n (p + n3, p + n3, ws, n1))
+	{
+	  mpn_incr_u (p + n1 + n3, 1);
+	}
+    }
+  else
+    {
+      /* Even length. */
+      i = n2;
+      do
+	{
+	  --i;
+	  w0 = a[i];
+	  w1 = a[n2 + i];
+	}
+      while (w0 == w1 && i != 0);
+      if (w0 < w1)
+	{
+	  x = a + n2;
+	  y = a;
+	}
+      else
+	{
+	  x = a;
+	  y = a + n2;
+	}
+      mpn_sub_n (p, x, y, n2);
+
+      /* Pointwise products. */
+      if (BELOW_THRESHOLD (n2, SQR_BASECASE_THRESHOLD))
+	{
+	  mpn_mul_basecase (ws,    p,      n2, p,      n2);
+	  mpn_mul_basecase (p,     a,      n2, a,      n2);
+	  mpn_mul_basecase (p + n, a + n2, n2, a + n2, n2);
+	}
+      else if (BELOW_THRESHOLD (n2, SQR_KARATSUBA_THRESHOLD))
+	{
+	  mpn_sqr_basecase (ws,    p,      n2);
+	  mpn_sqr_basecase (p,     a,      n2);
+	  mpn_sqr_basecase (p + n, a + n2, n2);
+	}
+      else
+	{
+	  mpn_kara_sqr_n (ws,    p,      n2, ws + n);
+	  mpn_kara_sqr_n (p,     a,      n2, ws + n);
+	  mpn_kara_sqr_n (p + n, a + n2, n2, ws + n);
+	}
+
+      /* Interpolate. */
+      w = -mpn_sub_n (ws, p, ws, n);
+      w += mpn_add_n (ws, p + n, ws, n);
+      w += mpn_add_n (p + n2, p + n2, ws, n);
+      MPN_INCR_U (p + n2 + n, 2 * n - (n2 + n), w);
+    }
+}
+
+/******************************************************************************
+ *                                                                            *
+ *              Toom 3-way multiplication and squaring                        *
+ *                                                                            *
+ *****************************************************************************/
+
+/* Starts from:
+   {v0,2k}    (stored in {c,2k})
+   {vm1,2k+1} (which sign is sa, and absolute value is stored in {vm1,2k+1})
+   {v1,2k+1}  (stored in {c+2k,2k+1})
+   {v2,2k+1}
+   {vinf,twor}  (stored in {c+4k,twor}, except the first limb, saved in vinf0)
+
+   ws is temporary space, and should have at least twor limbs.
+
+   put in {c, 2n} where n = 2k+twor the value of {v0,2k} (already in place)
+   + B^k * {tm1, 2k+1}
+   + B^(2k) * {t1, 2k+1}
+   + B^(3k) * {t2, 2k+1}
+   + B^(4k) * {vinf,twor} (high twor-1 limbs already in place)
+   where {t1, 2k+1} = ({v1, 2k+1} + sa * {vm1, 2k+1}- 2*{v0,2k})/2-*{vinf,twor}
+	 {t2, 2k+1} = (3*({v1,2k+1}-{v0,2k})-sa*{vm1,2k+1}+{v2,2k+1})/6-2*{vinf,twor}
+	 {tm1,2k+1} = ({v1,2k+1}-sa*{vm1,2k+1}/2-{t2,2k+1}
+
+   Exact sequence described in a comment in mpn_toom3_mul_n.
+   mpn_toom3_mul_n() and mpn_toom3_sqr_n() implement steps 1-2.
+   mpn_toom_interpolate_5pts() implements steps 3-4.
+
+   Reference: What About Toom-Cook Matrices Optimality? Marco Bodrato
+   and Alberto Zanoni, October 19, 2006, http://bodrato.it/papers/#CIVV2006
+
+   ************* saved note ****************
+   Think about:
+
+   The evaluated point a-b+c stands a good chance of having a zero carry
+   limb, a+b+c would have a 1/4 chance, and 4*a+2*b+c a 1/8 chance, roughly.
+   Perhaps this could be tested and stripped.  Doing so before recursing
+   would be better than stripping at the start of mpn_toom3_mul_n/sqr_n,
+   since then the recursion could be based on the new size.  Although in
+   truth the kara vs toom3 crossover is never so exact that one limb either
+   way makes a difference.
+
+   A small value like 1 or 2 for the carry could perhaps also be handled
+   with an add_n or addlsh1_n.  Would that be faster than an extra limb on a
+   (recursed) multiply/square?
+*/
+
+#define TOOM3_MUL_REC(p, a, b, n, ws) \
+  do {								\
+    if (MUL_TOOM3_THRESHOLD / 3 < MUL_KARATSUBA_THRESHOLD	\
+	&& BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))	\
+      mpn_mul_basecase (p, a, n, b, n);				\
+    else if (BELOW_THRESHOLD (n, MUL_TOOM3_THRESHOLD))		\
+      mpn_kara_mul_n (p, a, b, n, ws);				\
+    else							\
+      mpn_toom3_mul_n (p, a, b, n, ws);				\
+  } while (0)
+
+#define TOOM3_SQR_REC(p, a, n, ws)				\
+  do {								\
+    if (SQR_TOOM3_THRESHOLD / 3 < SQR_BASECASE_THRESHOLD	\
+	&& BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))		\
+      mpn_mul_basecase (p, a, n, a, n);				\
+    else if (SQR_TOOM3_THRESHOLD / 3 < SQR_KARATSUBA_THRESHOLD	\
+	&& BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))	\
+      mpn_sqr_basecase (p, a, n);				\
+    else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
+      mpn_kara_sqr_n (p, a, n, ws);				\
+    else							\
+      mpn_toom3_sqr_n (p, a, n, ws);				\
+  } while (0)
+
+/* The necessary temporary space T(n) satisfies T(n)=0 for n < THRESHOLD,
+   and T(n) <= max(2n+2, 6k+3, 4k+3+T(k+1)) otherwise, where k = ceil(n/3).
+
+   Assuming T(n) >= 2n, 6k+3 <= 4k+3+T(k+1).
+   Similarly, 2n+2 <= 6k+2 <= 4k+3+T(k+1).
+
+   With T(n) = 2n+S(n), this simplifies to S(n) <= 9 + S(k+1).
+   Since THRESHOLD >= 17, we have n/(k+1) >= 19/8
+   thus S(n) <= S(n/(19/8)) + 9 thus S(n) <= 9*log(n)/log(19/8) <= 8*log2(n).
+*/
+
+void
+mpn_toom3_mul_n (mp_ptr c, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr t)
+{
+  mp_size_t k, k1, kk1, r, twok, twor;
+  mp_limb_t cy, cc, saved, vinf0;
+  mp_ptr trec;
+  int sa, sb;
+  mp_ptr c1, c2, c3, c4, c5;
+
+  ASSERT(GMP_NUMB_BITS >= 6);
+  ASSERT(n >= 17); /* so that r <> 0 and 5k+3 <= 2n */
+
+  /*
+  The algorithm is the following:
+
+  0. k = ceil(n/3), r = n - 2k, B = 2^(GMP_NUMB_BITS), t = B^k
+  1. split a and b in three parts each a0, a1, a2 and b0, b1, b2
+     with a0, a1, b0, b1 of k limbs, and a2, b2 of r limbs
+  2. Evaluation: vm1 may be negative, the other can not.
+     v0   <- a0*b0
+     v1   <- (a0+a1+a2)*(b0+b1+b2)
+     v2   <- (a0+2*a1+4*a2)*(b0+2*b1+4*b2)
+     vm1  <- (a0-a1+a2)*(b0-b1+b2)
+     vinf <- a2*b2
+  3. Interpolation: every result is positive, all divisions are exact
+     t2   <- (v2 - vm1)/3
+     tm1  <- (v1 - vm1)/2
+     t1   <- (v1 - v0)
+     t2   <- (t2 - t1)/2
+     t1   <- (t1 - tm1 - vinf)
+     t2   <- (t2 - 2*vinf)
+     tm1  <- (tm1 - t2)
+  4. result is c0+c1*t+c2*t^2+c3*t^3+c4*t^4 where
+     c0   <- v0
+     c1   <- tm1
+     c2   <- t1
+     c3   <- t2
+     c4   <- vinf
+  */
+
+  k = (n + 2) / 3; /* ceil(n/3) */
+  twok = 2 * k;
+  k1 = k + 1;
+  kk1 = k + k1;
+  r = n - twok;   /* last chunk */
+  twor = 2 * r;
+
+  c1 = c + k;
+  c2 = c1 + k;
+  c3 = c2 + k;
+  c4 = c3 + k;
+  c5 = c4 + k;
+
+  trec = t + 4 * k + 3; /* trec = v2 + (2k+2) */
+
+  /* put a0+a2 in {c, k+1}, and b0+b2 in {c+4k+2, k+1};
+     put a0+a1+a2 in {t, k+1} and b0+b1+b2 in {t+k+1,k+1}
+     [????requires 5k+3 <= 2n, ie. n >= 9] */
+  cy = mpn_add_n (c,      a, a + twok, r);
+  cc = mpn_add_n (c4 + 2, b, b + twok, r);
+  if (r < k)
+    {
+      __GMPN_ADD_1 (cy, c + r,      a + r, k - r, cy);
+      __GMPN_ADD_1 (cc, c4 + 2 + r, b + r, k - r, cc);
+    }
+
+  /* Put in {t, k+1} the sum
+   * (a_0+a_2) - stored in {c, k+1} -
+   * +
+   * a_1       - stored in {a+k, k} */
+  t[k] = (c1[0] = cy) + mpn_add_n (t, c, a + k, k);
+  /*          ^              ^
+   * carry of a_0 + a_2    carry of (a_0+a_2) + a_1
+
+   */
+
+  /* Put in {t+k+1, k+1} the sum of the two values
+   * (b_0+b_2) - stored in {c1+1, k+1} -
+   * +
+   * b_1       - stored in {b+k, k} */
+  t[kk1] = (c5[3] = cc) + mpn_add_n (t + k1, c4 + 2, b + k, k);
+  /*          ^              ^
+   * carry of b_0 + b_2    carry of (b_0+b_2) + b_1 */
+
+#define v2 (t+2*k+1)
+
+  /* compute v1 := (a0+a1+a2)*(b0+b1+b2) in {t, 2k+1};
+     since v1 < 9*B^(2k), v1 uses only 2k+1 words if GMP_NUMB_BITS >= 4 */
+  TOOM3_MUL_REC (c2, t, t + k1, k1, trec);
+
+  /*   c         c2    c4                 t
+     {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+		 v1                                            */
+
+  /* put |a0-a1+a2| in {c, k+1} and |b0-b1+b2| in {c+4k+2,k+1} */
+  /* (They're already there, actually)                         */
+
+  /* sa = sign(a0-a1+a2) */
+  sa   = (cy != 0) ? 1 : mpn_cmp (c, a + k, k);
+  c[k] = (sa >= 0) ? cy - mpn_sub_n (c, c, a + k, k)
+		   : mpn_sub_n (c, a + k, c, k);
+
+  sb    = (cc != 0) ? 1 : mpn_cmp (c4 + 2, b + k, k);
+  c5[2] = (sb >= 0) ? cc - mpn_sub_n (c4 + 2, c4 + 2, b + k, k)
+		    : mpn_sub_n (c4 + 2, b + k, c4 + 2, k);
+  sa *= sb; /* sign of vm1 */
+
+  /* compute vm1 := (a0-a1+a2)*(b0-b1+b2) in {t, 2k+1};
+     since |vm1| < 4*B^(2k), vm1 uses only 2k+1 limbs */
+  TOOM3_MUL_REC (t, c, c4 + 2, k1, trec);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+		v1                      vm1
+  */
+
+  /* compute a0+2a1+4a2 in {c, k+1} and b0+2b1+4b2 in {c+4k+2, k+1}
+     [requires 5k+3 <= 2n, i.e. n >= 17] */
+#ifdef HAVE_NATIVE_mpn_addlsh1_n
+  c1[0] = mpn_addlsh1_n (c, a + k, a + twok, r);
+  c5[2] = mpn_addlsh1_n (c4 + 2, b + k, b + twok, r);
+  if (r < k)
+    {
+      __GMPN_ADD_1 (c1[0], c + r, a + k + r, k - r, c1[0]);
+      __GMPN_ADD_1 (c5[2], c4 + 2 + r, b + k + r, k - r, c5[2]);
+    }
+  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);
+  c5[2] = 2 * c5[2] + mpn_addlsh1_n (c4 + 2, b, c4 + 2, k);
+#else
+  c[r] = mpn_lshift (c, a + twok, r, 1);
+  c4[r + 2] = mpn_lshift (c4 + 2, b + twok, r, 1);
+  if (r < k)
+    {
+      MPN_ZERO(c + r + 1, k - r);
+      MPN_ZERO(c4 + r + 3, k - r);
+    }
+  c1[0] += mpn_add_n (c, c, a + k, k);
+  c5[2] += mpn_add_n (c4 + 2, c4 + 2, b + k, k);
+  mpn_lshift (c, c, k1, 1);
+  mpn_lshift (c4 + 2, c4 + 2, k1, 1);
+  c1[0] += mpn_add_n (c, c, a, k);
+  c5[2] += mpn_add_n (c4 + 2, c4 + 2, b, k);
+#endif
+
+  /* compute v2 := (a0+2a1+4a2)*(b0+2b1+4b2) in {t+2k+1, 2k+1}
+     v2 < 49*B^k so v2 uses at most 2k+1 limbs if GMP_NUMB_BITS >= 6 */
+  TOOM3_MUL_REC (v2, c, c4 + 2, k1, trec);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+		v1                      vm1         v2
+  */
+
+  /* compute v0 := a0*b0 in {c, 2k} */
+  TOOM3_MUL_REC (c, a, b, k, trec);
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1                      vm1       v2                   */
+
+  /* compute vinf := a2*b2 in {t+4k+2, 2r}: in {c4, 2r} */
+
+  saved = c4[0];              /* Remember v1's highest byte (will be overwritten). */
+  TOOM3_MUL_REC (c4, a + twok, b + twok, r, trec);           /* Overwrites c4[0].  */
+  vinf0 = c4[0];              /* Remember vinf's lowest byte (will be overwritten).*/
+  c4[0] = saved;              /* Overwriting. Now v1 value is correct.             */
+
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       v1       vinf[1..]      vm1       v2               */
+
+  mpn_toom_interpolate_5pts (c, v2, t, k, 2*r, sa, vinf0, trec);
+
+#undef v2
+}
+
+void
+mpn_toom3_sqr_n (mp_ptr c, mp_srcptr a, mp_size_t n, mp_ptr t)
+{
+  mp_size_t k, k1, kk1, r, twok, twor;
+  mp_limb_t cy, saved, vinf0;
+  mp_ptr trec;
+  int sa;
+  mp_ptr c1, c2, c3, c4;
+
+  ASSERT(GMP_NUMB_BITS >= 6);
+  ASSERT(n >= 17); /* so that r <> 0 and 5k+3 <= 2n */
+
+  /* the algorithm is the same as mpn_toom3_mul_n, with b=a */
+
+  k = (n + 2) / 3; /* ceil(n/3) */
+  twok = 2 * k;
+  k1 = k + 1;
+  kk1 = k + k1;
+  r = n - twok;   /* last chunk */
+  twor = 2 * r;
+
+  c1 = c + k;
+  c2 = c1 + k;
+  c3 = c2 + k;
+  c4 = c3 + k;
+
+  trec = t + 4 * k + 3; /* trec = v2 + (2k+2) */
+
+  cy = mpn_add_n (c, a, a + twok, r);
+  if (r < k)
+    __GMPN_ADD_1 (cy, c + r, a + r, k - r, cy);
+  t[k] = (c1[0] = cy) + mpn_add_n (t, c, a + k, k);
+
+#define v2 (t+2*k+1)
+
+  TOOM3_SQR_REC (c2, t, k1, trec);
+
+  sa = (cy != 0) ? 1 : mpn_cmp (c, a + k, k);
+  c[k] = (sa >= 0) ? cy - mpn_sub_n (c, c, a + k, k)
+    : mpn_sub_n (c, a + k, c, k);
+
+  TOOM3_SQR_REC (t, c, k1, trec);
+
+#ifdef HAVE_NATIVE_mpn_addlsh1_n
+  c1[0] = mpn_addlsh1_n (c, a + k, a + twok, r);
+  if (r < k)
+    __GMPN_ADD_1 (c1[0], c + r, a + k + r, k - r, c1[0]);
+  c1[0] = 2 * c1[0] + mpn_addlsh1_n (c, a, c, k);
+#else
+  c[r] = mpn_lshift (c, a + twok, r, 1);
+  if (r < k)
+    MPN_ZERO(c + r + 1, k - r);
+  c1[0] += mpn_add_n (c, c, a + k, k);
+  mpn_lshift (c, c, k1, 1);
+  c1[0] += mpn_add_n (c, c, a, k);
+#endif
+
+  TOOM3_SQR_REC (v2, c, k1, trec);
+
+  TOOM3_SQR_REC (c, a, k, trec);
+
+  saved = c4[0];
+  TOOM3_SQR_REC (c4, a + twok, r, trec);
+  vinf0 = c4[0];
+  c4[0] = saved;
+
+  mpn_toom_interpolate_5pts (c, v2, t, k, 2*r,  1, vinf0, trec);
+
+#undef v2
+}
+
 void
 mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
 {
@@ -40,28 +697,31 @@ mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
   ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
   ASSERT (! MPN_OVERLAP_P (p, 2 * n, b, n));
 
-  if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+  if (BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))
     {
       mpn_mul_basecase (p, a, n, b, n);
     }
-  else if (BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))
+  else if (BELOW_THRESHOLD (n, MUL_TOOM3_THRESHOLD))
     {
       /* Allocate workspace of fixed size on stack: fast! */
-      mp_limb_t ws[mpn_toom22_mul_itch (MUL_TOOM33_THRESHOLD_LIMIT-1,
-					MUL_TOOM33_THRESHOLD_LIMIT-1)];
-      ASSERT (MUL_TOOM33_THRESHOLD <= MUL_TOOM33_THRESHOLD_LIMIT);
-      mpn_toom22_mul (p, a, n, b, n, ws);
+      mp_limb_t ws[MPN_KARA_MUL_N_TSIZE (MUL_TOOM3_THRESHOLD_LIMIT-1)];
+      ASSERT (MUL_TOOM3_THRESHOLD <= MUL_TOOM3_THRESHOLD_LIMIT);
+      mpn_kara_mul_n (p, a, b, n, ws);
     }
   else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))
     {
       mp_ptr ws;
       TMP_SDECL;
       TMP_SMARK;
-      ws = TMP_SALLOC_LIMBS (mpn_toom33_mul_itch (n, n));
-      mpn_toom33_mul (p, a, n, b, n, ws);
+      ws = TMP_SALLOC_LIMBS (MPN_TOOM3_MUL_N_TSIZE (n));
+      mpn_toom3_mul_n (p, a, b, n, ws);
       TMP_SFREE;
     }
-  else if (BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD))
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+  else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
+#else
+  else if (BELOW_THRESHOLD (n, MPN_TOOM44_MAX_N))
+#endif
     {
       mp_ptr ws;
       TMP_SDECL;
@@ -70,28 +730,91 @@ mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
       mpn_toom44_mul (p, a, n, b, n, ws);
       TMP_SFREE;
     }
-  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD))
+  else
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+    {
+      /* The current FFT code allocates its own space.  That should probably
+	 change.  */
+      mpn_mul_fft_full (p, a, n, b, n);
+    }
+#else
+    {
+      /* Toom4 for large operands.  */
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_BALLOC_LIMBS (mpn_toom44_mul_itch (n, n));
+      mpn_toom44_mul (p, a, n, b, n, ws);
+      TMP_FREE;
+    }
+#endif
+}
+
+void
+mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
+{
+  ASSERT (n >= 1);
+  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
+
+#if 0
+  /* FIXME: Can this be removed? */
+  if (n == 0)
+    return;
+#endif
+
+  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
+      mpn_mul_basecase (p, a, n, a, n);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))
+    {
+      mpn_sqr_basecase (p, a, n);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t ws[MPN_KARA_SQR_N_TSIZE (SQR_TOOM3_THRESHOLD_LIMIT-1)];
+      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
+      mpn_kara_sqr_n (p, a, n, ws);
+    }
+  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))
     {
       mp_ptr ws;
       TMP_SDECL;
       TMP_SMARK;
-      ws = TMP_SALLOC_LIMBS (mpn_toom6_mul_n_itch (n));
-      mpn_toom6h_mul (p, a, n, b, n, ws);
+      ws = TMP_SALLOC_LIMBS (MPN_TOOM3_SQR_N_TSIZE (n));
+      mpn_toom3_sqr_n (p, a, n, ws);
       TMP_SFREE;
     }
-  else if (BELOW_THRESHOLD (n, MUL_FFT_THRESHOLD))
+#if WANT_FFT || TUNE_PROGRAM_BUILD
+  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
+#else
+  else if (BELOW_THRESHOLD (n, MPN_TOOM44_MAX_N))
+#endif
     {
       mp_ptr ws;
-      TMP_DECL;
-      TMP_MARK;
-      ws = TMP_ALLOC_LIMBS (mpn_toom8_mul_n_itch (n));
-      mpn_toom8h_mul (p, a, n, b, n, ws);
-      TMP_FREE;
+      TMP_SDECL;
+      TMP_SMARK;
+      ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n));
+      mpn_toom4_sqr (p, a, n, ws);
+      TMP_SFREE;
     }
   else
+#if WANT_FFT || TUNE_PROGRAM_BUILD
     {
       /* The current FFT code allocates its own space.  That should probably
 	 change.  */
-      mpn_fft_mul (p, a, n, b, n);
+      mpn_mul_fft_full (p, a, n, a, n);
+    }
+#else
+    {
+      /* Toom4 for large operands.  */
+      mp_ptr ws;
+      TMP_DECL;
+      TMP_MARK;
+      ws = TMP_BALLOC_LIMBS (mpn_toom4_sqr_itch (n));
+      mpn_toom4_sqr (p, a, n, ws);
+      TMP_FREE;
     }
+#endif
 }
diff --git a/gmp/mpn/generic/mullo_basecase.c b/gmp/mpn/generic/mullo_basecase.c
deleted file mode 100644
index 2120f44c3d..0000000000
--- a/gmp/mpn/generic/mullo_basecase.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/* mpn_mullo_basecase -- Internal routine to multiply two natural
-   numbers of length m and n and return the low part.
-
-   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-
-
-Copyright (C) 2000, 2002, 2004 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  FIXME: Should use mpn_addmul_2 (and higher).
-*/
-
-void
-mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
-{
-  mp_size_t i;
-
-  mpn_mul_1 (rp, up, n, vp[0]);
-
-  for (i = 1; i < n; i++)
-    mpn_addmul_1 (rp + i, up, n - i, vp[i]);
-}
diff --git a/gmp/mpn/generic/mullo_n.c b/gmp/mpn/generic/mullo_n.c
deleted file mode 100644
index dad75ee8f7..0000000000
--- a/gmp/mpn/generic/mullo_n.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/* mpn_mullo_n -- multiply two n-limb numbers and return the low n limbs
-   of their products.
-
-   Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
-
-   THIS IS (FOR NOW) AN INTERNAL FUNCTION.  IT IS ONLY SAFE TO REACH THIS
-   FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED
-   THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2004, 2005, 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#ifndef MULLO_BASECASE_THRESHOLD
-#define MULLO_BASECASE_THRESHOLD 0	/* never use mpn_mul_basecase */
-#endif
-
-#ifndef MULLO_DC_THRESHOLD
-#define MULLO_DC_THRESHOLD 3*MUL_TOOM22_THRESHOLD
-#endif
-
-#ifndef MULLO_MUL_N_THRESHOLD
-#define MULLO_MUL_N_THRESHOLD MUL_FFT_THRESHOLD
-#endif
-
-#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
-#define MAYBE_range_basecase 1
-#define MAYBE_range_toom22   1
-#else
-#define MAYBE_range_basecase                                           \
-  ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM22_THRESHOLD*36/(36-11))
-#define MAYBE_range_toom22                                             \
-  ((MULLO_DC_THRESHOLD == 0 ? MULLO_BASECASE_THRESHOLD : MULLO_DC_THRESHOLD) < MUL_TOOM33_THRESHOLD*36/(36-11) )
-#endif
-
-/*  THINK: The DC strategy uses different constants in different Toom's
-	 ranges. Something smoother?
-*/
-
-/*
-  Compute the least significant half of the product {xy,n}*{yp,n}, or
-  formally {rp,n} = {xy,n}*{yp,n} Mod (B^n).
-
-  Above the given threshold, the Divide and Conquer strategy is used.
-  The operands are split in two, and a full product plus two mullo
-  are used to obtain the final result. The more natural strategy is to
-  split in two halves, but this is far from optimal when a
-  sub-quadratic multiplication is used.
-
-  Mulders suggests an unbalanced split in favour of the full product,
-  split n = n1 + n2, where an = n1 <= n2 = (1-a)n; i.e. 0 < a <= 1/2.
-
-  To compute the value of a, we assume that the cost of mullo for a
-  given size ML(n) is a fraction of the cost of a full product with
-  same size M(n), and the cost M(n)=n^e for some exponent 1 < e <= 2;
-  then we can write:
-
-  ML(n) = 2*ML(an) + M((1-a)n) => k*M(n) = 2*k*M(n)*a^e + M(n)*(1-a)^e
-
-  Given a value for e, want to minimise the value of k, i.e. the
-  function k=(1-a)^e/(1-2*a^e).
-
-  With e=2, the exponent for schoolbook multiplication, the minimum is
-  given by the values a=1-a=1/2.
-
-  With e=log(3)/log(2), the exponent for Karatsuba (aka toom22),
-  Mulders compute (1-a) = 0.694... and we approximate a with 11/36.
-
-  Other possible approximations follow:
-  e=log(5)/log(3) [Toom-3] -> a ~= 9/40
-  e=log(7)/log(4) [Toom-4] -> a ~= 7/39
-  e=log(11)/log(6) [Toom-6] -> a ~= 1/8
-  e=log(15)/log(8) [Toom-8] -> a ~= 1/10
-
-  The values above where obtained with the following trivial commands
-  in the gp-pari shell:
-
-fun(e,a)=(1-a)^e/(1-2*a^e)
-mul(a,b,c)={local(m,x,p);if(b-c<1/10000,(b+c)/2,m=1;x=b;forstep(p=c,b,(b-c)/8,if(fun(a,p)<m,m=fun(a,p);x=p));mul(a,(b+x)/2,(c+x)/2))}
-contfracpnqn(contfrac(mul(log(2*2-1)/log(2),1/2,0),5))
-contfracpnqn(contfrac(mul(log(3*2-1)/log(3),1/2,0),5))
-contfracpnqn(contfrac(mul(log(4*2-1)/log(4),1/2,0),5))
-contfracpnqn(contfrac(mul(log(6*2-1)/log(6),1/2,0),3))
-contfracpnqn(contfrac(mul(log(8*2-1)/log(8),1/2,0),3))
-
-  ,
-  |\
-  | \
-  +----,
-  |    |
-  |    |
-  |    |\
-  |    | \
-  +----+--`
-  ^ n2 ^n1^
-
-  For an actual implementation, the assumption that M(n)=n^e is
-  incorrect, as a consequence also the assumption that ML(n)=k*M(n)
-  with a constant k is wrong.
-
-  But theory suggest us two things:
-  - the best the multiplication product is (lower e), the more k
-    approaches 1, and a approaches 0.
-
-  - A value for a smaller than optimal is probably less bad than a
-    bigger one: e.g. let e=log(3)/log(2), a=0.3058_ the optimal
-    value, and k(a)=0.808_ the mul/mullo speed ratio. We get
-    k(a+1/6)=0.929_ but k(a-1/6)=0.865_.
-*/
-
-static mp_size_t
-mpn_mullo_n_itch (mp_size_t n)
-{
-  return 2*n;
-}
-
-/*
-    mpn_dc_mullo_n requires a scratch space of 2*n limbs at tp.
-    It accepts tp == rp.
-*/
-static void
-mpn_dc_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n, mp_ptr tp)
-{
-  mp_size_t n2, n1;
-  ASSERT (n >= 2);
-  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
-  ASSERT (MPN_SAME_OR_SEPARATE2_P(rp, n, tp, 2*n));
-
-  /* Divide-and-conquer */
-
-  /* We need fractional approximation of the value 0 < a <= 1/2
-     giving the minimum in the function k=(1-a)^e/(1-2*a^e).
-  */
-  if (MAYBE_range_basecase && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD*36/(36-11)))
-    n1 = n >> 1;
-  else if (MAYBE_range_toom22 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD*36/(36-11)))
-    n1 = n * 11 / (size_t) 36;	/* n1 ~= n*(1-.694...) */
-  else if (BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD*40/(40-9)))
-    n1 = n * 9 / (size_t) 40;	/* n1 ~= n*(1-.775...) */
-  else if (BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD*10/9))
-    n1 = n * 7 / (size_t) 39;	/* n1 ~= n*(1-.821...) */
-  /* n1 = n * 4 / (size_t) 31;	// n1 ~= n*(1-.871...) [TOOM66] */
-  else
-    n1 = n / (size_t) 10;		/* n1 ~= n*(1-.899...) [TOOM88] */
-
-  n2 = n - n1;
-
-  /* Split as x = x1 2^(n2 GMP_NUMB_BITS) + x0,
-	      y = y1 2^(n2 GMP_NUMB_BITS) + y0 */
-
-  /* x0 * y0 */
-  mpn_mul_n (tp, xp, yp, n2);
-  MPN_COPY (rp, tp, n2);
-
-  /* x1 * y0 * 2^(n2 GMP_NUMB_BITS) */
-  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
-    mpn_mul_basecase (tp + n, xp + n2, n1, yp, n1);
-  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
-    mpn_mullo_basecase (tp + n, xp + n2, yp, n1);
-  else
-    mpn_dc_mullo_n (tp + n, xp + n2, yp, n1, tp + n);
-  mpn_add_n (rp + n2, tp + n2, tp + n, n1);
-
-  /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
-  if (BELOW_THRESHOLD (n1, MULLO_BASECASE_THRESHOLD))
-    mpn_mul_basecase (tp + n, xp, n1, yp + n2, n1);
-  else if (BELOW_THRESHOLD (n1, MULLO_DC_THRESHOLD))
-    mpn_mullo_basecase (tp + n, xp, yp + n2, n1);
-  else
-    mpn_dc_mullo_n (tp + n, xp, yp + n2, n1, tp + n);
-  mpn_add_n (rp + n2, rp + n2, tp + n, n1);
-}
-
-/* Avoid zero allocations when MULLO_BASECASE_THRESHOLD is 0.  */
-#define MUL_BASECASE_ALLOC \
- (MULLO_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLO_BASECASE_THRESHOLD_LIMIT)
-
-/* FIXME: This function should accept a temporary area; dc_mullow_n
-   accepts a pointer tp, and handle the case tp == rp, do the same here.
-   Maybe recombine the two functions.
-   THINK: If mpn_mul_basecase is always faster than mpn_mullo_basecase
-	  (typically thanks to mpn_addmul_2) should we unconditionally use
-	  mpn_mul_n?
-*/
-
-void
-mpn_mullo_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
-{
-  ASSERT (n >= 1);
-  ASSERT (! MPN_OVERLAP_P (rp, n, xp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
-
-  if (BELOW_THRESHOLD (n, MULLO_BASECASE_THRESHOLD))
-    {
-      /* Allocate workspace of fixed size on stack: fast! */
-      mp_limb_t tp[MUL_BASECASE_ALLOC];
-      mpn_mul_basecase (tp, xp, n, yp, n);
-      MPN_COPY (rp, tp, n);
-    }
-  else if (BELOW_THRESHOLD (n, MULLO_DC_THRESHOLD))
-    {
-      mpn_mullo_basecase (rp, xp, yp, n);
-    }
-  else
-    {
-      mp_ptr tp;
-      TMP_DECL;
-      TMP_MARK;
-      tp = TMP_ALLOC_LIMBS (mpn_mullo_n_itch (n));
-      if (BELOW_THRESHOLD (n, MULLO_MUL_N_THRESHOLD))
-	{
-	  mpn_dc_mullo_n (rp, xp, yp, n, tp);
-	}
-      else
-	{
-	  /* For really large operands, use plain mpn_mul_n but throw away upper n
-	     limbs of result.  */
-#if !TUNE_PROGRAM_BUILD && (MULLO_MUL_N_THRESHOLD > MUL_FFT_THRESHOLD)
-	  mpn_fft_mul (tp, xp, n, yp, n);
-#else
-	  mpn_mul_n (tp, xp, yp, n);
-#endif
-	  MPN_COPY (rp, tp, n);
-	}
-      TMP_FREE;
-    }
-}
diff --git a/gmp/mpn/generic/mullow_basecase.c b/gmp/mpn/generic/mullow_basecase.c
new file mode 100644
index 0000000000..72c48f65b4
--- /dev/null
+++ b/gmp/mpn/generic/mullow_basecase.c
@@ -0,0 +1,41 @@
+/* mpn_mullow_basecase -- Internal routine to multiply two natural
+   numbers of length m and n and return the low part.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+
+Copyright (C) 2000, 2002, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+  FIXME: Should use mpn_addmul_2 (and higher).
+*/
+
+void
+mpn_mullow_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+  mp_size_t i;
+
+  mpn_mul_1 (rp, up, n, vp[0]);
+
+  for (i = 1; i < n; i++)
+    mpn_addmul_1 (rp + i, up, n - i, vp[i]);
+}
diff --git a/gmp/mpn/generic/mullow_n.c b/gmp/mpn/generic/mullow_n.c
new file mode 100644
index 0000000000..e92a554616
--- /dev/null
+++ b/gmp/mpn/generic/mullow_n.c
@@ -0,0 +1,111 @@
+/* mpn_mullow_n -- multiply two n-limb nunbers and return the low n limbs
+   of their products.
+
+   THIS IS (FOR NOW) AN INTERNAL FUNCTION.  IT IS ONLY SAFE TO REACH THIS
+   FUNCTION THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED
+   THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2004, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#ifndef MULLOW_BASECASE_THRESHOLD
+#define MULLOW_BASECASE_THRESHOLD 0	/* never use mpn_mul_basecase */
+#endif
+
+#ifndef MULLOW_DC_THRESHOLD
+#define MULLOW_DC_THRESHOLD 3*MUL_KARATSUBA_THRESHOLD
+#endif
+
+#ifndef MULLOW_MUL_N_THRESHOLD
+#define MULLOW_MUL_N_THRESHOLD 10*MULLOW_DC_THRESHOLD
+#endif
+
+/* Avoid zero allocations when MULLOW_BASECASE_THRESHOLD is 0.  */
+#define MUL_BASECASE_ALLOC \
+ (MULLOW_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLOW_BASECASE_THRESHOLD_LIMIT)
+
+/*
+  FIXME: This function should accept a temporary area.
+  FIXME: Perhaps call mpn_kara_mul_n instead of mpn_mul_n?
+  THINK: If mpn_mul_basecase is always faster than mpn_mullow_basecase
+         (typically thanks to mpn_addmul_2) should we unconditionally use
+         mpn_mul_n?
+  FIXME: The recursive calls to mpn_mullow_n use sizes n/2 (one uses floor(n/2)
+         and the other ceil(n/2)).  Depending on the values of the various
+         _THRESHOLDs, this may never trigger MULLOW_BASECASE_THRESHOLD.
+	 Should we worry about this overhead?
+*/
+
+void
+mpn_mullow_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
+{
+  if (BELOW_THRESHOLD (n, MULLOW_BASECASE_THRESHOLD))
+    {
+      /* Allocate workspace of fixed size on stack: fast! */
+      mp_limb_t ws[MUL_BASECASE_ALLOC];
+      mpn_mul_basecase (ws, xp, n, yp, n);
+      MPN_COPY (rp, ws, n);
+    }
+  else if (BELOW_THRESHOLD (n, MULLOW_DC_THRESHOLD))
+    {
+      mpn_mullow_basecase (rp, xp, yp, n);
+    }
+  else if (BELOW_THRESHOLD (n, MULLOW_MUL_N_THRESHOLD))
+    {
+      /* Divide-and-conquer */
+      mp_size_t n2 = n >> 1;		/* floor(n/2) */
+      mp_size_t n1 = n - n2;		/* ceil(n/2) */
+      mp_ptr tp;
+      TMP_SDECL;
+      TMP_SMARK;
+      tp = TMP_SALLOC_LIMBS (n1);
+
+      /* Split as x = x1 2^(n1 GMP_NUMB_BITS) + x0,
+                  y = y1 2^(n2 GMP_NUMB_BITS) + y0 */
+
+      /* x0 * y0 */
+      mpn_mul_n (rp, xp, yp, n2);
+      if (n1 != n2)
+	rp[2 * n2] = mpn_addmul_1 (rp + n2, yp, n2, xp[n2]);
+
+      /* x1 * y0 * 2^(n1 GMP_NUMB_BITS) */
+      mpn_mullow_n (tp, xp + n1, yp, n2);
+      mpn_add_n (rp + n1, rp + n1, tp, n2);
+
+      /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
+      mpn_mullow_n (tp, yp + n2, xp, n1);
+      mpn_add_n (rp + n2, rp + n2, tp, n1);
+      TMP_SFREE;
+    }
+  else
+    {
+      /* For really large operands, use plain mpn_mul_n but throw away upper n
+	 limbs of result.  */
+      mp_ptr tp;
+      TMP_DECL;
+      TMP_MARK;
+      tp = TMP_ALLOC_LIMBS (2 * n);
+
+      mpn_mul_n (tp, xp, yp, n);
+      MPN_COPY (rp, tp, n);
+      TMP_FREE;
+    }
+}
diff --git a/gmp/mpn/generic/mulmid.c b/gmp/mpn/generic/mulmid.c
deleted file mode 100644
index 6b4ea3253d..0000000000
--- a/gmp/mpn/generic/mulmid.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/* mpn_mulmid -- middle product
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
-
-
-void
-mpn_mulmid (mp_ptr rp,
-            mp_srcptr ap, mp_size_t an,
-            mp_srcptr bp, mp_size_t bn)
-{
-  mp_size_t rn, k;
-  mp_ptr scratch, temp;
-
-  ASSERT (an >= bn);
-  ASSERT (bn >= 1);
-  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
-  ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
-
-  if (bn < MULMID_TOOM42_THRESHOLD)
-    {
-      /* region not tall enough to make toom42 worthwhile for any portion */
-
-      if (an < CHUNK)
-	{
-	  /* region not too wide either, just call basecase directly */
-	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
-	  return;
-	}
-
-      /* Region quite wide. For better locality, use basecase on chunks:
-
-	 AAABBBCC..
-	 .AAABBBCC.
-	 ..AAABBBCC
-      */
-
-      k = CHUNK - bn + 1;    /* number of diagonals per chunk */
-
-      /* first chunk (marked A in the above diagram) */
-      mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
-
-      /* remaining chunks (B, C, etc) */
-      an -= k;
-
-      while (an >= CHUNK)
-	{
-	  mp_limb_t t0, t1, cy;
-	  ap += k, rp += k;
-	  t0 = rp[0], t1 = rp[1];
-	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
-	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
-	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
-	  an -= k;
-	}
-
-      if (an >= bn)
-	{
-	  /* last remaining chunk */
-	  mp_limb_t t0, t1, cy;
-	  ap += k, rp += k;
-	  t0 = rp[0], t1 = rp[1];
-	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
-	  ADDC_LIMB (cy, rp[0], rp[0], t0);
-	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
-	}
-
-      return;
-    }
-
-  /* region is tall enough for toom42 */
-
-  rn = an - bn + 1;
-
-  if (rn < MULMID_TOOM42_THRESHOLD)
-    {
-      /* region not wide enough to make toom42 worthwhile for any portion */
-
-      TMP_DECL;
-
-      if (bn < CHUNK)
-	{
-	  /* region not too tall either, just call basecase directly */
-	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
-	  return;
-	}
-
-      /* Region quite tall. For better locality, use basecase on chunks:
-
-	 AAAAA....
-	 .AAAAA...
-	 ..BBBBB..
-	 ...BBBBB.
-	 ....CCCCC
-      */
-
-      TMP_MARK;
-
-      temp = TMP_ALLOC_LIMBS (rn + 2);
-
-      /* first chunk (marked A in the above diagram) */
-      bp += bn - CHUNK, an -= bn - CHUNK;
-      mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
-
-      /* remaining chunks (B, C, etc) */
-      bn -= CHUNK;
-
-      while (bn >= CHUNK)
-	{
-	  ap += CHUNK, bp -= CHUNK;
-	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
-	  mpn_add_n (rp, rp, temp, rn + 2);
-	  bn -= CHUNK;
-	}
-
-      if (bn)
-	{
-	  /* last remaining chunk */
-	  ap += CHUNK, bp -= bn;
-	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
-	  mpn_add_n (rp, rp, temp, rn + 2);
-	}
-
-      TMP_FREE;
-      return;
-    }
-
-  /* we're definitely going to use toom42 somewhere */
-
-  if (bn > rn)
-    {
-      /* slice region into chunks, use toom42 on all chunks except possibly
-	 the last:
-
-         AA....
-         .AA...
-         ..BB..
-         ...BB.
-         ....CC
-      */
-
-      TMP_DECL;
-      TMP_MARK;
-
-      temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
-      scratch = temp + rn + 2;
-
-      /* first chunk (marked A in the above diagram) */
-      bp += bn - rn;
-      mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
-
-      /* remaining chunks (B, C, etc) */
-      bn -= rn;
-
-      while (bn >= rn)
-        {
-          ap += rn, bp -= rn;
-	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
-          mpn_add_n (rp, rp, temp, rn + 2);
-          bn -= rn;
-        }
-
-      if (bn)
-        {
-          /* last remaining chunk */
-          ap += rn, bp -= bn;
-	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
-          mpn_add_n (rp, rp, temp, rn + 2);
-        }
-
-      TMP_FREE;
-    }
-  else
-    {
-      /* slice region into chunks, use toom42 on all chunks except possibly
-	 the last:
-
-         AAABBBCC..
-         .AAABBBCC.
-         ..AAABBBCC
-      */
-
-      TMP_DECL;
-      TMP_MARK;
-
-      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
-
-      /* first chunk (marked A in the above diagram) */
-      mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
-
-      /* remaining chunks (B, C, etc) */
-      rn -= bn;
-
-      while (rn >= bn)
-        {
-	  mp_limb_t t0, t1, cy;
-          ap += bn, rp += bn;
-          t0 = rp[0], t1 = rp[1];
-          mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
-	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
-	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
-	  rn -= bn;
-        }
-
-      TMP_FREE;
-
-      if (rn)
-        {
-          /* last remaining chunk */
-	  mp_limb_t t0, t1, cy;
-          ap += bn, rp += bn;
-          t0 = rp[0], t1 = rp[1];
-          mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
-	  ADDC_LIMB (cy, rp[0], rp[0], t0);
-	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
-        }
-    }
-}
diff --git a/gmp/mpn/generic/mulmid_basecase.c b/gmp/mpn/generic/mulmid_basecase.c
deleted file mode 100644
index 400e976424..0000000000
--- a/gmp/mpn/generic/mulmid_basecase.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/* mpn_mulmid_basecase -- classical middle product algorithm
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Middle product of {up,un} and {vp,vn}, write result to {rp,un-vn+3}.
-   Must have un >= vn >= 1.
-
-   Neither input buffer may overlap with the output buffer. */
-
-void
-mpn_mulmid_basecase (mp_ptr rp,
-                     mp_srcptr up, mp_size_t un,
-                     mp_srcptr vp, mp_size_t vn)
-{
-  mp_limb_t lo, hi;  /* last two limbs of output */
-  mp_limb_t cy;
-
-  ASSERT (un >= vn);
-  ASSERT (vn >= 1);
-  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, up, un));
-  ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, vp, vn));
-
-  up += vn - 1;
-  un -= vn - 1;
-
-  /* multiply by first limb, store result */
-  lo = mpn_mul_1 (rp, up, un, vp[0]);
-  hi = 0;
-
-  /* accumulate remaining rows */
-  for (vn--; vn; vn--)
-    {
-      up--, vp++;
-      cy = mpn_addmul_1 (rp, up, un, vp[0]);
-      add_ssaaaa (hi, lo, hi, lo, CNST_LIMB(0), cy);
-    }
-
-  /* store final limbs */
-#if GMP_NAIL_BITS != 0
-  hi = (hi << GMP_NAIL_BITS) + (lo >> GMP_NUMB_BITS);
-  lo &= GMP_NUMB_MASK;
-#endif
-
-  rp[un] = lo;
-  rp[un + 1] = hi;
-}
diff --git a/gmp/mpn/generic/mulmid_n.c b/gmp/mpn/generic/mulmid_n.c
deleted file mode 100644
index 2280ba3a36..0000000000
--- a/gmp/mpn/generic/mulmid_n.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/* mpn_mulmid_n -- balanced middle product
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-void
-mpn_mulmid_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
-  ASSERT (n >= 1);
-  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
-  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
-
-  if (n < MULMID_TOOM42_THRESHOLD)
-    {
-      mpn_mulmid_basecase (rp, ap, 2*n - 1, bp, n);
-    }
-  else
-    {
-      mp_ptr scratch;
-      TMP_DECL;
-      TMP_MARK;
-      scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (n));
-      mpn_toom42_mulmid (rp, ap, bp, n, scratch);
-      TMP_FREE;
-    }
-}
diff --git a/gmp/mpn/generic/mulmod_bnm1.c b/gmp/mpn/generic/mulmod_bnm1.c
deleted file mode 100644
index 8710324583..0000000000
--- a/gmp/mpn/generic/mulmod_bnm1.c
+++ /dev/null
@@ -1,355 +0,0 @@
-/* mulmod_bnm1.c -- multiplication mod B^n-1.
-
-   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
-   Marco Bodrato.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
-   mod B^rn - 1, and values are semi-normalised; zero is represented
-   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
-   tp==rp is allowed. */
-void
-mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
-		    mp_ptr tp)
-{
-  mp_limb_t cy;
-
-  ASSERT (0 < rn);
-
-  mpn_mul_n (tp, ap, bp, rn);
-  cy = mpn_add_n (rp, tp, tp + rn, rn);
-  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
-   * be no overflow when adding in the carry. */
-  MPN_INCR_U (rp, rn, cy);
-}
-
-
-/* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
-   semi-normalised representation, computation is mod B^rn + 1. Needs
-   a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
-   Output is normalised. */
-static void
-mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
-		    mp_ptr tp)
-{
-  mp_limb_t cy;
-
-  ASSERT (0 < rn);
-
-  mpn_mul_n (tp, ap, bp, rn + 1);
-  ASSERT (tp[2*rn+1] == 0);
-  ASSERT (tp[2*rn] < GMP_NUMB_MAX);
-  cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
-  rp[rn] = 0;
-  MPN_INCR_U (rp, rn+1, cy );
-}
-
-
-/* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
- *
- * The result is expected to be ZERO if and only if one of the operand
- * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
- * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
- * combine results and obtain a natural number when one knows in
- * advance that the final value is less than (B^rn-1).
- * Moreover it should not be a problem if mulmod_bnm1 is used to
- * compute the full product with an+bn <= rn, because this condition
- * implies (B^an-1)(B^bn-1) < (B^rn-1) .
- *
- * Requires 0 < bn <= an <= rn and an + bn > rn/2
- * Scratch need: rn + (need for recursive call OR rn + 4). This gives
- *
- * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
- */
-void
-mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
-{
-  ASSERT (0 < bn);
-  ASSERT (bn <= an);
-  ASSERT (an <= rn);
-
-  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
-    {
-      if (UNLIKELY (bn < rn))
-	{
-	  if (UNLIKELY (an + bn <= rn))
-	    {
-	      mpn_mul (rp, ap, an, bp, bn);
-	    }
-	  else
-	    {
-	      mp_limb_t cy;
-	      mpn_mul (tp, ap, an, bp, bn);
-	      cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
-	      MPN_INCR_U (rp, rn, cy);
-	    }
-	}
-      else
-	mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
-    }
-  else
-    {
-      mp_size_t n;
-      mp_limb_t cy;
-      mp_limb_t hi;
-
-      n = rn >> 1;
-
-      /* We need at least an + bn >= n, to be able to fit one of the
-	 recursive products at rp. Requiring strict inequality makes
-	 the coded slightly simpler. If desired, we could avoid this
-	 restriction by initially halving rn as long as rn is even and
-	 an + bn <= rn/2. */
-
-      ASSERT (an + bn > n);
-
-      /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
-	 and crt together as
-
-	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
-      */
-
-#define a0 ap
-#define a1 (ap + n)
-#define b0 bp
-#define b1 (bp + n)
-
-#define xp  tp	/* 2n + 2 */
-      /* am1  maybe in {xp, n} */
-      /* bm1  maybe in {xp + n, n} */
-#define sp1 (tp + 2*n + 2)
-      /* ap1  maybe in {sp1, n + 1} */
-      /* bp1  maybe in {sp1 + n + 1, n + 1} */
-
-      {
-	mp_srcptr am1, bm1;
-	mp_size_t anm, bnm;
-	mp_ptr so;
-
-	bm1 = b0;
-	bnm = bn;
-	if (LIKELY (an > n))
-	  {
-	    am1 = xp;
-	    cy = mpn_add (xp, a0, n, a1, an - n);
-	    MPN_INCR_U (xp, n, cy);
-	    anm = n;
-	    so = xp + n;
-	    if (LIKELY (bn > n))
-	      {
-		bm1 = so;
-		cy = mpn_add (so, b0, n, b1, bn - n);
-		MPN_INCR_U (so, n, cy);
-		bnm = n;
-		so += n;
-	      }
-	  }
-	else
-	  {
-	    so = xp;
-	    am1 = a0;
-	    anm = an;
-	  }
-
-	mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
-      }
-
-      {
-	int       k;
-	mp_srcptr ap1, bp1;
-	mp_size_t anp, bnp;
-
-	bp1 = b0;
-	bnp = bn;
-	if (LIKELY (an > n)) {
-	  ap1 = sp1;
-	  cy = mpn_sub (sp1, a0, n, a1, an - n);
-	  sp1[n] = 0;
-	  MPN_INCR_U (sp1, n + 1, cy);
-	  anp = n + ap1[n];
-	  if (LIKELY (bn > n)) {
-	    bp1 = sp1 + n + 1;
-	    cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
-	    sp1[2*n+1] = 0;
-	    MPN_INCR_U (sp1 + n + 1, n + 1, cy);
-	    bnp = n + bp1[n];
-	  }
-	} else {
-	  ap1 = a0;
-	  anp = an;
-	}
-
-	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
-	  k=0;
-	else
-	  {
-	    int mask;
-	    k = mpn_fft_best_k (n, 0);
-	    mask = (1<<k) - 1;
-	    while (n & mask) {k--; mask >>=1;};
-	  }
-	if (k >= FFT_FIRST_K)
-	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
-	else if (UNLIKELY (bp1 == b0))
-	  {
-	    ASSERT (anp + bnp <= 2*n+1);
-	    ASSERT (anp + bnp > n);
-	    ASSERT (anp >= bnp);
-	    mpn_mul (xp, ap1, anp, bp1, bnp);
-	    anp = anp + bnp - n;
-	    ASSERT (anp <= n || xp[2*n]==0);
-	    anp-= anp > n;
-	    cy = mpn_sub (xp, xp, n, xp + n, anp);
-	    xp[n] = 0;
-	    MPN_INCR_U (xp, n+1, cy);
-	  }
-	else
-	  mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
-      }
-
-      /* Here the CRT recomposition begins.
-
-	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
-	 Division by 2 is a bitwise rotation.
-
-	 Assumes xp normalised mod (B^n+1).
-
-	 The residue class [0] is represented by [B^n-1]; except when
-	 both input are ZERO.
-      */
-
-#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
-#if HAVE_NATIVE_mpn_rsh1add_nc
-      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
-      hi = cy << (GMP_NUMB_BITS - 1);
-      cy = 0;
-      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
-	 overflows, i.e. a further increment will not overflow again. */
-#else /* ! _nc */
-      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
-      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
-      cy >>= 1;
-      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
-	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
-#endif
-#if GMP_NAIL_BITS == 0
-      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
-#else
-      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
-      rp[n-1] ^= hi;
-#endif
-#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
-#if HAVE_NATIVE_mpn_add_nc
-      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
-#else /* ! _nc */
-      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
-#endif
-      cy += (rp[0]&1);
-      mpn_rshift(rp, rp, n, 1);
-      ASSERT (cy <= 2);
-      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
-      cy >>= 1;
-      /* We can have cy != 0 only if hi = 0... */
-      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
-      rp[n-1] |= hi;
-      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
-#endif
-      ASSERT (cy <= 1);
-      /* Next increment can not overflow, read the previous comments about cy. */
-      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
-      MPN_INCR_U(rp, n, cy);
-
-      /* Compute the highest half:
-	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
-       */
-      if (UNLIKELY (an + bn < rn))
-	{
-	  /* Note that in this case, the only way the result can equal
-	     zero mod B^{rn} - 1 is if one of the inputs is zero, and
-	     then the output of both the recursive calls and this CRT
-	     reconstruction is zero, not B^{rn} - 1. Which is good,
-	     since the latter representation doesn't fit in the output
-	     area.*/
-	  cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
-
-	  /* FIXME: This subtraction of the high parts is not really
-	     necessary, we do it to get the carry out, and for sanity
-	     checking. */
-	  cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
-				   xp + an + bn - n, rn - (an + bn), cy);
-	  ASSERT (an + bn == rn - 1 ||
-		  mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
-	  cy = mpn_sub_1 (rp, rp, an + bn, cy);
-	  ASSERT (cy == (xp + an + bn - n)[0]);
-	}
-      else
-	{
-	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
-	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
-	     DECR will affect _at most_ the lowest n limbs. */
-	  MPN_DECR_U (rp, 2*n, cy);
-	}
-#undef a0
-#undef a1
-#undef b0
-#undef b1
-#undef xp
-#undef sp1
-    }
-}
-
-mp_size_t
-mpn_mulmod_bnm1_next_size (mp_size_t n)
-{
-  mp_size_t nh;
-
-  if (BELOW_THRESHOLD (n,     MULMOD_BNM1_THRESHOLD))
-    return n;
-  if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
-    return (n + (2-1)) & (-2);
-  if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
-    return (n + (4-1)) & (-4);
-
-  nh = (n + 1) >> 1;
-
-  if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
-    return (n + (8-1)) & (-8);
-
-  return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));
-}
diff --git a/gmp/mpn/generic/neg.c b/gmp/mpn/generic/neg.c
deleted file mode 100644
index 2d752e912d..0000000000
--- a/gmp/mpn/generic/neg.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* mpn_neg - negate an mpn.
-
-Copyright 2001, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#define __GMP_FORCE_mpn_neg 1
-
-#include "gmp.h"
-#include "gmp-impl.h"
diff --git a/gmp/mpn/generic/neg_n.c b/gmp/mpn/generic/neg_n.c
new file mode 100644
index 0000000000..1609204c90
--- /dev/null
+++ b/gmp/mpn/generic/neg_n.c
@@ -0,0 +1,23 @@
+/* mpn_neg_n - negate an mpn.
+
+Copyright 2001, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#define __GMP_FORCE_mpn_neg_n 1
+
+#include "gmp.h"
+#include "gmp-impl.h"
diff --git a/gmp/mpn/generic/nussbaumer_mul.c b/gmp/mpn/generic/nussbaumer_mul.c
deleted file mode 100644
index d2bf19ad56..0000000000
--- a/gmp/mpn/generic/nussbaumer_mul.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/* mpn_nussbaumer_mul -- Multiply {ap,an} and {bp,bn} using
-   Nussbaumer's negacyclic convolution.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Multiply {ap,an} by {bp,bn}, and put the result in {pp, an+bn} */
-void
-mpn_nussbaumer_mul (mp_ptr pp,
-		    mp_srcptr ap, mp_size_t an,
-		    mp_srcptr bp, mp_size_t bn)
-{
-  mp_size_t rn;
-  mp_ptr tp;
-  TMP_DECL;
-
-  ASSERT (an >= bn);
-  ASSERT (bn > 0);
-
-  TMP_MARK;
-
-  if ((ap == bp) && (an == bn))
-    {
-      rn = mpn_sqrmod_bnm1_next_size (2*an);
-      tp = TMP_ALLOC_LIMBS (mpn_sqrmod_bnm1_itch (rn, an));
-      mpn_sqrmod_bnm1 (pp, rn, ap, an, tp);
-    }
-  else
-    {
-      rn = mpn_mulmod_bnm1_next_size (an + bn);
-      tp = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (rn, an, bn));
-      mpn_mulmod_bnm1 (pp, rn, ap, an, bp, bn, tp);
-    }
-
-  TMP_FREE;
-}
diff --git a/gmp/mpn/generic/perfpow.c b/gmp/mpn/generic/perfpow.c
deleted file mode 100644
index bbed6309d5..0000000000
--- a/gmp/mpn/generic/perfpow.c
+++ /dev/null
@@ -1,417 +0,0 @@
-/* mpn_perfect_power_p -- mpn perfect power detection.
-
-   Contributed to the GNU project by Martin Boij.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#define SMALL 20
-#define MEDIUM 100
-
-/* Return non-zero if {np,nn} == {xp,xn} ^ k.
-   Algorithm:
-       For s = 1, 2, 4, ..., s_max, compute the s least significant limbs of
-       {xp,xn}^k. Stop if they don't match the s least significant limbs of
-       {np,nn}.
-
-   FIXME: Low xn limbs can be expected to always match, if computed as a mod
-   B^{xn} root. So instead of using mpn_powlo, compute an approximation of the
-   most significant (normalized) limb of {xp,xn} ^ k (and an error bound), and
-   compare to {np, nn}. Or use an even cruder approximation based on fix-point
-   base 2 logarithm.  */
-static int
-pow_equals (mp_srcptr np, mp_size_t n,
-	    mp_srcptr xp,mp_size_t xn,
-	    mp_limb_t k, mp_bitcnt_t f,
-	    mp_ptr tp)
-{
-  mp_limb_t *tp2;
-  mp_bitcnt_t y, z;
-  mp_size_t i, bn;
-  int ans;
-  mp_limb_t h, l;
-  TMP_DECL;
-
-  ASSERT (n > 1 || (n == 1 && np[0] > 1));
-  ASSERT (np[n - 1] > 0);
-  ASSERT (xn > 0);
-
-  if (xn == 1 && xp[0] == 1)
-    return 0;
-
-  z = 1 + (n >> 1);
-  for (bn = 1; bn < z; bn <<= 1)
-    {
-      mpn_powlo (tp, xp, &k, 1, bn, tp + bn);
-      if (mpn_cmp (tp, np, bn) != 0)
-	return 0;
-    }
-
-  TMP_MARK;
-
-  /* Final check. Estimate the size of {xp,xn}^k before computing the power
-     with full precision.  Optimization: It might pay off to make a more
-     accurate estimation of the logarithm of {xp,xn}, rather than using the
-     index of the MSB.  */
-
-  MPN_SIZEINBASE_2EXP(y, xp, xn, 1);
-  y -= 1;  /* msb_index (xp, xn) */
-
-  umul_ppmm (h, l, k, y);
-  h -= l == 0;  l--;	/* two-limb decrement */
-
-  z = f - 1; /* msb_index (np, n) */
-  if (h == 0 && l <= z)
-    {
-      mp_limb_t size;
-      size = l + k;
-      ASSERT_ALWAYS (size >= k);
-
-      y = 2 + size / GMP_LIMB_BITS;
-      tp2 = TMP_ALLOC_LIMBS (y);
-
-      i = mpn_pow_1 (tp, xp, xn, k, tp2);
-      if (i == n && mpn_cmp (tp, np, n) == 0)
-	ans = 1;
-      else
-	ans = 0;
-    }
-  else
-    {
-      ans = 0;
-    }
-
-  TMP_FREE;
-  return ans;
-}
-
-
-/* Return non-zero if N = {np,n} is a kth power.
-   I = {ip,n} = N^(-1) mod B^n.  */
-static int
-is_kth_power (mp_ptr rp, mp_srcptr np,
-	      mp_limb_t k, mp_srcptr ip,
-	      mp_size_t n, mp_bitcnt_t f,
-	      mp_ptr tp)
-{
-  mp_bitcnt_t b;
-  mp_size_t rn, xn;
-
-  ASSERT (n > 0);
-  ASSERT ((k & 1) != 0 || k == 2);
-  ASSERT ((np[0] & 1) != 0);
-
-  if (k == 2)
-    {
-      b = (f + 1) >> 1;
-      rn = 1 + b / GMP_LIMB_BITS;
-      if (mpn_bsqrtinv (rp, ip, b, tp) != 0)
-	{
-	  rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
-	  xn = rn;
-	  MPN_NORMALIZE (rp, xn);
-	  if (pow_equals (np, n, rp, xn, k, f, tp) != 0)
-	    return 1;
-
-	  /* Check if (2^b - r)^2 == n */
-	  mpn_neg (rp, rp, rn);
-	  rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
-	  MPN_NORMALIZE (rp, rn);
-	  if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
-	    return 1;
-	}
-    }
-  else
-    {
-      b = 1 + (f - 1) / k;
-      rn = 1 + (b - 1) / GMP_LIMB_BITS;
-      mpn_brootinv (rp, ip, rn, k, tp);
-      if ((b % GMP_LIMB_BITS) != 0)
-	rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
-      MPN_NORMALIZE (rp, rn);
-      if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
-	return 1;
-    }
-  MPN_ZERO (rp, rn); /* Untrash rp */
-  return 0;
-}
-
-static int
-perfpow (mp_srcptr np, mp_size_t n,
-	 mp_limb_t ub, mp_limb_t g,
-	 mp_bitcnt_t f, int neg)
-{
-  mp_ptr ip, tp, rp;
-  mp_limb_t k;
-  int ans;
-  mp_bitcnt_t b;
-  gmp_primesieve_t ps;
-  TMP_DECL;
-
-  ASSERT (n > 0);
-  ASSERT ((np[0] & 1) != 0);
-  ASSERT (ub > 0);
-
-  TMP_MARK;
-  gmp_init_primesieve (&ps);
-  b = (f + 3) >> 1;
-
-  ip = TMP_ALLOC_LIMBS (n);
-  rp = TMP_ALLOC_LIMBS (n);
-  tp = TMP_ALLOC_LIMBS (5 * n);		/* FIXME */
-  MPN_ZERO (rp, n);
-
-  /* FIXME: It seems the inverse in ninv is needed only to get non-inverted
-     roots. I.e., is_kth_power computes n^{1/2} as (n^{-1})^{-1/2} and
-     similarly for nth roots. It should be more efficient to compute n^{1/2} as
-     n * n^{-1/2}, with a mullo instead of a binvert. And we can do something
-     similar for kth roots if we switch to an iteration converging to n^{1/k -
-     1}, and we can then eliminate this binvert call. */
-  mpn_binvert (ip, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
-  if (b % GMP_LIMB_BITS)
-    ip[(b - 1) / GMP_LIMB_BITS] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
-
-  if (neg)
-    gmp_nextprime (&ps);
-
-  ans = 0;
-  if (g > 0)
-    {
-      ub = MIN (ub, g + 1);
-      while ((k = gmp_nextprime (&ps)) < ub)
-	{
-	  if ((g % k) == 0)
-	    {
-	      if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
-		{
-		  ans = 1;
-		  goto ret;
-		}
-	    }
-	}
-    }
-  else
-    {
-      while ((k = gmp_nextprime (&ps)) < ub)
-	{
-	  if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
-	    {
-	      ans = 1;
-	      goto ret;
-	    }
-	}
-    }
- ret:
-  TMP_FREE;
-  return ans;
-}
-
-static const unsigned short nrtrial[] = { 100, 500, 1000 };
-
-/* Table of (log_{p_i} 2) values, where p_i is the (nrtrial[i] + 1)'th prime
-   number.  */
-static const double logs[] =
-  { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
-
-int
-mpn_perfect_power_p (mp_srcptr np, mp_size_t n)
-{
-  mp_size_t ncn, s, pn, xn;
-  mp_limb_t *nc, factor, g;
-  mp_limb_t exp, *prev, *next, d, l, r, c, *tp, cry;
-  mp_bitcnt_t twos, count;
-  int ans, where, neg, trial;
-  TMP_DECL;
-
-  nc = (mp_ptr) np;
-
-  neg = 0;
-  if (n < 0)
-    {
-      neg = 1;
-      n = -n;
-    }
-
-  if (n == 0 || (n == 1 && np[0] == 1))
-    return 1;
-
-  TMP_MARK;
-
-  g = 0;
-
-  ncn = n;
-  twos = mpn_scan1 (np, 0);
-  if (twos > 0)
-    {
-      if (twos == 1)
-	{
-	  ans = 0;
-	  goto ret;
-	}
-      s = twos / GMP_LIMB_BITS;
-      if (s + 1 == n && POW2_P (np[s]))
-	{
-	  ans = ! (neg && POW2_P (twos));
-	  goto ret;
-	}
-      count = twos % GMP_LIMB_BITS;
-      ncn = n - s;
-      nc = TMP_ALLOC_LIMBS (ncn);
-      if (count > 0)
-	{
-	  mpn_rshift (nc, np + s, ncn, count);
-	  ncn -= (nc[ncn - 1] == 0);
-	}
-      else
-	{
-	  MPN_COPY (nc, np + s, ncn);
-	}
-      g = twos;
-    }
-
-  if (ncn <= SMALL)
-    trial = 0;
-  else if (ncn <= MEDIUM)
-    trial = 1;
-  else
-    trial = 2;
-
-  where = 0;
-  factor = mpn_trialdiv (nc, ncn, nrtrial[trial], &where);
-
-  if (factor != 0)
-    {
-      if (twos == 0)
-	{
-	  nc = TMP_ALLOC_LIMBS (ncn);
-	  MPN_COPY (nc, np, ncn);
-	}
-
-      /* Remove factors found by trialdiv.  Optimization: Perhaps better to use
-	 the strategy in mpz_remove ().  */
-      prev = TMP_ALLOC_LIMBS (ncn + 2);
-      next = TMP_ALLOC_LIMBS (ncn + 2);
-      tp = TMP_ALLOC_LIMBS (4 * ncn);
-
-      do
-	{
-	  binvert_limb (d, factor);
-	  prev[0] = d;
-	  pn = 1;
-	  exp = 1;
-	  while (2 * pn - 1 <= ncn)
-	    {
-	      mpn_sqr (next, prev, pn);
-	      xn = 2 * pn;
-	      xn -= (next[xn - 1] == 0);
-
-	      if (mpn_divisible_p (nc, ncn, next, xn) == 0)
-		break;
-
-	      exp <<= 1;
-	      pn = xn;
-	      MP_PTR_SWAP (next, prev);
-	    }
-
-	  /* Binary search for the exponent */
-	  l = exp + 1;
-	  r = 2 * exp - 1;
-	  while (l <= r)
-	    {
-	      c = (l + r) >> 1;
-	      if (c - exp > 1)
-		{
-		  xn = mpn_pow_1 (tp, &d, 1, c - exp, next);
-		  if (pn + xn - 1 > ncn)
-		    {
-		      r = c - 1;
-		      continue;
-		    }
-		  mpn_mul (next, prev, pn, tp, xn);
-		  xn += pn;
-		  xn -= (next[xn - 1] == 0);
-		}
-	      else
-		{
-		  cry = mpn_mul_1 (next, prev, pn, d);
-		  next[pn] = cry;
-		  xn = pn + (cry != 0);
-		}
-
-	      if (mpn_divisible_p (nc, ncn, next, xn) == 0)
-		{
-		  r = c - 1;
-		}
-	      else
-		{
-		  exp = c;
-		  l = c + 1;
-		  MP_PTR_SWAP (next, prev);
-		  pn = xn;
-		}
-	    }
-
-	  if (g == 0)
-	    g = exp;
-	  else
-	    g = mpn_gcd_1 (&g, 1, exp);
-
-	  if (g == 1)
-	    {
-	      ans = 0;
-	      goto ret;
-	    }
-
-	  mpn_divexact (next, nc, ncn, prev, pn);
-	  ncn = ncn - pn;
-	  ncn += next[ncn] != 0;
-	  MPN_COPY (nc, next, ncn);
-
-	  if (ncn == 1 && nc[0] == 1)
-	    {
-	      ans = ! (neg && POW2_P (g));
-	      goto ret;
-	    }
-
-	  factor = mpn_trialdiv (nc, ncn, nrtrial[trial], &where);
-	}
-      while (factor != 0);
-    }
-
-  MPN_SIZEINBASE_2EXP(count, nc, ncn, 1);   /* log (nc) + 1 */
-  d = (mp_limb_t) (count * logs[trial] + 1e-9) + 1;
-  ans = perfpow (nc, ncn, d, g, count, neg);
-
- ret:
-  TMP_FREE;
-  return ans;
-}
diff --git a/gmp/mpn/generic/perfsqr.c b/gmp/mpn/generic/perfsqr.c
index bdd82ccd96..1995a944df 100644
--- a/gmp/mpn/generic/perfsqr.c
+++ b/gmp/mpn/generic/perfsqr.c
@@ -1,34 +1,23 @@
 /* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
    zero otherwise.
 
-Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2012 Free Software
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software
 Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include <stdio.h> /* for NULL */
 #include "gmp.h"
@@ -113,20 +102,20 @@ see https://www.gnu.org/licenses/.  */
 /* FIXME: The %= here isn't good, and might destroy any savings from keeping
    the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).
    Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor
-   and a shift count, like mpn_preinv_divrem_1.  But mod_34lsub1 is our
-   normal case, so lets not worry too much about mod_1.  */
-#define PERFSQR_MOD_PP(r, up, usize)					\
-  do {									\
-    if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD))	\
-      {									\
-	(r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM,		\
-				PERFSQR_PP_INVERTED);			\
-	(r) %= PERFSQR_PP;						\
-      }									\
-    else								\
-      {									\
-	(r) = mpn_mod_1 (up, usize, PERFSQR_PP);			\
-      }									\
+   and a shift count, like mpn_preinv_divrem_1.	 But mod_34lsub1 is our
+   normal case, so lets not worry too much about mod_1.	 */
+#define PERFSQR_MOD_PP(r, up, usize)				\
+  do {								\
+    if (USE_PREINV_MOD_1)					\
+      {								\
+	(r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM,	\
+				PERFSQR_PP_INVERTED);		\
+	(r) %= PERFSQR_PP;					\
+      }								\
+    else							\
+      {								\
+	(r) = mpn_mod_1 (up, usize, PERFSQR_PP);		\
+      }								\
   } while (0)
 
 #define PERFSQR_MOD_IDX(idx, r, d, inv)				\
@@ -156,7 +145,7 @@ see https://www.gnu.org/licenses/.  */
   } while (0)
 
 /* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the
-   sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch.  */
+   sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch.	*/
 #define PERFSQR_MOD_2(r, d, inv, mhi, mlo)			\
   do {								\
     mp_limb_t  m;						\
@@ -196,7 +185,7 @@ mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
   /* Check that we have even multiplicity of 2, and then check that the rest is
      a possible perfect square.  Leave disabled until we can determine this
      really is an improvement.  It it is, it could completely replace the
-     simple probe above, since this should throw out more non-squares, but at
+     simple probe above, since this should through out more non-squares, but at
      the expense of somewhat more cycles.  */
   {
     mp_limb_t lo;
@@ -229,7 +218,7 @@ mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
     TMP_DECL;
 
     TMP_MARK;
-    root_ptr = TMP_ALLOC_LIMBS ((usize + 1) / 2);
+    root_ptr = (mp_ptr) TMP_ALLOC ((usize + 1) / 2 * BYTES_PER_MP_LIMB);
 
     /* Iff mpn_sqrtrem returns zero, the square is perfect.  */
     res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
diff --git a/gmp/mpn/generic/popham.c b/gmp/mpn/generic/popham.c
index 13e529b7cd..be7c525036 100644
--- a/gmp/mpn/generic/popham.c
+++ b/gmp/mpn/generic/popham.c
@@ -1,33 +1,21 @@
 /* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.
 
-Copyright 1994, 1996, 2000-2002, 2005, 2011, 2012 Free Software Foundation,
-Inc.
+Copyright 1994, 1996, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -42,14 +30,14 @@ see https://www.gnu.org/licenses/.  */
 #define POPHAM(u,v) u ^ v
 #endif
 
-mp_bitcnt_t
+unsigned long
 FNAME (mp_srcptr up,
 #if OPERATION_hamdist
        mp_srcptr vp,
 #endif
-       mp_size_t n) __GMP_NOTHROW
+       mp_size_t n)
 {
-  mp_bitcnt_t result = 0;
+  unsigned long result = 0;
   mp_limb_t p0, p1, p2, p3, x, p01, p23;
   mp_size_t i;
 
diff --git a/gmp/mpn/generic/pow_1.c b/gmp/mpn/generic/pow_1.c
index 2333206554..4bc9f434bc 100644
--- a/gmp/mpn/generic/pow_1.c
+++ b/gmp/mpn/generic/pow_1.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2002, 2014 Free Software Foundation, Inc.
+Copyright 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
@@ -45,9 +34,6 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
   mp_size_t rn;
   int par;
 
-  ASSERT (bn >= 1);
-  /* FIXME: Add operand overlap criteria */
-
   if (exp <= 1)
     {
       if (exp == 0)
@@ -68,13 +54,11 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
      so much time that the slowness of this code will be negligible.  */
   par = 0;
   cnt = GMP_LIMB_BITS;
-  x = exp;
-  do
+  for (x = exp; x != 0; x >>= 1)
     {
-      par ^= x;
+      par ^= x & 1;
       cnt--;
-      x >>= 1;
-    } while (x != 0);
+    }
   exp <<= cnt;
 
   if (bn == 1)
@@ -84,7 +68,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
       if ((cnt & 1) != 0)
 	MP_PTR_SWAP (rp, tp);
 
-      mpn_sqr (rp, bp, bn);
+      mpn_sqr_n (rp, bp, bn);
       rn = 2 * bn; rn -= rp[rn - 1] == 0;
 
       for (i = GMP_LIMB_BITS - cnt - 1;;)
@@ -99,7 +83,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
 	  if (--i == 0)
 	    break;
 
-	  mpn_sqr (tp, rp, rn);
+	  mpn_sqr_n (tp, rp, rn);
 	  rn = 2 * rn; rn -= tp[rn - 1] == 0;
 	  MP_PTR_SWAP (rp, tp);
 	}
@@ -109,7 +93,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
       if (((par ^ cnt) & 1) == 0)
 	MP_PTR_SWAP (rp, tp);
 
-      mpn_sqr (rp, bp, bn);
+      mpn_sqr_n (rp, bp, bn);
       rn = 2 * bn; rn -= rp[rn - 1] == 0;
 
       for (i = GMP_LIMB_BITS - cnt - 1;;)
@@ -124,7 +108,7 @@ mpn_pow_1 (mp_ptr rp, mp_srcptr bp, mp_size_t bn, mp_limb_t exp, mp_ptr tp)
 	  if (--i == 0)
 	    break;
 
-	  mpn_sqr (tp, rp, rn);
+	  mpn_sqr_n (tp, rp, rn);
 	  rn = 2 * rn; rn -= tp[rn - 1] == 0;
 	  MP_PTR_SWAP (rp, tp);
 	}
diff --git a/gmp/mpn/generic/powlo.c b/gmp/mpn/generic/powlo.c
index adcd96eb51..ca3e1e9448 100644
--- a/gmp/mpn/generic/powlo.c
+++ b/gmp/mpn/generic/powlo.c
@@ -1,32 +1,21 @@
-/* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base.
+/* mpn_powlo -- Compute R = U^E mod R^n, where R is the limb base.
 
-Copyright 2007-2009, 2012 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
@@ -38,7 +27,7 @@ see https://www.gnu.org/licenses/.  */
   ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
 
 static inline mp_limb_t
-getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+getbits (const mp_limb_t *p, unsigned long bi, int nbits)
 {
   int nbits_in_r;
   mp_limb_t r;
@@ -51,10 +40,10 @@ getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
   else
     {
       bi -= nbits;			/* bit index of low bit to extract */
-      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
-      bi %= GMP_NUMB_BITS;		/* bit index in low word */
+      i = bi / GMP_LIMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_LIMB_BITS;		/* bit index in low word */
       r = p[i] >> bi;			/* extract (low) bits */
-      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
+      nbits_in_r = GMP_LIMB_BITS - bi;	/* number of bits now in r */
       if (nbits_in_r < nbits)		/* did we get enough bits? */
 	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
       return r & (((mp_limb_t ) 1 << nbits) - 1);
@@ -62,16 +51,16 @@ getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
 }
 
 static inline int
-win_size (mp_bitcnt_t eb)
+win_size (unsigned long eb)
 {
   int k;
-  static mp_bitcnt_t x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
+  static unsigned long x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~0ul};
   for (k = 0; eb > x[k]; k++)
     ;
   return k;
 }
 
-/* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod B^n, B is the limb base.
+/* rp[n-1..0] = bp[n-1..0] ^ ep[en-1..0] mod R^n, R is the limb base.
    Requires that ep[en-1] is non-zero.
    Uses scratch space tp[3n-1..0], i.e., 3n words.  */
 void
@@ -80,7 +69,7 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
 	   mp_size_t n, mp_ptr tp)
 {
   int cnt;
-  mp_bitcnt_t ebi;
+  long ebi;
   int windowsize, this_windowsize;
   mp_limb_t expbits;
   mp_limb_t *pp, *this_pp, *last_pp;
@@ -92,11 +81,12 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
 
   TMP_MARK;
 
-  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+  count_leading_zeros (cnt, ep[en - 1]);
+  ebi = en * GMP_LIMB_BITS - cnt;
 
   windowsize = win_size (ebi);
 
-  pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1)) + n); /* + n is for mullo ign part */
+  pp = TMP_ALLOC_LIMBS ((n << (windowsize - 1)) + n); /* + n is for mullow ign part */
 
   this_pp = pp;
 
@@ -105,7 +95,7 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
   b2p = tp + 2*n;
 
   /* Store b^2 in b2.  */
-  mpn_sqr (tp, bp, n);	/* FIXME: Use "mpn_sqrlo" */
+  mpn_sqr_n (tp, bp, n);	/* FIXME: Use "mpn_sqrlo" */
   MPN_COPY (b2p, tp, n);
 
   /* Precompute odd powers of b and put them in the temporary area at pp.  */
@@ -113,14 +103,13 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
     {
       last_pp = this_pp;
       this_pp += n;
-      mpn_mullo_n (this_pp, last_pp, b2p, n);
+      mpn_mullow_n (this_pp, last_pp, b2p, n);
     }
 
   expbits = getbits (ep, ebi, windowsize);
-  if (ebi < windowsize)
+  ebi -= windowsize;
+  if (ebi < 0)
     ebi = 0;
-  else
-    ebi -= windowsize;
 
   count_trailing_zeros (cnt, expbits);
   ebi += cnt;
@@ -132,7 +121,7 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
     {
       while (getbit (ep, ebi) == 0)
 	{
-	  mpn_sqr (tp, rp, n);	/* FIXME: Use "mpn_sqrlo" */
+	  mpn_sqr_n (tp, rp, n);	/* FIXME: Use "mpn_sqrlo" */
 	  MPN_COPY (rp, tp, n);
 	  ebi--;
 	  if (ebi == 0)
@@ -143,14 +132,13 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
 	 bits <= windowsize, and such that the least significant bit is 1.  */
 
       expbits = getbits (ep, ebi, windowsize);
+      ebi -= windowsize;
       this_windowsize = windowsize;
-      if (ebi < windowsize)
+      if (ebi < 0)
 	{
-	  this_windowsize -= windowsize - ebi;
+	  this_windowsize += ebi;
 	  ebi = 0;
 	}
-      else
-	ebi -= windowsize;
 
       count_trailing_zeros (cnt, expbits);
       this_windowsize -= cnt;
@@ -159,13 +147,13 @@ mpn_powlo (mp_ptr rp, mp_srcptr bp,
 
       do
 	{
-	  mpn_sqr (tp, rp, n);
+	  mpn_sqr_n (tp, rp, n);
 	  MPN_COPY (rp, tp, n);
 	  this_windowsize--;
 	}
       while (this_windowsize != 0);
 
-      mpn_mullo_n (tp, rp, pp + n * (expbits >> 1), n);
+      mpn_mullow_n (tp, rp, pp + n * (expbits >> 1), n);
       MPN_COPY (rp, tp, n);
     }
 
diff --git a/gmp/mpn/generic/powm.c b/gmp/mpn/generic/powm.c
index 9968116016..c057ec2156 100644
--- a/gmp/mpn/generic/powm.c
+++ b/gmp/mpn/generic/powm.c
@@ -1,51 +1,37 @@
 /* mpn_powm -- Compute R = U^E mod M.
 
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2007-2012 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /*
-  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
+  BASIC ALGORITHM, Compute b^e mod n, where n is odd.
 
-  1. W <- U
+  1. w <- b
 
-  2. T <- (B^n * U) mod M                Convert to REDC form
+  2. While w^2 < n (and there are more bits in e)
+       w <- power left-to-right base-2 without reduction
 
-  3. Compute table U^1, U^3, U^5... of E-dependent size
+  3. t <- (B^n * b) / n                Convert to REDC form
 
-  4. While there are more bits in E
-       W <- power left-to-right base-k
+  4. Compute power table of e-dependent size
+
+  5. While there are more bits in e
+       w <- power left-to-right base-k with reduction
 
 
   TODO:
@@ -54,64 +40,51 @@ see https://www.gnu.org/licenses/.  */
      That will simplify the code using getbits.  (Perhaps make getbits' sibling
      getbit then have similar form, for symmetry.)
 
-   * Write an itch function.  Or perhaps get rid of tp parameter since the huge
-     pp area is allocated locally anyway?
+   * Write an itch function.
 
    * Choose window size without looping.  (Superoptimize or think(tm).)
 
-   * Handle small bases with initial, reduction-free exponentiation.
+   * How do we handle small bases?
+
+   * This is slower than old mpz code, in particular if we base it on redc_1
+     (use: #undef HAVE_NATIVE_mpn_addmul_2).  Why?
+
+   * Make it sub-quadratic.
 
    * Call new division functions, not mpn_tdiv_qr.
 
+   * Is redc obsolete with improved SB division?
+
    * Consider special code for one-limb M.
 
-   * How should we handle the redc1/redc2/redc_n choice?
-     - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
-     - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
-     - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
+   * CRT for N = odd*2^t:
+      Using Newton's method and 2-adic arithmetic:
+        m1_inv_m2 = 1/odd mod 2^t
+      Plain 2-adic (REDC) modexp:
+        r1 = a ^ b mod odd
+      Mullo+sqrlo-based modexp:
+        r2 = a ^ b mod 2^t
+      mullo, mul, add:
+        r = ((r2 - r1) * m1_i_m2 mod 2^t) * odd + r1
+
+   * How should we handle the redc1/redc2/redc2/redc4/redc_subquad choice?
+     - redc1: T(binvert_1limb)  + e * (n)   * (T(mullo1x1) + n*T(addmul_1))
+     - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo2x2) + n*T(addmul_2))
+     - redc3: T(binvert_3limbs) + e * (n/3) * (T(mullo3x3) + n*T(addmul_3))
      This disregards the addmul_N constant term, but we could think of
-     that as part of the respective mullo.
-
-   * When U (the base) is small, we should start the exponentiation with plain
-     operations, then convert that partial result to REDC form.
-
-   * When U is just one limb, should it be handled without the k-ary tricks?
-     We could keep a factor of B^n in W, but use U' = BU as base.  After
-     multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
-     mod M.
+     that as part of the respective mulloNxN.
 */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
-#undef MPN_REDC_1
-#define MPN_REDC_1(rp, up, mp, n, invm)					\
-  do {									\
-    mp_limb_t cy;							\
-    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
-    if (cy != 0)							\
-      mpn_sub_n (rp, rp, mp, n);					\
-  } while (0)
-
-#undef MPN_REDC_2
-#define MPN_REDC_2(rp, up, mp, n, mip)					\
-  do {									\
-    mp_limb_t cy;							\
-    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
-    if (cy != 0)							\
-      mpn_sub_n (rp, rp, mp, n);					\
-  } while (0)
-
-#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
-#define WANT_REDC_2 1
-#endif
 
 #define getbit(p,bi) \
   ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
 
 static inline mp_limb_t
-getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
+getbits (const mp_limb_t *p, unsigned long bi, int nbits)
 {
   int nbits_in_r;
   mp_limb_t r;
@@ -124,27 +97,49 @@ getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
   else
     {
       bi -= nbits;			/* bit index of low bit to extract */
-      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
-      bi %= GMP_NUMB_BITS;		/* bit index in low word */
+      i = bi / GMP_LIMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_LIMB_BITS;		/* bit index in low word */
       r = p[i] >> bi;			/* extract (low) bits */
-      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
+      nbits_in_r = GMP_LIMB_BITS - bi;	/* number of bits now in r */
       if (nbits_in_r < nbits)		/* did we get enough bits? */
 	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
       return r & (((mp_limb_t ) 1 << nbits) - 1);
     }
 }
 
+#undef HAVE_NATIVE_mpn_addmul_2
+
+#ifndef HAVE_NATIVE_mpn_addmul_2
+#define REDC_2_THRESHOLD		MP_SIZE_T_MAX
+#endif
+
+#ifndef REDC_2_THRESHOLD
+#define REDC_2_THRESHOLD		4
+#endif
+
+static void mpn_redc_n () {ASSERT_ALWAYS(0);}
+
 static inline int
-win_size (mp_bitcnt_t eb)
+win_size (unsigned long eb)
 {
   int k;
-  static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
-  for (k = 1; eb > x[k]; k++)
+  static unsigned long x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~0ul};
+  for (k = 0; eb > x[k]; k++)
     ;
   return k;
 }
 
-/* Convert U to REDC form, U_r = B^n * U mod M */
+#define MPN_REDC_X(rp, tp, mp, n, mip)					\
+  do {									\
+    if (redc_x == 1)							\
+      mpn_redc_1 (rp, tp, mp, n, mip[0]);				\
+    else if (redc_x == 2)						\
+      mpn_redc_2 (rp, tp, mp, n, mip);					\
+    else								\
+      mpn_redc_n (rp, tp, mp, n, mip);					\
+  } while (0)
+
+  /* Convert U to REDC form, U_r = B^n * U mod M */
 static void
 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
 {
@@ -164,19 +159,21 @@ redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
    Requires that mp[n-1..0] is odd.
    Requires that ep[en-1..0] is > 1.
-   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
+   Uses scratch space tp[3n..0], i.e., 3n+1 words.  */
 void
 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 	  mp_srcptr ep, mp_size_t en,
 	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
 {
-  mp_limb_t ip[2], *mip;
+  mp_limb_t mip[2];
   int cnt;
-  mp_bitcnt_t ebi;
+  long ebi;
   int windowsize, this_windowsize;
   mp_limb_t expbits;
-  mp_ptr pp, this_pp;
+  mp_ptr pp, this_pp, last_pp;
+  mp_ptr b2p;
   long i;
+  int redc_x;
   TMP_DECL;
 
   ASSERT (en > 1 || (en == 1 && ep[0] > 1));
@@ -184,7 +181,8 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 
   TMP_MARK;
 
-  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+  count_leading_zeros (cnt, ep[en - 1]);
+  ebi = en * GMP_LIMB_BITS - cnt;
 
 #if 0
   if (bn < n)
@@ -193,7 +191,7 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 	 until the result is greater than the mod argument.  */
       for (;;)
 	{
-	  mpn_sqr (tp, this_pp, tn);
+	  mpn_sqr_n (tp, this_pp, tn);
 	  tn = tn * 2 - 1,  tn += tp[tn] != 0;
 	  if (getbit (ep, ebi) != 0)
 	    mpn_mul (..., tp, tn, bp, bn);
@@ -204,75 +202,49 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 
   windowsize = win_size (ebi);
 
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+  if (BELOW_THRESHOLD (n, REDC_2_THRESHOLD))
     {
-      mip = ip;
       binvert_limb (mip[0], mp[0]);
       mip[0] = -mip[0];
+      redc_x = 1;
     }
-  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+#if defined (HAVE_NATIVE_mpn_addmul_2)
+  else
     {
-      mip = ip;
       mpn_binvert (mip, mp, 2, tp);
       mip[0] = -mip[0]; mip[1] = ~mip[1];
-    }
-#else
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-    {
-      mip = ip;
-      binvert_limb (mip[0], mp[0]);
-      mip[0] = -mip[0];
+      redc_x = 2;
     }
 #endif
-  else
-    {
-      mip = TMP_ALLOC_LIMBS (n);
-      mpn_binvert (mip, mp, n, tp);
-    }
+#if 0
+  mpn_binvert (mip, mp, n, tp);
+  redc_x = 0;
+#endif
 
   pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
 
   this_pp = pp;
   redcify (this_pp, bp, bn, mp, n);
 
-  /* Store b^2 at rp.  */
-  mpn_sqr (tp, this_pp, n);
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
-  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-    MPN_REDC_2 (rp, tp, mp, n, mip);
-#else
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
-#endif
-  else
-    mpn_redc_n (rp, tp, mp, n, mip);
+  b2p = tp + 2*n;
+
+  /* Store b^2 in b2.  */
+  mpn_sqr_n (tp, this_pp, n);
+  MPN_REDC_X (b2p, tp, mp, n, mip);
 
   /* Precompute odd powers of b and put them in the temporary area at pp.  */
   for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
     {
-      mpn_mul_n (tp, this_pp, rp, n);
+      last_pp = this_pp;
       this_pp += n;
-#if WANT_REDC_2
-      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
-      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-	MPN_REDC_2 (this_pp, tp, mp, n, mip);
-#else
-      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
-#endif
-      else
-	mpn_redc_n (this_pp, tp, mp, n, mip);
+      mpn_mul_n (tp, last_pp, b2p, n);
+      MPN_REDC_X (this_pp, tp, mp, n, mip);
     }
 
   expbits = getbits (ep, ebi, windowsize);
-  if (ebi < windowsize)
+  ebi -= windowsize;
+  if (ebi < 0)
     ebi = 0;
-  else
-    ebi -= windowsize;
 
   count_trailing_zeros (cnt, expbits);
   ebi += cnt;
@@ -280,311 +252,51 @@ mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
 
   MPN_COPY (rp, pp + n * (expbits >> 1), n);
 
-#define INNERLOOP							\
-  while (ebi != 0)							\
-    {									\
-      while (getbit (ep, ebi) == 0)					\
-	{								\
-	  MPN_SQR (tp, rp, n);						\
-	  MPN_REDUCE (rp, tp, mp, n, mip);				\
-	  ebi--;							\
-	  if (ebi == 0)							\
-	    goto done;							\
-	}								\
-									\
-      /* The next bit of the exponent is 1.  Now extract the largest	\
-	 block of bits <= windowsize, and such that the least		\
-	 significant bit is 1.  */					\
-									\
-      expbits = getbits (ep, ebi, windowsize);				\
-      this_windowsize = windowsize;					\
-      if (ebi < windowsize)						\
-	{								\
-	  this_windowsize -= windowsize - ebi;				\
-	  ebi = 0;							\
-	}								\
-      else								\
-        ebi -= windowsize;						\
-									\
-      count_trailing_zeros (cnt, expbits);				\
-      this_windowsize -= cnt;						\
-      ebi += cnt;							\
-      expbits >>= cnt;							\
-									\
-      do								\
-	{								\
-	  MPN_SQR (tp, rp, n);						\
-	  MPN_REDUCE (rp, tp, mp, n, mip);				\
-	  this_windowsize--;						\
-	}								\
-      while (this_windowsize != 0);					\
-									\
-      MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);			\
-      MPN_REDUCE (rp, tp, mp, n, mip);					\
-    }
-
-
-#if WANT_REDC_2
-  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
-    {
-      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-	{
-	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	}
-      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
-	{
-	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
-	      INNERLOOP;
-	    }
-	}
-      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
-	  INNERLOOP;
-	}
-      else
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	  INNERLOOP;
-	}
-    }
-  else
+  while (ebi != 0)
     {
-      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+      while (getbit (ep, ebi) == 0)
 	{
-	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	}
-      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	  INNERLOOP;
-	}
-      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
-	  INNERLOOP;
-	}
-      else
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	  INNERLOOP;
+	  mpn_sqr_n (tp, rp, n);
+	  MPN_REDC_X (rp, tp, mp, n, mip);
+	  ebi--;
+	  if (ebi == 0)
+	    goto done;
 	}
-    }
 
-#else  /* WANT_REDC_2 */
+      /* The next bit of the exponent is 1.  Now extract the largest block of
+	 bits <= windowsize, and such that the least significant bit is 1.  */
 
-  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
-    {
-      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-	{
-	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	}
-      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+      expbits = getbits (ep, ebi, windowsize);
+      ebi -= windowsize;
+      this_windowsize = windowsize;
+      if (ebi < 0)
 	{
-	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	      INNERLOOP;
-	    }
+	  this_windowsize += ebi;
+	  ebi = 0;
 	}
-      else
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	  INNERLOOP;
-	}
-    }
-  else
-    {
-      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
-	{
-	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
-	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	  else
-	    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	      INNERLOOP;
-	    }
-	}
-      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
-	  INNERLOOP;
-	}
-      else
+
+      count_trailing_zeros (cnt, expbits);
+      this_windowsize -= cnt;
+      ebi += cnt;
+      expbits >>= cnt;
+
+      do
 	{
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
-	  INNERLOOP;
+	  mpn_sqr_n (tp, rp, n);
+	  MPN_REDC_X (rp, tp, mp, n, mip);
+	  this_windowsize--;
 	}
+      while (this_windowsize != 0);
+
+      mpn_mul_n (tp, rp, pp + n * (expbits >> 1), n);
+      MPN_REDC_X (rp, tp, mp, n, mip);
     }
-#endif  /* WANT_REDC_2 */
 
  done:
-
   MPN_COPY (tp, rp, n);
   MPN_ZERO (tp + n, n);
-
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
-  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
-    MPN_REDC_2 (rp, tp, mp, n, mip);
-#else
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
-    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
-#endif
-  else
-    mpn_redc_n (rp, tp, mp, n, mip);
-
+  MPN_REDC_X (rp, tp, mp, n, mip);
   if (mpn_cmp (rp, mp, n) >= 0)
     mpn_sub_n (rp, rp, mp, n);
-
   TMP_FREE;
 }
diff --git a/gmp/mpn/generic/powm_sec.c b/gmp/mpn/generic/powm_sec.c
new file mode 100644
index 0000000000..26d77b5c81
--- /dev/null
+++ b/gmp/mpn/generic/powm_sec.c
@@ -0,0 +1,272 @@
+/* mpn_powm_sec -- Compute R = U^E mod M.  Safe variant, not leaking time info.
+
+Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+
+/*
+  BASIC ALGORITHM, Compute b^e mod n, where n is odd.
+
+  1. w <- b
+
+  2. While w^2 < n (and there are more bits in e)
+       w <- power left-to-right base-2 without reduction
+
+  3. t <- (B^n * b) / n                Convert to REDC form
+
+  4. Compute power table of e-dependent size
+
+  5. While there are more bits in e
+       w <- power left-to-right base-k with reduction
+
+
+  TODO:
+
+   * Make getbits a macro, thereby allowing it to update the index operand.
+     That will simplify the code using getbits.  (Perhaps make getbits' sibling
+     getbit then have similar form, for symmetry.)
+
+   * Write an itch function.
+
+   * Choose window size without looping.  (Superoptimize or think(tm).)
+
+   * Make it sub-quadratic.
+
+   * Call new division functions, not mpn_tdiv_qr.
+
+   * Is redc obsolete with improved SB division?
+
+   * Consider special code for one-limb M.
+
+   * Handle even M (in mpz_powm_sec) with two modexps and CRT.
+*/
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#define WANT_CACHE_SECURITY 1
+
+
+#define getbit(p,bi) \
+  ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+
+static inline mp_limb_t
+getbits (const mp_limb_t *p, unsigned long bi, int nbits)
+{
+  int nbits_in_r;
+  mp_limb_t r;
+  mp_size_t i;
+
+  if (bi < nbits)
+    {
+      return p[0] & (((mp_limb_t) 1 << bi) - 1);
+    }
+  else
+    {
+      bi -= nbits;			/* bit index of low bit to extract */
+      i = bi / GMP_LIMB_BITS;		/* word index of low bit to extract */
+      bi %= GMP_LIMB_BITS;		/* bit index in low word */
+      r = p[i] >> bi;			/* extract (low) bits */
+      nbits_in_r = GMP_LIMB_BITS - bi;	/* number of bits now in r */
+      if (nbits_in_r < nbits)		/* did we get enough bits? */
+	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
+      return r & (((mp_limb_t ) 1 << nbits) - 1);
+    }
+}
+
+#undef HAVE_NATIVE_mpn_addmul_2
+
+#ifndef HAVE_NATIVE_mpn_addmul_2
+#define REDC_2_THRESHOLD		MP_SIZE_T_MAX
+#endif
+
+#ifndef REDC_2_THRESHOLD
+#define REDC_2_THRESHOLD		4
+#endif
+
+static void mpn_redc_n () {ASSERT_ALWAYS(0);}
+
+static inline int
+win_size (unsigned long eb)
+{
+  int k;
+  static unsigned long x[] = {1,4,27,100,325,1026,2905,7848,20457,51670,~0ul};
+  for (k = 0; eb > x[k]; k++)
+    ;
+  return k;
+}
+
+#define MPN_REDC_X(rp, tp, mp, n, mip)					\
+  do {									\
+    if (redc_x == 1)							\
+      mpn_redc_1 (rp, tp, mp, n, mip[0]);				\
+    else if (redc_x == 2)						\
+      mpn_redc_2 (rp, tp, mp, n, mip);					\
+    else								\
+      mpn_redc_n (rp, tp, mp, n, mip);					\
+  } while (0)
+
+  /* Convert U to REDC form, U_r = B^n * U mod M */
+static void
+redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
+{
+  mp_ptr tp, qp;
+  TMP_DECL;
+  TMP_MARK;
+
+  tp = TMP_ALLOC_LIMBS (un + n);
+  qp = TMP_ALLOC_LIMBS (un + 1);	/* FIXME: Put at tp+? */
+
+  MPN_ZERO (tp, n);
+  MPN_COPY (tp + n, up, un);
+  mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
+  TMP_FREE;
+}
+
+/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
+   Requires that mp[n-1..0] is odd.
+   Requires that ep[en-1..0] is > 1.
+   Uses scratch space tp[3n..0], i.e., 3n+1 words.  */
+void
+mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
+	      mp_srcptr ep, mp_size_t en,
+	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t mip[2];
+  int cnt;
+  long ebi;
+  int windowsize, this_windowsize;
+  mp_limb_t expbits;
+  mp_ptr pp, this_pp, last_pp;
+  long i;
+  int redc_x;
+  TMP_DECL;
+
+  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+  ASSERT (n >= 1 && ((mp[0] & 1) != 0));
+
+  TMP_MARK;
+
+  count_leading_zeros (cnt, ep[en - 1]);
+  ebi = en * GMP_LIMB_BITS - cnt;
+
+  windowsize = win_size (ebi);
+
+  if (BELOW_THRESHOLD (n, REDC_2_THRESHOLD))
+    {
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+      redc_x = 1;
+    }
+#if defined (HAVE_NATIVE_mpn_addmul_2)
+  else
+    {
+      mpn_binvert (mip, mp, 2, tp);
+      mip[0] = -mip[0]; mip[1] = ~mip[1];
+      redc_x = 2;
+    }
+#endif
+#if 0
+  mpn_binvert (mip, mp, n, tp);
+  redc_x = 0;
+#endif
+
+  pp = TMP_ALLOC_LIMBS (n << windowsize);
+
+  this_pp = pp;
+  this_pp[n] = 1;
+  redcify (this_pp, this_pp + n, 1, mp, n);
+  this_pp += n;
+  redcify (this_pp, bp, bn, mp, n);
+
+  /* Precompute powers of b and put them in the temporary area at pp.  */
+  for (i = (1 << windowsize) - 2; i > 0; i--)
+    {
+      last_pp = this_pp;
+      this_pp += n;
+      mpn_mul_n (tp, last_pp, pp + n, n);
+      MPN_REDC_X (this_pp, tp, mp, n, mip);
+    }
+
+  expbits = getbits (ep, ebi, windowsize);
+  ebi -= windowsize;
+  if (ebi < 0)
+    ebi = 0;
+
+  MPN_COPY (rp, pp + n * expbits, n);
+
+  while (ebi != 0)
+    {
+      expbits = getbits (ep, ebi, windowsize);
+      ebi -= windowsize;
+      this_windowsize = windowsize;
+      if (ebi < 0)
+	{
+	  this_windowsize += ebi;
+	  ebi = 0;
+	}
+
+      do
+	{
+	  mpn_sqr_n (tp, rp, n);
+	  MPN_REDC_X (rp, tp, mp, n, mip);
+	  this_windowsize--;
+	}
+      while (this_windowsize != 0);
+
+#if WANT_CACHE_SECURITY
+      mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);
+      mpn_mul_n (tp, rp, tp + 2*n, n);
+#else
+      mpn_mul_n (tp, rp, pp + n * expbits, n);
+#endif
+      MPN_REDC_X (rp, tp, mp, n, mip);
+    }
+
+  MPN_COPY (tp, rp, n);
+  MPN_ZERO (tp + n, n);
+  MPN_REDC_X (rp, tp, mp, n, mip);
+  if (mpn_cmp (rp, mp, n) >= 0)
+    mpn_sub_n (rp, rp, mp, n);
+  TMP_FREE;
+}
+
+#if ! HAVE_NATIVE_mpn_tabselect
+/* Select entry `which' from table `tab', which has nents entries, each `n'
+   limbs.  Store the selected entry at rp.  Reads entire table to avoid
+   sideband information leaks.  O(n*nents).  */
+
+void
+mpn_tabselect (volatile mp_limb_t *rp, volatile mp_limb_t *tab, mp_size_t n,
+	       mp_size_t nents, mp_size_t which)
+{
+  mp_size_t k, i;
+  mp_limb_t mask;
+  volatile mp_limb_t *tp;
+
+  for (k = 0; k < nents; k++)
+    {
+      mask = -(mp_limb_t) (which == k);
+      tp = tab + n * k;
+      for (i = 0; i < n; i++)
+	{
+	  rp[i] = (rp[i] & ~mask) | (tp[i] & mask);
+	}
+    }
+}
+#endif
diff --git a/gmp/mpn/generic/pre_divrem_1.c b/gmp/mpn/generic/pre_divrem_1.c
index 8027f0216e..6badf63192 100644
--- a/gmp/mpn/generic/pre_divrem_1.c
+++ b/gmp/mpn/generic/pre_divrem_1.c
@@ -4,33 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 2000-2003 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -43,8 +32,8 @@ see https://www.gnu.org/licenses/.  */
 /* Same test here for skipping one divide step as in mpn_divrem_1.
 
    The main reason for a separate shift==0 case is that not all CPUs give
-   zero for "n0 >> GMP_LIMB_BITS" which would arise in the general case
-   code used on shift==0.  shift==0 is also reasonably common in mp_bases
+   zero for "n0 >> BITS_PER_MP_LIMB" which would arise in the general case
+   code used on shift==0.  shift==0 is also reasonably common in __mp_bases
    big_base, for instance base==10 on a 64-bit limb.
 
    Under shift!=0 it would be possible to call mpn_lshift to adjust the
@@ -117,14 +106,14 @@ mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t xsize,
 	}
 
       n1 = ap[size-1];
-      r |= n1 >> (GMP_LIMB_BITS - shift);
+      r |= n1 >> (BITS_PER_MP_LIMB - shift);
 
       for (i = size-2; i >= 0; i--)
 	{
 	  ASSERT (r < d);
 	  n0 = ap[i];
 	  udiv_qrnnd_preinv (*qp, r, r,
-			     ((n1 << shift) | (n0 >> (GMP_LIMB_BITS - shift))),
+			     ((n1 << shift) | (n0 >> (BITS_PER_MP_LIMB - shift))),
 			     d, dinv);
 	  qp--;
 	  n1 = n0;
diff --git a/gmp/mpn/generic/pre_mod_1.c b/gmp/mpn/generic/pre_mod_1.c
index cb38f4a48f..961733ba34 100644
--- a/gmp/mpn/generic/pre_mod_1.c
+++ b/gmp/mpn/generic/pre_mod_1.c
@@ -2,34 +2,23 @@
    DINV should be 2^(2*GMP_LIMB_BITS) / D - 2^GMP_LIMB_BITS.
    Return the single-limb remainder.
 
-Copyright 1991, 1993, 1994, 2000-2002, 2004, 2005 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2004, 2005 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -45,6 +34,7 @@ mpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)
 {
   mp_size_t i;
   mp_limb_t n0, r;
+  mp_limb_t dummy;
 
   ASSERT (un >= 1);
   ASSERT (d & GMP_LIMB_HIGHBIT);
@@ -56,7 +46,7 @@ mpn_preinv_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d, mp_limb_t dinv)
   for (i = un - 2; i >= 0; i--)
     {
       n0 = up[i];
-      udiv_rnnd_preinv (r, r, n0, d, dinv);
+      udiv_qrnnd_preinv (dummy, r, r, n0, d, dinv);
     }
   return r;
 }
diff --git a/gmp/mpn/generic/random.c b/gmp/mpn/generic/random.c
index 5489becf4d..c0b85ea075 100644
--- a/gmp/mpn/generic/random.c
+++ b/gmp/mpn/generic/random.c
@@ -5,28 +5,17 @@ Copyright 2001, 2002 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/random2.c b/gmp/mpn/generic/random2.c
index 980b15367f..e29238c514 100644
--- a/gmp/mpn/generic/random2.c
+++ b/gmp/mpn/generic/random2.c
@@ -1,38 +1,28 @@
 /* mpn_random2 -- Generate random numbers with relatively long strings
    of ones and zeroes.  Suitable for border testing.
 
-Copyright 1992-1994, 1996, 2000-2002, 2004, 2012 Free Software Foundation, Inc.
+Copyright 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software
+Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
-static void gmp_rrandomb (mp_ptr, gmp_randstate_t, mp_bitcnt_t);
+static void gmp_rrandomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, unsigned long int));
 
 /* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.
    Thus, we get the same random number sequence in the common cases.
@@ -64,15 +54,15 @@ mpn_random2 (mp_ptr rp, mp_size_t n)
 }
 
 static void
-gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, mp_bitcnt_t nbits)
+gmp_rrandomb (mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits)
 {
-  mp_bitcnt_t bi;
+  unsigned long int bi;
   mp_limb_t ranm;		/* buffer for random bits */
   unsigned cap_chunksize, chunksize;
   mp_size_t i;
 
   /* Set entire result to 111..1  */
-  i = BITS_TO_LIMBS (nbits) - 1;
+  i = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS - 1;
   rp[i] = GMP_NUMB_MAX >> (GMP_NUMB_BITS - (nbits % GMP_NUMB_BITS)) % GMP_NUMB_BITS;
   for (i = i - 1; i >= 0; i--)
     rp[i] = GMP_NUMB_MAX;
diff --git a/gmp/mpn/generic/redc_1.c b/gmp/mpn/generic/redc_1.c
index 0d33421f63..47bee8220b 100644
--- a/gmp/mpn/generic/redc_1.c
+++ b/gmp/mpn/generic/redc_1.c
@@ -1,57 +1,43 @@
-/* mpn_redc_1.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
+/* mpn_redc_1.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
    mp[] is n limbs; up[] is 2n limbs.
 
    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
 
-Copyright (C) 2000-2002, 2004, 2008, 2009, 2012 Free Software Foundation, Inc.
+Copyright (C) 2000, 2001, 2002, 2004, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
-mp_limb_t
+void
 mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
 {
   mp_size_t j;
   mp_limb_t cy;
 
-  ASSERT (n > 0);
   ASSERT_MPN (up, 2*n);
 
   for (j = n - 1; j >= 0; j--)
     {
-      cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
-      ASSERT (up[0] == 0);
-      up[0] = cy;
+      up[0] = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
       up++;
     }
-
   cy = mpn_add_n (rp, up, up - n, n);
-  return cy;
+  if (cy != 0)
+    mpn_sub_n (rp, rp, mp, n);
 }
diff --git a/gmp/mpn/generic/redc_2.c b/gmp/mpn/generic/redc_2.c
index 07d90fa20d..0efbd9d4c7 100644
--- a/gmp/mpn/generic/redc_2.c
+++ b/gmp/mpn/generic/redc_2.c
@@ -1,36 +1,25 @@
-/* mpn_redc_2.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
+/* mpn_redc_2.  Set cp[] <- up[]/R^n mod mp[].  Clobber up[].
    mp[] is n limbs; up[] is 2n limbs.
 
    THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
 
-Copyright (C) 2000-2002, 2004, 2008, 2012 Free Software Foundation, Inc.
+Copyright (C) 2000, 2001, 2002, 2004, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -44,8 +33,7 @@ you lose
 /* For testing purposes, define our own mpn_addmul_2 if there is none already
    available.  */
 #ifndef HAVE_NATIVE_mpn_addmul_2
-#undef mpn_addmul_2
-static mp_limb_t
+mp_limb_t
 mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
 {
   rp[n] = mpn_addmul_1 (rp, up, n, vp[0]);
@@ -53,7 +41,7 @@ mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
 }
 #endif
 
-#if defined (__GNUC__) && defined (__ia64) && W_TYPE_SIZE == 64
+#if defined (__ia64) && W_TYPE_SIZE == 64
 #define umul2low(ph, pl, uh, ul, vh, vl) \
   do {									\
     mp_limb_t _ph, _pl;							\
@@ -78,7 +66,7 @@ mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
   } while (0)
 #endif
 
-mp_limb_t
+void
 mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
 {
   mp_limb_t q[2];
@@ -86,7 +74,6 @@ mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
   mp_limb_t upn;
   mp_limb_t cy;
 
-  ASSERT (n > 0);
   ASSERT_MPN (up, 2*n);
 
   if ((n & 1) != 0)
@@ -104,7 +91,7 @@ mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
       up[n] = upn;
       up += 2;
     }
-
   cy = mpn_add_n (rp, up, up - n, n);
-  return cy;
+  if (cy != 0)
+    mpn_sub_n (rp, rp, mp, n);
 }
diff --git a/gmp/mpn/generic/redc_n.c b/gmp/mpn/generic/redc_n.c
deleted file mode 100644
index c3d0cfe7fa..0000000000
--- a/gmp/mpn/generic/redc_n.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/* mpn_redc_n.  Set rp[] <- up[]/R^n mod mp[].  Clobber up[].
-   mp[] is n limbs; up[] is 2n limbs, the inverse ip[] is n limbs.
-
-   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-
-Copyright 2009, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  TODO
-
-  * We assume mpn_mulmod_bnm1 is always faster than plain mpn_mul_n (or a
-    future mpn_mulhi) for the range we will be called.  Follow up that
-    assumption.
-
-  * Decrease scratch usage.
-
-  * Consider removing the residue canonicalisation.
-*/
-
-void
-mpn_redc_n (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr ip)
-{
-  mp_ptr xp, yp, scratch;
-  mp_limb_t cy;
-  mp_size_t rn;
-  TMP_DECL;
-  TMP_MARK;
-
-  ASSERT (n > 8);
-
-  rn = mpn_mulmod_bnm1_next_size (n);
-
-  scratch = TMP_ALLOC_LIMBS (n + rn + mpn_mulmod_bnm1_itch (rn, n, n));
-
-  xp = scratch;
-  mpn_mullo_n (xp, up, ip, n);
-
-  yp = scratch + n;
-  mpn_mulmod_bnm1 (yp, rn, xp, n, mp, n, scratch + n + rn);
-
-  ASSERT_ALWAYS (2 * n > rn);				/* could handle this */
-
-  cy = mpn_sub_n (yp + rn, yp, up, 2*n - rn);		/* undo wrap around */
-  MPN_DECR_U (yp + 2*n - rn, rn, cy);
-
-  cy = mpn_sub_n (rp, up + n, yp + n, n);
-  if (cy != 0)
-    mpn_add_n (rp, rp, mp, n);
-
-  TMP_FREE;
-}
diff --git a/gmp/mpn/generic/remove.c b/gmp/mpn/generic/remove.c
deleted file mode 100644
index ef1a06ea14..0000000000
--- a/gmp/mpn/generic/remove.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/* mpn_remove -- divide out all multiples of odd mpn number from another mpn
-   number.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2009, 2012, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if GMP_LIMB_BITS > 50
-#define LOG 50
-#else
-#define LOG GMP_LIMB_BITS
-#endif
-
-
-/* Input: U = {up,un}, V = {vp,vn} must be odd, cap
-   Ouput  W = {wp,*wn} allocation need is exactly *wn
-
-   Set W = U / V^k, where k is the largest integer <= cap such that the
-   division yields an integer.
-
-   FIXME: We currently allow any operand overlap.  This is quite non mpn-ish
-   and might be changed, since it cost significant temporary space.
-   * If we require W to have space for un + 1 limbs, we could save qp or qp2
-     (but we will still need to copy things into wp 50% of the time).
-   * If we allow ourselves to clobber U, we could save the other of qp and qp2,
-     and the initial COPY (but also here we would need un + 1 limbs).
-*/
-
-/* FIXME: We need to wrap mpn_bdiv_qr due to the itch interface.  This need
-   indicates a flaw in the current itch mechanism: Which operands not greater
-   than un,un will incur the worst itch?  We need a parallel foo_maxitch set
-   of functions.  */
-static void
-mpn_bdiv_qr_wrap (mp_ptr qp, mp_ptr rp,
-		  mp_srcptr np, mp_size_t nn,
-		  mp_srcptr dp, mp_size_t dn)
-{
-  mp_ptr scratch_out;
-  TMP_DECL;
-
-  TMP_MARK;
-  scratch_out = TMP_ALLOC_LIMBS (mpn_bdiv_qr_itch (nn, dn));
-  mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch_out);
-
-  TMP_FREE;
-}
-
-mp_bitcnt_t
-mpn_remove (mp_ptr wp, mp_size_t *wn,
-	    mp_ptr up, mp_size_t un, mp_ptr vp, mp_size_t vn,
-	    mp_bitcnt_t cap)
-{
-  mp_ptr    pwpsp[LOG];
-  mp_size_t pwpsn[LOG];
-  mp_size_t npowers;
-  mp_ptr tp, qp, np, pp, qp2;
-  mp_size_t pn, nn, qn, i;
-  mp_bitcnt_t pwr;
-  TMP_DECL;
-
-  ASSERT (un > 0);
-  ASSERT (vn > 0);
-  ASSERT (vp[0] % 2 != 0);	/* 2-adic division wants odd numbers */
-  ASSERT (vn > 1 || vp[0] > 1);	/* else we would loop indefinitely */
-
-  TMP_MARK;
-
-  tp = TMP_ALLOC_LIMBS ((un + 1 + vn) / 2); /* remainder */
-  qp = TMP_ALLOC_LIMBS (un + 1);	/* quotient, alternating */
-  qp2 = TMP_ALLOC_LIMBS (un + 1);	/* quotient, alternating */
-  pp = vp;
-  pn = vn;
-
-  MPN_COPY (qp, up, un);
-  qn = un;
-
-  npowers = 0;
-  while (qn >= pn)
-    {
-      qp[qn] = 0;
-      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pp, pn);
-      if (!mpn_zero_p (tp, pn))
-	break;			/* could not divide by V^npowers */
-
-      MP_PTR_SWAP (qp, qp2);
-      qn = qn - pn;
-      qn += qp[qn] != 0;
-
-      pwpsp[npowers] = pp;
-      pwpsn[npowers] = pn;
-      npowers++;
-
-      if (((mp_bitcnt_t) 2 << npowers) - 1 > cap)
-	break;
-
-      nn = 2 * pn - 1;		/* next power will be at least this large */
-      if (nn > qn)
-	break;			/* next power would be overlarge */
-
-      if (npowers == 1)		/* Alloc once, but only if it's needed */
-	np = TMP_ALLOC_LIMBS (qn + LOG);	/* powers of V */
-      else
-	np += pn;
-
-      mpn_sqr (np, pp, pn);
-      pn = nn + (np[nn] != 0);
-      pp = np;
-    }
-
-  pwr = ((mp_bitcnt_t) 1 << npowers) - 1;
-
-  for (i = npowers - 1; i >= 0; i--)
-    {
-      pn = pwpsn[i];
-      if (qn < pn)
-	continue;
-
-      if (pwr + ((mp_bitcnt_t) 1 << i) > cap)
-	continue;		/* V^i would bring us past cap */
-
-      qp[qn] = 0;
-      mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pwpsp[i], pn);
-      if (!mpn_zero_p (tp, pn))
-	continue;		/* could not divide by V^i */
-
-      MP_PTR_SWAP (qp, qp2);
-      qn = qn - pn;
-      qn += qp[qn] != 0;
-
-      pwr += (mp_bitcnt_t) 1 << i;
-    }
-
-  MPN_COPY (wp, qp, qn);
-  *wn = qn;
-
-  TMP_FREE;
-
-  return pwr;
-}
diff --git a/gmp/mpn/generic/rootrem.c b/gmp/mpn/generic/rootrem.c
index 2edc74baa3..657e543ab3 100644
--- a/gmp/mpn/generic/rootrem.c
+++ b/gmp/mpn/generic/rootrem.c
@@ -8,37 +8,29 @@
    ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT'S ALMOST
    GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2002, 2005, 2009-2012 Free Software Foundation, Inc.
+Copyright 2002, 2005, 2009 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 /* FIXME:
-     This implementation is not optimal when remp == NULL, since the complexity
-     is M(n), whereas it should be M(n/k) on average.
+   (a) Once there is a native mpn_tdiv_q function in GMP (division without
+       remainder), replace the quick-and-dirty implementation below by it.
+   (b) The implementation below is not optimal when remp == NULL, since the
+       complexity is M(n) where n is the input size, whereas it should be
+       only M(n/k) on average.
 */
 
 #include <stdio.h>		/* for NULL */
@@ -49,6 +41,8 @@ see https://www.gnu.org/licenses/.  */
 
 static mp_size_t mpn_rootrem_internal (mp_ptr, mp_ptr, mp_srcptr, mp_size_t,
 				       mp_limb_t, int);
+static void mpn_tdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t,
+			mp_srcptr, mp_size_t);
 
 #define MPN_RSHIFT(cy,rp,up,un,cnt) \
   do {									\
@@ -90,15 +84,14 @@ mp_size_t
 mpn_rootrem (mp_ptr rootp, mp_ptr remp,
 	     mp_srcptr up, mp_size_t un, mp_limb_t k)
 {
-  mp_size_t m;
   ASSERT (un > 0);
   ASSERT (up[un - 1] != 0);
   ASSERT (k > 1);
 
-  m = (un - 1) / k;		/* ceil(un/k) - 1 */
-  if (remp == NULL && m > 2)
-    /* Pad {up,un} with k zero limbs.  This will produce an approximate root
-       with one more limb, allowing us to compute the exact integral result. */
+  if ((remp == NULL) && (un / k > 2))
+    /* call mpn_rootrem recursively, padding {up,un} with k zero limbs,
+       which will produce an approximate root with one more limb,
+       so that in most cases we can conclude. */
     {
       mp_ptr sp, wp;
       mp_size_t rn, sn, wn;
@@ -106,21 +99,21 @@ mpn_rootrem (mp_ptr rootp, mp_ptr remp,
       TMP_MARK;
       wn = un + k;
       wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */
-      sn = m + 2; /* ceil(un/k) + 1 */
+      sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
       sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */
       MPN_COPY (wp + k, up, un);
       MPN_ZERO (wp, k);
       rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
-      /* The approximate root S = {sp,sn} is either the correct root of
-	 {sp,sn}, or 1 too large.  Thus unless the least significant limb of
-	 S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
-	 limb.  (In case sp[0]=1, we can deduce the root, but not decide
+      /* the approximate root S = {sp,sn} is either the correct root of
+	 {sp,sn}, or one too large. Thus unless the least significant limb
+	 of S is 0 or 1, we can deduce the root of {up,un} is S truncated by
+	 one limb. (In case sp[0]=1, we can deduce the root, but not decide
 	 whether it is exact or not.) */
       MPN_COPY (rootp, sp + 1, sn - 1);
       TMP_FREE;
       return rn;
     }
-  else
+  else /* remp <> NULL */
     {
       return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
     }
@@ -131,11 +124,12 @@ static mp_size_t
 mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
 		      mp_limb_t k, int approx)
 {
-  mp_ptr qp, rp, sp, wp, scratch;
+  mp_ptr qp, rp, sp, wp;
   mp_size_t qn, rn, sn, wn, nl, bn;
   mp_limb_t save, save2, cy;
   unsigned long int unb; /* number of significant bits of {up,un} */
   unsigned long int xnb; /* number of significant bits of the result */
+  unsigned int cnt;
   unsigned long b, kk;
   unsigned long sizes[GMP_NUMB_BITS + 1];
   int ni, i;
@@ -145,19 +139,25 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
 
   TMP_MARK;
 
+  /* qp and wp need enough space to store S'^k where S' is an approximate
+     root. Since S' can be as large as S+2, the worst case is when S=2 and
+     S'=4. But then since we know the number of bits of S in advance, S'
+     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
+     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
+     fits in un limbs, the number of extra limbs needed is bounded by
+     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
+#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
+  qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
+					of R/(k*S^(k-1)), and S^k */
   if (remp == NULL)
-    {
-      rp = TMP_ALLOC_LIMBS (un + 1);     /* will contain the remainder */
-      scratch = rp;			 /* used by mpn_div_q */
-    }
+    rp = TMP_ALLOC_LIMBS (un);     /* will contain the remainder */
   else
-    {
-      scratch = TMP_ALLOC_LIMBS (un + 1); /* used by mpn_div_q */
-      rp = remp;
-    }
+    rp = remp;
   sp = rootp;
-
-  MPN_SIZEINBASE_2EXP(unb, up, un, 1);
+  wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
+					and temporary for mpn_pow_1 */
+  count_leading_zeros (cnt, up[un - 1]);
+  unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;
   /* unb is the number of bits of the input U */
 
   xnb = (unb - 1) / k + 1;	/* ceil (unb / k) */
@@ -216,19 +216,6 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
      Newton iteration will first compute sizes[ni-1] extra bits,
      then sizes[ni-2], ..., then sizes[0] = b. */
 
-  /* qp and wp need enough space to store S'^k where S' is an approximate
-     root. Since S' can be as large as S+2, the worst case is when S=2 and
-     S'=4. But then since we know the number of bits of S in advance, S'
-     can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
-     So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
-     fits in un limbs, the number of extra limbs needed is bounded by
-     ceil(k*log2(3/2)/GMP_NUMB_BITS). */
-#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
-  qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
-					of R/(k*S^(k-1)), and S^k */
-  wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
-					and temporary for mpn_pow_1 */
-
   wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
   wn = 1;
   for (i = ni; i != 0; i--)
@@ -304,8 +291,13 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
 	}
       else
 	{
+	  mp_ptr tp;
 	  qn = rn - wn; /* expected quotient size */
-	  mpn_div_q (qp, rp, rn, wp, wn, scratch);
+	  /* tp must have space for wn limbs.
+	     The quotient needs rn-wn+1 limbs, thus quotient+remainder
+	     need altogether rn+1 limbs. */
+	  tp = qp + qn + 1;	/* put remainder in Q buffer */
+	  mpn_tdiv_q (qp, tp, 0, rp, rn, wp, wn);
 	  qn += qp[qn] != 0;
 	}
 
@@ -400,7 +392,7 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
       ASSERT_ALWAYS (rn >= qn);
 
       /* R = R - Q = floor(U/2^kk) - S^k */
-      if (i > 1 || approx == 0)
+      if ((i > 1) || (approx == 0))
 	{
 	  mpn_sub (rp, rp, rn, qp, qn);
 	  MPN_NORMALIZE (rp, rn);
@@ -413,3 +405,47 @@ mpn_rootrem_internal (mp_ptr rootp, mp_ptr remp, mp_srcptr up, mp_size_t un,
   TMP_FREE;
   return rn;
 }
+
+/* return the quotient Q = {np, nn} divided by {dp, dn} only */
+static void
+mpn_tdiv_q (mp_ptr qp, mp_ptr rp, mp_size_t qxn, mp_srcptr np, mp_size_t nn,
+	    mp_srcptr dp, mp_size_t dn)
+{
+  mp_size_t qn = nn - dn; /* expected quotient size is qn+1 */
+  mp_size_t cut;
+
+  ASSERT_ALWAYS (qxn == 0);
+  if (dn <= qn + 3)
+    {
+      mpn_tdiv_qr (qp, rp, 0, np, nn, dp, dn);
+    }
+  else
+    {
+      mp_ptr tp;
+      TMP_DECL;
+      TMP_MARK;
+      tp = TMP_ALLOC_LIMBS (qn + 2);
+      cut = dn - (qn + 3);
+      /* perform a first division with divisor cut to dn-cut=qn+3 limbs
+	 and dividend to nn-(cut-1) limbs, i.e. the quotient will be one
+	 limb more than the final quotient.
+	 The quotient will have qn+2 < dn-cut limbs,
+	 and the remainder dn-cut = qn+3 limbs. */
+      mpn_tdiv_qr (tp, rp, 0, np + cut - 1, nn - cut + 1, dp + cut, dn - cut);
+      /* let Q' be the quotient of B * {np, nn} by {dp, dn} [qn+2 limbs]
+	 and T  be the approximation of Q' computed above, where
+	 B = 2^GMP_NUMB_BITS.
+	 We have Q' <= T <= Q'+1, and since floor(Q'/B) = Q, we have
+	 Q = floor(T/B), unless the last limb of T only consists of zeroes. */
+      if (tp[0] != 0)
+	{
+	  /* simply truncate one limb of T */
+	  MPN_COPY (qp, tp + 1, qn + 1);
+	}
+      else /* too bad: perform the expensive division */
+	{
+	  mpn_tdiv_qr (qp, rp, 0, np, nn, dp, dn);
+	}
+      TMP_FREE;
+    }
+}
diff --git a/gmp/mpn/generic/rshift.c b/gmp/mpn/generic/rshift.c
index ec61f2f7e2..62256656de 100644
--- a/gmp/mpn/generic/rshift.c
+++ b/gmp/mpn/generic/rshift.c
@@ -1,32 +1,22 @@
 /* mpn_rshift -- Shift right low level.
 
-Copyright 1991, 1993, 1994, 1996, 2000-2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/sb_bdiv_q.c b/gmp/mpn/generic/sb_bdiv_q.c
new file mode 100644
index 0000000000..474c804d48
--- /dev/null
+++ b/gmp/mpn/generic/sb_bdiv_q.c
@@ -0,0 +1,91 @@
+/* mpn_sb_bdiv_q -- schoolbook Hensel division with precomputed inverse,
+   returning quotient only.
+
+   Contributed to the GNU project by Niels M�ller.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2005, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Computes Q = N / D mod B^nn, destroys N.
+   Clobbers N.
+
+   D must be odd. dinv is (-D)^-1 mod B.
+
+
+   The straightforward way to compute Q is to cancel one limb at a time, using
+
+     qp[i] = D^{-1} * np[i] (mod B)
+     N -= B^i * qp[i] * D
+
+   But we prefer addition to subtraction, since mpn_addmul_1 is often faster
+   than mpn_submul_1.  Q = - N / D can be computed by iterating
+
+     qp[i] = (-D)^{-1} * np[i] (mod B)
+     N += B^i * qp[i] * D
+
+   And then we flip the sign, -Q = (not Q) + 1.
+*/
+
+void
+mpn_sb_bdiv_q (mp_ptr qp,
+	       mp_ptr np, mp_size_t nn,
+	       mp_srcptr dp, mp_size_t dn,
+	       mp_limb_t dinv)
+{
+  mp_size_t i;
+  mp_limb_t qh;
+
+  ASSERT (nn > 0);
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT (dp[0] & 1);
+
+  for (i = 0; i < nn - dn; i++)
+    {
+      mp_limb_t cy;
+      mp_limb_t q;
+
+      q = dinv * np[i];
+      qp[i] = ~q;
+      cy = mpn_addmul_1 (np + i, dp, dn, q);
+      mpn_add_1 (np + i + dn, np + i + dn, nn - i - dn, cy);
+      ASSERT (np[i] == 0);
+    }
+
+  for (; i < nn - 1; i++)
+    {
+      mp_limb_t q;
+
+      q = dinv * np[i];
+      qp[i] = ~q;
+      mpn_addmul_1 (np + i, dp, nn - i, q);
+
+      ASSERT (np[i] == 0);
+    }
+
+  /* Final limb */
+  qp[nn - 1] = ~(dinv * np[nn - 1]);
+  qh = mpn_add_1 (qp, qp, nn, 1); /* FIXME: can we get carry? */
+}
diff --git a/gmp/mpn/generic/sbpi1_bdiv_qr.c b/gmp/mpn/generic/sb_bdiv_qr.c
index 0e56f58148..d1cd0dee32 100644
--- a/gmp/mpn/generic/sbpi1_bdiv_qr.c
+++ b/gmp/mpn/generic/sb_bdiv_qr.c
@@ -1,39 +1,27 @@
-/* mpn_sbpi1_bdiv_qr -- schoolbook Hensel division with precomputed inverse,
+/* mpn_sb_bdiv_qr -- schoolbook Hensel division with precomputed inverse,
    returning quotient and remainder.
 
-   Contributed to the GNU project by Niels Möller.
+   Contributed to the GNU project by Niels M�ller.
 
    THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
-   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
-   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
 
-Copyright 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
+Copyright 2006 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -52,20 +40,19 @@ see https://www.gnu.org/licenses/.  */
    D must be odd. dinv is (-D)^-1 mod B. */
 
 mp_limb_t
-mpn_sbpi1_bdiv_qr (mp_ptr qp,
-		   mp_ptr np, mp_size_t nn,
-		   mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
+mpn_sb_bdiv_qr (mp_ptr qp,
+		mp_ptr np, mp_size_t nn,
+		mp_srcptr dp, mp_size_t dn, mp_limb_t dinv)
 {
   mp_size_t qn;
   mp_size_t i;
   mp_limb_t rh;
   mp_limb_t ql;
 
+  ASSERT (nn > 0);
   ASSERT (dn > 0);
   ASSERT (nn > dn);
-  ASSERT ((dp[0] & 1) != 0);
-  /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
-     but some over N/Q overlaps will not work.  */
+  ASSERT (dp[0] & 1);
 
   qn = nn - dn;
 
@@ -80,8 +67,9 @@ mpn_sbpi1_bdiv_qr (mp_ptr qp,
 	  mp_limb_t q;
 
 	  q = dinv * np[i];
-	  np[i] = mpn_addmul_1 (np + i, dp, dn, q);
 	  qp[i] = ~q;
+
+	  np[i] = mpn_addmul_1 (np + i, dp, dn, q);
 	}
       rh += mpn_add (np + dn, np + dn, qn, np, dn);
       ql = mpn_add_1 (qp, qp, dn, ql);
@@ -95,8 +83,9 @@ mpn_sbpi1_bdiv_qr (mp_ptr qp,
       mp_limb_t q;
 
       q = dinv * np[i];
-      np[i] = mpn_addmul_1 (np + i, dp, dn, q);
       qp[i] = ~q;
+
+      np[i] = mpn_addmul_1 (np + i, dp, dn, q);
     }
 
   rh += mpn_add_n (np + dn, np + dn, np, qn);
diff --git a/gmp/mpn/generic/sb_div_q.c b/gmp/mpn/generic/sb_div_q.c
new file mode 100644
index 0000000000..609c4ae7f2
--- /dev/null
+++ b/gmp/mpn/generic/sb_div_q.c
@@ -0,0 +1,240 @@
+/* mpn_sb_div_q -- schoolbook division with 2-limb sloppy non-greater
+   precomputed inverse, returning an accurate quotient.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/*
+  CAVEATS:
+  1. Should it demand normalized operands like now, or normalize on-the-fly?
+  2. Overwrites {np,nn}.
+  3. Uses mpn_submul_1.  It would be nice to somehow make it use mpn_addmul_1
+     instead.  (That would open for mpn_addmul_2 straightforwardly.)
+*/
+
+mp_limb_t
+mpn_sb_div_q (mp_ptr qp,
+	      mp_ptr np, mp_size_t nn,
+	      mp_srcptr dp, mp_size_t dn,
+	      mp_srcptr dip)
+{
+  mp_limb_t q, q10, q01a, q00a, q01b, q00b;
+  mp_limb_t cy;
+  mp_size_t i;
+  mp_limb_t qh;
+  mp_limb_t di1, di0;
+  mp_size_t qn;
+
+  mp_size_t dn_orig = dn;
+  mp_srcptr dp_orig = dp;
+  mp_ptr np_orig = np;
+
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, dn);
+
+  np += nn;
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += qn;
+  di1 = dip[1]; di0 = dip[0];
+  for (i = qn; i >= dn; i--)
+    {
+      np--;
+      umul_ppmm (q, q10, np[0], di1);
+      umul_ppmm (q01a, q00a, np[-1], di1);
+      add_ssaaaa (q, q10, q, q10, np[0], q01a);
+      umul_ppmm (q01b, q00b, np[0], di0);
+      add_ssaaaa (q, q10, q, q10, 0, q01b);
+      add_ssaaaa (q, q10, q, q10, 0, np[-1]);
+
+      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+      if (UNLIKELY (np[0] > cy || mpn_cmp (np - dn, dp, dn) >= 0))
+	{
+	  q = q + 1;
+	  mpn_sub_n (np - dn, np - dn, dp, dn);
+	}
+
+      *--qp = q;
+    }
+
+  for (i = dn - 1; i > 0; i--)
+    {
+      np--;
+      umul_ppmm (q, q10, np[0], di1);
+      umul_ppmm (q01a, q00a, np[-1], di1);
+      add_ssaaaa (q, q10, q, q10, np[0], q01a);
+      umul_ppmm (q01b, q00b, np[0], di0);
+      add_ssaaaa (q, q10, q, q10, 0, q01b);
+      add_ssaaaa (q, q10, q, q10, 0, np[-1]);
+
+      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+      if (UNLIKELY (np[0] > cy || mpn_cmp (np - dn, dp, dn) >= 0))
+	{
+	  q = q + 1;
+	  if (q == 0)
+	    q = GMP_NUMB_MAX;
+	  else
+	    mpn_sub_n (np - dn, np - dn, dp, dn);
+	}
+
+      *--qp = q;
+
+      /* Truncate operands.  */
+      dn--;
+      dp++;
+
+      /* The partial remainder might be equal to the truncated divisor,
+	 thus non-canonical.  When that happens, the rest of the quotient
+	 should be all ones.  */
+      if (UNLIKELY (mpn_cmp (np - dn, dp, dn) == 0))
+	{
+	  while (--i)
+	    *--qp = GMP_NUMB_MAX;
+	  break;
+	}
+    }
+
+  dn = dn_orig;
+  if (UNLIKELY (np[-1] < dn))
+    {
+      mp_limb_t q, x;
+
+      /* The quotient may be too large if the remainder is small.  Recompute
+	 for above ignored operand parts, until the remainder spills.
+
+	 FIXME: The quality of this code isn't the same as the code above.
+	 1. We don't compute things in an optimal order, high-to-low, in order
+	    to terminate as quickly as possible.
+	 2. We mess with pointers and sizes, adding and subtracting and
+	    adjusting to get things right.  It surely could be streamlined.
+	 3. The only termination criteria are that we determine that the
+	    quotient needs to be adjusted, or that we have recomputed
+	    everything.  We should stop when the remainder is so large
+	    that no additional subtracting could make it spill.
+	 4. If nothing else, we should not do two loops of submul_1 over the
+	    data, instead handle both the triangularization and chopping at
+	    once.  */
+
+      x = np[-1];
+
+      if (dn > 2)
+	{
+	  /* Compensate for triangularization.  */
+	  mp_limb_t y;
+
+	  dp = dp_orig;
+	  if (qn + 1 < dn)
+	    {
+	      dp += dn - (qn + 1);
+	      dn = qn + 1;
+	    }
+
+	  y = np[-2];
+
+	  for (i = dn - 3; i >= 0; i--)
+	    {
+	      q = qp[i];
+	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
+
+	      if (y < cy)
+		{
+		  if (x == 0)
+		    {
+		      cy = mpn_sub_1 (qp, qp, qn, 1);
+		      ASSERT_ALWAYS (cy == 0);
+		      return qh - cy;
+		    }
+		  x--;
+		}
+	      y -= cy;
+	    }
+	  np[-2] = y;
+	}
+
+      dn = dn_orig;
+      if (qn + 1 < dn)
+	{
+	  /* Compensate for ignored dividend and divisor tails.  */
+
+	  if (qn == 0)
+	    return qh;
+
+	  dp = dp_orig;
+	  np = np_orig;
+
+	  if (qh != 0)
+	    {
+	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
+	      if (cy != 0)
+		{
+		  if (x == 0)
+		    {
+		      cy = mpn_sub_1 (qp, qp, qn, 1);
+		      return qh - cy;
+		    }
+		  x--;
+		}
+	    }
+
+	  for (i = dn - qn - 2; i >= 0; i--)
+	    {
+	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
+	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
+	      if (cy != 0)
+		{
+		  if (x == 0)
+		    {
+		      cy = mpn_sub_1 (qp, qp, qn, 1);
+		      ASSERT_ALWAYS (cy == 0);
+		      return qh - cy;
+		    }
+		  x--;
+		}
+	    }
+	}
+    }
+
+  return qh;
+}
diff --git a/gmp/mpn/generic/sb_div_qr.c b/gmp/mpn/generic/sb_div_qr.c
new file mode 100644
index 0000000000..40e4442e21
--- /dev/null
+++ b/gmp/mpn/generic/sb_div_qr.c
@@ -0,0 +1,91 @@
+/* mpn_sb_div_qr -- schoolbook division with 2-limb sloppy non-greater
+   precomputed inverse, returning quotient and remainder.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/*
+  CAVEATS:
+  1. Should it demand normalized operands like now, or normalize on-the-fly?
+  2. Overwrites {np,nn} instead of writing remainder to a designated area.
+  3. Uses mpn_submul_1.  It would be nice to somehow make it use mpn_addmul_1
+     instead.  (That would open for mpn_addmul_2 straightforwardly.)
+*/
+
+mp_limb_t
+mpn_sb_div_qr (mp_ptr qp,
+	       mp_ptr np, mp_size_t nn,
+	       mp_srcptr dp, mp_size_t dn,
+	       mp_srcptr dip)
+{
+  mp_limb_t q, q10, q01a, q00a, q01b, q00b;
+  mp_limb_t cy;
+  mp_size_t i;
+  mp_limb_t qh;
+  mp_limb_t di1, di0;
+
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, dn);
+
+  np += nn;
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += nn - dn;
+  di1 = dip[1]; di0 = dip[0];
+  for (i = nn - dn; i > 0; i--)
+    {
+      np--;
+      umul_ppmm (q, q10, np[0], di1);
+      umul_ppmm (q01a, q00a, np[-1], di1);
+      add_ssaaaa (q, q10, q, q10, np[0], q01a);
+      umul_ppmm (q01b, q00b, np[0], di0);
+      add_ssaaaa (q, q10, q, q10, 0, q01b);
+      add_ssaaaa (q, q10, q, q10, 0, np[-1]);
+
+      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+      if (UNLIKELY (np[0] > cy || mpn_cmp (np - dn, dp, dn) >= 0))
+	{
+	  q = q + 1;
+	  mpn_sub_n (np - dn, np - dn, dp, dn);
+	}
+
+      *--qp = q;
+    }
+
+  return qh;
+}
diff --git a/gmp/mpn/generic/sb_divappr_q.c b/gmp/mpn/generic/sb_divappr_q.c
new file mode 100644
index 0000000000..42a39be009
--- /dev/null
+++ b/gmp/mpn/generic/sb_divappr_q.c
@@ -0,0 +1,136 @@
+/* mpn_sb_divappr_q -- schoolbook division with 2-limb sloppy non-greater
+   precomputed inverse, returning approximate quotient.
+
+   Contributed to the GNU project by Torbj�rn Granlund.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH A MUTABLE INTERFACE.  IT IS
+   ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
+   ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP
+   RELEASE.
+
+Copyright 2006, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/*
+  CAVEATS:
+  1. Should it demand normalized operands like now, or normalize on-the-fly?
+  2. Overwrites {np,nn}.
+  3. Uses mpn_submul_1.  It would be nice to somehow make it use mpn_addmul_1
+     instead.  (That would open for mpn_addmul_2 straightforwardly.)
+*/
+
+mp_limb_t
+mpn_sb_divappr_q (mp_ptr qp,
+		  mp_ptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn,
+		  mp_srcptr dip)
+{
+  mp_limb_t q, q10, q01a, q00a, q01b, q00b;
+  mp_limb_t cy;
+  mp_size_t i;
+  mp_limb_t qh;
+  mp_limb_t di1, di0;
+  mp_size_t qn;
+
+  ASSERT (dn > 0);
+  ASSERT (nn >= dn);
+  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
+  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, dn);
+
+  np += nn;
+  qn = nn - dn;
+  if (qn + 1 < dn)
+    {
+      dp += dn - (qn + 1);
+      dn = qn + 1;
+    }
+
+  qh = mpn_cmp (np - dn, dp, dn) >= 0;
+  if (qh != 0)
+    mpn_sub_n (np - dn, np - dn, dp, dn);
+
+  qp += qn;
+  di1 = dip[1]; di0 = dip[0];
+  for (i = qn; i >= dn; i--)
+    {
+      np--;
+      umul_ppmm (q, q10, np[0], di1);
+      umul_ppmm (q01a, q00a, np[-1], di1);
+      add_ssaaaa (q, q10, q, q10, np[0], q01a);
+      umul_ppmm (q01b, q00b, np[0], di0);
+      add_ssaaaa (q, q10, q, q10, 0, q01b);
+      add_ssaaaa (q, q10, q, q10, 0, np[-1]);
+
+      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+      if (UNLIKELY (np[0] > cy || mpn_cmp (np - dn, dp, dn) >= 0))
+	{
+	  q = q + 1;
+	  mpn_sub_n (np - dn, np - dn, dp, dn);
+	}
+
+      *--qp = q;
+    }
+
+  for (i = dn - 1; i > 0; i--)
+    {
+      np--;
+      umul_ppmm (q, q10, np[0], di1);
+      umul_ppmm (q01a, q00a, np[-1], di1);
+      add_ssaaaa (q, q10, q, q10, np[0], q01a);
+      umul_ppmm (q01b, q00b, np[0], di0);
+      add_ssaaaa (q, q10, q, q10, 0, q01b);
+      add_ssaaaa (q, q10, q, q10, 0, np[-1]);
+
+      cy = mpn_submul_1 (np - dn, dp, dn, q);
+
+      if (UNLIKELY (np[0] > cy || mpn_cmp (np - dn, dp, dn) >= 0))
+	{
+	  q = q + 1;
+	  if (q == 0)
+	    q = GMP_NUMB_MAX;
+	  else
+	    mpn_sub_n (np - dn, np - dn, dp, dn);
+	}
+
+      *--qp = q;
+
+      /* Truncate operands.  */
+      dn--;
+      dp++;
+
+      /* The partial remainder might be equal to the truncated divisor,
+	 thus non-canonical.  When that happens, the rest of the quotient
+	 should be all ones.  */
+      if (UNLIKELY (mpn_cmp (np - dn, dp, dn) == 0))
+	{
+	  while (--i)
+	    *--qp = GMP_NUMB_MAX;
+	  break;
+	}
+    }
+
+  return qh;
+}
diff --git a/gmp/mpn/generic/sb_divrem_mn.c b/gmp/mpn/generic/sb_divrem_mn.c
new file mode 100644
index 0000000000..06e2f4ca0d
--- /dev/null
+++ b/gmp/mpn/generic/sb_divrem_mn.c
@@ -0,0 +1,205 @@
+/* mpn_sb_divrem_mn -- Divide natural numbers, producing both remainder and
+   quotient.
+
+   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
+   INTERFACES.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
+   IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
+   FUTURE GNU MP RELEASE.
+
+
+Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
+   meaning the quotient size where that should happen, the quotient size
+   being how many udiv divisions will be done.
+
+   The default is to use preinv always, CPUs where this doesn't suit have
+   tuned thresholds.  Note in particular that preinv should certainly be
+   used if that's the only division available (USE_PREINV_ALWAYS).  */
+
+#ifndef DIV_SB_PREINV_THRESHOLD
+#define DIV_SB_PREINV_THRESHOLD  0
+#endif
+
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+   the NSIZE-DSIZE least significant quotient limbs at QP
+   and the DSIZE long remainder at NP.
+   Return the most significant limb of the quotient, this is always 0 or 1.
+
+   Preconditions:
+   0. NSIZE >= DSIZE.
+   1. The most significant bit of the divisor must be set.
+   2. QP must either not overlap with the input operands at all, or
+      QP + DSIZE >= NP must hold true.  (This means that it's
+      possible to put the quotient in the high part of NUM, right after the
+      remainder in NUM.
+   3. NSIZE >= DSIZE.
+   4. DSIZE > 2.  */
+
+
+mp_limb_t
+mpn_sb_divrem_mn (mp_ptr qp,
+		  mp_ptr np, mp_size_t nn,
+		  mp_srcptr dp, mp_size_t dn)
+{
+  mp_limb_t most_significant_q_limb = 0;
+  mp_size_t qn = nn - dn;
+  mp_size_t i;
+  mp_limb_t dx, d1, n0;
+  mp_limb_t dxinv;
+  int use_preinv;
+
+  ASSERT (dn > 2);
+  ASSERT (nn >= dn);
+  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
+  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, dp, dn));
+  ASSERT (! MPN_OVERLAP_P (qp, nn-dn, np, nn) || qp+dn >= np);
+  ASSERT_MPN (np, nn);
+  ASSERT_MPN (dp, dn);
+
+  np += qn;
+  dx = dp[dn - 1];
+  d1 = dp[dn - 2];
+  n0 = np[dn - 1];
+
+  if (n0 >= dx)
+    {
+      if (n0 > dx || mpn_cmp (np, dp, dn - 1) >= 0)
+	{
+	  mpn_sub_n (np, np, dp, dn);
+	  most_significant_q_limb = 1;
+	}
+    }
+
+  use_preinv = ABOVE_THRESHOLD (qn, DIV_SB_PREINV_THRESHOLD);
+  if (use_preinv)
+    invert_limb (dxinv, dx);
+
+  for (i = qn - 1; i >= 0; i--)
+    {
+      mp_limb_t q;
+      mp_limb_t nx;
+      mp_limb_t cy_limb;
+
+      nx = np[dn - 1];		/* FIXME: could get value from r1 */
+      np--;
+
+      if (nx == dx)
+	{
+	  /* This might over-estimate q, but it's probably not worth
+	     the extra code here to find out.  */
+	  q = GMP_NUMB_MASK;
+
+#if 1
+	  cy_limb = mpn_submul_1 (np, dp, dn, q);
+#else
+	  /* This should be faster on many machines */
+	  cy_limb = mpn_sub_n (np + 1, np + 1, dp, dn);
+	  cy = mpn_add_n (np, np, dp, dn);
+	  np[dn] += cy;
+#endif
+
+	  if (nx != cy_limb)
+	    {
+	      mpn_add_n (np, np, dp, dn);
+	      q--;
+	    }
+
+	  qp[i] = q;
+	}
+      else
+	{
+	  mp_limb_t rx, r1, r0, p1, p0;
+
+	  /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register usage
+	     when np[dn-1] is used in an asm statement like umul_ppmm in
+	     udiv_qrnnd_preinv.  The symptom is seg faults due to registers
+	     being clobbered.  gcc 2.95 i386 doesn't have the problem. */
+	  {
+	    mp_limb_t  workaround = np[dn - 1];
+	    if (CACHED_ABOVE_THRESHOLD (use_preinv, DIV_SB_PREINV_THRESHOLD))
+	      udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
+	    else
+	      {
+		udiv_qrnnd (q, r1, nx, workaround << GMP_NAIL_BITS,
+			    dx << GMP_NAIL_BITS);
+		r1 >>= GMP_NAIL_BITS;
+	      }
+	  }
+	  umul_ppmm (p1, p0, d1, q << GMP_NAIL_BITS);
+	  p0 >>= GMP_NAIL_BITS;
+
+	  r0 = np[dn - 2];
+	  rx = 0;
+	  if (r1 < p1 || (r1 == p1 && r0 < p0))
+	    {
+	      p1 -= p0 < d1;
+	      p0 = (p0 - d1) & GMP_NUMB_MASK;
+	      q--;
+	      r1 = (r1 + dx) & GMP_NUMB_MASK;
+	      rx = r1 < dx;
+	    }
+
+	  p1 += r0 < p0;	/* cannot carry! */
+	  rx -= r1 < p1;	/* may become 11..1 if q is still too large */
+	  r1 = (r1 - p1) & GMP_NUMB_MASK;
+	  r0 = (r0 - p0) & GMP_NUMB_MASK;
+
+	  cy_limb = mpn_submul_1 (np, dp, dn - 2, q);
+
+	  /* Check if we've over-estimated q, and adjust as needed.  */
+	  {
+	    mp_limb_t cy1, cy2;
+	    cy1 = r0 < cy_limb;
+	    r0 = (r0 - cy_limb) & GMP_NUMB_MASK;
+	    cy2 = r1 < cy1;
+	    r1 -= cy1;
+	    np[dn - 1] = r1;
+	    np[dn - 2] = r0;
+	    if (cy2 != rx)
+	      {
+		mpn_add_n (np, np, dp, dn);
+		q--;
+	      }
+	  }
+	  qp[i] = q;
+	}
+    }
+
+  /* ______ ______ ______
+    |__rx__|__r1__|__r0__|		partial remainder
+	    ______ ______
+	 - |__p1__|__p0__|		partial product to subtract
+	    ______ ______
+	 - |______|cylimb|
+
+     rx is -1, 0 or 1.  If rx=1, then q is correct (it should match
+     carry out).  If rx=-1 then q is too large.  If rx=0, then q might
+     be too large, but it is most likely correct.
+  */
+
+  return most_significant_q_limb;
+}
diff --git a/gmp/mpn/generic/sbpi1_bdiv_q.c b/gmp/mpn/generic/sbpi1_bdiv_q.c
deleted file mode 100644
index 645b1d9b6a..0000000000
--- a/gmp/mpn/generic/sbpi1_bdiv_q.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/* mpn_sbpi1_bdiv_q -- schoolbook Hensel division with precomputed inverse,
-   returning quotient only.
-
-   Contributed to the GNU project by Niels Möller.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.
-   IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS
-   ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Computes Q = N / D mod B^nn, destroys N.
-
-   D must be odd. dinv is (-D)^-1 mod B.
-
-
-   The straightforward way to compute Q is to cancel one limb at a time, using
-
-     qp[i] = D^{-1} * np[i] (mod B)
-     N -= B^i * qp[i] * D
-
-   But we prefer addition to subtraction, since mpn_addmul_1 is often faster
-   than mpn_submul_1.  Q = - N / D can be computed by iterating
-
-     qp[i] = (-D)^{-1} * np[i] (mod B)
-     N += B^i * qp[i] * D
-
-   And then we flip the sign, -Q = (not Q) + 1. */
-
-void
-mpn_sbpi1_bdiv_q (mp_ptr qp,
-		  mp_ptr np, mp_size_t nn,
-		  mp_srcptr dp, mp_size_t dn,
-		  mp_limb_t dinv)
-{
-  mp_size_t i;
-  mp_limb_t cy, q;
-
-  ASSERT (dn > 0);
-  ASSERT (nn >= dn);
-  ASSERT ((dp[0] & 1) != 0);
-  /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
-     but some over N/Q overlaps will not work.  */
-
-  for (i = nn - dn; i > 0; i--)
-    {
-      q = dinv * np[0];
-      cy = mpn_addmul_1 (np, dp, dn, q);
-      mpn_add_1 (np + dn, np + dn, i, cy);
-      ASSERT (np[0] == 0);
-      qp[0] = ~q;
-      qp++;
-      np++;
-    }
-
-  for (i = dn; i > 1; i--)
-    {
-      q = dinv * np[0];
-      mpn_addmul_1 (np, dp, i, q);
-      ASSERT (np[0] == 0);
-      qp[0] = ~q;
-      qp++;
-      np++;
-    }
-
-  /* Final limb */
-  q = dinv * np[0];
-  qp[0] = ~q;
-  mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1);
-}
diff --git a/gmp/mpn/generic/sbpi1_div_q.c b/gmp/mpn/generic/sbpi1_div_q.c
deleted file mode 100644
index 3abbd57933..0000000000
--- a/gmp/mpn/generic/sbpi1_div_q.c
+++ /dev/null
@@ -1,303 +0,0 @@
-/* mpn_sbpi1_div_q -- Schoolbook division using the Möller-Granlund 3/2
-   division algorithm.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-mp_limb_t
-mpn_sbpi1_div_q (mp_ptr qp,
-		 mp_ptr np, mp_size_t nn,
-		 mp_srcptr dp, mp_size_t dn,
-		 mp_limb_t dinv)
-{
-  mp_limb_t qh;
-  mp_size_t qn, i;
-  mp_limb_t n1, n0;
-  mp_limb_t d1, d0;
-  mp_limb_t cy, cy1;
-  mp_limb_t q;
-  mp_limb_t flag;
-
-  mp_size_t dn_orig = dn;
-  mp_srcptr dp_orig = dp;
-  mp_ptr np_orig = np;
-
-  ASSERT (dn > 2);
-  ASSERT (nn >= dn);
-  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
-
-  np += nn;
-
-  qn = nn - dn;
-  if (qn + 1 < dn)
-    {
-      dp += dn - (qn + 1);
-      dn = qn + 1;
-    }
-
-  qh = mpn_cmp (np - dn, dp, dn) >= 0;
-  if (qh != 0)
-    mpn_sub_n (np - dn, np - dn, dp, dn);
-
-  qp += qn;
-
-  dn -= 2;			/* offset dn by 2 for main division loops,
-				   saving two iterations in mpn_submul_1.  */
-  d1 = dp[dn + 1];
-  d0 = dp[dn + 0];
-
-  np -= 2;
-
-  n1 = np[1];
-
-  for (i = qn - (dn + 2); i >= 0; i--)
-    {
-      np--;
-      if (UNLIKELY (n1 == d1) && np[1] == d0)
-	{
-	  q = GMP_NUMB_MASK;
-	  mpn_submul_1 (np - dn, dp, dn + 2, q);
-	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
-	}
-      else
-	{
-	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	  cy = mpn_submul_1 (np - dn, dp, dn, q);
-
-	  cy1 = n0 < cy;
-	  n0 = (n0 - cy) & GMP_NUMB_MASK;
-	  cy = n1 < cy1;
-	  n1 -= cy1;
-	  np[0] = n0;
-
-	  if (UNLIKELY (cy != 0))
-	    {
-	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
-	      q--;
-	    }
-	}
-
-      *--qp = q;
-    }
-
-  flag = ~CNST_LIMB(0);
-
-  if (dn >= 0)
-    {
-      for (i = dn; i > 0; i--)
-	{
-	  np--;
-	  if (UNLIKELY (n1 >= (d1 & flag)))
-	    {
-	      q = GMP_NUMB_MASK;
-	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
-
-	      if (UNLIKELY (n1 != cy))
-		{
-		  if (n1 < (cy & flag))
-		    {
-		      q--;
-		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
-		    }
-		  else
-		    flag = 0;
-		}
-	      n1 = np[1];
-	    }
-	  else
-	    {
-	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	      cy = mpn_submul_1 (np - dn, dp, dn, q);
-
-	      cy1 = n0 < cy;
-	      n0 = (n0 - cy) & GMP_NUMB_MASK;
-	      cy = n1 < cy1;
-	      n1 -= cy1;
-	      np[0] = n0;
-
-	      if (UNLIKELY (cy != 0))
-		{
-		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
-		  q--;
-		}
-	    }
-
-	  *--qp = q;
-
-	  /* Truncate operands.  */
-	  dn--;
-	  dp++;
-	}
-
-      np--;
-      if (UNLIKELY (n1 >= (d1 & flag)))
-	{
-	  q = GMP_NUMB_MASK;
-	  cy = mpn_submul_1 (np, dp, 2, q);
-
-	  if (UNLIKELY (n1 != cy))
-	    {
-	      if (n1 < (cy & flag))
-		{
-		  q--;
-		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
-		}
-	      else
-		flag = 0;
-	    }
-	  n1 = np[1];
-	}
-      else
-	{
-	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	  np[0] = n0;
-	  np[1] = n1;
-	}
-
-      *--qp = q;
-    }
-  ASSERT_ALWAYS (np[1] == n1);
-  np += 2;
-
-
-  dn = dn_orig;
-  if (UNLIKELY (n1 < (dn & flag)))
-    {
-      mp_limb_t q, x;
-
-      /* The quotient may be too large if the remainder is small.  Recompute
-	 for above ignored operand parts, until the remainder spills.
-
-	 FIXME: The quality of this code isn't the same as the code above.
-	 1. We don't compute things in an optimal order, high-to-low, in order
-	    to terminate as quickly as possible.
-	 2. We mess with pointers and sizes, adding and subtracting and
-	    adjusting to get things right.  It surely could be streamlined.
-	 3. The only termination criteria are that we determine that the
-	    quotient needs to be adjusted, or that we have recomputed
-	    everything.  We should stop when the remainder is so large
-	    that no additional subtracting could make it spill.
-	 4. If nothing else, we should not do two loops of submul_1 over the
-	    data, instead handle both the triangularization and chopping at
-	    once.  */
-
-      x = n1;
-
-      if (dn > 2)
-	{
-	  /* Compensate for triangularization.  */
-	  mp_limb_t y;
-
-	  dp = dp_orig;
-	  if (qn + 1 < dn)
-	    {
-	      dp += dn - (qn + 1);
-	      dn = qn + 1;
-	    }
-
-	  y = np[-2];
-
-	  for (i = dn - 3; i >= 0; i--)
-	    {
-	      q = qp[i];
-	      cy = mpn_submul_1 (np - (dn - i), dp, dn - i - 2, q);
-
-	      if (y < cy)
-		{
-		  if (x == 0)
-		    {
-		      cy = mpn_sub_1 (qp, qp, qn, 1);
-		      ASSERT_ALWAYS (cy == 0);
-		      return qh - cy;
-		    }
-		  x--;
-		}
-	      y -= cy;
-	    }
-	  np[-2] = y;
-	}
-
-      dn = dn_orig;
-      if (qn + 1 < dn)
-	{
-	  /* Compensate for ignored dividend and divisor tails.  */
-
-	  dp = dp_orig;
-	  np = np_orig;
-
-	  if (qh != 0)
-	    {
-	      cy = mpn_sub_n (np + qn, np + qn, dp, dn - (qn + 1));
-	      if (cy != 0)
-		{
-		  if (x == 0)
-		    {
-		      if (qn != 0)
-			cy = mpn_sub_1 (qp, qp, qn, 1);
-		      return qh - cy;
-		    }
-		  x--;
-		}
-	    }
-
-	  if (qn == 0)
-	    return qh;
-
-	  for (i = dn - qn - 2; i >= 0; i--)
-	    {
-	      cy = mpn_submul_1 (np + i, qp, qn, dp[i]);
-	      cy = mpn_sub_1 (np + qn + i, np + qn + i, dn - qn - i - 1, cy);
-	      if (cy != 0)
-		{
-		  if (x == 0)
-		    {
-		      cy = mpn_sub_1 (qp, qp, qn, 1);
-		      return qh;
-		    }
-		  x--;
-		}
-	    }
-	}
-    }
-
-  return qh;
-}
diff --git a/gmp/mpn/generic/sbpi1_div_qr.c b/gmp/mpn/generic/sbpi1_div_qr.c
deleted file mode 100644
index 0c3e4cb729..0000000000
--- a/gmp/mpn/generic/sbpi1_div_qr.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* mpn_sbpi1_div_qr -- Schoolbook division using the Möller-Granlund 3/2
-   division algorithm.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-mp_limb_t
-mpn_sbpi1_div_qr (mp_ptr qp,
-		  mp_ptr np, mp_size_t nn,
-		  mp_srcptr dp, mp_size_t dn,
-		  mp_limb_t dinv)
-{
-  mp_limb_t qh;
-  mp_size_t i;
-  mp_limb_t n1, n0;
-  mp_limb_t d1, d0;
-  mp_limb_t cy, cy1;
-  mp_limb_t q;
-
-  ASSERT (dn > 2);
-  ASSERT (nn >= dn);
-  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
-
-  np += nn;
-
-  qh = mpn_cmp (np - dn, dp, dn) >= 0;
-  if (qh != 0)
-    mpn_sub_n (np - dn, np - dn, dp, dn);
-
-  qp += nn - dn;
-
-  dn -= 2;			/* offset dn by 2 for main division loops,
-				   saving two iterations in mpn_submul_1.  */
-  d1 = dp[dn + 1];
-  d0 = dp[dn + 0];
-
-  np -= 2;
-
-  n1 = np[1];
-
-  for (i = nn - (dn + 2); i > 0; i--)
-    {
-      np--;
-      if (UNLIKELY (n1 == d1) && np[1] == d0)
-	{
-	  q = GMP_NUMB_MASK;
-	  mpn_submul_1 (np - dn, dp, dn + 2, q);
-	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
-	}
-      else
-	{
-	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	  cy = mpn_submul_1 (np - dn, dp, dn, q);
-
-	  cy1 = n0 < cy;
-	  n0 = (n0 - cy) & GMP_NUMB_MASK;
-	  cy = n1 < cy1;
-	  n1 = (n1 - cy1) & GMP_NUMB_MASK;
-	  np[0] = n0;
-
-	  if (UNLIKELY (cy != 0))
-	    {
-	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
-	      q--;
-	    }
-	}
-
-      *--qp = q;
-    }
-  np[1] = n1;
-
-  return qh;
-}
diff --git a/gmp/mpn/generic/sbpi1_divappr_q.c b/gmp/mpn/generic/sbpi1_divappr_q.c
deleted file mode 100644
index 3e7cf91ba6..0000000000
--- a/gmp/mpn/generic/sbpi1_divappr_q.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/* mpn_sbpi1_divappr_q -- Schoolbook division using the Möller-Granlund 3/2
-   division algorithm, returning approximate quotient.  The quotient returned
-   is either correct, or one too large.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-
-Copyright 2007, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-mp_limb_t
-mpn_sbpi1_divappr_q (mp_ptr qp,
-		     mp_ptr np, mp_size_t nn,
-		     mp_srcptr dp, mp_size_t dn,
-		     mp_limb_t dinv)
-{
-  mp_limb_t qh;
-  mp_size_t qn, i;
-  mp_limb_t n1, n0;
-  mp_limb_t d1, d0;
-  mp_limb_t cy, cy1;
-  mp_limb_t q;
-  mp_limb_t flag;
-
-  ASSERT (dn > 2);
-  ASSERT (nn >= dn);
-  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
-
-  np += nn;
-
-  qn = nn - dn;
-  if (qn + 1 < dn)
-    {
-      dp += dn - (qn + 1);
-      dn = qn + 1;
-    }
-
-  qh = mpn_cmp (np - dn, dp, dn) >= 0;
-  if (qh != 0)
-    mpn_sub_n (np - dn, np - dn, dp, dn);
-
-  qp += qn;
-
-  dn -= 2;			/* offset dn by 2 for main division loops,
-				   saving two iterations in mpn_submul_1.  */
-  d1 = dp[dn + 1];
-  d0 = dp[dn + 0];
-
-  np -= 2;
-
-  n1 = np[1];
-
-  for (i = qn - (dn + 2); i >= 0; i--)
-    {
-      np--;
-      if (UNLIKELY (n1 == d1) && np[1] == d0)
-	{
-	  q = GMP_NUMB_MASK;
-	  mpn_submul_1 (np - dn, dp, dn + 2, q);
-	  n1 = np[1];		/* update n1, last loop's value will now be invalid */
-	}
-      else
-	{
-	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	  cy = mpn_submul_1 (np - dn, dp, dn, q);
-
-	  cy1 = n0 < cy;
-	  n0 = (n0 - cy) & GMP_NUMB_MASK;
-	  cy = n1 < cy1;
-	  n1 -= cy1;
-	  np[0] = n0;
-
-	  if (UNLIKELY (cy != 0))
-	    {
-	      n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
-	      q--;
-	    }
-	}
-
-      *--qp = q;
-    }
-
-  flag = ~CNST_LIMB(0);
-
-  if (dn >= 0)
-    {
-      for (i = dn; i > 0; i--)
-	{
-	  np--;
-	  if (UNLIKELY (n1 >= (d1 & flag)))
-	    {
-	      q = GMP_NUMB_MASK;
-	      cy = mpn_submul_1 (np - dn, dp, dn + 2, q);
-
-	      if (UNLIKELY (n1 != cy))
-		{
-		  if (n1 < (cy & flag))
-		    {
-		      q--;
-		      mpn_add_n (np - dn, np - dn, dp, dn + 2);
-		    }
-		  else
-		    flag = 0;
-		}
-	      n1 = np[1];
-	    }
-	  else
-	    {
-	      udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	      cy = mpn_submul_1 (np - dn, dp, dn, q);
-
-	      cy1 = n0 < cy;
-	      n0 = (n0 - cy) & GMP_NUMB_MASK;
-	      cy = n1 < cy1;
-	      n1 -= cy1;
-	      np[0] = n0;
-
-	      if (UNLIKELY (cy != 0))
-		{
-		  n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
-		  q--;
-		}
-	    }
-
-	  *--qp = q;
-
-	  /* Truncate operands.  */
-	  dn--;
-	  dp++;
-	}
-
-      np--;
-      if (UNLIKELY (n1 >= (d1 & flag)))
-	{
-	  q = GMP_NUMB_MASK;
-	  cy = mpn_submul_1 (np, dp, 2, q);
-
-	  if (UNLIKELY (n1 != cy))
-	    {
-	      if (n1 < (cy & flag))
-		{
-		  q--;
-		  add_ssaaaa (np[1], np[0], np[1], np[0], dp[1], dp[0]);
-		}
-	      else
-		flag = 0;
-	    }
-	  n1 = np[1];
-	}
-      else
-	{
-	  udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
-
-	  np[1] = n1;
-	  np[0] = n0;
-	}
-
-      *--qp = q;
-    }
-
-  ASSERT_ALWAYS (np[1] == n1);
-
-  return qh;
-}
diff --git a/gmp/mpn/generic/scan0.c b/gmp/mpn/generic/scan0.c
index 8171fd5afe..2e9f3a43da 100644
--- a/gmp/mpn/generic/scan0.c
+++ b/gmp/mpn/generic/scan0.c
@@ -5,28 +5,17 @@ Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -36,8 +25,9 @@ see https://www.gnu.org/licenses/.  */
    1. U must sooner or later have a limb with a clear bit.
  */
 
-mp_bitcnt_t
-mpn_scan0 (mp_srcptr up, mp_bitcnt_t starting_bit)
+unsigned long int
+mpn_scan0 (register mp_srcptr up,
+	   register unsigned long int starting_bit)
 {
   mp_size_t starting_word;
   mp_limb_t alimb;
diff --git a/gmp/mpn/generic/scan1.c b/gmp/mpn/generic/scan1.c
index e22ad5d827..d0d9a3feea 100644
--- a/gmp/mpn/generic/scan1.c
+++ b/gmp/mpn/generic/scan1.c
@@ -5,28 +5,17 @@ Copyright 1994, 1996, 2001, 2002, 2004 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -36,8 +25,9 @@ see https://www.gnu.org/licenses/.  */
    1. U must sooner or later have a limb != 0.
  */
 
-mp_bitcnt_t
-mpn_scan1 (mp_srcptr up, mp_bitcnt_t starting_bit)
+unsigned long int
+mpn_scan1 (register mp_srcptr up,
+	   register unsigned long int starting_bit)
 {
   mp_size_t starting_word;
   mp_limb_t alimb;
diff --git a/gmp/mpn/generic/sec_aors_1.c b/gmp/mpn/generic/sec_aors_1.c
deleted file mode 100644
index d789a5792e..0000000000
--- a/gmp/mpn/generic/sec_aors_1.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/* mpn_sec_add_1, mpn_sec_sub_1
-
-   Contributed to the GNU project by Niels Möller
-
-Copyright 2013, 2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if OPERATION_sec_add_1
-#define FNAME mpn_sec_add_1
-#define FNAME_itch mpn_sec_add_1_itch
-#define OP_N mpn_add_n
-#endif
-#if OPERATION_sec_sub_1
-#define FNAME mpn_sec_sub_1
-#define FNAME_itch mpn_sec_sub_1_itch
-#define OP_N mpn_sub_n
-#endif
-
-/* It's annoying to that we need scratch space */
-mp_size_t
-FNAME_itch (mp_size_t n)
-{
-  return n;
-}
-
-mp_limb_t
-FNAME (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_ptr scratch)
-{
-  scratch[0] = b;
-  MPN_ZERO (scratch + 1, n-1);
-  return OP_N (rp, ap, scratch, n);
-}
diff --git a/gmp/mpn/generic/sec_div.c b/gmp/mpn/generic/sec_div.c
deleted file mode 100644
index 483b118d0d..0000000000
--- a/gmp/mpn/generic/sec_div.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/* mpn_sec_div_qr, mpn_sec_div_r -- Compute Q = floor(U / V), U = U mod V.
-   Side-channel silent under the assumption that the used instructions are
-   side-channel silent.
-
-   Contributed to the GNU project by Torbjörn Granlund.
-
-Copyright 2011-2014 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#if OPERATION_sec_div_qr
-#define FNAME mpn_sec_div_qr
-#define FNAME_itch mpn_sec_div_qr_itch
-#define Q(q) q,
-#define RETTYPE mp_limb_t
-#endif
-#if OPERATION_sec_div_r
-#define FNAME mpn_sec_div_r
-#define FNAME_itch mpn_sec_div_r_itch
-#define Q(q)
-#define RETTYPE void
-#endif
-
-mp_size_t
-FNAME_itch (mp_size_t nn, mp_size_t dn)
-{
-#if OPERATION_sec_div_qr
-/* Needs (nn + dn + 1) + mpn_sec_pi1_div_qr's needs of (2nn' - dn + 1) for a
-   total of 3nn + 4 limbs at tp.  Note that mpn_sec_pi1_div_qr's nn is one
-   greater than ours, therefore +4 and not just +2.  */
-  return 3 * nn + 4;
-#endif
-#if OPERATION_sec_div_r
-/* Needs (nn + dn + 1) + mpn_sec_pi1_div_r's needs of (dn + 1) for a total of
-   nn + 2dn + 2 limbs at tp.  */
-  return nn + 2 * dn + 2;
-#endif
-}
-
-RETTYPE
-FNAME (Q(mp_ptr qp)
-       mp_ptr np, mp_size_t nn,
-       mp_srcptr dp, mp_size_t dn,
-       mp_ptr tp)
-{
-  mp_limb_t d1, d0;
-  unsigned int cnt;
-  gmp_pi1_t dinv;
-  mp_limb_t inv32;
-
-  ASSERT (dn >= 1);
-  ASSERT (nn >= dn);
-  ASSERT (dp[dn - 1] != 0);
-
-  d1 = dp[dn - 1];
-  count_leading_zeros (cnt, d1);
-
-  if (cnt != 0)
-    {
-      mp_limb_t qh, cy;
-      mp_ptr np2, dp2;
-      dp2 = tp;					/* dn limbs */
-      mpn_lshift (dp2, dp, dn, cnt);
-
-      np2 = tp + dn;				/* (nn + 1) limbs */
-      cy = mpn_lshift (np2, np, nn, cnt);
-      np2[nn++] = cy;
-
-      d0 = dp2[dn - 1];
-      d0 += (~d0 != 0);
-      invert_limb (inv32, d0);
-
-      /* We add nn + dn to tp here, not nn + 1 + dn, as expected.  This is
-	 since nn here will have been incremented.  */
-#if OPERATION_sec_div_qr
-      qh = mpn_sec_pi1_div_qr (np2 + dn, np2, nn, dp2, dn, inv32, tp + nn + dn);
-      ASSERT (qh == 0);		/* FIXME: this indicates inefficiency! */
-      MPN_COPY (qp, np2 + dn, nn - dn - 1);
-      qh = np2[nn - 1];
-#else
-      mpn_sec_pi1_div_r (np2, nn, dp2, dn, inv32, tp + nn + dn);
-#endif
-
-      mpn_rshift (np, np2, dn, cnt);
-
-#if OPERATION_sec_div_qr
-      return qh;
-#endif
-    }
-  else
-    {
-      /* FIXME: Consider copying np => np2 here, adding a 0-limb at the top.
-	 That would simplify the underlying pi1 function, since then it could
-	 assume nn > dn.  */
-      d0 = dp[dn - 1];
-      d0 += (~d0 != 0);
-      invert_limb (inv32, d0);
-
-#if OPERATION_sec_div_qr
-      return mpn_sec_pi1_div_qr (qp, np, nn, dp, dn, inv32, tp);
-#else
-      mpn_sec_pi1_div_r (np, nn, dp, dn, inv32, tp);
-#endif
-    }
-}
diff --git a/gmp/mpn/generic/sec_invert.c b/gmp/mpn/generic/sec_invert.c
deleted file mode 100644
index 43a578b2a1..0000000000
--- a/gmp/mpn/generic/sec_invert.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/* mpn_sec_invert
-
-   Contributed to the GNU project by Niels Möller
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if 0
-/* Currently unused. Should be resurrected once mpn_cnd_neg is
-   advertised. */
-static mp_size_t
-mpn_cnd_neg_itch (mp_size_t n)
-{
-  return n;
-}
-#endif
-
-/* FIXME: Ought to return carry */
-static void
-mpn_cnd_neg (int cnd, mp_limb_t *rp, const mp_limb_t *ap, mp_size_t n,
-	     mp_ptr scratch)
-{
-  mpn_lshift (scratch, ap, n, 1);
-  mpn_cnd_sub_n (cnd, rp, ap, scratch, n);
-}
-
-static void
-mpn_cnd_swap (int cnd, volatile mp_limb_t *ap, volatile mp_limb_t *bp,
-	      mp_size_t n)
-{
-  volatile mp_limb_t mask = - (mp_limb_t) (cnd != 0);
-  mp_size_t i;
-  for (i = 0; i < n; i++)
-    {
-      mp_limb_t a, b, t;
-      a = ap[i];
-      b = bp[i];
-      t = (a ^ b) & mask;
-      ap[i] = a ^ t;
-      bp[i] = b ^ t;
-    }
-}
-
-static int
-mpn_sec_eq_ui (mp_srcptr ap, mp_size_t n, mp_limb_t b)
-{
-  mp_limb_t d;
-  ASSERT (n > 0);
-
-  d = ap[0] ^ b;
-
-  while (--n > 0)
-    d |= ap[n];
-
-  return d == 0;
-}
-
-mp_size_t
-mpn_sec_invert_itch (mp_size_t n)
-{
-  return 4*n;
-}
-
-/* Compute V <-- A^{-1} (mod M), in data-independent time. M must be
-   odd. Returns 1 on success, and 0 on failure (i.e., if gcd (A, m) !=
-   1). Inputs and outputs of size n, and no overlap allowed. The {ap,
-   n} area is destroyed. For arbitrary inputs, bit_size should be
-   2*n*GMP_NUMB_BITS, but if A or M are known to be smaller, e.g., if
-   M = 2^521 - 1 and A < M, bit_size can be any bound on the sum of
-   the bit sizes of A and M. */
-int
-mpn_sec_invert (mp_ptr vp, mp_ptr ap, mp_srcptr mp,
-		mp_size_t n, mp_bitcnt_t bit_size,
-		mp_ptr scratch)
-{
-  ASSERT (n > 0);
-  ASSERT (bit_size > 0);
-  ASSERT (mp[0] & 1);
-  ASSERT (! MPN_OVERLAP_P (ap, n, vp, n));
-#define bp (scratch + n)
-#define up (scratch + 2*n)
-#define m1hp (scratch + 3*n)
-
-  /* Maintain
-
-       a = u * orig_a (mod m)
-       b = v * orig_a (mod m)
-
-     and b odd at all times. Initially,
-
-       a = a_orig, u = 1
-       b = m,      v = 0
-     */
-
-
-  up[0] = 1;
-  mpn_zero (up+1, n - 1);
-  mpn_copyi (bp, mp, n);
-  mpn_zero (vp, n);
-
-  ASSERT_CARRY (mpn_rshift (m1hp, mp, n, 1));
-  ASSERT_NOCARRY (mpn_sec_add_1 (m1hp, m1hp, n, 1, scratch));
-
-  while (bit_size-- > 0)
-    {
-      mp_limb_t odd, swap, cy;
-
-      /* Always maintain b odd. The logic of the iteration is as
-	 follows. For a, b:
-
-	   odd = a & 1
-	   a -= odd * b
-	   if (underflow from a-b)
-	     {
-	       b += a, assigns old a
-	       a = B^n-a
-	     }
-
-	   a /= 2
-
-	 For u, v:
-
-	   if (underflow from a - b)
-	     swap u, v
-	   u -= odd * v
-	   if (underflow from u - v)
-	     u += m
-
-	   u /= 2
-	   if (a one bit was shifted out)
-	     u += (m+1)/2
-
-	 As long as a > 0, the quantity
-
-	   (bitsize of a) + (bitsize of b)
-
-	 is reduced by at least one bit per iteration, hence after (bit_size of
-	 orig_a) + (bit_size of m) - 1 iterations we surely have a = 0. Then b
-	 = gcd(orig_a, m) and if b = 1 then also v = orig_a^{-1} (mod m).
-      */
-
-      ASSERT (bp[0] & 1);
-      odd = ap[0] & 1;
-
-      swap = mpn_cnd_sub_n (odd, ap, ap, bp, n);
-      mpn_cnd_add_n (swap, bp, bp, ap, n);
-      mpn_cnd_neg (swap, ap, ap, n, scratch);
-
-      mpn_cnd_swap (swap, up, vp, n);
-      cy = mpn_cnd_sub_n (odd, up, up, vp, n);
-      cy -= mpn_cnd_add_n (cy, up, up, mp, n);
-      ASSERT (cy == 0);
-
-      cy = mpn_rshift (ap, ap, n, 1);
-      ASSERT (cy == 0);
-      cy = mpn_rshift (up, up, n, 1);
-      cy = mpn_cnd_add_n (cy, up, up, m1hp, n);
-      ASSERT (cy == 0);
-    }
-  /* Should be all zeros, but check only extreme limbs */
-  ASSERT ( (ap[0] | ap[n-1]) == 0);
-  /* Check if indeed gcd == 1. */
-  return mpn_sec_eq_ui (bp, n, 1);
-#undef bp
-#undef up
-#undef m1hp
-}
diff --git a/gmp/mpn/generic/sec_mul.c b/gmp/mpn/generic/sec_mul.c
deleted file mode 100644
index 2cd87fab1d..0000000000
--- a/gmp/mpn/generic/sec_mul.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/* mpn_sec_mul.
-
-   Contributed to the GNU project by Torbjörn Granlund.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_sec_mul (mp_ptr rp,
-	     mp_srcptr ap, mp_size_t an,
-	     mp_srcptr bp, mp_size_t bn,
-	     mp_ptr tp)
-{
-  mpn_mul_basecase (rp, ap, an, bp, bn);
-}
-
-mp_size_t
-mpn_sec_mul_itch (mp_size_t an, mp_size_t bn)
-{
-  return 0;
-}
diff --git a/gmp/mpn/generic/sec_pi1_div.c b/gmp/mpn/generic/sec_pi1_div.c
deleted file mode 100644
index 1e075daf73..0000000000
--- a/gmp/mpn/generic/sec_pi1_div.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/* mpn_sec_pi1_div_qr, mpn_sec_pi1_div_r -- Compute Q = floor(U / V), U = U
-   mod V.  Side-channel silent under the assumption that the used instructions
-   are side-channel silent.
-
-   Contributed to the GNU project by Torbjörn Granlund.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011-2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* This side-channel silent division algorithm reduces the partial remainder by
-   GMP_NUMB_BITS/2 bits at a time, compared to GMP_NUMB_BITS for the main
-   division algorithm.  We actually do not insist on reducing by exactly
-   GMP_NUMB_BITS/2, but may leave a partial remainder that is D*B^i to 3D*B^i
-   too large (B is the limb base, D is the divisor, and i is the induction
-   variable); the subsequent step will handle the extra partial remainder bits.
-
-   With that partial remainder reduction, each step generates a quotient "half
-   limb".  The outer loop generates two quotient half limbs, an upper (q1h) and
-   a lower (q0h) which are stored sparsely in separate limb arrays.  These
-   arrays are added at the end; using separate arrays avoids data-dependent
-   carry propagation which could else pose a side-channel leakage problem.
-
-   The quotient half limbs may be between -3 to 0 from the accurate value
-   ("accurate" being the one which corresponds to a reduction to a principal
-   partial remainder).  Too small quotient half limbs correspond to too large
-   remainders, which we reduce later, as described above.
-
-   In order to keep quotients from getting too big, corresponding to a negative
-   partial remainder, we use an inverse which is slightly smaller than usually.
-*/
-
-#if OPERATION_sec_pi1_div_qr
-/* Needs (dn + 1) + (nn - dn) + (nn - dn) = 2nn - dn + 1 limbs at tp. */
-#define FNAME mpn_sec_pi1_div_qr
-#define Q(q) q,
-#define RETTYPE mp_limb_t
-#endif
-#if OPERATION_sec_pi1_div_r
-/* Needs (dn + 1) limbs at tp.  */
-#define FNAME mpn_sec_pi1_div_r
-#define Q(q)
-#define RETTYPE void
-#endif
-
-RETTYPE
-FNAME (Q(mp_ptr qp)
-       mp_ptr np, mp_size_t nn,
-       mp_srcptr dp, mp_size_t dn,
-       mp_limb_t dinv,
-       mp_ptr tp)
-{
-  mp_limb_t nh, cy, q1h, q0h, dummy, cnd;
-  mp_size_t i;
-  mp_ptr hp;
-#if OPERATION_sec_pi1_div_qr
-  mp_limb_t qh;
-  mp_ptr qlp, qhp;
-#endif
-
-  ASSERT (dn >= 1);
-  ASSERT (nn >= dn);
-  ASSERT ((dp[dn - 1] & GMP_NUMB_HIGHBIT) != 0);
-
-  if (nn == dn)
-    {
-      cy = mpn_sub_n (np, np, dp, dn);
-      mpn_cnd_add_n (cy, np, np, dp, dn);
-#if OPERATION_sec_pi1_div_qr
-      return 1 - cy;
-#else
-      return;
-#endif
-    }
-
-  /* Create a divisor copy shifted half a limb.  */
-  hp = tp;					/* (dn + 1) limbs */
-  hp[dn] = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
-
-#if OPERATION_sec_pi1_div_qr
-  qlp = tp + (dn + 1);				/* (nn - dn) limbs */
-  qhp = tp + (nn + 1);				/* (nn - dn) limbs */
-#endif
-
-  np += nn - dn;
-  nh = 0;
-
-  for (i = nn - dn - 1; i >= 0; i--)
-    {
-      np--;
-
-      nh = (nh << GMP_NUMB_BITS/2) + (np[dn] >> GMP_NUMB_BITS/2);
-      umul_ppmm (q1h, dummy, nh, dinv);
-      q1h += nh;
-#if OPERATION_sec_pi1_div_qr
-      qhp[i] = q1h;
-#endif
-      mpn_submul_1 (np, hp, dn + 1, q1h);
-
-      nh = np[dn];
-      umul_ppmm (q0h, dummy, nh, dinv);
-      q0h += nh;
-#if OPERATION_sec_pi1_div_qr
-      qlp[i] = q0h;
-#endif
-      nh -= mpn_submul_1 (np, dp, dn, q0h);
-    }
-
-  /* 1st adjustment depends on extra high remainder limb.  */
-  cnd = nh != 0;				/* FIXME: cmp-to-int */
-#if OPERATION_sec_pi1_div_qr
-  qlp[0] += cnd;
-#endif
-  nh -= mpn_cnd_sub_n (cnd, np, np, dp, dn);
-
-  /* 2nd adjustment depends on remainder/divisor comparison as well as whether
-     extra remainder limb was nullified by previous subtract.  */
-  cy = mpn_sub_n (np, np, dp, dn);
-  cy = cy - nh;
-#if OPERATION_sec_pi1_div_qr
-  qlp[0] += 1 - cy;
-#endif
-  mpn_cnd_add_n (cy, np, np, dp, dn);
-
-  /* 3rd adjustment depends on remainder/divisor comparison.  */
-  cy = mpn_sub_n (np, np, dp, dn);
-#if OPERATION_sec_pi1_div_qr
-  qlp[0] += 1 - cy;
-#endif
-  mpn_cnd_add_n (cy, np, np, dp, dn);
-
-#if OPERATION_sec_pi1_div_qr
-  /* Combine quotient halves into final quotient.  */
-  qh = mpn_lshift (qhp, qhp, nn - dn, GMP_NUMB_BITS/2);
-  qh += mpn_add_n (qp, qhp, qlp, nn - dn);
-
-  return qh;
-#else
-  return;
-#endif
-}
diff --git a/gmp/mpn/generic/sec_powm.c b/gmp/mpn/generic/sec_powm.c
deleted file mode 100644
index 67de44e10a..0000000000
--- a/gmp/mpn/generic/sec_powm.c
+++ /dev/null
@@ -1,438 +0,0 @@
-/* mpn_sec_powm -- Compute R = U^E mod M.  Secure variant, side-channel silent
-   under the assumption that the multiply instruction is side channel silent.
-
-   Contributed to the GNU project by Torbjörn Granlund.
-
-Copyright 2007-2009, 2011, 2012, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-/*
-  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
-
-  1. T <- (B^n * U) mod M                Convert to REDC form
-
-  2. Compute table U^0, U^1, U^2... of E-dependent size
-
-  3. While there are more bits in E
-       W <- power left-to-right base-k
-
-
-  TODO:
-
-   * Make getbits a macro, thereby allowing it to update the index operand.
-     That will simplify the code using getbits.  (Perhaps make getbits' sibling
-     getbit then have similar form, for symmetry.)
-
-   * Choose window size without looping.  (Superoptimize or think(tm).)
-
-   * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
-     redc_1 and redc_n.  On such systems, we will switch to redc_2 causing
-     slowdown.
-*/
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-#undef MPN_REDC_1_SEC
-#define MPN_REDC_1_SEC(rp, up, mp, n, invm)				\
-  do {									\
-    mp_limb_t cy;							\
-    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
-    mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
-  } while (0)
-
-#undef MPN_REDC_2_SEC
-#define MPN_REDC_2_SEC(rp, up, mp, n, mip)				\
-  do {									\
-    mp_limb_t cy;							\
-    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
-    mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
-  } while (0)
-
-#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
-#define WANT_REDC_2 1
-#endif
-
-/* Define our own mpn squaring function.  We do this since we cannot use a
-   native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
-   SQR_TOOM2_THRESHOLD.  This is so because of fixed size stack allocations
-   made inside mpn_sqr_basecase.  */
-
-#if HAVE_NATIVE_mpn_sqr_diagonal
-#define MPN_SQR_DIAGONAL(rp, up, n)					\
-  mpn_sqr_diagonal (rp, up, n)
-#else
-#define MPN_SQR_DIAGONAL(rp, up, n)					\
-  do {									\
-    mp_size_t _i;							\
-    for (_i = 0; _i < (n); _i++)					\
-      {									\
-	mp_limb_t ul, lpl;						\
-	ul = (up)[_i];							\
-	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
-	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
-      }									\
-  } while (0)
-#endif
-
-
-#if ! HAVE_NATIVE_mpn_sqr_basecase
-/* The limit of the generic code is SQR_TOOM2_THRESHOLD.  */
-#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
-#endif
-
-#if HAVE_NATIVE_mpn_sqr_basecase
-#ifdef TUNE_SQR_TOOM2_MAX
-/* We slightly abuse TUNE_SQR_TOOM2_MAX here.  If it is set for an assembly
-   mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
-   file.  An assembly mpn_sqr_basecase that does not define it, should allow
-   any size.  */
-#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
-#endif
-#endif
-
-#ifdef WANT_FAT_BINARY
-/* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
-   __gmpn_cpuvec.  Perhaps any possible sqr_basecase.asm allow any size, and we
-   limit the use unnecessarily.  We cannot tell, so play it safe.  FIXME.  */
-#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
-#endif
-
-#ifndef SQR_BASECASE_LIM
-/* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
-   size.  */
-#define mpn_local_sqr(rp,up,n,tp) mpn_sqr_basecase(rp,up,n)
-#else
-/* Define our own squaring function, which uses mpn_sqr_basecase for its
-   allowed sizes, but its own code for larger sizes.  */
-static void
-mpn_local_sqr (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp)
-{
-  mp_size_t i;
-
-  ASSERT (n >= 1);
-  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
-
-  if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM))
-    {
-      mpn_sqr_basecase (rp, up, n);
-      return;
-    }
-
-  {
-    mp_limb_t ul, lpl;
-    ul = up[0];
-    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
-    rp[0] = lpl >> GMP_NAIL_BITS;
-  }
-  if (n > 1)
-    {
-      mp_limb_t cy;
-
-      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
-      tp[n - 1] = cy;
-      for (i = 2; i < n; i++)
-	{
-	  mp_limb_t cy;
-	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
-	  tp[n + i - 2] = cy;
-	}
-      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
-
-      {
-	mp_limb_t cy;
-#if HAVE_NATIVE_mpn_addlsh1_n
-	cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#else
-	cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
-	cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#endif
-	rp[2 * n - 1] += cy;
-      }
-    }
-}
-#endif
-
-#define getbit(p,bi) \
-  ((p[(bi - 1) / GMP_NUMB_BITS] >> (bi - 1) % GMP_NUMB_BITS) & 1)
-
-/* FIXME: Maybe some things would get simpler if all callers ensure
-   that bi >= nbits. As far as I understand, with the current code bi
-   < nbits can happen only for the final iteration. */
-static inline mp_limb_t
-getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
-{
-  int nbits_in_r;
-  mp_limb_t r;
-  mp_size_t i;
-
-  if (bi < nbits)
-    {
-      return p[0] & (((mp_limb_t) 1 << bi) - 1);
-    }
-  else
-    {
-      bi -= nbits;			/* bit index of low bit to extract */
-      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
-      bi %= GMP_NUMB_BITS;		/* bit index in low word */
-      r = p[i] >> bi;			/* extract (low) bits */
-      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
-      if (nbits_in_r < nbits)		/* did we get enough bits? */
-	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
-      return r & (((mp_limb_t ) 1 << nbits) - 1);
-    }
-}
-
-#ifndef POWM_SEC_TABLE
-#if GMP_NUMB_BITS < 50
-#define POWM_SEC_TABLE  2,33,96,780,2741
-#else
-#define POWM_SEC_TABLE  2,130,524,2578
-#endif
-#endif
-
-#if TUNE_PROGRAM_BUILD
-extern int win_size (mp_bitcnt_t);
-#else
-static inline int
-win_size (mp_bitcnt_t enb)
-{
-  int k;
-  /* Find k, such that x[k-1] < enb <= x[k].
-
-     We require that x[k] >= k, then it follows that enb > x[k-1] >=
-     k-1, which implies k <= enb.
-  */
-  static const mp_bitcnt_t x[] = {0,POWM_SEC_TABLE,~(mp_bitcnt_t)0};
-  for (k = 1; enb > x[k]; k++)
-    ;
-  ASSERT (k <= enb);
-  return k;
-}
-#endif
-
-/* Convert U to REDC form, U_r = B^n * U mod M.
-   Uses scratch space at tp of size 2un + n + 1.  */
-static void
-redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
-{
-  MPN_ZERO (tp, n);
-  MPN_COPY (tp + n, up, un);
-
-  mpn_sec_div_r (tp, un + n, mp, n, tp + un + n);
-  MPN_COPY (rp, tp, n);
-}
-
-/* {rp, n} <-- {bp, bn} ^ {ep, en} mod {mp, n},
-   where en = ceil (enb / GMP_NUMB_BITS)
-   Requires that {mp, n} is odd (and hence also mp[0] odd).
-   Uses scratch space at tp as defined by mpn_sec_powm_itch.  */
-void
-mpn_sec_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
-	      mp_srcptr ep, mp_bitcnt_t enb,
-	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
-{
-  mp_limb_t ip[2], *mip;
-  int windowsize, this_windowsize;
-  mp_limb_t expbits;
-  mp_ptr pp, this_pp;
-  long i;
-  int cnd;
-
-  ASSERT (enb > 0);
-  ASSERT (n > 0);
-  /* The code works for bn = 0, but the defined scratch space is 2 limbs
-     greater than we supply, when converting 1 to redc form .  */
-  ASSERT (bn > 0);
-  ASSERT ((mp[0] & 1) != 0);
-
-  windowsize = win_size (enb);
-
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    {
-      mip = ip;
-      binvert_limb (mip[0], mp[0]);
-      mip[0] = -mip[0];
-    }
-  else
-    {
-      mip = ip;
-      mpn_binvert (mip, mp, 2, tp);
-      mip[0] = -mip[0]; mip[1] = ~mip[1];
-    }
-#else
-  mip = ip;
-  binvert_limb (mip[0], mp[0]);
-  mip[0] = -mip[0];
-#endif
-
-  pp = tp;
-  tp += (n << windowsize);	/* put tp after power table */
-
-  /* Compute pp[0] table entry */
-  /* scratch: |   n   | 1 |   n+2    |  */
-  /*          | pp[0] | 1 | redcify  |  */
-  this_pp = pp;
-  this_pp[n] = 1;
-  redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
-  this_pp += n;
-
-  /* Compute pp[1] table entry.  To avoid excessive scratch usage in the
-     degenerate situation where B >> M, we let redcify use scratch space which
-     will later be used by the pp table (element 2 and up).  */
-  /* scratch: |   n   |   n   |  bn + n + 1  |  */
-  /*          | pp[0] | pp[1] |   redcify    |  */
-  redcify (this_pp, bp, bn, mp, n, this_pp + n);
-
-  /* Precompute powers of b and put them in the temporary area at pp.  */
-  /* scratch: |   n   |   n   | ...  |                    |   2n      |  */
-  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  product  |  */
-  for (i = (1 << windowsize) - 2; i > 0; i--)
-    {
-      mpn_mul_basecase (tp, this_pp, n, pp + n, n);
-      this_pp += n;
-#if WANT_REDC_2
-      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-	MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
-      else
-	MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
-#else
-      MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
-#endif
-    }
-
-  expbits = getbits (ep, enb, windowsize);
-  ASSERT_ALWAYS (enb >= windowsize);
-  enb -= windowsize;
-
-  mpn_sec_tabselect (rp, pp, n, 1 << windowsize, expbits);
-
-  /* Main exponentiation loop.  */
-  /* scratch: |   n   |   n   | ...  |                    |     3n-4n     |  */
-  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  loop scratch |  */
-
-#define INNERLOOP							\
-  while (enb != 0)							\
-    {									\
-      expbits = getbits (ep, enb, windowsize);				\
-      this_windowsize = windowsize;					\
-      if (enb < windowsize)						\
-	{								\
-	  this_windowsize -= windowsize - enb;				\
-	  enb = 0;							\
-	}								\
-      else								\
-	enb -= windowsize;						\
-									\
-      do								\
-	{								\
-	  mpn_local_sqr (tp, rp, n, tp + 2 * n);			\
-	  MPN_REDUCE (rp, tp, mp, n, mip);				\
-	  this_windowsize--;						\
-	}								\
-      while (this_windowsize != 0);					\
-									\
-      mpn_sec_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);	\
-      mpn_mul_basecase (tp, rp, n, tp + 2*n, n);			\
-									\
-      MPN_REDUCE (rp, tp, mp, n, mip);					\
-    }
-
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
-      INNERLOOP;
-    }
-  else
-    {
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2_SEC (rp, tp, mp, n, mip)
-      INNERLOOP;
-    }
-#else
-#undef MPN_MUL_N
-#undef MPN_SQR
-#undef MPN_REDUCE
-#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
-#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
-  INNERLOOP;
-#endif
-
-  MPN_COPY (tp, rp, n);
-  MPN_ZERO (tp + n, n);
-
-#if WANT_REDC_2
-  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
-    MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
-  else
-    MPN_REDC_2_SEC (rp, tp, mp, n, mip);
-#else
-  MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
-#endif
-  cnd = mpn_sub_n (tp, rp, mp, n);	/* we need just retval */
-  mpn_cnd_sub_n (!cnd, rp, rp, mp, n);
-}
-
-mp_size_t
-mpn_sec_powm_itch (mp_size_t bn, mp_bitcnt_t enb, mp_size_t n)
-{
-  int windowsize;
-  mp_size_t redcify_itch, itch;
-
-  /* The top scratch usage will either be when reducing B in the 2nd redcify
-     call, or more typically n*2^windowsize + 3n or 4n, in the main loop.  (It
-     is 3n or 4n depending on if we use mpn_local_sqr or a native
-     mpn_sqr_basecase.  We assume 4n always for now.) */
-
-  windowsize = win_size (enb);
-
-  /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
-     the (bn + n) term is due to redcify's own usage, and the rest is due to
-     mpn_sec_div_r's usage when called from redcify.  */
-  redcify_itch = (2 * n) + (bn + n) + ((bn + n) + 2 * n + 2);
-
-  /* The n * 2^windowsize term is due to the power table, the 4n term is due to
-     scratch needs of squaring/multiplication in the exponentiation loop.  */
-  itch = (n << windowsize) + (4 * n);
-
-  return MAX (itch, redcify_itch);
-}
diff --git a/gmp/mpn/generic/sec_sqr.c b/gmp/mpn/generic/sec_sqr.c
deleted file mode 100644
index 736924cc22..0000000000
--- a/gmp/mpn/generic/sec_sqr.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* mpn_sec_sqr.
-
-   Contributed to the GNU project by Torbjörn Granlund.
-
-Copyright 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_sec_sqr (mp_ptr rp,
-	     mp_srcptr ap, mp_size_t an,
-	     mp_ptr tp)
-{
-  mpn_sqr_basecase (rp, ap, an);
-}
-
-mp_size_t
-mpn_sec_sqr_itch (mp_size_t an)
-{
-  return 0;
-}
diff --git a/gmp/mpn/generic/sec_tabselect.c b/gmp/mpn/generic/sec_tabselect.c
deleted file mode 100644
index a79c73a575..0000000000
--- a/gmp/mpn/generic/sec_tabselect.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* mpn_sec_tabselect.
-
-Copyright 2007-2009, 2011, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Select entry `which' from table `tab', which has nents entries, each `n'
-   limbs.  Store the selected entry at rp.  Reads entire table to avoid
-   side-channel information leaks.  O(n*nents).  */
-void
-mpn_sec_tabselect (volatile mp_limb_t *rp, volatile const mp_limb_t *tab,
-		   mp_size_t n, mp_size_t nents, mp_size_t which)
-{
-  mp_size_t k, i;
-  mp_limb_t mask;
-  volatile mp_limb_t *tp;
-
-  for (k = 0; k < nents; k++)
-    {
-      mask = -(mp_limb_t) (which == k);
-      tp = tab + n * k;
-      for (i = 0; i < n; i++)
-	{
-	  rp[i] = (rp[i] & ~mask) | (tp[i] & mask);
-	}
-    }
-}
diff --git a/gmp/mpn/generic/set_str.c b/gmp/mpn/generic/set_str.c
index 71034e34bf..975cfb0dad 100644
--- a/gmp/mpn/generic/set_str.c
+++ b/gmp/mpn/generic/set_str.c
@@ -9,34 +9,23 @@
    FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE
    GNU MP RELEASE.
 
-Copyright 1991-1994, 1996, 2000-2002, 2004, 2006-2008, 2012, 2013 Free
-Software Foundation, Inc.
+Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
+2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /* TODO:
@@ -80,7 +69,7 @@ mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
       int next_bitpos;
       mp_limb_t res_digit;
       mp_size_t size;
-      int bits_per_indigit = mp_bases[base].big_base;
+      int bits_per_indigit = __mp_bases[base].big_base;
 
       size = 0;
       res_digit = 0;
@@ -118,7 +107,7 @@ mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
 
       TMP_MARK;
 
-      chars_per_limb = mp_bases[base].chars_per_limb;
+      chars_per_limb = __mp_bases[base].chars_per_limb;
 
       un = str_len / chars_per_limb + 1;
 
@@ -142,15 +131,18 @@ mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, i
   long i, pi;
   mp_size_t n;
   mp_ptr p, t;
-  mp_limb_t big_base;
+  unsigned normalization_steps;
+  mp_limb_t big_base, big_base_inverted;
   int chars_per_limb;
   size_t digits_in_base;
   mp_size_t shift;
 
   powtab_mem_ptr = powtab_mem;
 
-  chars_per_limb = mp_bases[base].chars_per_limb;
-  big_base = mp_bases[base].big_base;
+  chars_per_limb = __mp_bases[base].chars_per_limb;
+  big_base = __mp_bases[base].big_base;
+  big_base_inverted = __mp_bases[base].big_base_inverted;
+  count_leading_zeros (normalization_steps, big_base);
 
   p = powtab_mem_ptr;
   powtab_mem_ptr += 1;
@@ -177,7 +169,7 @@ mpn_set_str_compute_powtab (powers_t *powtab, mp_ptr powtab_mem, mp_size_t un, i
 
       ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_set_str_powtab_alloc (un));
 
-      mpn_sqr (t, p, n);
+      mpn_sqr_n (t, p, n);
       n = 2 * n - 1; n += t[n] != 0;
       digits_in_base *= 2;
 #if 1
@@ -247,9 +239,7 @@ mpn_dc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len,
 
   if (hn == 0)
     {
-      /* Zero +1 limb here, to avoid reading an allocated but uninitialised
-	 limb in mpn_incr_u below.  */
-      MPN_ZERO (rp, powtab->n + sn + 1);
+      MPN_ZERO (rp, powtab->n + sn);
     }
   else
     {
@@ -288,11 +278,11 @@ mpn_bc_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
   mp_limb_t res_digit;
 
   ASSERT (base >= 2);
-  ASSERT (base < numberof (mp_bases));
+  ASSERT (base < numberof (__mp_bases));
   ASSERT (str_len >= 1);
 
-  big_base = mp_bases[base].big_base;
-  chars_per_limb = mp_bases[base].chars_per_limb;
+  big_base = __mp_bases[base].big_base;
+  chars_per_limb = __mp_bases[base].chars_per_limb;
 
   size = 0;
   for (i = chars_per_limb; i < str_len; i += chars_per_limb)
diff --git a/gmp/mpn/generic/sizeinbase.c b/gmp/mpn/generic/sizeinbase.c
index 16633569ec..edd10b544e 100644
--- a/gmp/mpn/generic/sizeinbase.c
+++ b/gmp/mpn/generic/sizeinbase.c
@@ -4,34 +4,22 @@
    CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
    FUTURE GNU MP RELEASES.
 
-Copyright 1991, 1993-1995, 2001, 2002, 2011, 2012 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -44,7 +32,27 @@ see https://www.gnu.org/licenses/.  */
 size_t
 mpn_sizeinbase (mp_srcptr xp, mp_size_t xsize, int base)
 {
-  size_t  result;
-  MPN_SIZEINBASE (result, xp, xsize, base);
-  return result;
+  int lb_base, cnt;
+  mp_size_t totbits;
+
+  ASSERT (xsize >= 0);
+  ASSERT (base >= 2);
+  ASSERT (base < numberof (__mp_bases));
+
+  /* Special case for X == 0.  */
+  if (xsize == 0)
+    return 1;
+
+  /* Calculate the total number of significant bits of X.  */
+  count_leading_zeros (cnt, xp[xsize-1]);
+  totbits = xsize * BITS_PER_MP_LIMB - cnt;
+
+  if (POW2_P (base))
+    {
+      /* Special case for powers of 2, giving exact result.  */
+      lb_base = __mp_bases[base].big_base;
+      return (totbits + lb_base - 1) / lb_base;
+    }
+  else
+    return (size_t) (totbits * __mp_bases[base].chars_per_bit_exactly) + 1;
 }
diff --git a/gmp/mpn/generic/sqr.c b/gmp/mpn/generic/sqr.c
deleted file mode 100644
index 3743761f78..0000000000
--- a/gmp/mpn/generic/sqr.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/* mpn_sqr -- square natural numbers.
-
-Copyright 1991, 1993, 1994, 1996-2003, 2005, 2008, 2009 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-void
-mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
-{
-  ASSERT (n >= 1);
-  ASSERT (! MPN_OVERLAP_P (p, 2 * n, a, n));
-
-  if (BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
-    { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
-      mpn_mul_basecase (p, a, n, a, n);
-    }
-  else if (BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))
-    {
-      mpn_sqr_basecase (p, a, n);
-    }
-  else if (BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))
-    {
-      /* Allocate workspace of fixed size on stack: fast! */
-      mp_limb_t ws[mpn_toom2_sqr_itch (SQR_TOOM3_THRESHOLD_LIMIT-1)];
-      ASSERT (SQR_TOOM3_THRESHOLD <= SQR_TOOM3_THRESHOLD_LIMIT);
-      mpn_toom2_sqr (p, a, n, ws);
-    }
-  else if (BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))
-    {
-      mp_ptr ws;
-      TMP_SDECL;
-      TMP_SMARK;
-      ws = TMP_SALLOC_LIMBS (mpn_toom3_sqr_itch (n));
-      mpn_toom3_sqr (p, a, n, ws);
-      TMP_SFREE;
-    }
-  else if (BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))
-    {
-      mp_ptr ws;
-      TMP_SDECL;
-      TMP_SMARK;
-      ws = TMP_SALLOC_LIMBS (mpn_toom4_sqr_itch (n));
-      mpn_toom4_sqr (p, a, n, ws);
-      TMP_SFREE;
-    }
-  else if (BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD))
-    {
-      mp_ptr ws;
-      TMP_SDECL;
-      TMP_SMARK;
-      ws = TMP_SALLOC_LIMBS (mpn_toom6_sqr_itch (n));
-      mpn_toom6_sqr (p, a, n, ws);
-      TMP_SFREE;
-    }
-  else if (BELOW_THRESHOLD (n, SQR_FFT_THRESHOLD))
-    {
-      mp_ptr ws;
-      TMP_DECL;
-      TMP_MARK;
-      ws = TMP_ALLOC_LIMBS (mpn_toom8_sqr_itch (n));
-      mpn_toom8_sqr (p, a, n, ws);
-      TMP_FREE;
-    }
-  else
-    {
-      /* The current FFT code allocates its own space.  That should probably
-	 change.  */
-      mpn_fft_mul (p, a, n, a, n);
-    }
-}
diff --git a/gmp/mpn/generic/sqr_basecase.c b/gmp/mpn/generic/sqr_basecase.c
index fc6a043a94..56d22216f6 100644
--- a/gmp/mpn/generic/sqr_basecase.c
+++ b/gmp/mpn/generic/sqr_basecase.c
@@ -5,34 +5,23 @@
    SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
 
 
-Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011 Free Software
-Foundation, Inc.
+Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
+2005, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -56,30 +45,6 @@ see https://www.gnu.org/licenses/.  */
   } while (0)
 #endif
 
-#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
-#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
-  mpn_sqr_diag_addlsh1 (rp, tp, up, n)
-#else
-#if HAVE_NATIVE_mpn_addlsh1_n
-#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
-  do {									\
-    mp_limb_t cy;							\
-    MPN_SQR_DIAGONAL (rp, up, n);					\
-    cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
-    rp[2 * n - 1] += cy;						\
-  } while (0)
-#else
-#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n)				\
-  do {									\
-    mp_limb_t cy;							\
-    MPN_SQR_DIAGONAL (rp, up, n);					\
-    cy = mpn_lshift (tp, tp, 2 * n - 2, 1);				\
-    cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);			\
-    rp[2 * n - 1] += cy;						\
-  } while (0)
-#endif
-#endif
-
 
 #undef READY_WITH_mpn_sqr_basecase
 
@@ -89,12 +54,12 @@ void
 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
 {
   mp_size_t i;
-  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+  mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];
   mp_ptr tp = tarr;
   mp_limb_t cy;
 
   /* must fit 2*n limbs in tarr */
-  ASSERT (n <= SQR_TOOM2_THRESHOLD);
+  ASSERT (n <= SQR_KARATSUBA_THRESHOLD);
 
   if ((n & 1) != 0)
     {
@@ -119,13 +84,9 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
     {
       if (n == 2)
 	{
-#if HAVE_NATIVE_mpn_mul_2
-	  rp[3] = mpn_mul_2 (rp, up, 2, up);
-#else
 	  rp[0] = 0;
 	  rp[1] = 0;
 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
-#endif
 	  return;
 	}
 
@@ -140,7 +101,15 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
       tp[2 * n - 3] = cy;
     }
 
-  MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
+  MPN_SQR_DIAGONAL (rp, up, n);
+
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#else
+  cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#endif
+  rp[2 * n - 1] += cy;
 }
 #define READY_WITH_mpn_sqr_basecase
 #endif
@@ -167,12 +136,12 @@ void
 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
 {
   mp_size_t i;
-  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+  mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];
   mp_ptr tp = tarr;
   mp_limb_t cy;
 
   /* must fit 2*n limbs in tarr */
-  ASSERT (n <= SQR_TOOM2_THRESHOLD);
+  ASSERT (n <= SQR_KARATSUBA_THRESHOLD);
 
   if ((n & 1) != 0)
     {
@@ -225,13 +194,9 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
 
       if (n == 2)
 	{
-#if HAVE_NATIVE_mpn_mul_2
-	  rp[3] = mpn_mul_2 (rp, up, 2, up);
-#else
 	  rp[0] = 0;
 	  rp[1] = 0;
 	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
-#endif
 	  return;
 	}
 
@@ -303,12 +268,12 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
   }
   if (n > 1)
     {
-      mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
+      mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];
       mp_ptr tp = tarr;
       mp_limb_t cy;
 
       /* must fit 2*n limbs in tarr */
-      ASSERT (n <= SQR_TOOM2_THRESHOLD);
+      ASSERT (n <= SQR_KARATSUBA_THRESHOLD);
 
       cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
       tp[n - 1] = cy;
@@ -318,8 +283,18 @@ mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
 	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
 	  tp[n + i - 2] = cy;
 	}
+      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
 
-      MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
+      {
+	mp_limb_t cy;
+#if HAVE_NATIVE_mpn_addlsh1_n
+	cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#else
+	cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
+	cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
+#endif
+	rp[2 * n - 1] += cy;
+      }
     }
 }
 #endif
diff --git a/gmp/mpn/generic/sqrmod_bnm1.c b/gmp/mpn/generic/sqrmod_bnm1.c
deleted file mode 100644
index fd0868b90b..0000000000
--- a/gmp/mpn/generic/sqrmod_bnm1.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/* sqrmod_bnm1.c -- squaring mod B^n-1.
-
-   Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
-   Marco Bodrato.
-
-   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
-   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Input is {ap,rn}; output is {rp,rn}, computation is
-   mod B^rn - 1, and values are semi-normalised; zero is represented
-   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
-   tp==rp is allowed. */
-static void
-mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
-{
-  mp_limb_t cy;
-
-  ASSERT (0 < rn);
-
-  mpn_sqr (tp, ap, rn);
-  cy = mpn_add_n (rp, tp, tp + rn, rn);
-  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
-   * be no overflow when adding in the carry. */
-  MPN_INCR_U (rp, rn, cy);
-}
-
-
-/* Input is {ap,rn+1}; output is {rp,rn+1}, in
-   semi-normalised representation, computation is mod B^rn + 1. Needs
-   a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
-   Output is normalised. */
-static void
-mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
-{
-  mp_limb_t cy;
-
-  ASSERT (0 < rn);
-
-  mpn_sqr (tp, ap, rn + 1);
-  ASSERT (tp[2*rn+1] == 0);
-  ASSERT (tp[2*rn] < GMP_NUMB_MAX);
-  cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
-  rp[rn] = 0;
-  MPN_INCR_U (rp, rn+1, cy );
-}
-
-
-/* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1)
- *
- * The result is expected to be ZERO if and only if the operand
- * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
- * B^rn-1.
- * It should not be a problem if sqrmod_bnm1 is used to
- * compute the full square with an <= 2*rn, because this condition
- * implies (B^an-1)^2 < (B^rn-1) .
- *
- * Requires rn/4 < an <= rn
- * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives
- *
- * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4
- */
-void
-mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp)
-{
-  ASSERT (0 < an);
-  ASSERT (an <= rn);
-
-  if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD))
-    {
-      if (UNLIKELY (an < rn))
-	{
-	  if (UNLIKELY (2*an <= rn))
-	    {
-	      mpn_sqr (rp, ap, an);
-	    }
-	  else
-	    {
-	      mp_limb_t cy;
-	      mpn_sqr (tp, ap, an);
-	      cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn);
-	      MPN_INCR_U (rp, rn, cy);
-	    }
-	}
-      else
-	mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp);
-    }
-  else
-    {
-      mp_size_t n;
-      mp_limb_t cy;
-      mp_limb_t hi;
-
-      n = rn >> 1;
-
-      ASSERT (2*an > n);
-
-      /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1)
-	 and crt together as
-
-	 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
-      */
-
-#define a0 ap
-#define a1 (ap + n)
-
-#define xp  tp	/* 2n + 2 */
-      /* am1  maybe in {xp, n} */
-#define sp1 (tp + 2*n + 2)
-      /* ap1  maybe in {sp1, n + 1} */
-
-      {
-	mp_srcptr am1;
-	mp_size_t anm;
-	mp_ptr so;
-
-	if (LIKELY (an > n))
-	  {
-	    so = xp + n;
-	    am1 = xp;
-	    cy = mpn_add (xp, a0, n, a1, an - n);
-	    MPN_INCR_U (xp, n, cy);
-	    anm = n;
-	  }
-	else
-	  {
-	    so = xp;
-	    am1 = a0;
-	    anm = an;
-	  }
-
-	mpn_sqrmod_bnm1 (rp, n, am1, anm, so);
-      }
-
-      {
-	int       k;
-	mp_srcptr ap1;
-	mp_size_t anp;
-
-	if (LIKELY (an > n)) {
-	  ap1 = sp1;
-	  cy = mpn_sub (sp1, a0, n, a1, an - n);
-	  sp1[n] = 0;
-	  MPN_INCR_U (sp1, n + 1, cy);
-	  anp = n + ap1[n];
-	} else {
-	  ap1 = a0;
-	  anp = an;
-	}
-
-	if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
-	  k=0;
-	else
-	  {
-	    int mask;
-	    k = mpn_fft_best_k (n, 1);
-	    mask = (1<<k) -1;
-	    while (n & mask) {k--; mask >>=1;};
-	  }
-	if (k >= FFT_FIRST_K)
-	  xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k);
-	else if (UNLIKELY (ap1 == a0))
-	  {
-	    ASSERT (anp <= n);
-	    ASSERT (2*anp > n);
-	    mpn_sqr (xp, a0, an);
-	    anp = 2*an - n;
-	    cy = mpn_sub (xp, xp, n, xp + n, anp);
-	    xp[n] = 0;
-	    MPN_INCR_U (xp, n+1, cy);
-	  }
-	else
-	  mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp);
-      }
-
-      /* Here the CRT recomposition begins.
-
-	 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
-	 Division by 2 is a bitwise rotation.
-
-	 Assumes xp normalised mod (B^n+1).
-
-	 The residue class [0] is represented by [B^n-1]; except when
-	 both input are ZERO.
-      */
-
-#if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
-#if HAVE_NATIVE_mpn_rsh1add_nc
-      cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
-      hi = cy << (GMP_NUMB_BITS - 1);
-      cy = 0;
-      /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
-	 overflows, i.e. a further increment will not overflow again. */
-#else /* ! _nc */
-      cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
-      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
-      cy >>= 1;
-      /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
-	 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
-#endif
-#if GMP_NAIL_BITS == 0
-      add_ssaaaa(cy, rp[n-1], cy, rp[n-1], CNST_LIMB(0), hi);
-#else
-      cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
-      rp[n-1] ^= hi;
-#endif
-#else /* ! HAVE_NATIVE_mpn_rsh1add_n */
-#if HAVE_NATIVE_mpn_add_nc
-      cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
-#else /* ! _nc */
-      cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
-#endif
-      cy += (rp[0]&1);
-      mpn_rshift(rp, rp, n, 1);
-      ASSERT (cy <= 2);
-      hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
-      cy >>= 1;
-      /* We can have cy != 0 only if hi = 0... */
-      ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
-      rp[n-1] |= hi;
-      /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
-#endif
-      ASSERT (cy <= 1);
-      /* Next increment can not overflow, read the previous comments about cy. */
-      ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
-      MPN_INCR_U(rp, n, cy);
-
-      /* Compute the highest half:
-	 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
-       */
-      if (UNLIKELY (2*an < rn))
-	{
-	  /* Note that in this case, the only way the result can equal
-	     zero mod B^{rn} - 1 is if the input is zero, and
-	     then the output of both the recursive calls and this CRT
-	     reconstruction is zero, not B^{rn} - 1. */
-	  cy = mpn_sub_n (rp + n, rp, xp, 2*an - n);
-
-	  /* FIXME: This subtraction of the high parts is not really
-	     necessary, we do it to get the carry out, and for sanity
-	     checking. */
-	  cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n,
-				   xp + 2*an - n, rn - 2*an, cy);
-	  ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an));
-	  cy = mpn_sub_1 (rp, rp, 2*an, cy);
-	  ASSERT (cy == (xp + 2*an - n)[0]);
-	}
-      else
-	{
-	  cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
-	  /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
-	     DECR will affect _at most_ the lowest n limbs. */
-	  MPN_DECR_U (rp, 2*n, cy);
-	}
-#undef a0
-#undef a1
-#undef xp
-#undef sp1
-    }
-}
-
-mp_size_t
-mpn_sqrmod_bnm1_next_size (mp_size_t n)
-{
-  mp_size_t nh;
-
-  if (BELOW_THRESHOLD (n,     SQRMOD_BNM1_THRESHOLD))
-    return n;
-  if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
-    return (n + (2-1)) & (-2);
-  if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1))
-    return (n + (4-1)) & (-4);
-
-  nh = (n + 1) >> 1;
-
-  if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD))
-    return (n + (8-1)) & (-8);
-
-  return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1));
-}
diff --git a/gmp/mpn/generic/sqrtrem.c b/gmp/mpn/generic/sqrtrem.c
index 7d0f120001..ac878c5083 100644
--- a/gmp/mpn/generic/sqrtrem.c
+++ b/gmp/mpn/generic/sqrtrem.c
@@ -8,34 +8,23 @@
    INTERFACES.  IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR
    DISAPPEAR IN A FUTURE GMP RELEASE.
 
-Copyright 1999-2002, 2004, 2005, 2008, 2010, 2012 Free Software Foundation,
+Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2008 Free Software Foundation,
 Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 /* See "Karatsuba Square Root", reference in gmp.texi.  */
@@ -48,64 +37,64 @@ see https://www.gnu.org/licenses/.  */
 #include "gmp-impl.h"
 #include "longlong.h"
 
-static const unsigned char invsqrttab[384] = /* The common 0x100 was removed */
+static const unsigned short invsqrttab[384] =
 {
-  0xff,0xfd,0xfb,0xf9,0xf7,0xf5,0xf3,0xf2, /* sqrt(1/80)..sqrt(1/87) */
-  0xf0,0xee,0xec,0xea,0xe9,0xe7,0xe5,0xe4, /* sqrt(1/88)..sqrt(1/8f) */
-  0xe2,0xe0,0xdf,0xdd,0xdb,0xda,0xd8,0xd7, /* sqrt(1/90)..sqrt(1/97) */
-  0xd5,0xd4,0xd2,0xd1,0xcf,0xce,0xcc,0xcb, /* sqrt(1/98)..sqrt(1/9f) */
-  0xc9,0xc8,0xc6,0xc5,0xc4,0xc2,0xc1,0xc0, /* sqrt(1/a0)..sqrt(1/a7) */
-  0xbe,0xbd,0xbc,0xba,0xb9,0xb8,0xb7,0xb5, /* sqrt(1/a8)..sqrt(1/af) */
-  0xb4,0xb3,0xb2,0xb0,0xaf,0xae,0xad,0xac, /* sqrt(1/b0)..sqrt(1/b7) */
-  0xaa,0xa9,0xa8,0xa7,0xa6,0xa5,0xa4,0xa3, /* sqrt(1/b8)..sqrt(1/bf) */
-  0xa2,0xa0,0x9f,0x9e,0x9d,0x9c,0x9b,0x9a, /* sqrt(1/c0)..sqrt(1/c7) */
-  0x99,0x98,0x97,0x96,0x95,0x94,0x93,0x92, /* sqrt(1/c8)..sqrt(1/cf) */
-  0x91,0x90,0x8f,0x8e,0x8d,0x8c,0x8c,0x8b, /* sqrt(1/d0)..sqrt(1/d7) */
-  0x8a,0x89,0x88,0x87,0x86,0x85,0x84,0x83, /* sqrt(1/d8)..sqrt(1/df) */
-  0x83,0x82,0x81,0x80,0x7f,0x7e,0x7e,0x7d, /* sqrt(1/e0)..sqrt(1/e7) */
-  0x7c,0x7b,0x7a,0x79,0x79,0x78,0x77,0x76, /* sqrt(1/e8)..sqrt(1/ef) */
-  0x76,0x75,0x74,0x73,0x72,0x72,0x71,0x70, /* sqrt(1/f0)..sqrt(1/f7) */
-  0x6f,0x6f,0x6e,0x6d,0x6d,0x6c,0x6b,0x6a, /* sqrt(1/f8)..sqrt(1/ff) */
-  0x6a,0x69,0x68,0x68,0x67,0x66,0x66,0x65, /* sqrt(1/100)..sqrt(1/107) */
-  0x64,0x64,0x63,0x62,0x62,0x61,0x60,0x60, /* sqrt(1/108)..sqrt(1/10f) */
-  0x5f,0x5e,0x5e,0x5d,0x5c,0x5c,0x5b,0x5a, /* sqrt(1/110)..sqrt(1/117) */
-  0x5a,0x59,0x59,0x58,0x57,0x57,0x56,0x56, /* sqrt(1/118)..sqrt(1/11f) */
-  0x55,0x54,0x54,0x53,0x53,0x52,0x52,0x51, /* sqrt(1/120)..sqrt(1/127) */
-  0x50,0x50,0x4f,0x4f,0x4e,0x4e,0x4d,0x4d, /* sqrt(1/128)..sqrt(1/12f) */
-  0x4c,0x4b,0x4b,0x4a,0x4a,0x49,0x49,0x48, /* sqrt(1/130)..sqrt(1/137) */
-  0x48,0x47,0x47,0x46,0x46,0x45,0x45,0x44, /* sqrt(1/138)..sqrt(1/13f) */
-  0x44,0x43,0x43,0x42,0x42,0x41,0x41,0x40, /* sqrt(1/140)..sqrt(1/147) */
-  0x40,0x3f,0x3f,0x3e,0x3e,0x3d,0x3d,0x3c, /* sqrt(1/148)..sqrt(1/14f) */
-  0x3c,0x3b,0x3b,0x3a,0x3a,0x39,0x39,0x39, /* sqrt(1/150)..sqrt(1/157) */
-  0x38,0x38,0x37,0x37,0x36,0x36,0x35,0x35, /* sqrt(1/158)..sqrt(1/15f) */
-  0x35,0x34,0x34,0x33,0x33,0x32,0x32,0x32, /* sqrt(1/160)..sqrt(1/167) */
-  0x31,0x31,0x30,0x30,0x2f,0x2f,0x2f,0x2e, /* sqrt(1/168)..sqrt(1/16f) */
-  0x2e,0x2d,0x2d,0x2d,0x2c,0x2c,0x2b,0x2b, /* sqrt(1/170)..sqrt(1/177) */
-  0x2b,0x2a,0x2a,0x29,0x29,0x29,0x28,0x28, /* sqrt(1/178)..sqrt(1/17f) */
-  0x27,0x27,0x27,0x26,0x26,0x26,0x25,0x25, /* sqrt(1/180)..sqrt(1/187) */
-  0x24,0x24,0x24,0x23,0x23,0x23,0x22,0x22, /* sqrt(1/188)..sqrt(1/18f) */
-  0x21,0x21,0x21,0x20,0x20,0x20,0x1f,0x1f, /* sqrt(1/190)..sqrt(1/197) */
-  0x1f,0x1e,0x1e,0x1e,0x1d,0x1d,0x1d,0x1c, /* sqrt(1/198)..sqrt(1/19f) */
-  0x1c,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x19, /* sqrt(1/1a0)..sqrt(1/1a7) */
-  0x19,0x19,0x18,0x18,0x18,0x18,0x17,0x17, /* sqrt(1/1a8)..sqrt(1/1af) */
-  0x17,0x16,0x16,0x16,0x15,0x15,0x15,0x14, /* sqrt(1/1b0)..sqrt(1/1b7) */
-  0x14,0x14,0x13,0x13,0x13,0x12,0x12,0x12, /* sqrt(1/1b8)..sqrt(1/1bf) */
-  0x12,0x11,0x11,0x11,0x10,0x10,0x10,0x0f, /* sqrt(1/1c0)..sqrt(1/1c7) */
-  0x0f,0x0f,0x0f,0x0e,0x0e,0x0e,0x0d,0x0d, /* sqrt(1/1c8)..sqrt(1/1cf) */
-  0x0d,0x0c,0x0c,0x0c,0x0c,0x0b,0x0b,0x0b, /* sqrt(1/1d0)..sqrt(1/1d7) */
-  0x0a,0x0a,0x0a,0x0a,0x09,0x09,0x09,0x09, /* sqrt(1/1d8)..sqrt(1/1df) */
-  0x08,0x08,0x08,0x07,0x07,0x07,0x07,0x06, /* sqrt(1/1e0)..sqrt(1/1e7) */
-  0x06,0x06,0x06,0x05,0x05,0x05,0x04,0x04, /* sqrt(1/1e8)..sqrt(1/1ef) */
-  0x04,0x04,0x03,0x03,0x03,0x03,0x02,0x02, /* sqrt(1/1f0)..sqrt(1/1f7) */
-  0x02,0x02,0x01,0x01,0x01,0x01,0x00,0x00  /* sqrt(1/1f8)..sqrt(1/1ff) */
+  0x1ff,0x1fd,0x1fb,0x1f9,0x1f7,0x1f5,0x1f3,0x1f2, /* sqrt(1/80)..sqrt(1/87) */
+  0x1f0,0x1ee,0x1ec,0x1ea,0x1e9,0x1e7,0x1e5,0x1e4, /* sqrt(1/88)..sqrt(1/8f) */
+  0x1e2,0x1e0,0x1df,0x1dd,0x1db,0x1da,0x1d8,0x1d7, /* sqrt(1/90)..sqrt(1/97) */
+  0x1d5,0x1d4,0x1d2,0x1d1,0x1cf,0x1ce,0x1cc,0x1cb, /* sqrt(1/98)..sqrt(1/9f) */
+  0x1c9,0x1c8,0x1c6,0x1c5,0x1c4,0x1c2,0x1c1,0x1c0, /* sqrt(1/a0)..sqrt(1/a7) */
+  0x1be,0x1bd,0x1bc,0x1ba,0x1b9,0x1b8,0x1b7,0x1b5, /* sqrt(1/a8)..sqrt(1/af) */
+  0x1b4,0x1b3,0x1b2,0x1b0,0x1af,0x1ae,0x1ad,0x1ac, /* sqrt(1/b0)..sqrt(1/b7) */
+  0x1aa,0x1a9,0x1a8,0x1a7,0x1a6,0x1a5,0x1a4,0x1a3, /* sqrt(1/b8)..sqrt(1/bf) */
+  0x1a2,0x1a0,0x19f,0x19e,0x19d,0x19c,0x19b,0x19a, /* sqrt(1/c0)..sqrt(1/c7) */
+  0x199,0x198,0x197,0x196,0x195,0x194,0x193,0x192, /* sqrt(1/c8)..sqrt(1/cf) */
+  0x191,0x190,0x18f,0x18e,0x18d,0x18c,0x18c,0x18b, /* sqrt(1/d0)..sqrt(1/d7) */
+  0x18a,0x189,0x188,0x187,0x186,0x185,0x184,0x183, /* sqrt(1/d8)..sqrt(1/df) */
+  0x183,0x182,0x181,0x180,0x17f,0x17e,0x17e,0x17d, /* sqrt(1/e0)..sqrt(1/e7) */
+  0x17c,0x17b,0x17a,0x179,0x179,0x178,0x177,0x176, /* sqrt(1/e8)..sqrt(1/ef) */
+  0x176,0x175,0x174,0x173,0x172,0x172,0x171,0x170, /* sqrt(1/f0)..sqrt(1/f7) */
+  0x16f,0x16f,0x16e,0x16d,0x16d,0x16c,0x16b,0x16a, /* sqrt(1/f8)..sqrt(1/ff) */
+  0x16a,0x169,0x168,0x168,0x167,0x166,0x166,0x165, /* sqrt(1/100)..sqrt(1/107) */
+  0x164,0x164,0x163,0x162,0x162,0x161,0x160,0x160, /* sqrt(1/108)..sqrt(1/10f) */
+  0x15f,0x15e,0x15e,0x15d,0x15c,0x15c,0x15b,0x15a, /* sqrt(1/110)..sqrt(1/117) */
+  0x15a,0x159,0x159,0x158,0x157,0x157,0x156,0x156, /* sqrt(1/118)..sqrt(1/11f) */
+  0x155,0x154,0x154,0x153,0x153,0x152,0x152,0x151, /* sqrt(1/120)..sqrt(1/127) */
+  0x150,0x150,0x14f,0x14f,0x14e,0x14e,0x14d,0x14d, /* sqrt(1/128)..sqrt(1/12f) */
+  0x14c,0x14b,0x14b,0x14a,0x14a,0x149,0x149,0x148, /* sqrt(1/130)..sqrt(1/137) */
+  0x148,0x147,0x147,0x146,0x146,0x145,0x145,0x144, /* sqrt(1/138)..sqrt(1/13f) */
+  0x144,0x143,0x143,0x142,0x142,0x141,0x141,0x140, /* sqrt(1/140)..sqrt(1/147) */
+  0x140,0x13f,0x13f,0x13e,0x13e,0x13d,0x13d,0x13c, /* sqrt(1/148)..sqrt(1/14f) */
+  0x13c,0x13b,0x13b,0x13a,0x13a,0x139,0x139,0x139, /* sqrt(1/150)..sqrt(1/157) */
+  0x138,0x138,0x137,0x137,0x136,0x136,0x135,0x135, /* sqrt(1/158)..sqrt(1/15f) */
+  0x135,0x134,0x134,0x133,0x133,0x132,0x132,0x132, /* sqrt(1/160)..sqrt(1/167) */
+  0x131,0x131,0x130,0x130,0x12f,0x12f,0x12f,0x12e, /* sqrt(1/168)..sqrt(1/16f) */
+  0x12e,0x12d,0x12d,0x12d,0x12c,0x12c,0x12b,0x12b, /* sqrt(1/170)..sqrt(1/177) */
+  0x12b,0x12a,0x12a,0x129,0x129,0x129,0x128,0x128, /* sqrt(1/178)..sqrt(1/17f) */
+  0x127,0x127,0x127,0x126,0x126,0x126,0x125,0x125, /* sqrt(1/180)..sqrt(1/187) */
+  0x124,0x124,0x124,0x123,0x123,0x123,0x122,0x122, /* sqrt(1/188)..sqrt(1/18f) */
+  0x121,0x121,0x121,0x120,0x120,0x120,0x11f,0x11f, /* sqrt(1/190)..sqrt(1/197) */
+  0x11f,0x11e,0x11e,0x11e,0x11d,0x11d,0x11d,0x11c, /* sqrt(1/198)..sqrt(1/19f) */
+  0x11c,0x11b,0x11b,0x11b,0x11a,0x11a,0x11a,0x119, /* sqrt(1/1a0)..sqrt(1/1a7) */
+  0x119,0x119,0x118,0x118,0x118,0x118,0x117,0x117, /* sqrt(1/1a8)..sqrt(1/1af) */
+  0x117,0x116,0x116,0x116,0x115,0x115,0x115,0x114, /* sqrt(1/1b0)..sqrt(1/1b7) */
+  0x114,0x114,0x113,0x113,0x113,0x112,0x112,0x112, /* sqrt(1/1b8)..sqrt(1/1bf) */
+  0x112,0x111,0x111,0x111,0x110,0x110,0x110,0x10f, /* sqrt(1/1c0)..sqrt(1/1c7) */
+  0x10f,0x10f,0x10f,0x10e,0x10e,0x10e,0x10d,0x10d, /* sqrt(1/1c8)..sqrt(1/1cf) */
+  0x10d,0x10c,0x10c,0x10c,0x10c,0x10b,0x10b,0x10b, /* sqrt(1/1d0)..sqrt(1/1d7) */
+  0x10a,0x10a,0x10a,0x10a,0x109,0x109,0x109,0x109, /* sqrt(1/1d8)..sqrt(1/1df) */
+  0x108,0x108,0x108,0x107,0x107,0x107,0x107,0x106, /* sqrt(1/1e0)..sqrt(1/1e7) */
+  0x106,0x106,0x106,0x105,0x105,0x105,0x104,0x104, /* sqrt(1/1e8)..sqrt(1/1ef) */
+  0x104,0x104,0x103,0x103,0x103,0x103,0x102,0x102, /* sqrt(1/1f0)..sqrt(1/1f7) */
+  0x102,0x102,0x101,0x101,0x101,0x101,0x100,0x100  /* sqrt(1/1f8)..sqrt(1/1ff) */
 };
 
 /* Compute s = floor(sqrt(a0)), and *rp = a0 - s^2.  */
 
 #if GMP_NUMB_BITS > 32
-#define MAGIC CNST_LIMB(0x10000000000)	/* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
+#define MAGIC 0x10000000000	/* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
 #else
-#define MAGIC CNST_LIMB(0x100000)		/* 0xfee6f < MAGIC < 0x29cbc8 */
+#define MAGIC 0x100000		/* 0xfee6f < MAGIC < 0x29cbc8 */
 #endif
 
 static mp_limb_t
@@ -126,16 +115,16 @@ mpn_sqrtrem1 (mp_ptr rp, mp_limb_t a0)
      iteration convert from 1/sqrt(a) to sqrt(a).  */
 
   abits = a0 >> (GMP_LIMB_BITS - 1 - 8);	/* extract bits for table lookup */
-  x0 = 0x100 | invsqrttab[abits - 0x80];	/* initial 1/sqrt(a) */
+  x0 = invsqrttab[abits - 0x80];		/* initial 1/sqrt(a) */
 
   /* x0 is now an 8 bits approximation of 1/sqrt(a0) */
 
 #if GMP_NUMB_BITS > 32
   a1 = a0 >> (GMP_LIMB_BITS - 1 - 32);
-  t = (mp_limb_signed_t) (CNST_LIMB(0x2000000000000) - 0x30000  - a1 * x0 * x0) >> 16;
+  t = (mp_limb_signed_t) (0x2000000000000l - 0x30000  - a1 * x0 * x0) >> 16;
   x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));
 
-  /* x0 is now a 16 bits approximation of 1/sqrt(a0) */
+  /* x0 is now an 16 bits approximation of 1/sqrt(a0) */
 
   t2 = x0 * (a0 >> (32-8));
   t = t2 >> 25;
@@ -250,18 +239,14 @@ mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
       q >>= 1;
       if (c != 0)
 	c = mpn_add_n (np + l, np + l, sp + l, h);
-      mpn_sqr (np + n, sp, l);
+      mpn_sqr_n (np + n, sp, l);
       b = q + mpn_sub_n (np, np, np + n, 2 * l);
       c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);
       q = mpn_add_1 (sp + l, sp + l, h, q);
 
       if (c < 0)
 	{
-#if HAVE_NATIVE_mpn_addlsh1_n
-	  c += mpn_addlsh1_n (np, np, sp, n) + 2 * q;
-#else
 	  c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
-#endif
 	  c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
 	  q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
 	}
diff --git a/gmp/mpn/generic/sub.c b/gmp/mpn/generic/sub.c
index 3fbcbbe98b..ada3e91b83 100644
--- a/gmp/mpn/generic/sub.c
+++ b/gmp/mpn/generic/sub.c
@@ -5,28 +5,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __GMP_FORCE_mpn_sub 1
 
diff --git a/gmp/mpn/generic/sub_1.c b/gmp/mpn/generic/sub_1.c
index db2e6f948f..4ed2eabccb 100644
--- a/gmp/mpn/generic/sub_1.c
+++ b/gmp/mpn/generic/sub_1.c
@@ -5,28 +5,17 @@ Copyright 2001 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #define __GMP_FORCE_mpn_sub_1 1
 
diff --git a/gmp/mpn/generic/sub_err1_n.c b/gmp/mpn/generic/sub_err1_n.c
deleted file mode 100644
index 340313a323..0000000000
--- a/gmp/mpn/generic/sub_err1_n.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* mpn_sub_err1_n -- sub_n with one error term
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
-  return value is borrow out.
-
-  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
-  Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_sub_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-		mp_ptr ep, mp_srcptr yp,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
-
-  yp += n - 1;
-  el = eh = 0;
-
-  do
-    {
-      yl = *yp--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary sub_n */
-      SUBC_LIMB (cy1, sl, ul, vl);
-      SUBC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh:el) */
-      zl = (-cy) & yl;
-      el += zl;
-      eh += el < zl;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
-  el &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el;
-  ep[1] = eh;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/sub_err2_n.c b/gmp/mpn/generic/sub_err2_n.c
deleted file mode 100644
index 63ea2451b4..0000000000
--- a/gmp/mpn/generic/sub_err2_n.c
+++ /dev/null
@@ -1,117 +0,0 @@
-/* mpn_sub_err2_n -- sub_n with two error terms
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
-  return value is borrow out.
-
-  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
-  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
-           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
-  stores two-limb results at {ep,2} and {ep+2,2} respectively.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_sub_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
-
-  yp1 += n - 1;
-  yp2 += n - 1;
-  el1 = eh1 = 0;
-  el2 = eh2 = 0;
-
-  do
-    {
-      yl1 = *yp1--;
-      yl2 = *yp2--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary sub_n */
-      SUBC_LIMB (cy1, sl, ul, vl);
-      SUBC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh1:el1) */
-      zl1 = (-cy) & yl1;
-      el1 += zl1;
-      eh1 += el1 < zl1;
-
-      /* update (eh2:el2) */
-      zl2 = (-cy) & yl2;
-      el2 += zl2;
-      eh2 += el2 < zl2;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
-  el1 &= GMP_NUMB_MASK;
-  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
-  el2 &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el1;
-  ep[1] = eh1;
-  ep[2] = el2;
-  ep[3] = eh2;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/sub_err3_n.c b/gmp/mpn/generic/sub_err3_n.c
deleted file mode 100644
index a80e05d0d9..0000000000
--- a/gmp/mpn/generic/sub_err3_n.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/* mpn_sub_err3_n -- sub_n with three error terms
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/*
-  Computes:
-
-  (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
-  return value is borrow out.
-
-  (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
-  Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
-           c[1]*yp2[n-1] + ... + c[n]*yp2[0],
-           c[1]*yp3[n-1] + ... + c[n]*yp3[0],
-  stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
-
-  Requires n >= 1.
-
-  None of the outputs may overlap each other or any of the inputs, except
-  that {rp,n} may be equal to {up,n} or {vp,n}.
-*/
-mp_limb_t
-mpn_sub_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
-                mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
-                mp_size_t n, mp_limb_t cy)
-{
-  mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
-
-  ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
-  ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
-
-  yp1 += n - 1;
-  yp2 += n - 1;
-  yp3 += n - 1;
-  el1 = eh1 = 0;
-  el2 = eh2 = 0;
-  el3 = eh3 = 0;
-
-  do
-    {
-      yl1 = *yp1--;
-      yl2 = *yp2--;
-      yl3 = *yp3--;
-      ul = *up++;
-      vl = *vp++;
-
-      /* ordinary sub_n */
-      SUBC_LIMB (cy1, sl, ul, vl);
-      SUBC_LIMB (cy2, rl, sl, cy);
-      cy = cy1 | cy2;
-      *rp++ = rl;
-
-      /* update (eh1:el1) */
-      zl1 = (-cy) & yl1;
-      el1 += zl1;
-      eh1 += el1 < zl1;
-
-      /* update (eh2:el2) */
-      zl2 = (-cy) & yl2;
-      el2 += zl2;
-      eh2 += el2 < zl2;
-
-      /* update (eh3:el3) */
-      zl3 = (-cy) & yl3;
-      el3 += zl3;
-      eh3 += el3 < zl3;
-    }
-  while (--n);
-
-#if GMP_NAIL_BITS != 0
-  eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
-  el1 &= GMP_NUMB_MASK;
-  eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
-  el2 &= GMP_NUMB_MASK;
-  eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
-  el3 &= GMP_NUMB_MASK;
-#endif
-
-  ep[0] = el1;
-  ep[1] = eh1;
-  ep[2] = el2;
-  ep[3] = eh2;
-  ep[4] = el3;
-  ep[5] = eh3;
-
-  return cy;
-}
diff --git a/gmp/mpn/generic/sub_n.c b/gmp/mpn/generic/sub_n.c
index 29de2d2d89..d33668fa86 100644
--- a/gmp/mpn/generic/sub_n.c
+++ b/gmp/mpn/generic/sub_n.c
@@ -1,32 +1,21 @@
 /* mpn_sub_n -- Subtract equal length limb vectors.
 
-Copyright 1992-1994, 1996, 2000, 2002, 2009 Free Software Foundation, Inc.
+Copyright 1992, 1993, 1994, 1996, 2000, 2002 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -40,8 +29,8 @@ mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   mp_limb_t ul, vl, sl, rl, cy, cy1, cy2;
 
   ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
 
   cy = 0;
   do
@@ -70,8 +59,8 @@ mpn_sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
   mp_limb_t ul, vl, rl, cy;
 
   ASSERT (n >= 1);
-  ASSERT (MPN_SAME_OR_INCR_P (rp, up, n));
-  ASSERT (MPN_SAME_OR_INCR_P (rp, vp, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
 
   cy = 0;
   do
diff --git a/gmp/mpn/generic/subcnd_n.c b/gmp/mpn/generic/subcnd_n.c
new file mode 100644
index 0000000000..0dcc45641d
--- /dev/null
+++ b/gmp/mpn/generic/subcnd_n.c
@@ -0,0 +1,85 @@
+/* mpn_subcnd_n -- Compute R = U - V if CND != 0 or R = U if CND == 0.
+
+   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
+   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2008, 2009 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#if GMP_NAIL_BITS == 0
+
+mp_limb_t
+mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+{
+  mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+  mask = -(mp_limb_t) (cnd != 0);
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++ & mask;
+      sl = ul - vl;
+      cy1 = sl > ul;
+      rl = sl - cy;
+      cy2 = rl > sl;
+      cy = cy1 | cy2;
+      *rp++ = rl;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
+
+#if GMP_NAIL_BITS >= 1
+
+mp_limb_t
+mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+{
+  mp_limb_t ul, vl, rl, cy, mask;
+
+  ASSERT (n >= 1);
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+  ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+  mask = -(mp_limb_t) (cnd != 0);
+  cy = 0;
+  do
+    {
+      ul = *up++;
+      vl = *vp++ & mask;
+      rl = ul - vl - cy;
+      cy = rl >> (GMP_LIMB_BITS - 1);
+      *rp++ = rl & GMP_NUMB_MASK;
+    }
+  while (--n != 0);
+
+  return cy;
+}
+
+#endif
diff --git a/gmp/mpn/generic/submul_1.c b/gmp/mpn/generic/submul_1.c
index fbc3501389..3e8e74302d 100644
--- a/gmp/mpn/generic/submul_1.c
+++ b/gmp/mpn/generic/submul_1.c
@@ -3,33 +3,23 @@
    vector pointed to by RP.  Return the most significant limb of the
    product, adjusted for carry-out from the subtraction.
 
-Copyright 1992-1994, 1996, 2000, 2002, 2004 Free Software Foundation, Inc.
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
diff --git a/gmp/mpn/generic/tdiv_qr.c b/gmp/mpn/generic/tdiv_qr.c
index be213b0467..8ac4d38813 100644
--- a/gmp/mpn/generic/tdiv_qr.c
+++ b/gmp/mpn/generic/tdiv_qr.c
@@ -1,43 +1,33 @@
 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
    write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp.  If
    qxn is non-zero, generate that many fraction limbs and append them after the
-   other quotient limbs, and update the remainder accordingly.  The input
+   other quotient limbs, and update the remainder accordningly.  The input
    operands are unaffected.
 
    Preconditions:
    1. The most significant limb of of the divisor must be non-zero.
-   2. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
+   2. No argument overlap is permitted.  (??? relax this ???)
+   3. nn >= dn, even if qxn is non-zero.  (??? relax this ???)
 
    The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
    complexity of multiplication.
 
-Copyright 1997, 2000-2002, 2005, 2009 Free Software Foundation, Inc.
+Copyright 1997, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -48,8 +38,13 @@ void
 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	     mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
 {
+  /* FIXME:
+     1. qxn
+     2. pass allocated storage in additional parameter?
+  */
   ASSERT_ALWAYS (qxn == 0);
 
+  ASSERT (qxn >= 0);
   ASSERT (nn >= 0);
   ASSERT (dn >= 0);
   ASSERT (dn == 0 || dp[dn - 1] != 0);
@@ -63,7 +58,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 
     case 1:
       {
-	rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
+	rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
 	return;
       }
 
@@ -82,7 +77,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	    d2p = dtmp;
 	    d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
 	    d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
-	    n2p = TMP_ALLOC_LIMBS (nn + 1);
+	    n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
 	    cy = mpn_lshift (n2p, np, nn, cnt);
 	    n2p[nn] = cy;
 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
@@ -95,7 +90,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	else
 	  {
 	    d2p = (mp_ptr) dp;
-	    n2p = TMP_ALLOC_LIMBS (nn);
+	    n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
 	    MPN_COPY (n2p, np, nn);
 	    qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
 	    qp[nn - 2] = qhl;	/* always store nn-2+1 quotient limbs */
@@ -109,13 +104,12 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
     default:
       {
 	int adjust;
-	gmp_pi1_t dinv;
 	TMP_DECL;
 	TMP_MARK;
 	adjust = np[nn - 1] >= dp[dn - 1];	/* conservative tests for quotient size */
 	if (nn + adjust >= 2 * dn)
 	  {
-	    mp_ptr n2p, d2p;
+	    mp_ptr n2p, d2p, q2p;
 	    mp_limb_t cy;
 	    int cnt;
 
@@ -124,9 +118,9 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	      {
 		count_leading_zeros (cnt, dp[dn - 1]);
 		cnt -= GMP_NAIL_BITS;
-		d2p = TMP_ALLOC_LIMBS (dn);
+		d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
 		mpn_lshift (d2p, dp, dn, cnt);
-		n2p = TMP_ALLOC_LIMBS (nn + 1);
+		n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
 		cy = mpn_lshift (n2p, np, nn, cnt);
 		n2p[nn] = cy;
 		nn += adjust;
@@ -135,28 +129,51 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	      {
 		cnt = 0;
 		d2p = (mp_ptr) dp;
-		n2p = TMP_ALLOC_LIMBS (nn + 1);
+		n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
 		MPN_COPY (n2p, np, nn);
 		n2p[nn] = 0;
 		nn += adjust;
 	      }
 
-	    invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
-	    if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
-	      mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
-	    else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) ||   /* fast condition */
-		     BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
-		     (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
-		     + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn)    /* ...condition */
-	      mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
+	    if (dn < DIV_DC_THRESHOLD)
+	      mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
 	    else
 	      {
-		mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
-		mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-		mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
-		n2p = rp;
+		/* Divide 2*dn / dn limbs as long as the limbs in np last.  */
+		q2p = qp + nn - dn;
+		n2p += nn - dn;
+		do
+		  {
+		    q2p -= dn;  n2p -= dn;
+		    mpn_dc_divrem_n (q2p, n2p, d2p, dn);
+		    nn -= dn;
+		  }
+		while (nn >= 2 * dn);
+
+		if (nn != dn)
+		  {
+		    mp_limb_t ql;
+		    n2p -= nn - dn;
+
+		    /* We have now dn < nn - dn < 2dn.  Make a recursive call,
+		       since falling out to the code below isn't pretty.
+		       Unfortunately, mpn_tdiv_qr returns nn-dn+1 quotient
+		       limbs, which would overwrite one already generated
+		       quotient limbs.  Preserve it with an ugly hack.  */
+		    /* FIXME: This suggests that we should have an
+		       mpn_tdiv_qr_internal that instead returns the most
+		       significant quotient limb and move the meat of this
+		       function there.  */
+		    /* FIXME: Perhaps call mpn_sb_divrem_mn here for certain
+		       operand ranges, to decrease overhead for small
+		       operands?  */
+		    ql = qp[nn - dn]; /* preserve quotient limb... */
+		    mpn_tdiv_qr (qp, n2p, 0L, n2p, nn, d2p, dn);
+		    qp[nn - dn] = ql; /* ...restore it again */
+		  }
 	      }
 
+
 	    if (cnt != 0)
 	      mpn_rshift (rp, n2p, dn, cnt);
 	    else
@@ -229,11 +246,11 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 		count_leading_zeros (cnt, dp[dn - 1]);
 		cnt -= GMP_NAIL_BITS;
 
-		d2p = TMP_ALLOC_LIMBS (qn);
+		d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
 		mpn_lshift (d2p, dp + in, qn, cnt);
 		d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
 
-		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+		n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
 		cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
 		if (adjust)
 		  {
@@ -250,7 +267,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 		cnt = 0;
 		d2p = (mp_ptr) dp + in;
 
-		n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
+		n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
 		MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
 		if (adjust)
 		  {
@@ -263,30 +280,25 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	    if (qn == 1)
 	      {
 		mp_limb_t q0, r0;
-		udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
-		n2p[0] = r0 >> GMP_NAIL_BITS;
+		mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
+		/* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
+		   temps here.  This doesn't hurt code quality on any machines
+		   so we do it unconditionally.  */
+		gcc272bug_n1 = n2p[1];
+		gcc272bug_n0 = n2p[0];
+		gcc272bug_d0 = d2p[0];
+		udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0 << GMP_NAIL_BITS,
+			    gcc272bug_d0 << GMP_NAIL_BITS);
+		r0 >>= GMP_NAIL_BITS;
+		n2p[0] = r0;
 		qp[0] = q0;
 	      }
 	    else if (qn == 2)
-	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
+	      mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
+	    else if (qn < DIV_DC_THRESHOLD)
+	      mpn_sb_divrem_mn (qp, n2p, 2 * qn, d2p, qn);
 	    else
-	      {
-		invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
-		if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
-		  mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
-		else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
-		  mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
-		else
-		  {
-		    mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
-		    mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
-		    mp_ptr r2p = rp;
-		    if (np == r2p)	/* If N and R share space, put ... */
-		      r2p += nn - qn;	/* intermediate remainder at N's upper end. */
-		    mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
-		    MPN_COPY (n2p, r2p, qn);
-		  }
-	      }
+	      mpn_dc_divrem_n (qp, n2p, d2p, qn);
 
 	    rn = qn;
 	    /* Multiply the first ignored divisor limb by the most significant
@@ -304,7 +316,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 		dl = dp[in - 2];
 
 #if GMP_NAIL_BITS == 0
-	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
+	      x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % BITS_PER_MP_LIMB));
 #else
 	      x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
 	      if (cnt != 0)
@@ -354,7 +366,7 @@ mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
 	      }
 	    /* True: partial remainder now is neutral, i.e., it is not shifted up.  */
 
-	    tp = TMP_ALLOC_LIMBS (dn);
+	    tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
 
 	    if (in < qn)
 	      {
diff --git a/gmp/mpn/generic/toom22_mul.c b/gmp/mpn/generic/toom22_mul.c
index 36ac29b72d..6407bbeb96 100644
--- a/gmp/mpn/generic/toom22_mul.c
+++ b/gmp/mpn/generic/toom22_mul.c
@@ -7,33 +7,22 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2010, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
@@ -52,7 +41,7 @@ see https://www.gnu.org/licenses/.  */
   vinf=      a1 *     b1   # A(inf)*B(inf)
 */
 
-#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#if TUNE_PROGRAM_BUILD
 #define MAYBE_mul_toom22   1
 #else
 #define MAYBE_mul_toom22						\
@@ -62,36 +51,18 @@ see https://www.gnu.org/licenses/.  */
 #define TOOM22_MUL_N_REC(p, a, b, n, ws)				\
   do {									\
     if (! MAYBE_mul_toom22						\
-	|| BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+	|| BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))		\
       mpn_mul_basecase (p, a, n, b, n);					\
     else								\
       mpn_toom22_mul (p, a, n, b, n, ws);				\
   } while (0)
 
-/* Normally, this calls mul_basecase or toom22_mul.  But when when the fraction
-   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
-   relative unbalance will become a larger and larger relative unbalance with
-   each recursion (the difference s-t will be invariant over recursive calls).
-   Therefore, we need to call toom32_mul.  FIXME: Suppress depending on
-   MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD.  */
-#define TOOM22_MUL_REC(p, a, an, b, bn, ws)				\
-  do {									\
-    if (! MAYBE_mul_toom22						\
-	|| BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD))			\
-      mpn_mul_basecase (p, a, an, b, bn);				\
-    else if (4 * an < 5 * bn)						\
-      mpn_toom22_mul (p, a, an, b, bn, ws);				\
-    else								\
-      mpn_toom32_mul (p, a, an, b, bn, ws);				\
-  } while (0)
-
 void
 mpn_toom22_mul (mp_ptr pp,
 		mp_srcptr ap, mp_size_t an,
 		mp_srcptr bp, mp_size_t bn,
 		mp_ptr scratch)
 {
-  const int __gmpn_cpuvec_initialized = 1;
   mp_size_t n, s, t;
   int vm1_neg;
   mp_limb_t cy, cy2;
@@ -179,8 +150,8 @@ mpn_toom22_mul (mp_ptr pp,
   /* vm1, 2n limbs */
   TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
 
-  if (s > t)  TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
-  else        TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
+  /* vinf, s+t limbs */
+  mpn_mul (vinf, a1, s, b1, t);
 
   /* v0, 2n limbs */
   TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
diff --git a/gmp/mpn/generic/toom2_sqr.c b/gmp/mpn/generic/toom2_sqr.c
index 2f2fdaee6f..445cff8f5d 100644
--- a/gmp/mpn/generic/toom2_sqr.c
+++ b/gmp/mpn/generic/toom2_sqr.c
@@ -6,33 +6,22 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2010, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
 #include "gmp.h"
@@ -43,23 +32,25 @@ see https://www.gnu.org/licenses/.  */
   <-s--><--n-->
    ____ ______
   |_a1_|___a0_|
+   |b1_|___b0_|
+   <-t-><--n-->
 
-  v0  =  a0     ^2  #   A(0)^2
-  vm1 = (a0- a1)^2  #  A(-1)^2
-  vinf=      a1 ^2  # A(inf)^2
+  v0  =  a0     * b0       #   A(0)*B(0)
+  vm1 = (a0- a1)*(b0- b1)  #  A(-1)*B(-1)
+  vinf=      a1 *     b1   # A(inf)*B(inf)
 */
 
-#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#if TUNE_PROGRAM_BUILD
 #define MAYBE_sqr_toom2   1
 #else
 #define MAYBE_sqr_toom2							\
   (SQR_TOOM3_THRESHOLD >= 2 * SQR_TOOM2_THRESHOLD)
 #endif
 
-#define TOOM2_SQR_REC(p, a, n, ws)					\
+#define TOOM2_SQR_N_REC(p, a, n, ws)					\
   do {									\
     if (! MAYBE_sqr_toom2						\
-	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+	|| BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))		\
       mpn_sqr_basecase (p, a, n);					\
     else								\
       mpn_toom2_sqr (p, a, n, ws);					\
@@ -70,7 +61,6 @@ mpn_toom2_sqr (mp_ptr pp,
 	       mp_srcptr ap, mp_size_t an,
 	       mp_ptr scratch)
 {
-  const int __gmpn_cpuvec_initialized = 1;
   mp_size_t n, s;
   mp_limb_t cy, cy2;
   mp_ptr asm1;
@@ -113,16 +103,15 @@ mpn_toom2_sqr (mp_ptr pp,
 #define v0	pp				/* 2n */
 #define vinf	(pp + 2 * n)			/* s+s */
 #define vm1	scratch				/* 2n */
-#define scratch_out	scratch + 2 * n
 
   /* vm1, 2n limbs */
-  TOOM2_SQR_REC (vm1, asm1, n, scratch_out);
+  TOOM2_SQR_N_REC (vm1, asm1, n, scratch);
 
   /* vinf, s+s limbs */
-  TOOM2_SQR_REC (vinf, a1, s, scratch_out);
+  TOOM2_SQR_N_REC (vinf, a1, s, scratch);
 
   /* v0, 2n limbs */
-  TOOM2_SQR_REC (v0, ap, n, scratch_out);
+  TOOM2_SQR_N_REC (v0, ap, n, scratch);
 
   /* H(v0) + L(vinf) */
   cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
diff --git a/gmp/mpn/generic/toom32_mul.c b/gmp/mpn/generic/toom32_mul.c
index 0b05669cc4..7bdd688a53 100644
--- a/gmp/mpn/generic/toom32_mul.c
+++ b/gmp/mpn/generic/toom32_mul.c
@@ -2,7 +2,6 @@
    times as large as bn.  Or more accurately, bn < an < 3bn.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Improvements by Marco Bodrato and Niels Möller.
 
    The idea of applying toom to unbalanced multiplication is due to Marco
    Bodrato and Alberto Zanoni.
@@ -11,34 +10,32 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-or both in parallel, as here.
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+/*
+  Things to work on:
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch allocation.
 
+  2. Apply optimizations also to mul_toom42.c.
+*/
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -57,9 +54,20 @@ see https://www.gnu.org/licenses/.  */
   vinf=          a2 *     b1  # A(inf)*B(inf)
 */
 
-#define TOOM32_MUL_N_REC(p, a, b, n, ws)				\
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_toom22   1
+#else
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
+#endif
+
+#define TOOM22_MUL_N_REC(p, a, b, n, ws)				\
   do {									\
-    mpn_mul_n (p, a, b, n);						\
+    if (! MAYBE_mul_toom22						\
+	|| BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))		\
+      mpn_mul_basecase (p, a, n, b, n);					\
+    else								\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
   } while (0)
 
 void
@@ -70,9 +78,15 @@ mpn_toom32_mul (mp_ptr pp,
 {
   mp_size_t n, s, t;
   int vm1_neg;
+#if HAVE_NATIVE_mpn_add_nc
   mp_limb_t cy;
-  mp_limb_signed_t hi;
-  mp_limb_t ap1_hi, bp1_hi;
+#else
+  mp_limb_t cy, cy2;
+#endif
+  mp_ptr a0_a2;
+  mp_ptr as1, asm1;
+  mp_ptr bs1, bsm1;
+  TMP_DECL;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -80,9 +94,6 @@ mpn_toom32_mul (mp_ptr pp,
 #define b0  bp
 #define b1  (bp + n)
 
-  /* Required, to ensure that s + t >= n. */
-  ASSERT (bn + 2 <= an && an + 6 <= 3*bn);
-
   n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
 
   s = an - 2 * n;
@@ -90,234 +101,191 @@ mpn_toom32_mul (mp_ptr pp,
 
   ASSERT (0 < s && s <= n);
   ASSERT (0 < t && t <= n);
-  ASSERT (s + t >= n);
 
-  /* Product area of size an + bn = 3*n + s + t >= 4*n + 2. */
-#define ap1 (pp)		/* n, most significant limb in ap1_hi */
-#define bp1 (pp + n)		/* n, most significant bit in bp1_hi */
-#define am1 (pp + 2*n)		/* n, most significant bit in hi */
-#define bm1 (pp + 3*n)		/* n */
-#define v1 (scratch)		/* 2n + 1 */
-#define vm1 (pp)		/* 2n + 1 */
-#define scratch_out (scratch + 2*n + 1) /* Currently unused. */
+  TMP_MARK;
+
+  as1 = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
 
-  /* Scratch need: 2*n + 1 + scratch for the recursive multiplications. */
+  bs1 = TMP_SALLOC_LIMBS (n + 1);
+  bsm1 = TMP_SALLOC_LIMBS (n);
 
-  /* FIXME: Keep v1[2*n] and vm1[2*n] in scalar variables? */
+  a0_a2 = pp;
 
-  /* Compute ap1 = a0 + a1 + a3, am1 = a0 - a1 + a3 */
-  ap1_hi = mpn_add (ap1, a0, n, a2, s);
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+  /* Compute as1 and asm1.  */
+  a0_a2[n] = mpn_add (a0_a2, a0, n, a2, s);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (a0_a2[n] == 0 && mpn_cmp (a0_a2, a1, n) < 0)
     {
-      ap1_hi = mpn_add_n_sub_n (ap1, am1, a1, ap1, n) >> 1;
-      hi = 0;
+      cy = mpn_addsub_n (as1, asm1, a1, a0_a2, n);
+      as1[n] = cy >> 1;
+      asm1[n] = 0;
       vm1_neg = 1;
     }
   else
     {
-      cy = mpn_add_n_sub_n (ap1, am1, ap1, a1, n);
-      hi = ap1_hi - (cy & 1);
-      ap1_hi += (cy >> 1);
+      cy = mpn_addsub_n (as1, asm1, a0_a2, a1, n);
+      as1[n] = a0_a2[n] + (cy >> 1);
+      asm1[n] = a0_a2[n] - (cy & 1);
       vm1_neg = 0;
     }
 #else
-  if (ap1_hi == 0 && mpn_cmp (ap1, a1, n) < 0)
+  as1[n] = a0_a2[n] + mpn_add_n (as1, a0_a2, a1, n);
+  if (a0_a2[n] == 0 && mpn_cmp (a0_a2, a1, n) < 0)
     {
-      ASSERT_NOCARRY (mpn_sub_n (am1, a1, ap1, n));
-      hi = 0;
+      mpn_sub_n (asm1, a1, a0_a2, n);
+      asm1[n] = 0;
       vm1_neg = 1;
     }
   else
     {
-      hi = ap1_hi - mpn_sub_n (am1, ap1, a1, n);
+      cy = mpn_sub_n (asm1, a0_a2, a1, n);
+      asm1[n] = a0_a2[n] - cy;
       vm1_neg = 0;
     }
-  ap1_hi += mpn_add_n (ap1, ap1, a1, n);
 #endif
 
-  /* Compute bp1 = b0 + b1 and bm1 = b0 - b1. */
+  /* Compute bs1 and bsm1.  */
   if (t == n)
     {
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
       if (mpn_cmp (b0, b1, n) < 0)
 	{
-	  cy = mpn_add_n_sub_n (bp1, bm1, b1, b0, n);
+	  cy = mpn_addsub_n (bs1, bsm1, b1, b0, n);
 	  vm1_neg ^= 1;
 	}
       else
 	{
-	  cy = mpn_add_n_sub_n (bp1, bm1, b0, b1, n);
+	  cy = mpn_addsub_n (bs1, bsm1, b0, b1, n);
 	}
-      bp1_hi = cy >> 1;
+      bs1[n] = cy >> 1;
 #else
-      bp1_hi = mpn_add_n (bp1, b0, b1, n);
+      bs1[n] = mpn_add_n (bs1, b0, b1, n);
 
       if (mpn_cmp (b0, b1, n) < 0)
 	{
-	  ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, n));
+	  mpn_sub_n (bsm1, b1, b0, n);
 	  vm1_neg ^= 1;
 	}
       else
 	{
-	  ASSERT_NOCARRY (mpn_sub_n (bm1, b0, b1, n));
+	  mpn_sub_n (bsm1, b0, b1, n);
 	}
 #endif
     }
   else
     {
-      /* FIXME: Should still use mpn_add_n_sub_n for the main part. */
-      bp1_hi = mpn_add (bp1, b0, n, b1, t);
+      bs1[n] = mpn_add (bs1, b0, n, b1, t);
 
       if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
 	{
-	  ASSERT_NOCARRY (mpn_sub_n (bm1, b1, b0, t));
-	  MPN_ZERO (bm1 + t, n - t);
+	  mpn_sub_n (bsm1, b1, b0, t);
+	  MPN_ZERO (bsm1 + t, n - t);
 	  vm1_neg ^= 1;
 	}
       else
 	{
-	  ASSERT_NOCARRY (mpn_sub (bm1, b0, n, b1, t));
+	  mpn_sub (bsm1, b0, n, b1, t);
 	}
     }
 
-  TOOM32_MUL_N_REC (v1, ap1, bp1, n, scratch_out);
-  if (ap1_hi == 1)
+  ASSERT (as1[n] <= 2);
+  ASSERT (bs1[n] <= 1);
+  ASSERT (asm1[n] <= 1);
+/*ASSERT (bsm1[n] == 0); */
+
+#define v0    pp				/* 2n */
+#define v1    (scratch)				/* 2n+1 */
+#define vinf  (pp + 3 * n)			/* s+t */
+#define vm1   (scratch + 2 * n + 1)		/* 2n+1 */
+#define scratch_out	scratch + 4 * n + 2
+
+  /* vm1, 2n+1 limbs */
+  TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+  cy = 0;
+  if (asm1[n] != 0)
+    cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
+  vm1[2 * n] = cy;
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a2, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a2, s);
+
+  /* v1, 2n+1 limbs */
+  TOOM22_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+  if (as1[n] == 1)
     {
-      cy = bp1_hi + mpn_add_n (v1 + n, v1 + n, bp1, n);
+      cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
     }
-  else if (ap1_hi == 2)
+  else if (as1[n] == 2)
     {
 #if HAVE_NATIVE_mpn_addlsh1_n
-      cy = 2 * bp1_hi + mpn_addlsh1_n (v1 + n, v1 + n, bp1, n);
+      cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
 #else
-      cy = 2 * bp1_hi + mpn_addmul_1 (v1 + n, bp1, n, CNST_LIMB(2));
+      cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
 #endif
     }
   else
     cy = 0;
-  if (bp1_hi != 0)
-    cy += mpn_add_n (v1 + n, v1 + n, ap1, n);
+  if (bs1[n] != 0)
+    cy += mpn_add_n (v1 + n, v1 + n, as1, n);
   v1[2 * n] = cy;
 
-  TOOM32_MUL_N_REC (vm1, am1, bm1, n, scratch_out);
-  if (hi)
-    hi = mpn_add_n (vm1+n, vm1+n, bm1, n);
+  mpn_mul_n (v0, ap, bp, n);                    /* v0, 2n limbs */
 
-  vm1[2*n] = hi;
+  /* Interpolate */
 
-  /* v1 <-- (v1 + vm1) / 2 = x0 + x2 */
   if (vm1_neg)
     {
-#if HAVE_NATIVE_mpn_rsh1sub_n
-      mpn_rsh1sub_n (v1, v1, vm1, 2*n+1);
+#if HAVE_NATIVE_mpn_rsh1add_n
+      mpn_rsh1add_n (vm1, v1, vm1, 2 * n + 1);
 #else
-      mpn_sub_n (v1, v1, vm1, 2*n+1);
-      ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+      mpn_add_n (vm1, v1, vm1, 2 * n + 1);
+      mpn_rshift (vm1, vm1, 2 * n + 1, 1);
 #endif
     }
   else
     {
-#if HAVE_NATIVE_mpn_rsh1add_n
-      mpn_rsh1add_n (v1, v1, vm1, 2*n+1);
+#if HAVE_NATIVE_mpn_rsh1sub_n
+      mpn_rsh1sub_n (vm1, v1, vm1, 2 * n + 1);
 #else
-      mpn_add_n (v1, v1, vm1, 2*n+1);
-      ASSERT_NOCARRY (mpn_rshift (v1, v1, 2*n+1, 1));
+      mpn_sub_n (vm1, v1, vm1, 2 * n + 1);
+      mpn_rshift (vm1, vm1, 2 * n + 1, 1);
 #endif
     }
 
-  /* We get x1 + x3 = (x0 + x2) - (x0 - x1 + x2 - x3), and hence
-
-     y = x1 + x3 + (x0 + x2) * B
-       = (x0 + x2) * B + (x0 + x2) - vm1.
-
-     y is 3*n + 1 limbs, y = y0 + y1 B + y2 B^2. We store them as
-     follows: y0 at scratch, y1 at pp + 2*n, and y2 at scratch + n
-     (already in place, except for carry propagation).
+  mpn_sub_n (v1, v1, vm1, 2 * n + 1);
+  v1[2 * n] -= mpn_sub_n (v1, v1, v0, 2 * n);
 
-     We thus add
+  /*
+    pp[] prior to operations:
+     |_H vinf|_L vinf|_______|_______|_______|
 
-   B^3  B^2   B    1
-    |    |    |    |
-   +-----+----+
- + |  x0 + x2 |
-   +----+-----+----+
- +      |  x0 + x2 |
-	+----------+
- -      |  vm1     |
- --+----++----+----+-
-   | y2  | y1 | y0 |
-   +-----+----+----+
-
-  Since we store y0 at the same location as the low half of x0 + x2, we
-  need to do the middle sum first. */
-
-  hi = vm1[2*n];
-  cy = mpn_add_n (pp + 2*n, v1, v1 + n, n);
-  MPN_INCR_U (v1 + n, n + 1, cy + v1[2*n]);
-
-  /* FIXME: Can we get rid of this second vm1_neg conditional by
-     swapping the location of +1 and -1 values? */
-  if (vm1_neg)
-    {
-      cy = mpn_add_n (v1, v1, vm1, n);
-      hi += mpn_add_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
-      MPN_INCR_U (v1 + n, n+1, hi);
-    }
-  else
-    {
-      cy = mpn_sub_n (v1, v1, vm1, n);
-      hi += mpn_sub_nc (pp + 2*n, pp + 2*n, vm1 + n, n, cy);
-      MPN_DECR_U (v1 + n, n+1, hi);
-    }
-
-  TOOM32_MUL_N_REC (pp, a0, b0, n, scratch_out);
-  /* vinf, s+t limbs.  Use mpn_mul for now, to handle unbalanced operands */
-  if (s > t)  mpn_mul (pp+3*n, a2, s, b1, t);
-  else        mpn_mul (pp+3*n, b1, t, a2, s);
-
-  /* Remaining interpolation.
-
-     y * B + x0 + x3 B^3 - x0 B^2 - x3 B
-     = (x1 + x3) B + (x0 + x2) B^2 + x0 + x3 B^3 - x0 B^2 - x3 B
-     = y0 B + y1 B^2 + y3 B^3 + Lx0 + H x0 B
-       + L x3 B^3 + H x3 B^4 - Lx0 B^2 - H x0 B^3 - L x3 B - H x3 B^2
-     = L x0 + (y0 + H x0 - L x3) B + (y1 - L x0 - H x3) B^2
-       + (y2 - (H x0 - L x3)) B^3 + H x3 B^4
-
-	  B^4       B^3       B^2        B         1
- |         |         |         |         |         |
-   +-------+                   +---------+---------+
-   |  Hx3  |                   | Hx0-Lx3 |    Lx0  |
-   +------+----------+---------+---------+---------+
-	  |    y2    |  y1     |   y0    |
-	  ++---------+---------+---------+
-	  -| Hx0-Lx3 | - Lx0   |
-	   +---------+---------+
-		      | - Hx3  |
-		      +--------+
-
-    We must take into account the carry from Hx0 - Lx3.
+    summation scheme for remaining operations:
+     |_______|_______|_______|_______|_______|
+     |_Hvinf_|_Lvinf_|       |_H v0__|_L v0__|
+		     | H vm1 | L vm1 |
+		     |-H vinf|-L vinf|
+	     | H v1  | L v1  |
   */
 
-  cy = mpn_sub_n (pp + n, pp + n, pp+3*n, n);
-  hi = scratch[2*n] + cy;
-
-  cy = mpn_sub_nc (pp + 2*n, pp + 2*n, pp, n, cy);
-  hi -= mpn_sub_nc (pp + 3*n, scratch + n, pp + n, n, cy);
-
-  hi += mpn_add (pp + n, pp + n, 3*n, scratch, n);
-
-  /* FIXME: Is support for s + t == n needed? */
-  if (LIKELY (s + t > n))
-    {
-      hi -= mpn_sub (pp + 2*n, pp + 2*n, 2*n, pp + 4*n, s+t-n);
+  mpn_sub (vm1, vm1, 2 * n + 1, vinf, s + t);
+#if HAVE_NATIVE_mpn_add_nc
+  cy = mpn_add_n (pp + n, pp + n, vm1, n);
+  cy = mpn_add_nc (pp + 2 * n, v1, vm1 + n, n, cy);
+  cy = mpn_add_nc (pp + 3 * n, pp + 3 * n, v1 + n, n, cy);
+  mpn_incr_u (pp + 3 * n, vm1[2 * n]);
+  if (LIKELY (n != s + t))  /* FIXME: Limit operand range to avoid condition */
+    mpn_incr_u (pp + 4 * n, cy + v1[2 * n]);
+#else
+  cy2 = mpn_add_n (pp + n, pp + n, vm1, n);
+  cy = mpn_add_n (pp + 2 * n, v1, vm1 + n, n);
+  mpn_incr_u (pp + 2 * n, cy2);
+  mpn_incr_u (pp + 3 * n, cy + vm1[2 * n]);
+  cy = mpn_add_n (pp + 3 * n, pp + 3 * n, v1 + n,  n);
+  if (LIKELY (n != s + t))  /* FIXME: Limit operand range to avoid condition */
+    mpn_incr_u (pp + 4 * n, cy + v1[2 * n]);
+#endif
 
-      if (hi < 0)
-	MPN_DECR_U (pp + 4*n, s+t-n, -hi);
-      else
-	MPN_INCR_U (pp + 4*n, s+t-n, hi);
-    }
-  else
-    ASSERT (hi == 0);
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom33_mul.c b/gmp/mpn/generic/toom33_mul.c
index 655355c39a..5fa2813c31 100644
--- a/gmp/mpn/generic/toom33_mul.c
+++ b/gmp/mpn/generic/toom33_mul.c
@@ -1,52 +1,48 @@
-/* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
+/* mpn_toom33_mul -- Multiply {ap,an} and {bp,bn} where an and bn are close in
    size.  Or more accurately, bn <= an < (3/2)bn.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Additional improvements by Marco Bodrato.
 
    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2008, 2010, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch area.
+  2. Use new toom functions for the recursive calls.
+*/
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
 /* Evaluate in: -1, 0, +1, +2, +inf
 
-  <-s--><--n--><--n-->
-   ____ ______ ______
-  |_a2_|___a1_|___a0_|
-   |b2_|___b1_|___b0_|
-   <-t-><--n--><--n-->
+  <-s-><--n--><--n--><--n-->
+   ___ ______ ______ ______
+  |a3_|___a2_|___a1_|___a0_|
+	       |_b1_|___b0_|
+	       <-t--><--n-->
 
   v0  =  a0         * b0          #   A(0)*B(0)
   v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
@@ -55,33 +51,26 @@ see https://www.gnu.org/licenses/.  */
   vinf=          a2 *         b2  # A(inf)*B(inf)
 */
 
-#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#if TUNE_PROGRAM_BUILD
 #define MAYBE_mul_basecase 1
 #define MAYBE_mul_toom33   1
 #else
 #define MAYBE_mul_basecase						\
-  (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
+  (MUL_TOOM33_THRESHOLD < 3 * MUL_KARATSUBA_THRESHOLD)
 #define MAYBE_mul_toom33						\
   (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
 #endif
 
-/* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
-   multiplication at the infinity point. We may have
-   MAYBE_mul_basecase == 0, and still get s just below
-   MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
-   s == 1 and mpn_toom22_mul will crash.
-*/
-
 #define TOOM33_MUL_N_REC(p, a, b, n, ws)				\
   do {									\
     if (MAYBE_mul_basecase						\
-	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+	&& BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))		\
       mpn_mul_basecase (p, a, n, b, n);					\
     else if (! MAYBE_mul_toom33						\
 	     || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
-      mpn_toom22_mul (p, a, n, b, n, ws);				\
+      mpn_kara_mul_n (p, a, b, n, ws);					\
     else								\
-      mpn_toom33_mul (p, a, n, b, n, ws);				\
+      mpn_toom3_mul_n (p, a, b, n, ws);					\
   } while (0)
 
 void
@@ -90,13 +79,13 @@ mpn_toom33_mul (mp_ptr pp,
 		mp_srcptr bp, mp_size_t bn,
 		mp_ptr scratch)
 {
-  const int __gmpn_cpuvec_initialized = 1;
   mp_size_t n, s, t;
   int vm1_neg;
   mp_limb_t cy, vinf0;
   mp_ptr gp;
   mp_ptr as1, asm1, as2;
   mp_ptr bs1, bsm1, bs2;
+  TMP_DECL;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -115,34 +104,35 @@ mpn_toom33_mul (mp_ptr pp,
   ASSERT (0 < s && s <= n);
   ASSERT (0 < t && t <= n);
 
-  as1  = scratch + 4 * n + 4;
-  asm1 = scratch + 2 * n + 2;
-  as2 = pp + n + 1;
+  TMP_MARK;
+
+  as1 = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
+  as2 = TMP_SALLOC_LIMBS (n + 1);
 
-  bs1 = pp;
-  bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
-  bs2 = pp + 2 * n + 2;
+  bs1 = TMP_SALLOC_LIMBS (n + 1);
+  bsm1 = TMP_SALLOC_LIMBS (n + 1);
+  bs2 = TMP_SALLOC_LIMBS (n + 1);
 
-  gp = scratch;
+  gp = pp;
 
   vm1_neg = 0;
 
   /* Compute as1 and asm1.  */
   cy = mpn_add (gp, a0, n, a2, s);
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
   if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
     {
-      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
-      as1[n] = cy >> 1;
+      cy = mpn_addsub_n (as1, asm1, a1, gp, n);
+      as1[n] = 0;
       asm1[n] = 0;
       vm1_neg = 1;
     }
   else
     {
-      mp_limb_t cy2;
-      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+      cy2 = mpn_addsub_n (as1, asm1, gp, a1, n);
       as1[n] = cy + (cy2 >> 1);
-      asm1[n] = cy - (cy2 & 1);
+      asm1[n] = cy - (cy & 1);
     }
 #else
   as1[n] = cy + mpn_add_n (as1, gp, a1, n);
@@ -160,45 +150,36 @@ mpn_toom33_mul (mp_ptr pp,
 #endif
 
   /* Compute as2.  */
-#if HAVE_NATIVE_mpn_rsblsh1_n
-  cy = mpn_add_n (as2, a2, as1, s);
-  if (s != n)
-    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
-  cy += as1[n];
-  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
-#else
 #if HAVE_NATIVE_mpn_addlsh1_n
   cy  = mpn_addlsh1_n (as2, a1, a2, s);
   if (s != n)
     cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
   cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
 #else
-  cy = mpn_add_n (as2, a2, as1, s);
+  cy  = mpn_lshift (as2, a2, s, 1);
+  cy += mpn_add_n (as2, a1, as2, s);
   if (s != n)
-    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
-  cy += as1[n];
+    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
   cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
-  cy -= mpn_sub_n (as2, as2, a0, n);
-#endif
+  cy += mpn_add_n (as2, a0, as2, n);
 #endif
   as2[n] = cy;
 
   /* Compute bs1 and bsm1.  */
   cy = mpn_add (gp, b0, n, b2, t);
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
   if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
     {
-      cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
-      bs1[n] = cy >> 1;
+      cy = mpn_addsub_n (bs1, bsm1, b1, gp, n);
+      bs1[n] = 0;
       bsm1[n] = 0;
       vm1_neg ^= 1;
     }
   else
     {
-      mp_limb_t cy2;
-      cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
+      cy2 = mpn_addsub_n (bs1, bsm1, gp, b1, n);
       bs1[n] = cy + (cy2 >> 1);
-      bsm1[n] = cy - (cy2 & 1);
+      bsm1[n] = cy - (cy & 1);
     }
 #else
   bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
@@ -216,26 +197,18 @@ mpn_toom33_mul (mp_ptr pp,
 #endif
 
   /* Compute bs2.  */
-#if HAVE_NATIVE_mpn_rsblsh1_n
-  cy = mpn_add_n (bs2, b2, bs1, t);
-  if (t != n)
-    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
-  cy += bs1[n];
-  cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
-#else
 #if HAVE_NATIVE_mpn_addlsh1_n
   cy  = mpn_addlsh1_n (bs2, b1, b2, t);
   if (t != n)
     cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
   cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
 #else
-  cy  = mpn_add_n (bs2, bs1, b2, t);
+  cy  = mpn_lshift (bs2, b2, t, 1);
+  cy += mpn_add_n (bs2, b1, bs2, t);
   if (t != n)
-    cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
-  cy += bs1[n];
+    cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
   cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
-  cy -= mpn_sub_n (bs2, bs2, b0, n);
-#endif
+  cy += mpn_add_n (bs2, b0, bs2, n);
 #endif
   bs2[n] = cy;
 
@@ -251,7 +224,7 @@ mpn_toom33_mul (mp_ptr pp,
 #define vinf  (pp + 4 * n)			/* s+t */
 #define vm1   scratch				/* 2n+1 */
 #define v2    (scratch + 2 * n + 1)		/* 2n+2 */
-#define scratch_out  (scratch + 5 * n + 5)
+#define scratch_out  (scratch + 4 * n + 4)
 
   /* vm1, 2n+1 limbs */
 #ifdef SMALLER_RECURSION
@@ -312,5 +285,7 @@ mpn_toom33_mul (mp_ptr pp,
 
   TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
 
-  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, 1^vm1_neg, vinf0, scratch_out);
+
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom3_sqr.c b/gmp/mpn/generic/toom3_sqr.c
index 6117c67ca6..0c8a4ff74d 100644
--- a/gmp/mpn/generic/toom3_sqr.c
+++ b/gmp/mpn/generic/toom3_sqr.c
@@ -1,77 +1,75 @@
 /* mpn_toom3_sqr -- Square {ap,an}.
 
    Contributed to the GNU project by Torbjorn Granlund.
-   Additional improvements by Marco Bodrato.
 
    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2010, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1 and asm1 could be
+     avoided by instead reusing the pp area and the scratch area.
+  2. Use new toom functions for the recursive calls.
+*/
+
 #include "gmp.h"
 #include "gmp-impl.h"
 
 /* Evaluate in: -1, 0, +1, +2, +inf
 
-  <-s--><--n--><--n-->
-   ____ ______ ______
-  |_a2_|___a1_|___a0_|
-
-  v0  =  a0         ^2 #   A(0)^2
-  v1  = (a0+ a1+ a2)^2 #   A(1)^2    ah  <= 2
-  vm1 = (a0- a1+ a2)^2 #  A(-1)^2   |ah| <= 1
-  v2  = (a0+2a1+4a2)^2 #   A(2)^2    ah  <= 6
-  vinf=          a2 ^2 # A(inf)^2
+  <-s-><--n--><--n--><--n-->
+   ___ ______ ______ ______
+  |a3_|___a2_|___a1_|___a0_|
+	       |_b1_|___b0_|
+	       <-t--><--n-->
+
+  v0  =  a0         * b0          #   A(0)*B(0)
+  v1  = (a0+ a1+ a2)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 2  bh <= 2
+  vm1 = (a0- a1+ a2)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1  bh <= 1
+  v2  = (a0+2a1+4a2)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 6  bh <= 6
+  vinf=          a2 *         b2  # A(inf)*B(inf)
 */
 
-#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
+#if TUNE_PROGRAM_BUILD
 #define MAYBE_sqr_basecase 1
 #define MAYBE_sqr_toom3   1
 #else
 #define MAYBE_sqr_basecase						\
-  (SQR_TOOM3_THRESHOLD < 3 * SQR_TOOM2_THRESHOLD)
+  (SQR_TOOM3_THRESHOLD < 3 * SQR_KARATSUBA_THRESHOLD)
 #define MAYBE_sqr_toom3							\
   (SQR_TOOM4_THRESHOLD >= 3 * SQR_TOOM3_THRESHOLD)
 #endif
 
-#define TOOM3_SQR_REC(p, a, n, ws)					\
+#define TOOM3_SQR_N_REC(p, a, n, ws)					\
   do {									\
     if (MAYBE_sqr_basecase						\
-	&& BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+	&& BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))		\
       mpn_sqr_basecase (p, a, n);					\
     else if (! MAYBE_sqr_toom3						\
 	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
-      mpn_toom2_sqr (p, a, n, ws);					\
+      mpn_kara_sqr_n (p, a, n, ws);					\
     else								\
-      mpn_toom3_sqr (p, a, n, ws);					\
+      mpn_toom3_sqr_n (p, a, n, ws);					\
   } while (0)
 
 void
@@ -79,11 +77,11 @@ mpn_toom3_sqr (mp_ptr pp,
 	       mp_srcptr ap, mp_size_t an,
 	       mp_ptr scratch)
 {
-  const int __gmpn_cpuvec_initialized = 1;
   mp_size_t n, s;
   mp_limb_t cy, vinf0;
   mp_ptr gp;
   mp_ptr as1, asm1, as2;
+  TMP_DECL;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -95,27 +93,28 @@ mpn_toom3_sqr (mp_ptr pp,
 
   ASSERT (0 < s && s <= n);
 
-  as1 = scratch + 4 * n + 4;
-  asm1 = scratch + 2 * n + 2;
-  as2 = pp + n + 1;
+  TMP_MARK;
+
+  as1 = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
+  as2 = TMP_SALLOC_LIMBS (n + 1);
 
-  gp = scratch;
+  gp = pp;
 
   /* Compute as1 and asm1.  */
   cy = mpn_add (gp, a0, n, a2, s);
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
   if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
     {
-      cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
-      as1[n] = cy >> 1;
+      cy = mpn_addsub_n (as1, asm1, a1, gp, n);
+      as1[n] = 0;
       asm1[n] = 0;
     }
   else
     {
-      mp_limb_t cy2;
-      cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
+      cy2 = mpn_addsub_n (as1, asm1, gp, a1, n);
       as1[n] = cy + (cy2 >> 1);
-      asm1[n] = cy - (cy2 & 1);
+      asm1[n] = cy - (cy & 1);
     }
 #else
   as1[n] = cy + mpn_add_n (as1, gp, a1, n);
@@ -132,26 +131,18 @@ mpn_toom3_sqr (mp_ptr pp,
 #endif
 
   /* Compute as2.  */
-#if HAVE_NATIVE_mpn_rsblsh1_n
-  cy = mpn_add_n (as2, a2, as1, s);
-  if (s != n)
-    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
-  cy += as1[n];
-  cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
-#else
 #if HAVE_NATIVE_mpn_addlsh1_n
   cy  = mpn_addlsh1_n (as2, a1, a2, s);
   if (s != n)
     cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
   cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
 #else
-  cy = mpn_add_n (as2, a2, as1, s);
+  cy  = mpn_lshift (as2, a2, s, 1);
+  cy += mpn_add_n (as2, a1, as2, s);
   if (s != n)
-    cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
-  cy += as1[n];
+    cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
   cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
-  cy -= mpn_sub_n (as2, as2, a0, n);
-#endif
+  cy += mpn_add_n (as2, a0, as2, n);
 #endif
   as2[n] = cy;
 
@@ -163,11 +154,11 @@ mpn_toom3_sqr (mp_ptr pp,
 #define vinf  (pp + 4 * n)			/* s+s */
 #define vm1   scratch				/* 2n+1 */
 #define v2    (scratch + 2 * n + 1)		/* 2n+2 */
-#define scratch_out  (scratch + 5 * n + 5)
+#define scratch_out  (scratch + 4 * n + 4)
 
   /* vm1, 2n+1 limbs */
 #ifdef SMALLER_RECURSION
-  TOOM3_SQR_REC (vm1, asm1, n, scratch_out);
+  TOOM3_SQR_N_REC (vm1, asm1, n, scratch_out);
   cy = 0;
   if (asm1[n] != 0)
     cy = asm1[n] + mpn_add_n (vm1 + n, vm1 + n, asm1, n);
@@ -175,18 +166,18 @@ mpn_toom3_sqr (mp_ptr pp,
     cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
   vm1[2 * n] = cy;
 #else
-  TOOM3_SQR_REC (vm1, asm1, n + 1, scratch_out);
+  TOOM3_SQR_N_REC (vm1, asm1, n + 1, scratch_out);
 #endif
 
-  TOOM3_SQR_REC (v2, as2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
+  TOOM3_SQR_N_REC (v2, as2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
 
-  TOOM3_SQR_REC (vinf, a2, s, scratch_out);	/* vinf, s+s limbs */
+  TOOM3_SQR_N_REC (vinf, a2, s, scratch_out);		/* vinf, s+s limbs */
 
   vinf0 = vinf[0];				/* v1 overlaps with this */
 
 #ifdef SMALLER_RECURSION
   /* v1, 2n+1 limbs */
-  TOOM3_SQR_REC (v1, as1, n, scratch_out);
+  TOOM3_SQR_N_REC (v1, as1, n, scratch_out);
   if (as1[n] == 1)
     {
       cy = as1[n] + mpn_add_n (v1 + n, v1 + n, as1, n);
@@ -216,11 +207,13 @@ mpn_toom3_sqr (mp_ptr pp,
   v1[2 * n] = cy;
 #else
   cy = vinf[1];
-  TOOM3_SQR_REC (v1, as1, n + 1, scratch_out);
+  TOOM3_SQR_N_REC (v1, as1, n + 1, scratch_out);
   vinf[1] = cy;
 #endif
 
-  TOOM3_SQR_REC (v0, ap, n, scratch_out);	/* v0, 2n limbs */
+  TOOM3_SQR_N_REC (v0, ap, n, scratch_out);	/* v0, 2n limbs */
+
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 1, vinf0, scratch_out);
 
-  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + s, 0, vinf0);
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom42_mul.c b/gmp/mpn/generic/toom42_mul.c
index 9b1e7d491b..981b45df83 100644
--- a/gmp/mpn/generic/toom42_mul.c
+++ b/gmp/mpn/generic/toom42_mul.c
@@ -11,34 +11,32 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2008, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-or both in parallel, as here.
 
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+/*
+  Things to work on:
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch allocation.
 
+  2. Apply optimizations also to mul_toom32.c.
+*/
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -58,9 +56,20 @@ see https://www.gnu.org/licenses/.  */
   vinf=              a3 *     b1  # A(inf)*B(inf)
 */
 
-#define TOOM42_MUL_N_REC(p, a, b, n, ws)				\
+#if TUNE_PROGRAM_BUILD
+#define MAYBE_mul_toom22   1
+#else
+#define MAYBE_mul_toom22						\
+  (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
+#endif
+
+#define TOOM22_MUL_N_REC(p, a, b, n, ws)				\
   do {									\
-    mpn_mul_n (p, a, b, n);						\
+    if (! MAYBE_mul_toom22						\
+	|| BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))		\
+      mpn_mul_basecase (p, a, n, b, n);					\
+    else								\
+      mpn_toom22_mul (p, a, n, b, n, ws);				\
   } while (0)
 
 void
@@ -72,7 +81,7 @@ mpn_toom42_mul (mp_ptr pp,
   mp_size_t n, s, t;
   int vm1_neg;
   mp_limb_t cy, vinf0;
-  mp_ptr a0_a2;
+  mp_ptr a0_a2, a1_a3;
   mp_ptr as1, asm1, as2;
   mp_ptr bs1, bsm1, bs2;
   TMP_DECL;
@@ -103,9 +112,35 @@ mpn_toom42_mul (mp_ptr pp,
   bs2 = TMP_SALLOC_LIMBS (n + 1);
 
   a0_a2 = pp;
+  a1_a3 = pp + n + 1;
 
   /* Compute as1 and asm1.  */
-  vm1_neg = mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0_a2) & 1;
+  a0_a2[n] = mpn_add_n (a0_a2, a0, a2, n);
+  a1_a3[n] = mpn_add (a1_a3, a1, n, a3, s);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
+    {
+      mpn_addsub_n (as1, asm1, a1_a3, a0_a2, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_addsub_n (as1, asm1, a0_a2, a1_a3, n + 1);
+      vm1_neg = 0;
+    }
+#else
+  mpn_add_n (as1, a0_a2, a1_a3, n + 1);
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
+    {
+      mpn_sub_n (asm1, a1_a3, a0_a2, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_sub_n (asm1, a0_a2, a1_a3, n + 1);
+      vm1_neg = 0;
+    }
+#endif
 
   /* Compute as2.  */
 #if HAVE_NATIVE_mpn_addlsh1_n
@@ -129,15 +164,15 @@ mpn_toom42_mul (mp_ptr pp,
   /* Compute bs1 and bsm1.  */
   if (t == n)
     {
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
       if (mpn_cmp (b0, b1, n) < 0)
 	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
+	  cy = mpn_addsub_n (bs1, bsm1, b1, b0, n);
 	  vm1_neg ^= 1;
 	}
       else
 	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
+	  cy = mpn_addsub_n (bs1, bsm1, b0, b1, n);
 	}
       bs1[n] = cy >> 1;
 #else
@@ -185,16 +220,16 @@ mpn_toom42_mul (mp_ptr pp,
 #define vinf  (pp + 4 * n)			/* s+t */
 #define vm1   scratch				/* 2n+1 */
 #define v2    (scratch + 2 * n + 1)		/* 2n+2 */
-#define scratch_out	scratch + 4 * n + 4	/* Currently unused. */
+#define scratch_out	scratch + 4 * n + 4
 
   /* vm1, 2n+1 limbs */
-  TOOM42_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
+  TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
   cy = 0;
   if (asm1[n] != 0)
     cy = mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
   vm1[2 * n] = cy;
 
-  TOOM42_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
+  TOOM22_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out);	/* v2, 2n+1 limbs */
 
   /* vinf, s+t limbs */
   if (s > t)  mpn_mul (vinf, a3, s, b1, t);
@@ -203,7 +238,7 @@ mpn_toom42_mul (mp_ptr pp,
   vinf0 = vinf[0];				/* v1 overlaps with this */
 
   /* v1, 2n+1 limbs */
-  TOOM42_MUL_N_REC (v1, as1, bs1, n, scratch_out);
+  TOOM22_MUL_N_REC (v1, as1, bs1, n, scratch_out);
   if (as1[n] == 1)
     {
       cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
@@ -226,9 +261,9 @@ mpn_toom42_mul (mp_ptr pp,
     cy += mpn_add_n (v1 + n, v1 + n, as1, n);
   v1[2 * n] = cy;
 
-  TOOM42_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
+  TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);	/* v0, 2n limbs */
 
-  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);
+  mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, 1^vm1_neg, vinf0, scratch + 4 * n + 4);
 
   TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom42_mulmid.c b/gmp/mpn/generic/toom42_mulmid.c
deleted file mode 100644
index 0251a6d7ed..0000000000
--- a/gmp/mpn/generic/toom42_mulmid.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/* mpn_toom42_mulmid -- toom42 middle product
-
-   Contributed by David Harvey.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-
-/*
-  Middle product of {ap,2n-1} and {bp,n}, output written to {rp,n+2}.
-
-  Neither ap nor bp may overlap rp.
-
-  Must have n >= 4.
-
-  Amount of scratch space required is given by mpn_toom42_mulmid_itch().
-
-  FIXME: this code assumes that n is small compared to GMP_NUMB_MAX. The exact
-  requirements should be clarified.
-*/
-void
-mpn_toom42_mulmid (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n,
-                   mp_ptr scratch)
-{
-  mp_limb_t cy, e[12], zh, zl;
-  mp_size_t m;
-  int neg;
-
-  ASSERT (n >= 4);
-  ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
-  ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
-
-  ap += n & 1;   /* handle odd row and diagonal later */
-  m = n / 2;
-
-  /* (e0h:e0l) etc are correction terms, in 2's complement */
-#define e0l (e[0])
-#define e0h (e[1])
-#define e1l (e[2])
-#define e1h (e[3])
-#define e2l (e[4])
-#define e2h (e[5])
-#define e3l (e[6])
-#define e3h (e[7])
-#define e4l (e[8])
-#define e4h (e[9])
-#define e5l (e[10])
-#define e5h (e[11])
-
-#define s (scratch + 2)
-#define t (rp + m + 2)
-#define p0 rp
-#define p1 scratch
-#define p2 (rp + m)
-#define next_scratch (scratch + 3*m + 1)
-
-  /*
-            rp                            scratch
-  |---------|-----------|    |---------|---------|----------|
-  0         m         2m+2   0         m         2m        3m+1
-            <----p2---->       <-------------s------------->
-  <----p0----><---t---->     <----p1---->
-  */
-
-  /* compute {s,3m-1} = {a,3m-1} + {a+m,3m-1} and error terms e0, e1, e2, e3 */
-  cy = mpn_add_err1_n (s, ap, ap + m, &e0l, bp + m, m - 1, 0);
-  cy = mpn_add_err2_n (s + m - 1, ap + m - 1, ap + 2*m - 1, &e1l,
-		       bp + m, bp, m, cy);
-  mpn_add_err1_n (s + 2*m - 1, ap + 2*m - 1, ap + 3*m - 1, &e3l, bp, m, cy);
-
-  /* compute t = (-1)^neg * ({b,m} - {b+m,m}) and error terms e4, e5 */
-  if (mpn_cmp (bp + m, bp, m) < 0)
-    {
-      ASSERT_NOCARRY (mpn_sub_err2_n (t, bp, bp + m, &e4l,
-				      ap + m - 1, ap + 2*m - 1, m, 0));
-      neg = 1;
-    }
-  else
-    {
-      ASSERT_NOCARRY (mpn_sub_err2_n (t, bp + m, bp, &e4l,
-				      ap + m - 1, ap + 2*m - 1, m, 0));
-      neg = 0;
-    }
-
-  /* recursive middle products. The picture is:
-
-      b[2m-1]   A   A   A   B   B   B   -   -   -   -   -
-      ...       -   A   A   A   B   B   B   -   -   -   -
-      b[m]      -   -   A   A   A   B   B   B   -   -   -
-      b[m-1]    -   -   -   C   C   C   D   D   D   -   -
-      ...       -   -   -   -   C   C   C   D   D   D   -
-      b[0]      -   -   -   -   -   C   C   C   D   D   D
-               a[0]   ...  a[m]  ...  a[2m]    ...    a[4m-2]
-  */
-
-  if (m < MULMID_TOOM42_THRESHOLD)
-    {
-      /* A + B */
-      mpn_mulmid_basecase (p0, s, 2*m - 1, bp + m, m);
-      /* accumulate high limbs of p0 into e1 */
-      ADDC_LIMB (cy, e1l, e1l, p0[m]);
-      e1h += p0[m + 1] + cy;
-      /* (-1)^neg * (B - C)   (overwrites first m limbs of s) */
-      mpn_mulmid_basecase (p1, ap + m, 2*m - 1, t, m);
-      /* C + D   (overwrites t) */
-      mpn_mulmid_basecase (p2, s + m, 2*m - 1, bp, m);
-    }
-  else
-    {
-      /* as above, but use toom42 instead */
-      mpn_toom42_mulmid (p0, s, bp + m, m, next_scratch);
-      ADDC_LIMB (cy, e1l, e1l, p0[m]);
-      e1h += p0[m + 1] + cy;
-      mpn_toom42_mulmid (p1, ap + m, t, m, next_scratch);
-      mpn_toom42_mulmid (p2, s + m, bp, m, next_scratch);
-    }
-
-  /* apply error terms */
-
-  /* -e0 at rp[0] */
-  SUBC_LIMB (cy, rp[0], rp[0], e0l);
-  SUBC_LIMB (cy, rp[1], rp[1], e0h + cy);
-  if (UNLIKELY (cy))
-    {
-      cy = (m > 2) ? mpn_sub_1 (rp + 2, rp + 2, m - 2, 1) : 1;
-      SUBC_LIMB (cy, e1l, e1l, cy);
-      e1h -= cy;
-    }
-
-  /* z = e1 - e2 + high(p0) */
-  SUBC_LIMB (cy, zl, e1l, e2l);
-  zh = e1h - e2h - cy;
-
-  /* z at rp[m] */
-  ADDC_LIMB (cy, rp[m], rp[m], zl);
-  zh = (zh + cy) & GMP_NUMB_MASK;
-  ADDC_LIMB (cy, rp[m + 1], rp[m + 1], zh);
-  cy -= (zh >> (GMP_NUMB_BITS - 1));
-  if (UNLIKELY (cy))
-    {
-      if (cy == 1)
-	mpn_add_1 (rp + m + 2, rp + m + 2, m, 1);
-      else /* cy == -1 */
-	mpn_sub_1 (rp + m + 2, rp + m + 2, m, 1);
-    }
-
-  /* e3 at rp[2*m] */
-  ADDC_LIMB (cy, rp[2*m], rp[2*m], e3l);
-  rp[2*m + 1] = (rp[2*m + 1] + e3h + cy) & GMP_NUMB_MASK;
-
-  /* e4 at p1[0] */
-  ADDC_LIMB (cy, p1[0], p1[0], e4l);
-  ADDC_LIMB (cy, p1[1], p1[1], e4h + cy);
-  if (UNLIKELY (cy))
-    mpn_add_1 (p1 + 2, p1 + 2, m, 1);
-
-  /* -e5 at p1[m] */
-  SUBC_LIMB (cy, p1[m], p1[m], e5l);
-  p1[m + 1] = (p1[m + 1] - e5h - cy) & GMP_NUMB_MASK;
-
-  /* adjustment if p1 ends up negative */
-  cy = (p1[m + 1] >> (GMP_NUMB_BITS - 1));
-
-  /* add (-1)^neg * (p1 - B^m * p1) to output */
-  if (neg)
-    {
-      mpn_sub_1 (rp + m + 2, rp + m + 2, m, cy);
-      mpn_add (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */
-      mpn_sub_n (rp + m, rp + m, p1, m + 2);            /* B + D */
-    }
-  else
-    {
-      mpn_add_1 (rp + m + 2, rp + m + 2, m, cy);
-      mpn_sub (rp, rp, 2*m + 2, p1, m + 2);             /* A + C */
-      mpn_add_n (rp + m, rp + m, p1, m + 2);            /* B + D */
-    }
-
-  /* odd row and diagonal */
-  if (n & 1)
-    {
-      /*
-        Products marked E are already done. We need to do products marked O.
-
-        OOOOO----
-        -EEEEO---
-        --EEEEO--
-        ---EEEEO-
-        ----EEEEO
-       */
-
-      /* first row of O's */
-      cy = mpn_addmul_1 (rp, ap - 1, n, bp[n - 1]);
-      ADDC_LIMB (rp[n + 1], rp[n], rp[n], cy);
-
-      /* O's on diagonal */
-      /* FIXME: should probably define an interface "mpn_mulmid_diag_1"
-         that can handle the sum below. Currently we're relying on
-         mulmid_basecase being pretty fast for a diagonal sum like this,
-	 which is true at least for the K8 asm version, but surely false
-	 for the generic version. */
-      mpn_mulmid_basecase (e, ap + n - 1, n - 1, bp, n - 1);
-      mpn_add_n (rp + n - 1, rp + n - 1, e, 3);
-    }
-}
diff --git a/gmp/mpn/generic/toom43_mul.c b/gmp/mpn/generic/toom43_mul.c
deleted file mode 100644
index 59d45576b8..0000000000
--- a/gmp/mpn/generic/toom43_mul.c
+++ /dev/null
@@ -1,234 +0,0 @@
-/* mpn_toom43_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
-   times as large as bn.  Or more accurately, bn < an < 2 bn.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   The idea of applying toom to unbalanced multiplication is due to Marco
-   Bodrato and Alberto Zanoni.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Evaluate in: -2, -1, 0, +1, +2, +inf
-
-  <-s-><--n--><--n--><--n-->
-   ___ ______ ______ ______
-  |a3_|___a2_|___a1_|___a0_|
-	|_b2_|___b1_|___b0_|
-	<-t--><--n--><--n-->
-
-  v0  =  a0             * b0          #   A(0)*B(0)
-  v1  = (a0+ a1+ a2+ a3)*(b0+ b1+ b2) #   A(1)*B(1)      ah  <= 3  bh <= 2
-  vm1 = (a0- a1+ a2- a3)*(b0- b1+ b2) #  A(-1)*B(-1)    |ah| <= 1 |bh|<= 1
-  v2  = (a0+2a1+4a2+8a3)*(b0+2b1+4b2) #   A(2)*B(2)      ah  <= 14 bh <= 6
-  vm2 = (a0-2a1+4a2-8a3)*(b0-2b1+4b2) #  A(-2)*B(-2)    |ah| <= 9 |bh|<= 4
-  vinf=              a3 *         b2  # A(inf)*B(inf)
-*/
-
-void
-mpn_toom43_mul (mp_ptr pp,
-		mp_srcptr ap, mp_size_t an,
-		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  enum toom6_flags flags;
-  mp_limb_t cy;
-
-#define a0  ap
-#define a1  (ap + n)
-#define a2  (ap + 2 * n)
-#define a3  (ap + 3 * n)
-#define b0  bp
-#define b1  (bp + n)
-#define b2  (bp + 2 * n)
-
-  n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
-
-  s = an - 3 * n;
-  t = bn - 2 * n;
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-
-  /* This is true whenever an >= 25 or bn >= 19, I think. It
-     guarantees that we can fit 5 values of size n+1 in the product
-     area. */
-  ASSERT (s+t >= 5);
-
-#define v0    pp				/* 2n */
-#define vm1   (scratch)				/* 2n+1 */
-#define v1    (pp + 2*n)			/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define v2    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vinf  (pp + 5 * n)			/* s+t */
-#define bs1    pp				/* n+1 */
-#define bsm1  (scratch + 2 * n + 2)		/* n+1 */
-#define asm1  (scratch + 3 * n + 3)		/* n+1 */
-#define asm2  (scratch + 4 * n + 4)		/* n+1 */
-#define bsm2  (pp + n + 1)			/* n+1 */
-#define bs2   (pp + 2 * n + 2)			/* n+1 */
-#define as2   (pp + 3 * n + 3)			/* n+1 */
-#define as1   (pp + 4 * n + 4)			/* n+1 */
-
-  /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra
-     limb, because products will overwrite 2n+2 limbs. */
-
-#define a0a2  scratch
-#define b0b2  scratch
-#define a1a3  asm1
-#define b1d   bsm1
-
-  /* Compute as2 and asm2.  */
-  flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3));
-
-  /* Compute bs2 and bsm2.  */
-  b1d[n] = mpn_lshift (b1d, b1, n, 1);			/*       2b1      */
-  cy  = mpn_lshift (b0b2, b2, t, 2);			/*  4b2           */
-  cy += mpn_add_n (b0b2, b0b2, b0, t);			/*  4b2      + b0 */
-  if (t != n)
-    cy = mpn_add_1 (b0b2 + t, b0 + t, n - t, cy);
-  b0b2[n] = cy;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (mpn_cmp (b0b2, b1d, n+1) < 0)
-    {
-      mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1);
-      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
-    }
-  else
-    {
-      mpn_add_n_sub_n (bs2, bsm2, b0b2, b1d, n+1);
-    }
-#else
-  mpn_add_n (bs2, b0b2, b1d, n+1);
-  if (mpn_cmp (b0b2, b1d, n+1) < 0)
-    {
-      mpn_sub_n (bsm2, b1d, b0b2, n+1);
-      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
-    }
-  else
-    {
-      mpn_sub_n (bsm2, b0b2, b1d, n+1);
-    }
-#endif
-
-  /* Compute as1 and asm1.  */
-  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2));
-
-  /* Compute bs1 and bsm1.  */
-  bsm1[n] = mpn_add (bsm1, b0, n, b2, t);
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
-    {
-      cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n);
-      bs1[n] = cy >> 1;
-      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
-    }
-  else
-    {
-      cy = mpn_add_n_sub_n (bs1, bsm1, bsm1, b1, n);
-      bs1[n] = bsm1[n] + (cy >> 1);
-      bsm1[n]-= cy & 1;
-    }
-#else
-  bs1[n] = bsm1[n] + mpn_add_n (bs1, bsm1, b1, n);
-  if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
-    {
-      mpn_sub_n (bsm1, b1, bsm1, n);
-      flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
-    }
-  else
-    {
-      bsm1[n] -= mpn_sub_n (bsm1, bsm1, b1, n);
-    }
-#endif
-
-  ASSERT (as1[n] <= 3);
-  ASSERT (bs1[n] <= 2);
-  ASSERT (asm1[n] <= 1);
-  ASSERT (bsm1[n] <= 1);
-  ASSERT (as2[n] <=14);
-  ASSERT (bs2[n] <= 6);
-  ASSERT (asm2[n] <= 9);
-  ASSERT (bsm2[n] <= 4);
-
-  /* vm1, 2n+1 limbs */
-  mpn_mul_n (vm1, asm1, bsm1, n+1);  /* W4 */
-
-  /* vm2, 2n+1 limbs */
-  mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
-
-  /* v2, 2n+1 limbs */
-  mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
-
-  /* v1, 2n+1 limbs */
-  mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
-
-  /* vinf, s+t limbs */   /* W0 */
-  if (s > t)  mpn_mul (vinf, a3, s, b2, t);
-  else        mpn_mul (vinf, b2, t, a3, s);
-
-  /* v0, 2n limbs */
-  mpn_mul_n (v0, ap, bp, n);  /* W5 */
-
-  mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
-
-#undef v0
-#undef vm1
-#undef v1
-#undef vm2
-#undef v2
-#undef vinf
-#undef bs1
-#undef bs2
-#undef bsm1
-#undef bsm2
-#undef asm1
-#undef asm2
-/* #undef as1 */
-/* #undef as2 */
-#undef a0a2
-#undef b0b2
-#undef a1a3
-#undef b1d
-#undef a0
-#undef a1
-#undef a2
-#undef a3
-#undef b0
-#undef b1
-#undef b2
-}
diff --git a/gmp/mpn/generic/toom44_mul.c b/gmp/mpn/generic/toom44_mul.c
index 5abf2d14a9..37ff45279d 100644
--- a/gmp/mpn/generic/toom44_mul.c
+++ b/gmp/mpn/generic/toom44_mul.c
@@ -7,39 +7,36 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2008, 2013 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch area.
+  2. Use new toom functions for the recursive calls.
+*/
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
-/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+/* Evaluate in: -1, -1/2, 0, +1/2, +1, +2, +inf
 
   <-s--><--n--><--n--><--n-->
    ____ ______ ______ ______
@@ -51,8 +48,8 @@ see https://www.gnu.org/licenses/.  */
   v1  = ( a0+ a1+ a2+ a3)*( b0+ b1+ b2+ b3) #    A(1)*B(1)      ah  <= 3   bh  <= 3
   vm1 = ( a0- a1+ a2- a3)*( b0- b1+ b2- b3) #   A(-1)*B(-1)    |ah| <= 1  |bh| <= 1
   v2  = ( a0+2a1+4a2+8a3)*( b0+2b1+4b2+8b3) #    A(2)*B(2)      ah  <= 14  bh  <= 14
-  vm2 = ( a0-2a1+4a2-8a3)*( b0-2b1+4b2-8b3) #    A(2)*B(2)      ah  <= 9  |bh| <= 9
   vh  = (8a0+4a1+2a2+ a3)*(8b0+4b1+2b2+ b3) #  A(1/2)*B(1/2)    ah  <= 14  bh  <= 14
+  vmh = (8a0-4a1+2a2- a3)*(8b0-4b1+2b2- b3) # A(-1/2)*B(-1/2)  -4<=ah<=9  -4<=bh<=9
   vinf=               a3 *          b2      #  A(inf)*B(inf)
 */
 
@@ -62,51 +59,28 @@ see https://www.gnu.org/licenses/.  */
 #define MAYBE_mul_toom44   1
 #else
 #define MAYBE_mul_basecase						\
-  (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM22_THRESHOLD)
+  (MUL_TOOM44_THRESHOLD < 4 * MUL_KARATSUBA_THRESHOLD)
 #define MAYBE_mul_toom22						\
   (MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM33_THRESHOLD)
 #define MAYBE_mul_toom44						\
-  (MUL_TOOM6H_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
+  (MUL_FFT_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
 #endif
 
 #define TOOM44_MUL_N_REC(p, a, b, n, ws)				\
   do {									\
     if (MAYBE_mul_basecase						\
-	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
+	&& BELOW_THRESHOLD (n, MUL_KARATSUBA_THRESHOLD))		\
       mpn_mul_basecase (p, a, n, b, n);					\
     else if (MAYBE_mul_toom22						\
 	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD))		\
-      mpn_toom22_mul (p, a, n, b, n, ws);				\
+      mpn_kara_mul_n (p, a, b, n, ws);					\
     else if (! MAYBE_mul_toom44						\
 	     || BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD))		\
-      mpn_toom33_mul (p, a, n, b, n, ws);				\
+      mpn_toom3_mul_n (p, a, b, n, ws);					\
     else								\
       mpn_toom44_mul (p, a, n, b, n, ws);				\
   } while (0)
 
-/* Use of scratch space. In the product area, we store
-
-      ___________________
-     |vinf|____|_v1_|_v0_|
-      s+t  2n-1 2n+1  2n
-
-   The other recursive products, vm1, v2, vm2, vh are stored in the
-   scratch area. When computing them, we use the product area for
-   intermediate values.
-
-   Next, we compute v1. We can store the intermediate factors at v0
-   and at vh + 2n + 2.
-
-   Finally, for v0 and vinf, factors are parts of the input operands,
-   and we need scratch space only for the recursive multiplication.
-
-   In all, if S(an) is the scratch need, the needed space is bounded by
-
-     S(an) <= 4 (2*ceil(an/4) + 1) + 1 + S(ceil(an/4) + 1)
-
-   which should give S(n) = 8 n/3 + c log(n) for some constant c.
-*/
-
 void
 mpn_toom44_mul (mp_ptr pp,
 		mp_srcptr ap, mp_size_t an,
@@ -115,7 +89,11 @@ mpn_toom44_mul (mp_ptr pp,
 {
   mp_size_t n, s, t;
   mp_limb_t cy;
-  enum toom7_flags flags;
+  mp_ptr gp, hp;
+  mp_ptr as1, asm1, as2, ash, asmh;
+  mp_ptr bs1, bsm1, bs2, bsh, bsmh;
+  enum toom4_flags flags;
+  TMP_DECL;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -126,111 +104,227 @@ mpn_toom44_mul (mp_ptr pp,
 #define b2  (bp + 2*n)
 #define b3  (bp + 3*n)
 
-  ASSERT (an >= bn);
-
   n = (an + 3) >> 2;
 
   s = an - 3 * n;
   t = bn - 3 * n;
 
+  ASSERT (an >= bn);
+
   ASSERT (0 < s && s <= n);
   ASSERT (0 < t && t <= n);
-  ASSERT (s >= t);
-
-  /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
-   * following limb, so these must be computed in order, and we need a
-   * one limb gap to tp. */
-#define v0    pp				/* 2n */
-#define v1    (pp + 2 * n)			/* 2n+1 */
-#define vinf  (pp + 6 * n)			/* s+t */
-#define v2    scratch				/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
-#define tp (scratch + 8*n + 5)
-
-  /* apx and bpx must not overlap with v1 */
-#define apx   pp				/* n+1 */
-#define amx   (pp + n + 1)			/* n+1 */
-#define bmx   (pp + 2*n + 2)			/* n+1 */
-#define bpx   (pp + 4*n + 2)			/* n+1 */
 
-  /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
-     gives roughly 32 n/3 + log term. */
-
-  /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
-  flags = (enum toom7_flags) (toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp));
-
-  /* Compute bpx = b0 + 2 b1 + 4 b2 + 8 b3 and bmx = b0 - 2 b1 + 4 b2 - 8 b3.  */
-  flags = (enum toom7_flags) (flags ^ toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (bpx, bmx, bp, n, t, tp));
+  TMP_MARK;
+
+  as1  = TMP_ALLOC_LIMBS (10 * n + 10);
+  asm1 = as1  + n + 1;
+  as2  = asm1 + n + 1;
+  ash  = as2  + n + 1;
+  asmh = ash  + n + 1;
+  bs1  = asmh + n + 1;
+  bsm1 = bs1  + n + 1;
+  bs2  = bsm1 + n + 1;
+  bsh  = bs2  + n + 1;
+  bsmh = bsh  + n + 1;
+
+  gp = pp;
+  hp = pp + n + 1;
+
+  flags = 0;
+
+  /* Compute as1 and asm1.  */
+  gp[n]  = mpn_add_n (gp, a0, a2, n);
+  hp[n]  = mpn_add (hp, a1, n, a3, s);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_addsub_n (as1, asm1, hp, gp, n + 1);
+      flags ^= toom4_w3_neg;
+    }
+  else
+    {
+      mpn_addsub_n (as1, asm1, gp, hp, n + 1);
+    }
+#else
+  mpn_add_n (as1, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asm1, hp, gp, n + 1);
+      flags ^= toom4_w3_neg;
+    }
+  else
+    {
+      mpn_sub_n (asm1, gp, hp, n + 1);
+    }
+#endif
 
-  TOOM44_MUL_N_REC (v2, apx, bpx, n + 1, tp);	/* v2,  2n+1 limbs */
-  TOOM44_MUL_N_REC (vm2, amx, bmx, n + 1, tp);	/* vm2,  2n+1 limbs */
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a2, a3, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy  = mpn_lshift (as2, a3, s, 1);
+  cy += mpn_add_n (as2, a2, as2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a0, as2, n);
+#endif
+  as2[n] = cy;
 
-  /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+  /* Compute ash and asmh.  */
+  cy  = mpn_lshift (gp, a0, n, 3);			/*  8a0             */
 #if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (apx, a1, a0, n);
-  cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
-  if (s < n)
+  gp[n] = cy + mpn_addlsh1_n (gp, gp, a2, n);		/*  8a0 + 2a2       */
+#else
+  cy += mpn_lshift (hp, a2, n, 1);			/*        2a2       */
+  gp[n] = cy + mpn_add_n (gp, gp, hp, n);		/*  8a0 + 2a2       */
+#endif
+  cy = mpn_lshift (hp, a1, n, 2);			/*  4a1             */
+  hp[n] = cy + mpn_add (hp, hp, n, a3, s);		/*  4a1 +  a3       */
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (apx, a3, apx, s);
-      apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
-      MPN_INCR_U (apx + s, n+1-s, cy2);
+      mpn_addsub_n (ash, asmh, hp, gp, n + 1);
+      flags ^= toom4_w1_neg;
     }
   else
-    apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+    {
+      mpn_addsub_n (ash, asmh, gp, hp, n + 1);
+    }
 #else
-  cy = mpn_lshift (apx, a0, n, 1);
-  cy += mpn_add_n (apx, apx, a1, n);
-  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
-  cy += mpn_add_n (apx, apx, a2, n);
-  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
-  apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+  mpn_add_n (ash, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asmh, hp, gp, n + 1);
+      flags ^= toom4_w1_neg;
+    }
+  else
+    {
+      mpn_sub_n (asmh, gp, hp, n + 1);
+    }
+#endif
+
+  /* Compute bs1 and bsm1.  */
+  gp[n]  = mpn_add_n (gp, b0, b2, n);
+  hp[n]  = mpn_add (hp, b1, n, b3, t);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_addsub_n (bs1, bsm1, hp, gp, n + 1);
+      flags ^= toom4_w3_neg;
+    }
+  else
+    {
+      mpn_addsub_n (bs1, bsm1, gp, hp, n + 1);
+    }
+#else
+  mpn_add_n (bs1, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (bsm1, hp, gp, n + 1);
+      flags ^= toom4_w3_neg;
+    }
+  else
+    {
+      mpn_sub_n (bsm1, gp, hp, n + 1);
+    }
 #endif
 
-  /* Compute bpx = 8 b0 + 4 b1 + 2 b2 + b3 = (((2*b0 + b1) * 2 + b2) * 2 + b3 */
+  /* Compute bs2.  */
 #if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (bpx, b1, b0, n);
-  cy = 2*cy + mpn_addlsh1_n (bpx, b2, bpx, n);
-  if (t < n)
+  cy  = mpn_addlsh1_n (bs2, b2, b3, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, b2 + t, n - t, cy);
+  cy = 2 * cy + mpn_addlsh1_n (bs2, b1, bs2, n);
+  cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
+#else
+  cy  = mpn_lshift (bs2, b3, t, 1);
+  cy += mpn_add_n (bs2, b2, bs2, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, b2 + t, n - t, cy);
+  cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
+  cy += mpn_add_n (bs2, b1, bs2, n);
+  cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
+  cy += mpn_add_n (bs2, b0, bs2, n);
+#endif
+  bs2[n] = cy;
+
+  /* Compute bsh and bsmh.  */
+  cy  = mpn_lshift (gp, b0, n, 3);			/*  8b0             */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  gp[n] = cy + mpn_addlsh1_n (gp, gp, b2, n);		/*  8b0 + 2b2       */
+#else
+  cy += mpn_lshift (hp, b2, n, 1);			/*        2b2       */
+  gp[n] = cy + mpn_add_n (gp, gp, hp, n);		/*  8b0 + 2b2       */
+#endif
+  cy = mpn_lshift (hp, b1, n, 2);			/*  4b1             */
+  hp[n] = cy + mpn_add (hp, hp, n, b3, t);		/*  4b1 +  b3       */
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (bpx, b3, bpx, t);
-      bpx[n] = 2*cy + mpn_lshift (bpx + t, bpx + t, n - t, 1);
-      MPN_INCR_U (bpx + t, n+1-t, cy2);
+      mpn_addsub_n (bsh, bsmh, hp, gp, n + 1);
+      flags ^= toom4_w1_neg;
     }
   else
-    bpx[n] = 2*cy + mpn_addlsh1_n (bpx, b3, bpx, n);
+    {
+      mpn_addsub_n (bsh, bsmh, gp, hp, n + 1);
+    }
 #else
-  cy = mpn_lshift (bpx, b0, n, 1);
-  cy += mpn_add_n (bpx, bpx, b1, n);
-  cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
-  cy += mpn_add_n (bpx, bpx, b2, n);
-  cy = 2*cy + mpn_lshift (bpx, bpx, n, 1);
-  bpx[n] = cy + mpn_add (bpx, bpx, n, b3, t);
+  mpn_add_n (bsh, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (bsmh, hp, gp, n + 1);
+      flags ^= toom4_w1_neg;
+    }
+  else
+    {
+      mpn_sub_n (bsmh, gp, hp, n + 1);
+    }
 #endif
 
-  ASSERT (apx[n] < 15);
-  ASSERT (bpx[n] < 15);
+  ASSERT (as1[n] <= 3);
+  ASSERT (bs1[n] <= 3);
+  ASSERT (asm1[n] <= 1);
+  ASSERT (bsm1[n] <= 1);
+  ASSERT (as2[n] <= 14);
+  ASSERT (bs2[n] <= 14);
+  ASSERT (ash[n] <= 14);
+  ASSERT (bsh[n] <= 14);
+  ASSERT (asmh[n] <= 9);
+  ASSERT (bsmh[n] <= 9);
+
+#define v0    pp				/* 2n */
+#define v1    (scratch + 6 * n + 6)		/* 2n+1 */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 2)		/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define vh    (pp + 2 * n)			/* 2n+1 */
+#define vmh   (scratch + 4 * n + 4)
+#define scratch_out  (scratch + 8 * n + 8)
+
+  /* vm1, 2n+1 limbs */
+  TOOM44_MUL_N_REC (vm1, asm1, bsm1, n + 1, scratch_out);	/* vm1, 2n+1 limbs */
 
-  TOOM44_MUL_N_REC (vh, apx, bpx, n + 1, tp);	/* vh,  2n+1 limbs */
+  TOOM44_MUL_N_REC (v2 , as2 , bs2 , n + 1, scratch_out);	/* v2,  2n+1 limbs */
 
-  /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
-  flags = (enum toom7_flags) (flags | toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp));
+  if (s > t)  mpn_mul (vinf, a3, s, b3, t);
+  else   TOOM44_MUL_N_REC (vinf, a3, b3, s, scratch_out);	/* vinf, s+t limbs */
 
-  /* Compute bpx = b0 + b1 + b2 + b3 bnd bmx = b0 - b1 + b2 - b3.  */
-  flags = (enum toom7_flags) (flags ^ toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (bpx, bmx, bp, n, t, tp));
+  TOOM44_MUL_N_REC (v1 , as1 , bs1 , n + 1, scratch_out);	/* v1,  2n+1 limbs */
 
-  TOOM44_MUL_N_REC (vm1, amx, bmx, n + 1, tp);	/* vm1,  2n+1 limbs */
-  /* Clobbers amx, bmx. */
-  TOOM44_MUL_N_REC (v1, apx, bpx, n + 1, tp);	/* v1,  2n+1 limbs */
+  TOOM44_MUL_N_REC (vh , ash , bsh , n + 1, scratch_out);
 
-  TOOM44_MUL_N_REC (v0, a0, b0, n, tp);
-  if (s > t)
-    mpn_mul (vinf, a3, s, b3, t);
-  else
-    TOOM44_MUL_N_REC (vinf, a3, b3, s, tp);	/* vinf, s+t limbs */
+  TOOM44_MUL_N_REC (vmh, asmh, bsmh, n + 1, scratch_out);
+
+  TOOM44_MUL_N_REC (v0 , ap  , bp  , n    , scratch_out);	/* v0,  2n limbs */
+
+  mpn_toom_interpolate_7pts (pp, n, flags, vmh, vm1, v1, v2, s + t, scratch_out);
 
-  mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t, tp);
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom4_sqr.c b/gmp/mpn/generic/toom4_sqr.c
index b4154ba83f..911b5548d7 100644
--- a/gmp/mpn/generic/toom4_sqr.c
+++ b/gmp/mpn/generic/toom4_sqr.c
@@ -6,34 +6,31 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2010, 2013 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch area.
+  2. Use new toom functions for the recursive calls.
+*/
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -43,14 +40,16 @@ see https://www.gnu.org/licenses/.  */
   <-s--><--n--><--n--><--n-->
    ____ ______ ______ ______
   |_a3_|___a2_|___a1_|___a0_|
-
-  v0  =   a0             ^2 #    A(0)^2
-  v1  = ( a0+ a1+ a2+ a3)^2 #    A(1)^2   ah  <= 3
-  vm1 = ( a0- a1+ a2- a3)^2 #   A(-1)^2  |ah| <= 1
-  v2  = ( a0+2a1+4a2+8a3)^2 #    A(2)^2   ah  <= 14
-  vh  = (8a0+4a1+2a2+ a3)^2 #  A(1/2)^2   ah  <= 14
-  vmh = (8a0-4a1+2a2- a3)^2 # A(-1/2)^2  -4<=ah<=9
-  vinf=               a3 ^2 #  A(inf)^2
+   |b3_|___b2_|___b1_|___b0_|
+   <-t-><--n--><--n--><--n-->
+
+  v0  =   a0             *  b0              #    A(0)*B(0)
+  v1  = ( a0+ a1+ a2+ a3)*( b0+ b1+ b2+ b3) #    A(1)*B(1)      ah  <= 3   bh  <= 3
+  vm1 = ( a0- a1+ a2- a3)*( b0- b1+ b2- b3) #   A(-1)*B(-1)    |ah| <= 1  |bh| <= 1
+  v2  = ( a0+2a1+4a2+8a3)*( b0+2b1+4b2+8b3) #    A(2)*B(2)      ah  <= 14  bh  <= 14
+  vh  = (8a0+4a1+2a2+ a3)*(8b0+4b1+2b2+ b3) #  A(1/2)*B(1/2)    ah  <= 14  bh  <= 14
+  vmh = (8a0-4a1+2a2- a3)*(8b0-4b1+2b2- b3) # A(-1/2)*B(-1/2)  -4<=ah<=9  -4<=bh<=9
+  vinf=               a3 *          b2      #  A(inf)*B(inf)
 */
 
 #if TUNE_PROGRAM_BUILD
@@ -59,24 +58,24 @@ see https://www.gnu.org/licenses/.  */
 #define MAYBE_sqr_toom4   1
 #else
 #define MAYBE_sqr_basecase						\
-  (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM2_THRESHOLD)
+  (SQR_TOOM4_THRESHOLD < 4 * SQR_KARATSUBA_THRESHOLD)
 #define MAYBE_sqr_toom2							\
   (SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM3_THRESHOLD)
 #define MAYBE_sqr_toom4							\
-  (SQR_TOOM6_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
+  (SQR_FFT_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
 #endif
 
-#define TOOM4_SQR_REC(p, a, n, ws)					\
+#define TOOM4_SQR_N_REC(p, a, n, ws)					\
   do {									\
     if (MAYBE_sqr_basecase						\
-	&& BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))			\
+	&& BELOW_THRESHOLD (n, SQR_KARATSUBA_THRESHOLD))		\
       mpn_sqr_basecase (p, a, n);					\
     else if (MAYBE_sqr_toom2						\
 	     && BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))		\
-      mpn_toom2_sqr (p, a, n, ws);					\
+      mpn_kara_sqr_n (p, a, n, ws);					\
     else if (! MAYBE_sqr_toom4						\
 	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))		\
-      mpn_toom3_sqr (p, a, n, ws);					\
+      mpn_toom3_sqr_n (p, a, n, ws);					\
     else								\
       mpn_toom4_sqr (p, a, n, ws);					\
   } while (0)
@@ -88,6 +87,9 @@ mpn_toom4_sqr (mp_ptr pp,
 {
   mp_size_t n, s;
   mp_limb_t cy;
+  mp_ptr gp, hp;
+  mp_ptr as1, asm1, as2, ash, asmh;
+  TMP_DECL;
 
 #define a0  ap
 #define a1  (ap + n)
@@ -100,65 +102,122 @@ mpn_toom4_sqr (mp_ptr pp,
 
   ASSERT (0 < s && s <= n);
 
-  /* NOTE: The multiplications to v2, vm2, vh and vm1 overwrites the
-   * following limb, so these must be computed in order, and we need a
-   * one limb gap to tp. */
-#define v0    pp				/* 2n */
-#define v1    (pp + 2 * n)			/* 2n+1 */
-#define vinf  (pp + 6 * n)			/* s+t */
-#define v2    scratch				/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
-#define tp (scratch + 8*n + 5)
+  TMP_MARK;
 
-  /* No overlap with v1 */
-#define apx   pp				/* n+1 */
-#define amx   (pp + 4*n + 2)			/* n+1 */
+  as1  = TMP_SALLOC_LIMBS (n + 1);
+  asm1 = TMP_SALLOC_LIMBS (n + 1);
+  as2  = TMP_SALLOC_LIMBS (n + 1);
+  ash  = TMP_SALLOC_LIMBS (n + 1);
+  asmh = TMP_SALLOC_LIMBS (n + 1);
 
-  /* Total scratch need: 8*n + 5 + scratch for recursive calls. This
-     gives roughly 32 n/3 + log term. */
+  gp = pp;
+  hp = pp + n + 1;
 
-  /* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3.  */
-  mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp);
+  /* Compute as1 and asm1.  */
+  gp[n]  = mpn_add_n (gp, a0, a2, n);
+  hp[n]  = mpn_add (hp, a1, n, a3, s);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_addsub_n (as1, asm1, hp, gp, n + 1);
+    }
+  else
+    {
+      mpn_addsub_n (as1, asm1, gp, hp, n + 1);
+    }
+#else
+  mpn_add_n (as1, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asm1, hp, gp, n + 1);
+    }
+  else
+    {
+      mpn_sub_n (asm1, gp, hp, n + 1);
+    }
+#endif
 
-  TOOM4_SQR_REC (v2, apx, n + 1, tp);	/* v2,  2n+1 limbs */
-  TOOM4_SQR_REC (vm2, amx, n + 1, tp);	/* vm2,  2n+1 limbs */
+  /* Compute as2.  */
+#if HAVE_NATIVE_mpn_addlsh1_n
+  cy  = mpn_addlsh1_n (as2, a2, a3, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy  = mpn_lshift (as2, a3, s, 1);
+  cy += mpn_add_n (as2, a2, as2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a2 + s, n - s, cy);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a0, as2, n);
+#endif
+  as2[n] = cy;
 
-  /* Compute apx = 8 a0 + 4 a1 + 2 a2 + a3 = (((2*a0 + a1) * 2 + a2) * 2 + a3 */
+  /* Compute ash and asmh.  */
+  cy  = mpn_lshift (gp, a0, n, 3);			/*  8a0             */
 #if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (apx, a1, a0, n);
-  cy = 2*cy + mpn_addlsh1_n (apx, a2, apx, n);
-  if (s < n)
+  gp[n] = cy + mpn_addlsh1_n (gp, gp, a2, n);		/*  8a0 + 2a2       */
+#else
+  cy += mpn_lshift (hp, a2, n, 1);			/*        2a2       */
+  gp[n] = cy + mpn_add_n (gp, gp, hp, n);		/*  8a0 + 2a2       */
+#endif
+  cy = mpn_lshift (hp, a1, n, 2);			/*  4a1             */
+  hp[n] = cy + mpn_add (hp, hp, n, a3, s);		/*  4a1 +  a3       */
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (apx, a3, apx, s);
-      apx[n] = 2*cy + mpn_lshift (apx + s, apx + s, n - s, 1);
-      MPN_INCR_U (apx + s, n+1-s, cy2);
+      mpn_addsub_n (ash, asmh, hp, gp, n + 1);
     }
   else
-    apx[n] = 2*cy + mpn_addlsh1_n (apx, a3, apx, n);
+    {
+      mpn_addsub_n (ash, asmh, gp, hp, n + 1);
+    }
 #else
-  cy = mpn_lshift (apx, a0, n, 1);
-  cy += mpn_add_n (apx, apx, a1, n);
-  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
-  cy += mpn_add_n (apx, apx, a2, n);
-  cy = 2*cy + mpn_lshift (apx, apx, n, 1);
-  apx[n] = cy + mpn_add (apx, apx, n, a3, s);
+  mpn_add_n (ash, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asmh, hp, gp, n + 1);
+    }
+  else
+    {
+      mpn_sub_n (asmh, gp, hp, n + 1);
+    }
 #endif
 
-  ASSERT (apx[n] < 15);
+  ASSERT (as1[n] <= 3);
+  ASSERT (asm1[n] <= 1);
+  ASSERT (as2[n] <= 14);
+  ASSERT (ash[n] <= 14);
+  ASSERT (asmh[n] <= 9);
+
+#define v0    pp				/* 2n */
+#define v1    (scratch + 6 * n + 6)		/* 2n+1 */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 2)		/* 2n+1 */
+#define vinf  (pp + 6 * n)			/* s+t */
+#define vh    (pp + 2 * n)			/* 2n+1 */
+#define vmh   (scratch + 4 * n + 4)
+#define scratch_out  (scratch + 8 * n + 8)
+
+  /* vm1, 2n+1 limbs */
+  TOOM4_SQR_N_REC (vm1, asm1, n + 1, scratch_out);	/* vm1, 2n+1 limbs */
+
+  TOOM4_SQR_N_REC (v2 , as2 , n + 1, scratch_out);	/* v2,  2n+1 limbs */
+
+  TOOM4_SQR_N_REC (vinf, a3 , s,     scratch_out);	/* vinf, 2s limbs */
+
+  TOOM4_SQR_N_REC (v1 , as1 , n + 1, scratch_out);	/* v1,  2n+1 limbs */
 
-  TOOM4_SQR_REC (vh, apx, n + 1, tp);	/* vh,  2n+1 limbs */
+  TOOM4_SQR_N_REC (vh , ash , n + 1, scratch_out);
 
-  /* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3.  */
-  mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp);
+  TOOM4_SQR_N_REC (vmh, asmh, n + 1, scratch_out);
 
-  TOOM4_SQR_REC (v1, apx, n + 1, tp);	/* v1,  2n+1 limbs */
-  TOOM4_SQR_REC (vm1, amx, n + 1, tp);	/* vm1,  2n+1 limbs */
+  TOOM4_SQR_N_REC (v0 , ap  , n    , scratch_out);	/* v0,  2n limbs */
 
-  TOOM4_SQR_REC (v0, a0, n, tp);
-  TOOM4_SQR_REC (vinf, a3, s, tp);	/* vinf, 2s limbs */
+  mpn_toom_interpolate_7pts (pp, n, 0, vmh, vm1, v1, v2, s + s, scratch_out);
 
-  mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) 0, vm2, vm1, v2, vh, 2*s, tp);
+  TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom52_mul.c b/gmp/mpn/generic/toom52_mul.c
deleted file mode 100644
index e15b5833aa..0000000000
--- a/gmp/mpn/generic/toom52_mul.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/* mpn_toom52_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3
-   times as large as bn.  Or more accurately, bn < an < 2 bn.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   The idea of applying toom to unbalanced multiplication is due to Marco
-   Bodrato and Alberto Zanoni.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Evaluate in: -2, -1, 0, +1, +2, +inf
-
-  <-s-><--n--><--n--><--n--><--n-->
-   ___ ______ ______ ______ ______
-  |a4_|___a3_|___a2_|___a1_|___a0_|
-			|b1|___b0_|
-			<t-><--n-->
-
-  v0  =  a0                  * b0      #   A(0)*B(0)
-  v1  = (a0+ a1+ a2+ a3+  a4)*(b0+ b1) #   A(1)*B(1)      ah  <= 4   bh <= 1
-  vm1 = (a0- a1+ a2- a3+  a4)*(b0- b1) #  A(-1)*B(-1)    |ah| <= 2   bh  = 0
-  v2  = (a0+2a1+4a2+8a3+16a4)*(b0+2b1) #   A(2)*B(2)      ah  <= 30  bh <= 2
-  vm2 = (a0-2a1+4a2-8a3+16a4)*(b0-2b1) #  A(-2)*B(-2)    |ah| <= 20 |bh|<= 1
-  vinf=                   a4 *     b1  # A(inf)*B(inf)
-
-  Some slight optimization in evaluation are taken from the paper:
-  "Towards Optimal Toom-Cook Multiplication for Univariate and
-  Multivariate Polynomials in Characteristic 2 and 0."
-*/
-
-void
-mpn_toom52_mul (mp_ptr pp,
-		mp_srcptr ap, mp_size_t an,
-		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  enum toom6_flags flags;
-
-#define a0  ap
-#define a1  (ap + n)
-#define a2  (ap + 2 * n)
-#define a3  (ap + 3 * n)
-#define a4  (ap + 4 * n)
-#define b0  bp
-#define b1  (bp + n)
-
-  n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
-
-  s = an - 4 * n;
-  t = bn - n;
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-
-  /* Ensures that 5 values of n+1 limbs each fits in the product area.
-     Borderline cases are an = 32, bn = 8, n = 7, and an = 36, bn = 9,
-     n = 8. */
-  ASSERT (s+t >= 5);
-
-#define v0    pp				/* 2n */
-#define vm1   (scratch)				/* 2n+1 */
-#define v1    (pp + 2 * n)			/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define v2    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vinf  (pp + 5 * n)			/* s+t */
-#define bs1    pp				/* n+1 */
-#define bsm1  (scratch + 2 * n + 2)		/* n   */
-#define asm1  (scratch + 3 * n + 3)		/* n+1 */
-#define asm2  (scratch + 4 * n + 4)		/* n+1 */
-#define bsm2  (pp + n + 1)			/* n+1 */
-#define bs2   (pp + 2 * n + 2)			/* n+1 */
-#define as2   (pp + 3 * n + 3)			/* n+1 */
-#define as1   (pp + 4 * n + 4)			/* n+1 */
-
-  /* Scratch need is 6 * n + 3 + 1. We need one extra limb, because
-     products will overwrite 2n+2 limbs. */
-
-#define a0a2  scratch
-#define a1a3  asm1
-
-  /* Compute as2 and asm2.  */
-  flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3));
-
-  /* Compute bs1 and bsm1.  */
-  if (t == n)
-    {
-#if HAVE_NATIVE_mpn_add_n_sub_n
-      mp_limb_t cy;
-
-      if (mpn_cmp (b0, b1, n) < 0)
-	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
-	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
-	}
-      else
-	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
-	}
-      bs1[n] = cy >> 1;
-#else
-      bs1[n] = mpn_add_n (bs1, b0, b1, n);
-      if (mpn_cmp (b0, b1, n) < 0)
-	{
-	  mpn_sub_n (bsm1, b1, b0, n);
-	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
-	}
-      else
-	{
-	  mpn_sub_n (bsm1, b0, b1, n);
-	}
-#endif
-    }
-  else
-    {
-      bs1[n] = mpn_add (bs1, b0, n, b1, t);
-      if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
-	{
-	  mpn_sub_n (bsm1, b1, b0, t);
-	  MPN_ZERO (bsm1 + t, n - t);
-	  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
-	}
-      else
-	{
-	  mpn_sub (bsm1, b0, n, b1, t);
-	}
-    }
-
-  /* Compute bs2 and bsm2, recycling bs1 and bsm1. bs2=bs1+b1; bsm2=bsm1-b1  */
-  mpn_add (bs2, bs1, n+1, b1, t);
-  if (flags & toom6_vm1_neg )
-    {
-      bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
-      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
-    }
-  else
-    {
-      bsm2[n] = 0;
-      if (t == n)
-	{
-	  if (mpn_cmp (bsm1, b1, n) < 0)
-	    {
-	      mpn_sub_n (bsm2, b1, bsm1, n);
-	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
-	    }
-	  else
-	    {
-	      mpn_sub_n (bsm2, bsm1, b1, n);
-	    }
-	}
-      else
-	{
-	  if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
-	    {
-	      mpn_sub_n (bsm2, b1, bsm1, t);
-	      MPN_ZERO (bsm2 + t, n - t);
-	      flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
-	    }
-	  else
-	    {
-	      mpn_sub (bsm2, bsm1, n, b1, t);
-	    }
-	}
-    }
-
-  /* Compute as1 and asm1.  */
-  flags = (enum toom6_flags) (flags ^ toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2));
-
-  ASSERT (as1[n] <= 4);
-  ASSERT (bs1[n] <= 1);
-  ASSERT (asm1[n] <= 2);
-/*   ASSERT (bsm1[n] <= 1); */
-  ASSERT (as2[n] <=30);
-  ASSERT (bs2[n] <= 2);
-  ASSERT (asm2[n] <= 20);
-  ASSERT (bsm2[n] <= 1);
-
-  /* vm1, 2n+1 limbs */
-  mpn_mul (vm1, asm1, n+1, bsm1, n);  /* W4 */
-
-  /* vm2, 2n+1 limbs */
-  mpn_mul_n (vm2, asm2, bsm2, n+1);  /* W2 */
-
-  /* v2, 2n+1 limbs */
-  mpn_mul_n (v2, as2, bs2, n+1);  /* W1 */
-
-  /* v1, 2n+1 limbs */
-  mpn_mul_n (v1, as1, bs1, n+1);  /* W3 */
-
-  /* vinf, s+t limbs */   /* W0 */
-  if (s > t)  mpn_mul (vinf, a4, s, b1, t);
-  else        mpn_mul (vinf, b1, t, a4, s);
-
-  /* v0, 2n limbs */
-  mpn_mul_n (v0, ap, bp, n);  /* W5 */
-
-  mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s);
-
-#undef v0
-#undef vm1
-#undef v1
-#undef vm2
-#undef v2
-#undef vinf
-#undef bs1
-#undef bs2
-#undef bsm1
-#undef bsm2
-#undef asm1
-#undef asm2
-#undef as1
-#undef as2
-#undef a0a2
-#undef b0b2
-#undef a1a3
-#undef a0
-#undef a1
-#undef a2
-#undef a3
-#undef b0
-#undef b1
-#undef b2
-
-}
diff --git a/gmp/mpn/generic/toom53_mul.c b/gmp/mpn/generic/toom53_mul.c
index 41274d48e0..4483d4dfb7 100644
--- a/gmp/mpn/generic/toom53_mul.c
+++ b/gmp/mpn/generic/toom53_mul.c
@@ -10,39 +10,35 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2008, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch allocation.
+*/
+
 #include "gmp.h"
 #include "gmp-impl.h"
 
-/* Evaluate in: 0, +1, -1, +2, -2, 1/2, +inf
+/* Evaluate in: -1, -1/2, 0, +1/2, +1, +2, +inf
 
   <-s-><--n--><--n--><--n--><--n-->
    ___ ______ ______ ______ ______
@@ -54,8 +50,8 @@ see https://www.gnu.org/licenses/.  */
   v1  = (  a0+ a1+ a2+ a3+  a4)*( b0+ b1+ b2) #    A(1)*B(1)      ah  <= 4   bh <= 2
   vm1 = (  a0- a1+ a2- a3+  a4)*( b0- b1+ b2) #   A(-1)*B(-1)    |ah| <= 2   bh <= 1
   v2  = (  a0+2a1+4a2+8a3+16a4)*( b0+2b1+4b2) #    A(2)*B(2)      ah  <= 30  bh <= 6
-  vm2 = (  a0-2a1+4a2-8a3+16a4)*( b0-2b1+4b2) #    A(2)*B(2)     -9<=ah<=20 -1<=bh<=4
   vh  = (16a0+8a1+4a2+2a3+  a4)*(4b0+2b1+ b2) #  A(1/2)*B(1/2)    ah  <= 30  bh <= 6
+  vmh = (16a0-8a1+4a2-2a3+  a4)*(4b0-2b1+ b2) # A(-1/2)*B(-1/2)  -9<=ah<=20 -1<=bh<=4
   vinf=                     a4 *          b2  #  A(inf)*B(inf)
 */
 
@@ -66,11 +62,12 @@ mpn_toom53_mul (mp_ptr pp,
 		mp_ptr scratch)
 {
   mp_size_t n, s, t;
+  int vm1_neg, vmh_neg;
   mp_limb_t cy;
-  mp_ptr gp;
-  mp_ptr as1, asm1, as2, asm2, ash;
-  mp_ptr bs1, bsm1, bs2, bsm2, bsh;
-  enum toom7_flags flags;
+  mp_ptr gp, hp;
+  mp_ptr as1, asm1, as2, ash, asmh;
+  mp_ptr bs1, bsm1, bs2, bsh, bsmh;
+  enum toom4_flags flags;
   TMP_DECL;
 
 #define a0  ap
@@ -95,61 +92,124 @@ mpn_toom53_mul (mp_ptr pp,
   as1  = TMP_SALLOC_LIMBS (n + 1);
   asm1 = TMP_SALLOC_LIMBS (n + 1);
   as2  = TMP_SALLOC_LIMBS (n + 1);
-  asm2 = TMP_SALLOC_LIMBS (n + 1);
   ash  = TMP_SALLOC_LIMBS (n + 1);
+  asmh = TMP_SALLOC_LIMBS (n + 1);
 
   bs1  = TMP_SALLOC_LIMBS (n + 1);
   bsm1 = TMP_SALLOC_LIMBS (n + 1);
   bs2  = TMP_SALLOC_LIMBS (n + 1);
-  bsm2 = TMP_SALLOC_LIMBS (n + 1);
   bsh  = TMP_SALLOC_LIMBS (n + 1);
+  bsmh = TMP_SALLOC_LIMBS (n + 1);
 
   gp = pp;
+  hp = pp + n + 1;
 
   /* Compute as1 and asm1.  */
-  flags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp));
-
-  /* Compute as2 and asm2. */
-  flags = (enum toom7_flags) (flags | toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp));
+  gp[n]  = mpn_add_n (gp, a0, a2, n);
+  gp[n] += mpn_add   (gp, gp, n, a4, s);
+  hp[n]  = mpn_add_n (hp, a1, a3, n);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_addsub_n (as1, asm1, hp, gp, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_addsub_n (as1, asm1, gp, hp, n + 1);
+      vm1_neg = 0;
+    }
+#else
+  mpn_add_n (as1, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asm1, hp, gp, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_sub_n (asm1, gp, hp, n + 1);
+      vm1_neg = 0;
+    }
+#endif
 
-  /* Compute ash = 16 a0 + 8 a1 + 4 a2 + 2 a3 + a4
-     = 2*(2*(2*(2*a0 + a1) + a2) + a3) + a4  */
+  /* Compute as2.  */
+#if !HAVE_NATIVE_mpn_addlsh_n
+  ash[n] = mpn_lshift (ash, a2, n, 2);			/*        4a2       */
+#endif
 #if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (ash, a1, a0, n);
-  cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
-  cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
-  if (s < n)
+  cy  = mpn_addlsh1_n (as2, a3, a4, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a2, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+  as2[n] = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy  = mpn_lshift (as2, a4, s, 1);
+  cy += mpn_add_n (as2, a3, as2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a3 + s, n - s, cy);
+  cy = 4 * cy + mpn_lshift (as2, as2, n, 2);
+  cy += mpn_add_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  as2[n] = cy + mpn_add_n (as2, a0, as2, n);
+  mpn_add_n (as2, ash, as2, n + 1);
+#endif
+
+  /* Compute ash and asmh.  */
+#if HAVE_NATIVE_mpn_addlsh_n
+  cy  = mpn_addlsh_n (gp, a2, a0, n, 2);		/* 4a0  +  a2       */
+  cy = 4 * cy + mpn_addlsh_n (gp, a4, gp, n, 2);	/* 16a0 + 4a2 +  a4 */ /* FIXME s */
+  gp[n] = cy;
+  cy  = mpn_addlsh_n (hp, a3, a1, n, 2);		/*  4a1 +  a3       */
+  cy = 2 * cy + mpn_lshift (hp, hp, n, 1);		/*  8a1 + 2a3       */
+  hp[n] = cy;
+#else
+  gp[n] = mpn_lshift (gp, a0, n, 4);			/* 16a0             */
+  mpn_add (gp, gp, n + 1, a4, s);			/* 16a0 +        a4 */
+  mpn_add_n (gp, ash, gp, n+1);				/* 16a0 + 4a2 +  a4 */
+  cy  = mpn_lshift (hp, a1, n, 3);			/*  8a1             */
+  cy += mpn_lshift (ash, a3, n, 1);			/*        2a3       */
+  cy += mpn_add_n (hp, ash, hp, n);			/*  8a1 + 2a3       */
+  hp[n] = cy;
+#endif
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (ash, a4, ash, s);
-      ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
-      MPN_INCR_U (ash + s, n+1-s, cy2);
+      mpn_addsub_n (ash, asmh, hp, gp, n + 1);
+      vmh_neg = 1;
     }
   else
-    ash[n] = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
+    {
+      mpn_addsub_n (ash, asmh, gp, hp, n + 1);
+      vmh_neg = 0;
+    }
 #else
-  cy = mpn_lshift (ash, a0, n, 1);
-  cy += mpn_add_n (ash, ash, a1, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  cy += mpn_add_n (ash, ash, a2, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  cy += mpn_add_n (ash, ash, a3, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  ash[n] = cy + mpn_add (ash, ash, n, a4, s);
+  mpn_add_n (ash, gp, hp, n + 1);
+  if (mpn_cmp (gp, hp, n + 1) < 0)
+    {
+      mpn_sub_n (asmh, hp, gp, n + 1);
+      vmh_neg = 1;
+    }
+  else
+    {
+      mpn_sub_n (asmh, gp, hp, n + 1);
+      vmh_neg = 0;
+    }
 #endif
 
   /* Compute bs1 and bsm1.  */
   bs1[n] = mpn_add (bs1, b0, n, b2, t);		/* b0 + b2 */
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
   if (bs1[n] == 0 && mpn_cmp (bs1, b1, n) < 0)
     {
-      bs1[n] = mpn_add_n_sub_n (bs1, bsm1, b1, bs1, n) >> 1;
+      bs1[n] = mpn_addsub_n (bs1, bsm1, b1, bs1, n) >> 1;
       bsm1[n] = 0;
-      flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
+      vm1_neg ^= 1;
     }
   else
     {
-      cy = mpn_add_n_sub_n (bs1, bsm1, bs1, b1, n);
+      cy = mpn_addsub_n (bs1, bsm1, bs1, b1, n);
       bsm1[n] = bs1[n] - (cy & 1);
       bs1[n] += (cy >> 1);
     }
@@ -158,7 +218,7 @@ mpn_toom53_mul (mp_ptr pp,
     {
       mpn_sub_n (bsm1, b1, bs1, n);
       bsm1[n] = 0;
-      flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
+      vm1_neg ^= 1;
     }
   else
     {
@@ -167,64 +227,46 @@ mpn_toom53_mul (mp_ptr pp,
   bs1[n] += mpn_add_n (bs1, bs1, b1, n);  /* b0+b1+b2 */
 #endif
 
-  /* Compute bs2 and bsm2. */
-#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
-#if HAVE_NATIVE_mpn_addlsh2_n
-  cy = mpn_addlsh2_n (bs2, b0, b2, t);
-#else /* HAVE_NATIVE_mpn_addlsh_n */
-  cy = mpn_addlsh_n (bs2, b0, b2, t, 2);
-#endif
-  if (t < n)
-    cy = mpn_add_1 (bs2 + t, b0 + t, n - t, cy);
-  bs2[n] = cy;
+  /* Compute bs2 */
+  hp[n]   = mpn_lshift (hp, b1, n, 1);			/*       2b1       */
+
+#ifdef HAVE_NATIVE_mpn_addlsh1_n
+  cy = mpn_addlsh1_n (bs2, b1, b2, t);
+  if (t != n)
+    cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
+  bs2[n] = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
 #else
-  cy = mpn_lshift (gp, b2, t, 2);
-  bs2[n] = mpn_add (bs2, b0, n, gp, t);
-  MPN_INCR_U (bs2 + t, n+1-t, cy);
+  bs2[t] = mpn_lshift (bs2, b2, t, 2);
+  mpn_add (bs2, hp, n + 1, bs2, t + 1);
+  bs2[n] += mpn_add_n (bs2, bs2, b0, n);
 #endif
 
-  gp[n] = mpn_lshift (gp, b1, n, 1);
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (mpn_cmp (bs2, gp, n+1) < 0)
+  /* Compute bsh and bsmh.  */
+#if HAVE_NATIVE_mpn_addlsh_n
+  gp[n] = mpn_addlsh_n (gp, b2, b0, n, 2);		/* 4a0  +       a2 */
+#else
+  cy = mpn_lshift (gp, b0, n, 2);			/* 4b0             */
+  gp[n] = cy + mpn_add (gp, gp, n, b2, t);		/* 4b0 +        b2 */
+#endif
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, gp, bs2, n+1));
-      flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
+      mpn_addsub_n (bsh, bsmh, hp, gp, n + 1);
+      vmh_neg^= 1;
     }
   else
-    {
-      ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, bs2, gp, n+1));
-    }
+    mpn_addsub_n (bsh, bsmh, gp, hp, n + 1);
 #else
-  if (mpn_cmp (bs2, gp, n+1) < 0)
+  mpn_add_n (bsh, gp, hp, n + 1);			/* 4b0 + 2b1 +  b2 */
+  if (mpn_cmp (gp, hp, n + 1) < 0)
     {
-      ASSERT_NOCARRY (mpn_sub_n (bsm2, gp, bs2, n+1));
-      flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
+      mpn_sub_n (bsmh, hp, gp, n + 1);
+      vmh_neg ^= 1;
     }
   else
     {
-      ASSERT_NOCARRY (mpn_sub_n (bsm2, bs2, gp, n+1));
+      mpn_sub_n (bsmh, gp, hp, n + 1);
     }
-  mpn_add_n (bs2, bs2, gp, n+1);
-#endif
-
-  /* Compute bsh = 4 b0 + 2 b1 + b2 = 2*(2*b0 + b1)+b2.  */
-#if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (bsh, b1, b0, n);
-  if (t < n)
-    {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (bsh, b2, bsh, t);
-      bsh[n] = 2*cy + mpn_lshift (bsh + t, bsh + t, n - t, 1);
-      MPN_INCR_U (bsh + t, n+1-t, cy2);
-    }
-  else
-    bsh[n] = 2*cy + mpn_addlsh1_n (bsh, b2, bsh, n);
-#else
-  cy = mpn_lshift (bsh, b0, n, 1);
-  cy += mpn_add_n (bsh, bsh, b1, n);
-  cy = 2*cy + mpn_lshift (bsh, bsh, n, 1);
-  bsh[n] = cy + mpn_add (bsh, bsh, n, b2, t);
 #endif
 
   ASSERT (as1[n] <= 4);
@@ -233,26 +275,18 @@ mpn_toom53_mul (mp_ptr pp,
   ASSERT (bsm1[n] <= 1);
   ASSERT (as2[n] <= 30);
   ASSERT (bs2[n] <= 6);
-  ASSERT (asm2[n] <= 20);
-  ASSERT (bsm2[n] <= 4);
   ASSERT (ash[n] <= 30);
   ASSERT (bsh[n] <= 6);
+  ASSERT (asmh[n] <= 20);
+  ASSERT (bsmh[n] <= 4);
 
 #define v0    pp				/* 2n */
-#define v1    (pp + 2 * n)			/* 2n+1 */
+#define v1    (scratch + 6 * n + 6)		/* 2n+1 */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 2)		/* 2n+1 */
 #define vinf  (pp + 6 * n)			/* s+t */
-#define v2    scratch				/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
-#define scratch_out (scratch + 8 * n + 4)		/* 2n+1 */
-  /* Total scratch need: 10*n+5 */
-
-  /* Must be in allocation order, as they overwrite one limb beyond
-   * 2n+1. */
-  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
-  mpn_mul_n (vm2, asm2, bsm2, n + 1);		/* vm2, 2n+1 limbs */
-  mpn_mul_n (vh, ash, bsh, n + 1);		/* vh, 2n+1 limbs */
+#define vh    (pp + 2 * n)			/* 2n+1 */
+#define vmh   (scratch + 4 * n + 4)
 
   /* vm1, 2n+1 limbs */
 #ifdef SMALLER_RECURSION
@@ -279,6 +313,12 @@ mpn_toom53_mul (mp_ptr pp,
   mpn_mul_n (vm1, asm1, bsm1, n + ((asm1[n] | bsm1[n]) != 0));
 #endif /* SMALLER_RECURSION */
 
+  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a4, s, b2, t);
+  else        mpn_mul (vinf, b2, t, a4, s);
+
   /* v1, 2n+1 limbs */
 #ifdef SMALLER_RECURSION
   mpn_mul_n (v1, as1, bs1, n);
@@ -318,14 +358,16 @@ mpn_toom53_mul (mp_ptr pp,
   mpn_mul_n (v1, as1, bs1, n + ((as1[n] | bs1[n]) != 0));
 #endif /* SMALLER_RECURSION */
 
-  mpn_mul_n (v0, a0, b0, n);			/* v0, 2n limbs */
+  mpn_mul_n (vh, ash, bsh, n + 1);
 
-  /* vinf, s+t limbs */
-  if (s > t)  mpn_mul (vinf, a4, s, b2, t);
-  else        mpn_mul (vinf, b2, t, a4, s);
+  mpn_mul_n (vmh, asmh, bsmh, n + 1);
+
+  mpn_mul_n (v0, ap, bp, n);			/* v0, 2n limbs */
+
+  flags =  vm1_neg ? toom4_w3_neg : 0;
+  flags |= vmh_neg ? toom4_w1_neg : 0;
 
-  mpn_toom_interpolate_7pts (pp, n, flags, vm2, vm1, v2, vh, s + t,
-			     scratch_out);
+  mpn_toom_interpolate_7pts (pp, n, flags, vmh, vm1, v1, v2, s + t, scratch + 8 * n + 8);
 
   TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom54_mul.c b/gmp/mpn/generic/toom54_mul.c
deleted file mode 100644
index 939bb53ab6..0000000000
--- a/gmp/mpn/generic/toom54_mul.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/* Implementation of the algorithm for Toom-Cook 4.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Toom-4.5, the splitting 5x4 unbalanced version.
-   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
-
-  <--s-><--n--><--n--><--n--><--n-->
-   ____ ______ ______ ______ ______
-  |_a4_|__a3__|__a2__|__a1__|__a0__|
-	  |b3_|__b2__|__b1__|__b0__|
-	  <-t-><--n--><--n--><--n-->
-
-*/
-#define TOOM_54_MUL_N_REC(p, a, b, n, ws)		\
-  do {	mpn_mul_n (p, a, b, n);				\
-  } while (0)
-
-#define TOOM_54_MUL_REC(p, a, na, b, nb, ws)		\
-  do {	mpn_mul (p, a, na, b, nb);			\
-  } while (0)
-
-void
-mpn_toom54_mul (mp_ptr pp,
-		mp_srcptr ap, mp_size_t an,
-		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  int sign;
-
-  /***************************** decomposition *******************************/
-#define a4  (ap + 4 * n)
-#define b3  (bp + 3 * n)
-
-  ASSERT (an >= bn);
-  n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
-
-  s = an - 4 * n;
-  t = bn - 3 * n;
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-  /* Required by mpn_toom_interpolate_8pts. */
-  ASSERT ( s + t >= n );
-  ASSERT ( s + t > 4);
-  ASSERT ( n > 2);
-
-#define   r8    pp				/* 2n   */
-#define   r7    scratch				/* 3n+1 */
-#define   r5    (pp + 3*n)			/* 3n+1 */
-#define   v0    (pp + 3*n)			/* n+1 */
-#define   v1    (pp + 4*n+1)			/* n+1 */
-#define   v2    (pp + 5*n+2)			/* n+1 */
-#define   v3    (pp + 6*n+3)			/* n+1 */
-#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r1    (pp + 7*n)			/* s+t <= 2*n */
-#define   ws    (scratch + 6 * n + 2)		/* ??? */
-
-  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
-     need all of them, when DO_mpn_sublsh_n usea a scratch  */
-  /********************** evaluation and recursive calls *********************/
-  /* $\pm4$ */
-  sign = mpn_toom_eval_pm2exp (v2, v0, 4, ap, n, s, 2, pp)
-       ^ mpn_toom_eval_pm2exp (v3, v1, 3, bp, n, t, 2, pp);
-  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
-  TOOM_54_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
-  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
-
-  /* $\pm1$ */
-  sign = mpn_toom_eval_pm1 (v2, v0, 4, ap, n, s,    pp)
-       ^ mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
-  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
-  TOOM_54_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
-  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
-
-  /* $\pm2$ */
-  sign = mpn_toom_eval_pm2 (v2, v0, 4, ap, n, s, pp)
-       ^ mpn_toom_eval_dgr3_pm2 (v3, v1, bp, n, t, pp);
-  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
-  TOOM_54_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
-  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
-
-  /* A(0)*B(0) */
-  TOOM_54_MUL_N_REC(pp, ap, bp, n, ws);
-
-  /* Infinity */
-  if (s > t) {
-    TOOM_54_MUL_REC(r1, a4, s, b3, t, ws);
-  } else {
-    TOOM_54_MUL_REC(r1, b3, t, a4, s, ws);
-  };
-
-  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
-
-#undef a4
-#undef b3
-#undef r1
-#undef r3
-#undef r5
-#undef v0
-#undef v1
-#undef v2
-#undef v3
-#undef r7
-#undef r8
-#undef ws
-}
diff --git a/gmp/mpn/generic/toom62_mul.c b/gmp/mpn/generic/toom62_mul.c
index 3759e3cb3c..944b3feffd 100644
--- a/gmp/mpn/generic/toom62_mul.c
+++ b/gmp/mpn/generic/toom62_mul.c
@@ -10,42 +10,38 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2006-2008, 2012 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 
+/*
+  Things to work on:
+
+  1. Trim allocation.  The allocations for as1, asm1, bs1, and bsm1 could be
+     avoided by instead reusing the pp area and the scratch allocation.
+*/
+
 #include "gmp.h"
 #include "gmp-impl.h"
 
-/* Evaluate in:
-   0, +1, -1, +2, -2, 1/2, +inf
 
-  <-s-><--n--><--n--><--n--><--n--><--n-->
+/* Evaluate in: -1, -1/2, 0, +1/2, +1, +2, +inf
+
+  <-s-><--n--><--n--><--n-->
    ___ ______ ______ ______ ______ ______
   |a5_|___a4_|___a3_|___a2_|___a1_|___a0_|
 			     |_b1_|___b0_|
@@ -55,8 +51,8 @@ see https://www.gnu.org/licenses/.  */
   v1  = (  a0+  a1+ a2+ a3+  a4+  a5)*( b0+ b1) #    A(1)*B(1)      ah  <= 5   bh <= 1
   vm1 = (  a0-  a1+ a2- a3+  a4-  a5)*( b0- b1) #   A(-1)*B(-1)    |ah| <= 2   bh  = 0
   v2  = (  a0+ 2a1+4a2+8a3+16a4+32a5)*( b0+2b1) #    A(2)*B(2)      ah  <= 62  bh <= 2
-  vm2 = (  a0- 2a1+4a2-8a3+16a4-32a5)*( b0-2b1) #   A(-2)*B(-2)    -41<=ah<=20 -1<=bh<=0
   vh  = (32a0+16a1+8a2+4a3+ 2a4+  a5)*(2b0+ b1) #  A(1/2)*B(1/2)    ah  <= 62  bh <= 2
+  vmh = (32a0-16a1+8a2-4a3+ 2a4-  a5)*(2b0- b1) # A(-1/2)*B(-1/2)  -20<=ah<=41 0<=bh<=1
   vinf=                           a5 *      b1  #  A(inf)*B(inf)
 */
 
@@ -67,11 +63,12 @@ mpn_toom62_mul (mp_ptr pp,
 		mp_ptr scratch)
 {
   mp_size_t n, s, t;
+  int vm1_neg, vmh_neg, bsm_neg;
   mp_limb_t cy;
-  mp_ptr as1, asm1, as2, asm2, ash;
-  mp_ptr bs1, bsm1, bs2, bsm2, bsh;
-  mp_ptr gp;
-  enum toom7_flags aflags, bflags;
+  mp_ptr a0_a2, a1_a3;
+  mp_ptr as1, asm1, as2, ash, asmh;
+  mp_ptr bs1, bsm1, bs2, bsh, bsmh;
+  enum toom4_flags flags;
   TMP_DECL;
 
 #define a0  ap
@@ -83,7 +80,7 @@ mpn_toom62_mul (mp_ptr pp,
 #define b0  bp
 #define b1  (bp + n)
 
-  n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
+  n = 1 + (an >= 3 * bn ? (an - 1) / (unsigned long) 6 : (bn - 1) >> 1);
 
   s = an - 5 * n;
   t = bn - n;
@@ -96,66 +93,133 @@ mpn_toom62_mul (mp_ptr pp,
   as1 = TMP_SALLOC_LIMBS (n + 1);
   asm1 = TMP_SALLOC_LIMBS (n + 1);
   as2 = TMP_SALLOC_LIMBS (n + 1);
-  asm2 = TMP_SALLOC_LIMBS (n + 1);
   ash = TMP_SALLOC_LIMBS (n + 1);
+  asmh = TMP_SALLOC_LIMBS (n + 1);
 
   bs1 = TMP_SALLOC_LIMBS (n + 1);
   bsm1 = TMP_SALLOC_LIMBS (n);
   bs2 = TMP_SALLOC_LIMBS (n + 1);
-  bsm2 = TMP_SALLOC_LIMBS (n + 1);
   bsh = TMP_SALLOC_LIMBS (n + 1);
+  bsmh = TMP_SALLOC_LIMBS (n + 1);
 
-  gp = pp;
+  a0_a2 = pp;
+  a1_a3 = pp + n + 1;
 
   /* Compute as1 and asm1.  */
-  aflags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 5, ap, n, s, gp));
-
-  /* Compute as2 and asm2. */
-  aflags = (enum toom7_flags) (aflags | toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 5, ap, n, s, gp));
-
-  /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
-     = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5  */
+  a0_a2[n]  = mpn_add_n (a0_a2, a0, a2, n);
+  a0_a2[n] += mpn_add_n (a0_a2, a0_a2, a4, n);
+  a1_a3[n]  = mpn_add_n (a1_a3, a1, a3, n);
+  a1_a3[n] += mpn_add (a1_a3, a1_a3, n, a5, s);
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
+    {
+      mpn_addsub_n (as1, asm1, a1_a3, a0_a2, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_addsub_n (as1, asm1, a0_a2, a1_a3, n + 1);
+      vm1_neg = 0;
+    }
+#else
+  mpn_add_n (as1, a0_a2, a1_a3, n + 1);
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
+    {
+      mpn_sub_n (asm1, a1_a3, a0_a2, n + 1);
+      vm1_neg = 1;
+    }
+  else
+    {
+      mpn_sub_n (asm1, a0_a2, a1_a3, n + 1);
+      vm1_neg = 0;
+    }
+#endif
 
+  /* Compute as2.  */
 #if HAVE_NATIVE_mpn_addlsh1_n
-  cy = mpn_addlsh1_n (ash, a1, a0, n);
-  cy = 2*cy + mpn_addlsh1_n (ash, a2, ash, n);
-  cy = 2*cy + mpn_addlsh1_n (ash, a3, ash, n);
-  cy = 2*cy + mpn_addlsh1_n (ash, a4, ash, n);
-  if (s < n)
+  cy  = mpn_addlsh1_n (as2, a4, a5, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a4 + s, n - s, cy);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a3, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a2, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
+#else
+  cy  = mpn_lshift (as2, a5, s, 1);
+  cy += mpn_add_n (as2, a4, as2, s);
+  if (s != n)
+    cy = mpn_add_1 (as2 + s, a4 + s, n - s, cy);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a3, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a2, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a1, as2, n);
+  cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
+  cy += mpn_add_n (as2, a0, as2, n);
+#endif
+  as2[n] = cy;
+
+  /* Compute ash and asmh.  */
+#if HAVE_NATIVE_mpn_addlsh_n
+  cy  = mpn_addlsh_n (a0_a2, a2, a0, n, 2);		/* 4a0  +  a2       */
+  cy = 4 * cy + mpn_addlsh_n (a0_a2, a4, a0_a2, n, 2);	/* 16a0 + 4a2 +  a4 */
+  cy = 2 * cy + mpn_lshift (a0_a2, a0_a2, n, 1);	/* 32a0 + 8a2 + 2a4 */
+  a0_a2[n] = cy;
+  cy  = mpn_addlsh_n (a1_a3, a3, a1, n, 2);		/* 4a1              */
+  cy = 4 * cy + mpn_addlsh_n (a1_a3, a5, a1_a3, n, 2);	/* 16a1 + 4a3       */
+  a1_a3[n] = cy;
+#else
+  cy  = mpn_lshift (a0_a2, a0, n, 2);			/* 4a0              */
+  cy += mpn_add_n (a0_a2, a2, a0_a2, n);		/* 4a0  +  a2       */
+  cy = 4 * cy + mpn_lshift (a0_a2, a0_a2, n, 2);	/* 16a0 + 4a2       */
+  cy += mpn_add_n (a0_a2, a4, a0_a2, n);		/* 16a0 + 4a2 +  a4 */
+  cy = 2 * cy + mpn_lshift (a0_a2, a0_a2, n, 1);	/* 32a0 + 8a2 + 2a4 */
+  a0_a2[n] = cy;
+  cy  = mpn_lshift (a1_a3, a1, n, 2);			/* 4a1              */
+  cy += mpn_add_n (a1_a3, a3, a1_a3, n);		/* 4a1  +  a3       */
+  cy = 4 * cy + mpn_lshift (a1_a3, a1_a3, n, 2);	/* 16a1 + 4a3       */
+  cy += mpn_add (a1_a3, a1_a3, n, a5, s);		/* 16a1 + 4a3 + a5  */
+  a1_a3[n] = cy;
+#endif
+#if HAVE_NATIVE_mpn_addsub_n
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
     {
-      mp_limb_t cy2;
-      cy2 = mpn_addlsh1_n (ash, a5, ash, s);
-      ash[n] = 2*cy + mpn_lshift (ash + s, ash + s, n - s, 1);
-      MPN_INCR_U (ash + s, n+1-s, cy2);
+      mpn_addsub_n (ash, asmh, a1_a3, a0_a2, n + 1);
+      vmh_neg = 1;
     }
   else
-    ash[n] = 2*cy + mpn_addlsh1_n (ash, a5, ash, n);
+    {
+      mpn_addsub_n (ash, asmh, a0_a2, a1_a3, n + 1);
+      vmh_neg = 0;
+    }
 #else
-  cy = mpn_lshift (ash, a0, n, 1);
-  cy += mpn_add_n (ash, ash, a1, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  cy += mpn_add_n (ash, ash, a2, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  cy += mpn_add_n (ash, ash, a3, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  cy += mpn_add_n (ash, ash, a4, n);
-  cy = 2*cy + mpn_lshift (ash, ash, n, 1);
-  ash[n] = cy + mpn_add (ash, ash, n, a5, s);
+  mpn_add_n (ash, a0_a2, a1_a3, n + 1);
+  if (mpn_cmp (a0_a2, a1_a3, n + 1) < 0)
+    {
+      mpn_sub_n (asmh, a1_a3, a0_a2, n + 1);
+      vmh_neg = 1;
+    }
+  else
+    {
+      mpn_sub_n (asmh, a0_a2, a1_a3, n + 1);
+      vmh_neg = 0;
+    }
 #endif
 
   /* Compute bs1 and bsm1.  */
   if (t == n)
     {
-#if HAVE_NATIVE_mpn_add_n_sub_n
+#if HAVE_NATIVE_mpn_addsub_n
       if (mpn_cmp (b0, b1, n) < 0)
 	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
-	  bflags = toom7_w3_neg;
+	  cy = mpn_addsub_n (bs1, bsm1, b1, b0, n);
+	  bsm_neg = 1;
 	}
       else
 	{
-	  cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
-	  bflags = (enum toom7_flags) 0;
+	  cy = mpn_addsub_n (bs1, bsm1, b0, b1, n);
+	  bsm_neg = 0;
 	}
       bs1[n] = cy >> 1;
 #else
@@ -163,12 +227,12 @@ mpn_toom62_mul (mp_ptr pp,
       if (mpn_cmp (b0, b1, n) < 0)
 	{
 	  mpn_sub_n (bsm1, b1, b0, n);
-	  bflags = toom7_w3_neg;
+	  bsm_neg = 1;
 	}
       else
 	{
 	  mpn_sub_n (bsm1, b0, b1, n);
-	  bflags = (enum toom7_flags) 0;
+	  bsm_neg = 0;
 	}
 #endif
     }
@@ -179,83 +243,56 @@ mpn_toom62_mul (mp_ptr pp,
 	{
 	  mpn_sub_n (bsm1, b1, b0, t);
 	  MPN_ZERO (bsm1 + t, n - t);
-	  bflags = toom7_w3_neg;
+	  bsm_neg = 1;
 	}
       else
 	{
 	  mpn_sub (bsm1, b0, n, b1, t);
-	  bflags = (enum toom7_flags) 0;
+	  bsm_neg = 0;
 	}
     }
 
-  /* Compute bs2 and bsm2. Recycling bs1 and bsm1; bs2=bs1+b1, bsm2 =
-     bsm1 - b1 */
+  vm1_neg ^= bsm_neg;
+
+  /* Compute bs2, recycling bs1. bs2=bs1+b1  */
   mpn_add (bs2, bs1, n + 1, b1, t);
-  if (bflags & toom7_w3_neg)
-    {
-      bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
-      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
-    }
-  else
+
+  /* Compute bsh and bsmh, recycling bs1 and bsm1. bsh=bs1+b0; bsmh=bsmh+b0  */
+  if (bsm_neg == 1)
     {
-      /* FIXME: Simplify this logic? */
-      if (t < n)
+      bsmh[n] = 0;
+      if (mpn_cmp (bsm1, b0, n) < 0)
 	{
-	  if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0)
-	    {
-	      ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, t));
-	      MPN_ZERO (bsm2 + t, n + 1 - t);
-	      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
-	    }
-	  else
-	    {
-	      ASSERT_NOCARRY (mpn_sub (bsm2, bsm1, n, b1, t));
-	      bsm2[n] = 0;
-	    }
+	  bsm_neg = 0;
+	  mpn_sub_n (bsmh, b0, bsm1, n);
 	}
       else
-	{
-	  if (mpn_cmp (bsm1, b1, n) < 0)
-	    {
-	      ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, n));
-	      bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
-	    }
-	  else
-	    {
-	      ASSERT_NOCARRY (mpn_sub_n (bsm2, bsm1, b1, n));
-	    }
-	  bsm2[n] = 0;
-	}
+	mpn_sub_n (bsmh, bsm1, b0, n);
     }
+  else
+    bsmh[n] = mpn_add_n (bsmh, bsm1, b0, n);
+  mpn_add (bsh, bs1, n + 1, b0, n);
+  vmh_neg ^= bsm_neg;
 
-  /* Compute bsh, recycling bs1. bsh=bs1+b0;  */
-  bsh[n] = bs1[n] + mpn_add_n (bsh, bs1, b0, n);
 
   ASSERT (as1[n] <= 5);
   ASSERT (bs1[n] <= 1);
   ASSERT (asm1[n] <= 2);
+/*ASSERT (bsm1[n] == 0);*/
   ASSERT (as2[n] <= 62);
   ASSERT (bs2[n] <= 2);
-  ASSERT (asm2[n] <= 41);
-  ASSERT (bsm2[n] <= 1);
   ASSERT (ash[n] <= 62);
   ASSERT (bsh[n] <= 2);
+  ASSERT (asmh[n] <= 41);
+  ASSERT (bsmh[n] <= 1);
 
 #define v0    pp				/* 2n */
-#define v1    (pp + 2 * n)			/* 2n+1 */
+#define v1    (scratch + 6 * n + 6)		/* 2n+1 */
 #define vinf  (pp + 6 * n)			/* s+t */
-#define v2    scratch				/* 2n+1 */
-#define vm2   (scratch + 2 * n + 1)		/* 2n+1 */
-#define vh    (scratch + 4 * n + 2)		/* 2n+1 */
-#define vm1   (scratch + 6 * n + 3)		/* 2n+1 */
-#define scratch_out (scratch + 8 * n + 4)		/* 2n+1 */
-  /* Total scratch need: 10*n+5 */
-
-  /* Must be in allocation order, as they overwrite one limb beyond
-   * 2n+1. */
-  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
-  mpn_mul_n (vm2, asm2, bsm2, n + 1);		/* vm2, 2n+1 limbs */
-  mpn_mul_n (vh, ash, bsh, n + 1);		/* vh, 2n+1 limbs */
+#define vm1   scratch				/* 2n+1 */
+#define v2    (scratch + 2 * n + 2)		/* 2n+1 */
+#define vh    (pp + 2 * n)			/* 2n+1 */
+#define vmh   (scratch + 4 * n + 4)
 
   /* vm1, 2n+1 limbs */
   mpn_mul_n (vm1, asm1, bsm1, n);
@@ -274,6 +311,12 @@ mpn_toom62_mul (mp_ptr pp,
     }
   vm1[2 * n] = cy;
 
+  mpn_mul_n (v2, as2, bs2, n + 1);		/* v2, 2n+1 limbs */
+
+  /* vinf, s+t limbs */
+  if (s > t)  mpn_mul (vinf, a5, s, b1, t);
+  else        mpn_mul (vinf, b1, t, a5, s);
+
   /* v1, 2n+1 limbs */
   mpn_mul_n (v1, as1, bs1, n);
   if (as1[n] == 1)
@@ -298,14 +341,16 @@ mpn_toom62_mul (mp_ptr pp,
     cy += mpn_add_n (v1 + n, v1 + n, as1, n);
   v1[2 * n] = cy;
 
-  mpn_mul_n (v0, a0, b0, n);			/* v0, 2n limbs */
+  mpn_mul_n (vh, ash, bsh, n + 1);
 
-  /* vinf, s+t limbs */
-  if (s > t)  mpn_mul (vinf, a5, s, b1, t);
-  else        mpn_mul (vinf, b1, t, a5, s);
+  mpn_mul_n (vmh, asmh, bsmh, n + 1);
+
+  mpn_mul_n (v0, ap, bp, n);			/* v0, 2n limbs */
+
+  flags =  vm1_neg ? toom4_w3_neg : 0;
+  flags |= vmh_neg ? toom4_w1_neg : 0;
 
-  mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) (aflags ^ bflags),
-			     vm2, vm1, v2, vh, s + t, scratch_out);
+  mpn_toom_interpolate_7pts (pp, n, flags, vmh, vm1, v1, v2, s + t, scratch + 8 * n + 8);
 
   TMP_FREE;
 }
diff --git a/gmp/mpn/generic/toom63_mul.c b/gmp/mpn/generic/toom63_mul.c
deleted file mode 100644
index 57c5d3e3dd..0000000000
--- a/gmp/mpn/generic/toom63_mul.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/* Implementation of the algorithm for Toom-Cook 4.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */
-static int
-abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
-{
-  mp_limb_t  x, y;
-  while (--n >= 0)
-    {
-      x = ap[n];
-      y = bp[n];
-      if (x != y)
-	{
-	  n++;
-	  if (x > y)
-	    {
-	      mpn_sub_n (rp, ap, bp, n);
-	      return 0;
-	    }
-	  else
-	    {
-	      mpn_sub_n (rp, bp, ap, n);
-	      return ~0;
-	    }
-	}
-      rp[n] = 0;
-    }
-  return 0;
-}
-
-static int
-abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
-  int result;
-  result = abs_sub_n (rm, rp, rs, n);
-  ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
-  return result;
-}
-
-
-/* Toom-4.5, the splitting 6x3 unbalanced version.
-   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
-
-  <--s-><--n--><--n--><--n--><--n--><--n-->
-   ____ ______ ______ ______ ______ ______
-  |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
-			|b2_|__b1__|__b0__|
-			<-t-><--n--><--n-->
-
-*/
-#define TOOM_63_MUL_N_REC(p, a, b, n, ws)		\
-  do {	mpn_mul_n (p, a, b, n);				\
-  } while (0)
-
-#define TOOM_63_MUL_REC(p, a, na, b, nb, ws)		\
-  do {	mpn_mul (p, a, na, b, nb);			\
-  } while (0)
-
-void
-mpn_toom63_mul (mp_ptr pp,
-		mp_srcptr ap, mp_size_t an,
-		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  mp_limb_t cy;
-  int sign;
-
-  /***************************** decomposition *******************************/
-#define a5  (ap + 5 * n)
-#define b0  (bp + 0 * n)
-#define b1  (bp + 1 * n)
-#define b2  (bp + 2 * n)
-
-  ASSERT (an >= bn);
-  n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
-
-  s = an - 5 * n;
-  t = bn - 2 * n;
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-  /* WARNING! it assumes s+t>=n */
-  ASSERT ( s + t >= n );
-  ASSERT ( s + t > 4);
-  /* WARNING! it assumes n>1 */
-  ASSERT ( n > 2);
-
-#define   r8    pp				/* 2n   */
-#define   r7    scratch				/* 3n+1 */
-#define   r5    (pp + 3*n)			/* 3n+1 */
-#define   v0    (pp + 3*n)			/* n+1 */
-#define   v1    (pp + 4*n+1)			/* n+1 */
-#define   v2    (pp + 5*n+2)			/* n+1 */
-#define   v3    (pp + 6*n+3)			/* n+1 */
-#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r1    (pp + 7*n)			/* s+t <= 2*n */
-#define   ws    (scratch + 6 * n + 2)		/* ??? */
-
-  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
-     need all of them, when DO_mpn_sublsh_n usea a scratch  */
-/*   if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
-
-  /********************** evaluation and recursive calls *********************/
-  /* $\pm4$ */
-  sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
-  pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
-  /* FIXME: use addlsh */
-  v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
-  if ( n == t )
-    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
-  else
-    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
-  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
-  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
-  TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
-  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
-
-  /* $\pm1$ */
-  sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
-  /* Compute bs1 and bsm1. Code taken from toom33 */
-  cy = mpn_add (ws, b0, n, b2, t);
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
-    {
-      cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
-      v3[n] = cy >> 1;
-      v1[n] = 0;
-      sign = ~sign;
-    }
-  else
-    {
-      mp_limb_t cy2;
-      cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
-      v3[n] = cy + (cy2 >> 1);
-      v1[n] = cy - (cy2 & 1);
-    }
-#else
-  v3[n] = cy + mpn_add_n (v3, ws, b1, n);
-  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
-    {
-      mpn_sub_n (v1, b1, ws, n);
-      v1[n] = 0;
-      sign = ~sign;
-    }
-  else
-    {
-      cy -= mpn_sub_n (v1, ws, b1, n);
-      v1[n] = cy;
-    }
-#endif
-  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
-  TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
-  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
-
-  /* $\pm2$ */
-  sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
-  pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
-  /* FIXME: use addlsh or addlsh2 */
-  v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
-  if ( n == t )
-    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
-  else
-    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
-  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
-  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
-  TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
-  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
-
-  /* A(0)*B(0) */
-  TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
-
-  /* Infinity */
-  if (s > t) {
-    TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
-  } else {
-    TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
-  };
-
-  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
-
-#undef a5
-#undef b0
-#undef b1
-#undef b2
-#undef r1
-#undef r3
-#undef r5
-#undef v0
-#undef v1
-#undef v2
-#undef v3
-#undef r7
-#undef r8
-#undef ws
-}
diff --git a/gmp/mpn/generic/toom6_sqr.c b/gmp/mpn/generic/toom6_sqr.c
deleted file mode 100644
index e5ab7dcd1d..0000000000
--- a/gmp/mpn/generic/toom6_sqr.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#if GMP_NUMB_BITS < 21
-#error Not implemented.
-#endif
-
-
-#if TUNE_PROGRAM_BUILD
-#define MAYBE_sqr_basecase 1
-#define MAYBE_sqr_above_basecase   1
-#define MAYBE_sqr_toom2   1
-#define MAYBE_sqr_above_toom2   1
-#define MAYBE_sqr_toom3   1
-#define MAYBE_sqr_above_toom3   1
-#define MAYBE_sqr_above_toom4   1
-#else
-#ifdef  SQR_TOOM8_THRESHOLD
-#define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6)
-#else
-#define SQR_TOOM6_MAX					\
-  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ?	\
-   ((SQR_FFT_THRESHOLD+6*2-1+5)/6)			\
-   : MP_SIZE_T_MAX )
-#endif
-#define MAYBE_sqr_basecase					\
-  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
-#define MAYBE_sqr_above_basecase				\
-  (SQR_TOOM6_MAX >=  SQR_TOOM2_THRESHOLD)
-#define MAYBE_sqr_toom2						\
-  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
-#define MAYBE_sqr_above_toom2					\
-  (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD)
-#define MAYBE_sqr_toom3						\
-  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
-#define MAYBE_sqr_above_toom3					\
-  (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD)
-#define MAYBE_sqr_above_toom4					\
-  (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD)
-#endif
-
-#define TOOM6_SQR_REC(p, a, n, ws)					\
-  do {									\
-    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
-	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))			\
-      mpn_sqr_basecase (p, a, n);					\
-    else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
-	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))		\
-      mpn_toom2_sqr (p, a, n, ws);					\
-    else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
-	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))		\
-      mpn_toom3_sqr (p, a, n, ws);					\
-    else if (! MAYBE_sqr_above_toom4					\
-	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))		\
-      mpn_toom4_sqr (p, a, n, ws);					\
-    else								\
-      mpn_toom6_sqr (p, a, n, ws);					\
-  } while (0)
-
-void
-mpn_toom6_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
-{
-  mp_size_t n, s;
-
-  /***************************** decomposition *******************************/
-
-  ASSERT( an >= 18 );
-
-  n = 1 + (an - 1) / (size_t) 6;
-
-  s = an - 5 * n;
-
-  ASSERT (0 < s && s <= n);
-
-#define   r4    (pp + 3 * n)			/* 3n+1 */
-#define   r2    (pp + 7 * n)			/* 3n+1 */
-#define   r0    (pp +11 * n)			/* s+t <= 2*n */
-#define   r5    (scratch)			/* 3n+1 */
-#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
-#define   v0    (pp + 7 * n)			/* n+1 */
-#define   v2    (pp + 9 * n+2)			/* n+1 */
-#define   wse   (scratch + 9 * n + 3)		/* 3n+1 */
-
-  /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may
-     need all of them, when DO_mpn_sublsh_n usea a scratch  */
-/*   if (scratch== NULL) */
-/*     scratch = TMP_SALLOC_LIMBS (12 * n + 6); */
-
-  /********************** evaluation and recursive calls *********************/
-  /* $\pm1/2$ */
-  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp);
-  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
-  TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
-  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0);
-
-  /* $\pm1$ */
-  mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
-  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
-  TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */
-  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0);
-
-  /* $\pm4$ */
-  mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
-  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
-  TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */
-  mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4);
-
-  /* $\pm1/4$ */
-  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp);
-  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
-  TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
-  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0);
-
-  /* $\pm2$ */
-  mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
-  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
-  TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */
-  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2);
-
-#undef v0
-#undef v2
-
-  /* A(0)*B(0) */
-  TOOM6_SQR_REC(pp, ap, n, wse);
-
-  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse);
-
-#undef r0
-#undef r1
-#undef r2
-#undef r3
-#undef r4
-#undef r5
-
-}
-#undef TOOM6_SQR_REC
-#undef MAYBE_sqr_basecase
-#undef MAYBE_sqr_above_basecase
-#undef MAYBE_sqr_toom2
-#undef MAYBE_sqr_above_toom2
-#undef MAYBE_sqr_toom3
-#undef MAYBE_sqr_above_toom3
-#undef MAYBE_sqr_above_toom4
diff --git a/gmp/mpn/generic/toom6h_mul.c b/gmp/mpn/generic/toom6h_mul.c
deleted file mode 100644
index 420895be8f..0000000000
--- a/gmp/mpn/generic/toom6h_mul.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/* Implementation of the multiplication algorithm for Toom-Cook 6.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#if GMP_NUMB_BITS < 21
-#error Not implemented.
-#endif
-
-#if TUNE_PROGRAM_BUILD
-#define MAYBE_mul_basecase 1
-#define MAYBE_mul_toom22   1
-#define MAYBE_mul_toom33   1
-#define MAYBE_mul_toom6h   1
-#else
-#define MAYBE_mul_basecase						\
-  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM22_THRESHOLD)
-#define MAYBE_mul_toom22						\
-  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM33_THRESHOLD)
-#define MAYBE_mul_toom33						\
-  (MUL_TOOM6H_THRESHOLD < 6 * MUL_TOOM44_THRESHOLD)
-#define MAYBE_mul_toom6h						\
-  (MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
-#endif
-
-#define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
-  do {									\
-    if (MAYBE_mul_basecase						\
-	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
-      mpn_mul_basecase (p, a, n, b, n);					\
-      if (f)								\
-	mpn_mul_basecase (p2, a2, n, b2, n);				\
-    } else if (MAYBE_mul_toom22						\
-	       && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
-      mpn_toom22_mul (p, a, n, b, n, ws);				\
-      if (f)								\
-	mpn_toom22_mul (p2, a2, n, b2, n, ws);				\
-    } else if (MAYBE_mul_toom33						\
-	       && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
-      mpn_toom33_mul (p, a, n, b, n, ws);				\
-      if (f)								\
-	mpn_toom33_mul (p2, a2, n, b2, n, ws);				\
-    } else if (! MAYBE_mul_toom6h					\
-	       || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
-      mpn_toom44_mul (p, a, n, b, n, ws);				\
-      if (f)								\
-	mpn_toom44_mul (p2, a2, n, b2, n, ws);				\
-    } else {								\
-      mpn_toom6h_mul (p, a, n, b, n, ws);				\
-      if (f)								\
-	mpn_toom6h_mul (p2, a2, n, b2, n, ws);				\
-    }									\
-  } while (0)
-
-#define TOOM6H_MUL_REC(p, a, na, b, nb, ws)		\
-  do { mpn_mul (p, a, na, b, nb);			\
-  } while (0)
-
-/* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
-   With: an >= bn >= 46, an*6 <  bn * 17.
-   It _may_ work with bn<=46 and bn*17 < an*6 < bn*18
-
-   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, +1/2, -1/2, +1/4, -1/4, 0.
-*/
-/* Estimate on needed scratch:
-   S(n) <= (n+5)\6*10+4+MAX(S((n+5)\6),1+2*(n+5)\6),
-   since n>42; S(n) <= ceil(log(n)/log(6))*(10+4)+n*12\6 < n*2 + lg2(n)*6
- */
-
-void
-mpn_toom6h_mul   (mp_ptr pp,
-		  mp_srcptr ap, mp_size_t an,
-		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  int p, q, half;
-  int sign;
-
-  /***************************** decomposition *******************************/
-
-  ASSERT (an >= bn);
-  /* Can not handle too much unbalancement */
-  ASSERT (bn >= 42);
-  /* Can not handle too much unbalancement */
-  ASSERT ((an*3 <  bn * 8) || (bn >= 46 && an * 6 <  bn * 17));
-
-  /* Limit num/den is a rational number between
-     (12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1))             */
-#define LIMIT_numerator (18)
-#define LIMIT_denominat (17)
-
-  if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
-    {
-      n = 1 + (an - 1) / (size_t) 6;
-      p = q = 5;
-      half = 0;
-
-      s = an - 5 * n;
-      t = bn - 5 * n;
-    }
-  else {
-    if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
-      { p = 7; q = 6; }
-    else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
-      { p = 7; q = 5; }
-    else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn)  /* is 4*... < 8*... */
-      { p = 8; q = 5; }
-    else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn)  /* is 4*... < 8*... */
-      { p = 8; q = 4; }
-    else
-      { p = 9; q = 4; }
-
-    half = (p ^ q) & 1;
-    n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
-    p--; q--;
-
-    s = an - p * n;
-    t = bn - q * n;
-
-    /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
-    if (half) { /* Recover from badly chosen splitting */
-      if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
-      else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
-    }
-  }
-#undef LIMIT_numerator
-#undef LIMIT_denominat
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-  ASSERT (half || s + t > 3);
-  ASSERT (n > 2);
-
-#define   r4    (pp + 3 * n)			/* 3n+1 */
-#define   r2    (pp + 7 * n)			/* 3n+1 */
-#define   r0    (pp +11 * n)			/* s+t <= 2*n */
-#define   r5    (scratch)			/* 3n+1 */
-#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
-#define   v0    (pp + 7 * n)			/* n+1 */
-#define   v1    (pp + 8 * n+1)			/* n+1 */
-#define   v2    (pp + 9 * n+2)			/* n+1 */
-#define   v3    (scratch + 9 * n + 3)		/* n+1 */
-#define   wsi   (scratch + 9 * n + 3)		/* 3n+1 */
-#define   wse   (scratch +10 * n + 4)		/* 2n+1 */
-
-  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_12pts may
-     need all of them  */
-/*   if (scratch == NULL) */
-/*     scratch = TMP_SALLOC_LIMBS(mpn_toom6_sqr_itch(n * 6)); */
-  ASSERT (12 * n + 6 <= mpn_toom6h_mul_itch(an,bn));
-  ASSERT (12 * n + 6 <= mpn_toom6_sqr_itch(n * 6));
-
-  /********************** evaluation and recursive calls *********************/
-  /* $\pm1/2$ */
-  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
-	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
-  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
-  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
-
-  /* $\pm1$ */
-  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
-  if (UNLIKELY (q == 3))
-    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
-  else
-    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
-  /* A(-1)*B(-1) */ /* A(1)*B(1) */
-  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
-
-  /* $\pm4$ */
-  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
-	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
-  /* A(-4)*B(-4) */
-  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
-  mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
-
-  /* $\pm1/4$ */
-  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
-	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
-  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
-  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
-
-  /* $\pm2$ */
-  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
-	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
-  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
-  TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
-
-#undef v0
-#undef v1
-#undef v2
-#undef v3
-#undef wse
-
-  /* A(0)*B(0) */
-  TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
-
-  /* Infinity */
-  if (UNLIKELY (half != 0)) {
-    if (s > t) {
-      TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
-    } else {
-      TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
-    };
-  };
-
-  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, s+t, half, wsi);
-
-#undef r0
-#undef r1
-#undef r2
-#undef r3
-#undef r4
-#undef r5
-#undef wsi
-}
-
-#undef TOOM6H_MUL_N_REC
-#undef TOOM6H_MUL_REC
-#undef MAYBE_mul_basecase
-#undef MAYBE_mul_toom22
-#undef MAYBE_mul_toom33
-#undef MAYBE_mul_toom6h
diff --git a/gmp/mpn/generic/toom8_sqr.c b/gmp/mpn/generic/toom8_sqr.c
deleted file mode 100644
index 0c93678815..0000000000
--- a/gmp/mpn/generic/toom8_sqr.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/* Implementation of the squaring algorithm with Toom-Cook 8.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if GMP_NUMB_BITS < 29
-#error Not implemented.
-#endif
-
-#if GMP_NUMB_BITS < 43
-#define BIT_CORRECTION 1
-#define CORRECTION_BITS GMP_NUMB_BITS
-#else
-#define BIT_CORRECTION 0
-#define CORRECTION_BITS 0
-#endif
-
-#ifndef SQR_TOOM8_THRESHOLD
-#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
-#endif
-
-#ifndef SQR_TOOM6_THRESHOLD
-#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
-#endif
-
-#if TUNE_PROGRAM_BUILD
-#define MAYBE_sqr_basecase 1
-#define MAYBE_sqr_above_basecase   1
-#define MAYBE_sqr_toom2   1
-#define MAYBE_sqr_above_toom2   1
-#define MAYBE_sqr_toom3   1
-#define MAYBE_sqr_above_toom3   1
-#define MAYBE_sqr_toom4   1
-#define MAYBE_sqr_above_toom4   1
-#define MAYBE_sqr_above_toom6   1
-#else
-#define SQR_TOOM8_MAX					\
-  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (8*2-1+7)) ?	\
-   ((SQR_FFT_THRESHOLD+8*2-1+7)/8)			\
-   : MP_SIZE_T_MAX )
-#define MAYBE_sqr_basecase					\
-  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM2_THRESHOLD)
-#define MAYBE_sqr_above_basecase				\
-  (SQR_TOOM8_MAX >= SQR_TOOM2_THRESHOLD)
-#define MAYBE_sqr_toom2						\
-  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM3_THRESHOLD)
-#define MAYBE_sqr_above_toom2					\
-  (SQR_TOOM8_MAX >= SQR_TOOM3_THRESHOLD)
-#define MAYBE_sqr_toom3						\
-  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM4_THRESHOLD)
-#define MAYBE_sqr_above_toom3					\
-  (SQR_TOOM8_MAX >= SQR_TOOM4_THRESHOLD)
-#define MAYBE_sqr_toom4						\
-  (SQR_TOOM8_THRESHOLD < 8 * SQR_TOOM6_THRESHOLD)
-#define MAYBE_sqr_above_toom4					\
-  (SQR_TOOM8_MAX >= SQR_TOOM6_THRESHOLD)
-#define MAYBE_sqr_above_toom6					\
-  (SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
-#endif
-
-#define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws)				\
-  do {									\
-    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
-	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) {			\
-      mpn_sqr_basecase (p, a, n);					\
-      if (f) mpn_sqr_basecase (p2, a2, n);				\
-    } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
-	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) {		\
-      mpn_toom2_sqr (p, a, n, ws);					\
-      if (f) mpn_toom2_sqr (p2, a2, n, ws);				\
-    } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
-	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) {		\
-      mpn_toom3_sqr (p, a, n, ws);					\
-      if (f) mpn_toom3_sqr (p2, a2, n, ws);				\
-    } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4		\
-	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) {		\
-      mpn_toom4_sqr (p, a, n, ws);					\
-      if (f) mpn_toom4_sqr (p2, a2, n, ws);				\
-    } else if (! MAYBE_sqr_above_toom6					\
-	     || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) {		\
-      mpn_toom6_sqr (p, a, n, ws);					\
-      if (f) mpn_toom6_sqr (p2, a2, n, ws);				\
-    } else {								\
-      mpn_toom8_sqr (p, a, n, ws);					\
-      if (f) mpn_toom8_sqr (p2, a2, n, ws);				\
-    }									\
-  } while (0)
-
-void
-mpn_toom8_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
-{
-  mp_size_t n, s;
-
-  /***************************** decomposition *******************************/
-
-  ASSERT ( an >= 40 );
-
-  n = 1 + ((an - 1)>>3);
-
-  s = an - 7 * n;
-
-  ASSERT (0 < s && s <= n);
-  ASSERT ( s + s > 3 );
-
-#define   r6    (pp + 3 * n)			/* 3n+1 */
-#define   r4    (pp + 7 * n)			/* 3n+1 */
-#define   r2    (pp +11 * n)			/* 3n+1 */
-#define   r0    (pp +15 * n)			/* s+t <= 2*n */
-#define   r7    (scratch)			/* 3n+1 */
-#define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
-#define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
-#define   v0    (pp +11 * n)			/* n+1 */
-#define   v2    (pp +13 * n+2)			/* n+1 */
-#define   wse   (scratch +12 * n + 4)		/* 3n+1 */
-
-  /* Alloc also 3n+1 limbs for ws... toom_interpolate_16pts may
-     need all of them, when DO_mpn_sublsh_n usea a scratch  */
-/*   if (scratch == NULL) */
-/*     scratch = TMP_SALLOC_LIMBS (30 * n + 6); */
-
-  /********************** evaluation and recursive calls *********************/
-  /* $\pm1/8$ */
-  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
-  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
-  TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
-  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
-
-  /* $\pm1/4$ */
-  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
-  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
-  TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
-  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
-
-  /* $\pm2$ */
-  mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
-  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
-  TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
-  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
-
-  /* $\pm8$ */
-  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
-  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
-  TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
-  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
-
-  /* $\pm1/2$ */
-  mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
-  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
-  TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
-  mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
-
-  /* $\pm1$ */
-  mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s,    pp);
-  /* A(-1)*B(-1) */ /* A(1)*B(1) */
-  TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
-  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
-
-  /* $\pm4$ */
-  mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
-  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
-  TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
-  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
-
-#undef v0
-#undef v2
-
-  /* A(0)*B(0) */
-  TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
-
-  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
-
-#undef r0
-#undef r1
-#undef r2
-#undef r3
-#undef r4
-#undef r5
-#undef r6
-#undef wse
-
-}
-
-#undef TOOM8_SQR_REC
-#undef MAYBE_sqr_basecase
-#undef MAYBE_sqr_above_basecase
-#undef MAYBE_sqr_toom2
-#undef MAYBE_sqr_above_toom2
-#undef MAYBE_sqr_toom3
-#undef MAYBE_sqr_above_toom3
-#undef MAYBE_sqr_above_toom4
diff --git a/gmp/mpn/generic/toom8h_mul.c b/gmp/mpn/generic/toom8h_mul.c
deleted file mode 100644
index 8f593903f5..0000000000
--- a/gmp/mpn/generic/toom8h_mul.c
+++ /dev/null
@@ -1,306 +0,0 @@
-/* Implementation of the multiplication algorithm for Toom-Cook 8.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#if GMP_NUMB_BITS < 29
-#error Not implemented.
-#endif
-
-#if GMP_NUMB_BITS < 43
-#define BIT_CORRECTION 1
-#define CORRECTION_BITS GMP_NUMB_BITS
-#else
-#define BIT_CORRECTION 0
-#define CORRECTION_BITS 0
-#endif
-
-
-#if TUNE_PROGRAM_BUILD
-#define MAYBE_mul_basecase 1
-#define MAYBE_mul_toom22   1
-#define MAYBE_mul_toom33   1
-#define MAYBE_mul_toom44   1
-#define MAYBE_mul_toom8h   1
-#else
-#define MAYBE_mul_basecase						\
-  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD)
-#define MAYBE_mul_toom22						\
-  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD)
-#define MAYBE_mul_toom33						\
-  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD)
-#define MAYBE_mul_toom44						\
-  (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD)
-#define MAYBE_mul_toom8h						\
-  (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
-#endif
-
-#define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws)			\
-  do {									\
-    if (MAYBE_mul_basecase						\
-	&& BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) {			\
-      mpn_mul_basecase (p, a, n, b, n);					\
-      if (f) mpn_mul_basecase (p2, a2, n, b2, n);			\
-    } else if (MAYBE_mul_toom22						\
-	     && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) {		\
-      mpn_toom22_mul (p, a, n, b, n, ws);				\
-      if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws);			\
-    } else if (MAYBE_mul_toom33						\
-	     && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) {		\
-      mpn_toom33_mul (p, a, n, b, n, ws);				\
-      if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws);			\
-    } else if (MAYBE_mul_toom44						\
-	     && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) {		\
-      mpn_toom44_mul (p, a, n, b, n, ws);				\
-      if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws);			\
-    } else if (! MAYBE_mul_toom8h					\
-	     || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) {		\
-      mpn_toom6h_mul (p, a, n, b, n, ws);				\
-      if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws);			\
-    } else {								\
-      mpn_toom8h_mul (p, a, n, b, n, ws);				\
-      if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws);			\
-    }									\
-  } while (0)
-
-#define TOOM8H_MUL_REC(p, a, na, b, nb, ws)		\
-  do { mpn_mul (p, a, na, b, nb); } while (0)
-
-/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
-   With: an >= bn >= 86, an*5 <  bn * 11.
-   It _may_ work with bn<=?? and bn*?? < an*? < bn*??
-
-   Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0.
-*/
-/* Estimate on needed scratch:
-   S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8),
-   since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6
- */
-
-void
-mpn_toom8h_mul   (mp_ptr pp,
-		  mp_srcptr ap, mp_size_t an,
-		  mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
-{
-  mp_size_t n, s, t;
-  int p, q, half;
-  int sign;
-
-  /***************************** decomposition *******************************/
-
-  ASSERT (an >= bn);
-  /* Can not handle too small operands */
-  ASSERT (bn >= 86);
-  /* Can not handle too much unbalancement */
-  ASSERT (an <= bn*4);
-  ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
-  ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
-  ASSERT (GMP_NUMB_BITS >  9*3 || an*2 <= bn* 3);
-
-  /* Limit num/den is a rational number between
-     (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1))             */
-#define LIMIT_numerator (21)
-#define LIMIT_denominat (20)
-
-  if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */
-    {
-      half = 0;
-      n = 1 + ((an - 1)>>3);
-      p = q = 7;
-      s = an - 7 * n;
-      t = bn - 7 * n;
-    }
-  else
-    {
-      if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */
-	{ p = 9; q = 8; }
-      else if (GMP_NUMB_BITS <= 9*3 ||
-	       an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1))
-	{ p = 9; q = 7; }
-      else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */
-	{ p =10; q = 7; }
-      else if (GMP_NUMB_BITS <= 10*3 ||
-	       an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn)
-	{ p =10; q = 6; }
-      else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/
-	{ p =11; q = 6; }
-      else if (GMP_NUMB_BITS <= 11*3 ||
-	       an * 4 < 9 * bn)
-	{ p =11; q = 5; }
-      else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn)  /* is 4*... <12*... */
-	{ p =12; q = 5; }
-      else if (GMP_NUMB_BITS <= 12*3 ||
-	       an * 9 < 28 * bn )  /* is 4*... <12*... */
-	{ p =12; q = 4; }
-      else
-	{ p =13; q = 4; }
-
-      half = (p+q)&1;
-      n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
-      p--; q--;
-
-      s = an - p * n;
-      t = bn - q * n;
-
-      if(half) { /* Recover from badly chosen splitting */
-	if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
-	else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
-      }
-    }
-#undef LIMIT_numerator
-#undef LIMIT_denominat
-
-  ASSERT (0 < s && s <= n);
-  ASSERT (0 < t && t <= n);
-  ASSERT (half || s + t > 3);
-  ASSERT (n > 2);
-
-#define   r6    (pp + 3 * n)			/* 3n+1 */
-#define   r4    (pp + 7 * n)			/* 3n+1 */
-#define   r2    (pp +11 * n)			/* 3n+1 */
-#define   r0    (pp +15 * n)			/* s+t <= 2*n */
-#define   r7    (scratch)			/* 3n+1 */
-#define   r5    (scratch + 3 * n + 1)		/* 3n+1 */
-#define   r3    (scratch + 6 * n + 2)		/* 3n+1 */
-#define   r1    (scratch + 9 * n + 3)		/* 3n+1 */
-#define   v0    (pp +11 * n)			/* n+1 */
-#define   v1    (pp +12 * n+1)			/* n+1 */
-#define   v2    (pp +13 * n+2)			/* n+1 */
-#define   v3    (scratch +12 * n + 4)		/* n+1 */
-#define   wsi   (scratch +12 * n + 4)		/* 3n+1 */
-#define   wse   (scratch +13 * n + 5)		/* 2n+1 */
-
-  /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may
-     need all of them  */
-/*   if (scratch == NULL) */
-/*     scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */
-  ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn));
-  ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8));
-
-  /********************** evaluation and recursive calls *********************/
-
-  /* $\pm1/8$ */
-  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
-	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
-  /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
-
-  /* $\pm1/4$ */
-  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
-	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
-  /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
-
-  /* $\pm2$ */
-  sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
-	 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
-  /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
-
-  /* $\pm8$ */
-  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
-	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
-  /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
-
-  /* $\pm1/2$ */
-  sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
-	 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
-  /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
-
-  /* $\pm1$ */
-  sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s,    pp);
-  if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
-    sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
-  else
-    sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t,    pp);
-  /* A(-1)*B(-1) */ /* A(1)*B(1) */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
-
-  /* $\pm4$ */
-  sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
-	 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
-  /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
-  TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
-  mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
-
-#undef v0
-#undef v1
-#undef v2
-#undef v3
-#undef wse
-
-  /* A(0)*B(0) */
-  TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
-
-  /* Infinity */
-  if (UNLIKELY (half != 0)) {
-    if (s > t) {
-      TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
-    } else {
-      TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
-    };
-  };
-
-  mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi);
-
-#undef r0
-#undef r1
-#undef r2
-#undef r3
-#undef r4
-#undef r5
-#undef r6
-#undef wsi
-}
-
-#undef TOOM8H_MUL_N_REC
-#undef TOOM8H_MUL_REC
-#undef MAYBE_mul_basecase
-#undef MAYBE_mul_toom22
-#undef MAYBE_mul_toom33
-#undef MAYBE_mul_toom44
-#undef MAYBE_mul_toom8h
diff --git a/gmp/mpn/generic/toom_couple_handling.c b/gmp/mpn/generic/toom_couple_handling.c
deleted file mode 100644
index 9e62bcba1c..0000000000
--- a/gmp/mpn/generic/toom_couple_handling.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/* Helper function for high degree Toom-Cook algorithms.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Gets {pp,n} and (sign?-1:1)*{np,n}. Computes at once:
-     {pp,n} <- ({pp,n}+{np,n})/2^{ps+1}
-     {pn,n} <- ({pp,n}-{np,n})/2^{ns+1}
-   Finally recompose them obtaining:
-     {pp,n+off} <- {pp,n}+{np,n}*2^{off*GMP_NUMB_BITS}
-*/
-void
-mpn_toom_couple_handling (mp_ptr pp, mp_size_t n, mp_ptr np,
-			  int nsign, mp_size_t off, int ps, int ns)
-{
-  if (nsign) {
-#ifdef HAVE_NATIVE_mpn_rsh1sub_n
-    mpn_rsh1sub_n (np, pp, np, n);
-#else
-    mpn_sub_n (np, pp, np, n);
-    mpn_rshift (np, np, n, 1);
-#endif
-  } else {
-#ifdef HAVE_NATIVE_mpn_rsh1add_n
-    mpn_rsh1add_n (np, pp, np, n);
-#else
-    mpn_add_n (np, pp, np, n);
-    mpn_rshift (np, np, n, 1);
-#endif
-  }
-
-#ifdef HAVE_NATIVE_mpn_rsh1sub_n
-  if (ps == 1)
-    mpn_rsh1sub_n (pp, pp, np, n);
-  else
-#endif
-  {
-    mpn_sub_n (pp, pp, np, n);
-    if (ps > 0)
-      mpn_rshift (pp, pp, n, ps);
-  }
-  if (ns > 0)
-    mpn_rshift (np, np, n, ns);
-  pp[n] = mpn_add_n (pp+off, pp+off, np, n-off);
-  ASSERT_NOCARRY (mpn_add_1(pp+n, np+n-off, off, pp[n]) );
-}
diff --git a/gmp/mpn/generic/toom_eval_dgr3_pm1.c b/gmp/mpn/generic/toom_eval_dgr3_pm1.c
deleted file mode 100644
index 50411bd3ca..0000000000
--- a/gmp/mpn/generic/toom_eval_dgr3_pm1.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/* mpn_toom_eval_dgr3_pm1 -- Evaluate a degree 3 polynomial in +1 and -1
-
-   Contributed to the GNU project by Niels Möller
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-int
-mpn_toom_eval_dgr3_pm1 (mp_ptr xp1, mp_ptr xm1,
-			mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
-{
-  int neg;
-
-  ASSERT (x3n > 0);
-  ASSERT (x3n <= n);
-
-  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
-  tp[n] = mpn_add (tp, xp + n, n, xp + 3*n, x3n);
-
-  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
-  else
-    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
-#else
-  if (neg)
-    mpn_sub_n (xm1, tp, xp1, n + 1);
-  else
-    mpn_sub_n (xm1, xp1, tp, n + 1);
-
-  mpn_add_n (xp1, xp1, tp, n + 1);
-#endif
-
-  ASSERT (xp1[n] <= 3);
-  ASSERT (xm1[n] <= 1);
-
-  return neg;
-}
diff --git a/gmp/mpn/generic/toom_eval_dgr3_pm2.c b/gmp/mpn/generic/toom_eval_dgr3_pm2.c
deleted file mode 100644
index 3ba6d15f3d..0000000000
--- a/gmp/mpn/generic/toom_eval_dgr3_pm2.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/* mpn_toom_eval_dgr3_pm2 -- Evaluate a degree 3 polynomial in +2 and -2
-
-   Contributed to the GNU project by Niels Möller
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Needs n+1 limbs of temporary storage. */
-int
-mpn_toom_eval_dgr3_pm2 (mp_ptr xp2, mp_ptr xm2,
-			mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
-{
-  mp_limb_t cy;
-  int neg;
-
-  ASSERT (x3n > 0);
-  ASSERT (x3n <= n);
-
-  /* (x0 + 4 * x2) +/- (2 x1 + 8 x_3) */
-#if HAVE_NATIVE_mpn_addlsh_n || HAVE_NATIVE_mpn_addlsh2_n
-#if HAVE_NATIVE_mpn_addlsh2_n
-  xp2[n] = mpn_addlsh2_n (xp2, xp, xp + 2*n, n);
-
-  cy = mpn_addlsh2_n (tp, xp + n, xp + 3*n, x3n);
-#else /* HAVE_NATIVE_mpn_addlsh_n */
-  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2);
-
-  cy = mpn_addlsh_n (tp, xp + n, xp + 3*n, x3n, 2);
-#endif
-  if (x3n < n)
-    cy = mpn_add_1 (tp + x3n, xp + n + x3n, n - x3n, cy);
-  tp[n] = cy;
-#else
-  cy = mpn_lshift (tp, xp + 2*n, n, 2);
-  xp2[n] = cy + mpn_add_n (xp2, tp, xp, n);
-
-  tp[x3n] = mpn_lshift (tp, xp + 3*n, x3n, 2);
-  if (x3n < n)
-    tp[n] = mpn_add (tp, xp + n, n, tp, x3n + 1);
-  else
-    tp[n] += mpn_add_n (tp, xp + n, tp, n);
-#endif
-  mpn_lshift (tp, tp, n+1, 1);
-
-  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
-  else
-    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
-#else
-  if (neg)
-    mpn_sub_n (xm2, tp, xp2, n + 1);
-  else
-    mpn_sub_n (xm2, xp2, tp, n + 1);
-
-  mpn_add_n (xp2, xp2, tp, n + 1);
-#endif
-
-  ASSERT (xp2[n] < 15);
-  ASSERT (xm2[n] < 10);
-
-  return neg;
-}
diff --git a/gmp/mpn/generic/toom_eval_pm1.c b/gmp/mpn/generic/toom_eval_pm1.c
deleted file mode 100644
index 2334b0aff4..0000000000
--- a/gmp/mpn/generic/toom_eval_pm1.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/* mpn_toom_eval_pm1 -- Evaluate a polynomial in +1 and -1
-
-   Contributed to the GNU project by Niels Möller
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Evaluates a polynomial of degree k > 3, in the points +1 and -1. */
-int
-mpn_toom_eval_pm1 (mp_ptr xp1, mp_ptr xm1, unsigned k,
-		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
-{
-  unsigned i;
-  int neg;
-
-  ASSERT (k >= 4);
-
-  ASSERT (hn > 0);
-  ASSERT (hn <= n);
-
-  /* The degree k is also the number of full-size coefficients, so
-   * that last coefficient, of size hn, starts at xp + k*n. */
-
-  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
-  for (i = 4; i < k; i += 2)
-    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+i*n, n));
-
-  tp[n] = mpn_add_n (tp, xp + n, xp + 3*n, n);
-  for (i = 5; i < k; i += 2)
-    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+i*n, n));
-
-  if (k & 1)
-    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+k*n, hn));
-  else
-    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+k*n, hn));
-
-  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
-  else
-    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
-#else
-  if (neg)
-    mpn_sub_n (xm1, tp, xp1, n + 1);
-  else
-    mpn_sub_n (xm1, xp1, tp, n + 1);
-
-  mpn_add_n (xp1, xp1, tp, n + 1);
-#endif
-
-  ASSERT (xp1[n] <= k);
-  ASSERT (xm1[n] <= k/2 + 1);
-
-  return neg;
-}
diff --git a/gmp/mpn/generic/toom_eval_pm2.c b/gmp/mpn/generic/toom_eval_pm2.c
deleted file mode 100644
index 67afcc638e..0000000000
--- a/gmp/mpn/generic/toom_eval_pm2.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/* mpn_toom_eval_pm2 -- Evaluate a polynomial in +2 and -2
-
-   Contributed to the GNU project by Niels Möller and Marco Bodrato
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* DO_addlsh2(d,a,b,n,cy) computes cy,{d,n} <- {a,n} + 4*(cy,{b,n}), it
-   can be used as DO_addlsh2(d,a,d,n,d[n]), for accumulation on {d,n+1}. */
-#if HAVE_NATIVE_mpn_addlsh2_n
-#define DO_addlsh2(d, a, b, n, cy)	\
-do {					\
-  (cy) <<= 2;				\
-  (cy) += mpn_addlsh2_n(d, a, b, n);	\
-} while (0)
-#else
-#if HAVE_NATIVE_mpn_addlsh_n
-#define DO_addlsh2(d, a, b, n, cy)	\
-do {					\
-  (cy) <<= 2;				\
-  (cy) += mpn_addlsh_n(d, a, b, n, 2);	\
-} while (0)
-#else
-/* The following is not a general substitute for addlsh2.
-   It is correct if d == b, but it is not if d == a.  */
-#define DO_addlsh2(d, a, b, n, cy)	\
-do {					\
-  (cy) <<= 2;				\
-  (cy) += mpn_lshift(d, b, n, 2);	\
-  (cy) += mpn_add_n(d, d, a, n);	\
-} while (0)
-#endif
-#endif
-
-/* Evaluates a polynomial of degree 2 < k < GMP_NUMB_BITS, in the
-   points +2 and -2. */
-int
-mpn_toom_eval_pm2 (mp_ptr xp2, mp_ptr xm2, unsigned k,
-		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
-{
-  int i;
-  int neg;
-  mp_limb_t cy;
-
-  ASSERT (k >= 3);
-  ASSERT (k < GMP_NUMB_BITS);
-
-  ASSERT (hn > 0);
-  ASSERT (hn <= n);
-
-  /* The degree k is also the number of full-size coefficients, so
-   * that last coefficient, of size hn, starts at xp + k*n. */
-
-  cy = 0;
-  DO_addlsh2 (xp2, xp + (k-2) * n, xp + k * n, hn, cy);
-  if (hn != n)
-    cy = mpn_add_1 (xp2 + hn, xp + (k-2) * n + hn, n - hn, cy);
-  for (i = k - 4; i >= 0; i -= 2)
-    DO_addlsh2 (xp2, xp + i * n, xp2, n, cy);
-  xp2[n] = cy;
-
-  k--;
-
-  cy = 0;
-  DO_addlsh2 (tp, xp + (k-2) * n, xp + k * n, n, cy);
-  for (i = k - 4; i >= 0; i -= 2)
-    DO_addlsh2 (tp, xp + i * n, tp, n, cy);
-  tp[n] = cy;
-
-  if (k & 1)
-    ASSERT_NOCARRY(mpn_lshift (tp , tp , n + 1, 1));
-  else
-    ASSERT_NOCARRY(mpn_lshift (xp2, xp2, n + 1, 1));
-
-  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
-  else
-    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
-#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
-  if (neg)
-    mpn_sub_n (xm2, tp, xp2, n + 1);
-  else
-    mpn_sub_n (xm2, xp2, tp, n + 1);
-
-  mpn_add_n (xp2, xp2, tp, n + 1);
-#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
-
-  ASSERT (xp2[n] < (1<<(k+2))-1);
-  ASSERT (xm2[n] < ((1<<(k+3))-1 - (1^k&1))/3);
-
-  neg ^= ((k & 1) - 1);
-
-  return neg;
-}
-
-#undef DO_addlsh2
diff --git a/gmp/mpn/generic/toom_eval_pm2exp.c b/gmp/mpn/generic/toom_eval_pm2exp.c
deleted file mode 100644
index b178fcac24..0000000000
--- a/gmp/mpn/generic/toom_eval_pm2exp.c
+++ /dev/null
@@ -1,128 +0,0 @@
-/* mpn_toom_eval_pm2exp -- Evaluate a polynomial in +2^k and -2^k
-
-   Contributed to the GNU project by Niels Möller
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* Evaluates a polynomial of degree k > 2, in the points +2^shift and -2^shift. */
-int
-mpn_toom_eval_pm2exp (mp_ptr xp2, mp_ptr xm2, unsigned k,
-		      mp_srcptr xp, mp_size_t n, mp_size_t hn, unsigned shift,
-		      mp_ptr tp)
-{
-  unsigned i;
-  int neg;
-#if HAVE_NATIVE_mpn_addlsh_n
-  mp_limb_t cy;
-#endif
-
-  ASSERT (k >= 3);
-  ASSERT (shift*k < GMP_NUMB_BITS);
-
-  ASSERT (hn > 0);
-  ASSERT (hn <= n);
-
-  /* The degree k is also the number of full-size coefficients, so
-   * that last coefficient, of size hn, starts at xp + k*n. */
-
-#if HAVE_NATIVE_mpn_addlsh_n
-  xp2[n] = mpn_addlsh_n (xp2, xp, xp + 2*n, n, 2*shift);
-  for (i = 4; i < k; i += 2)
-    xp2[n] += mpn_addlsh_n (xp2, xp2, xp + i*n, n, i*shift);
-
-  tp[n] = mpn_lshift (tp, xp+n, n, shift);
-  for (i = 3; i < k; i+= 2)
-    tp[n] += mpn_addlsh_n (tp, tp, xp+i*n, n, i*shift);
-
-  if (k & 1)
-    {
-      cy = mpn_addlsh_n (tp, tp, xp+k*n, hn, k*shift);
-      MPN_INCR_U (tp + hn, n+1 - hn, cy);
-    }
-  else
-    {
-      cy = mpn_addlsh_n (xp2, xp2, xp+k*n, hn, k*shift);
-      MPN_INCR_U (xp2 + hn, n+1 - hn, cy);
-    }
-
-#else /* !HAVE_NATIVE_mpn_addlsh_n */
-  xp2[n] = mpn_lshift (tp, xp+2*n, n, 2*shift);
-  xp2[n] += mpn_add_n (xp2, xp, tp, n);
-  for (i = 4; i < k; i += 2)
-    {
-      xp2[n] += mpn_lshift (tp, xp + i*n, n, i*shift);
-      xp2[n] += mpn_add_n (xp2, xp2, tp, n);
-    }
-
-  tp[n] = mpn_lshift (tp, xp+n, n, shift);
-  for (i = 3; i < k; i+= 2)
-    {
-      tp[n] += mpn_lshift (xm2, xp + i*n, n, i*shift);
-      tp[n] += mpn_add_n (tp, tp, xm2, n);
-    }
-
-  xm2[hn] = mpn_lshift (xm2, xp + k*n, hn, k*shift);
-  if (k & 1)
-    mpn_add (tp, tp, n+1, xm2, hn+1);
-  else
-    mpn_add (xp2, xp2, n+1, xm2, hn+1);
-#endif /* !HAVE_NATIVE_mpn_addlsh_n */
-
-  neg = (mpn_cmp (xp2, tp, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (xp2, xm2, tp, xp2, n + 1);
-  else
-    mpn_add_n_sub_n (xp2, xm2, xp2, tp, n + 1);
-#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
-  if (neg)
-    mpn_sub_n (xm2, tp, xp2, n + 1);
-  else
-    mpn_sub_n (xm2, xp2, tp, n + 1);
-
-  mpn_add_n (xp2, xp2, tp, n + 1);
-#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
-
-  /* FIXME: the following asserts are useless if (k+1)*shift >= GMP_LIMB_BITS */
-  ASSERT ((k+1)*shift >= GMP_LIMB_BITS ||
-	  xp2[n] < ((CNST_LIMB(1)<<((k+1)*shift))-1)/((CNST_LIMB(1)<<shift)-1));
-  ASSERT ((k+2)*shift >= GMP_LIMB_BITS ||
-	  xm2[n] < ((CNST_LIMB(1)<<((k+2)*shift))-((k&1)?(CNST_LIMB(1)<<shift):1))/((CNST_LIMB(1)<<(2*shift))-1));
-
-  return neg;
-}
diff --git a/gmp/mpn/generic/toom_eval_pm2rexp.c b/gmp/mpn/generic/toom_eval_pm2rexp.c
deleted file mode 100644
index 3cac46bd90..0000000000
--- a/gmp/mpn/generic/toom_eval_pm2rexp.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/* mpn_toom_eval_pm2rexp -- Evaluate a polynomial in +2^-k and -2^-k
-
-   Contributed to the GNU project by Marco Bodrato
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if HAVE_NATIVE_mpn_addlsh_n
-#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift(ws,src,n,s);
-  return    __cy + mpn_add_n(dst,dst,ws,n);
-#endif
-}
-#endif
-
-/* Evaluates a polynomial of degree k >= 3. */
-int
-mpn_toom_eval_pm2rexp (mp_ptr rp, mp_ptr rm,
-		      unsigned int q, mp_srcptr ap, mp_size_t n, mp_size_t t,
-		      unsigned int s, mp_ptr ws)
-{
-  unsigned int i;
-  int neg;
-  /* {ap,q*n+t} -> {rp,n+1} {rm,n+1} , with {ws, n+1}*/
-  ASSERT (n >= t);
-  ASSERT (s != 0); /* or _eval_pm1 should be used */
-  ASSERT (q > 1);
-  ASSERT (s*q < GMP_NUMB_BITS);
-  rp[n] = mpn_lshift(rp, ap, n, s*q);
-  ws[n] = mpn_lshift(ws, ap+n, n, s*(q-1));
-  if( (q & 1) != 0) {
-    ASSERT_NOCARRY(mpn_add(ws,ws,n+1,ap+n*q,t));
-    rp[n] += DO_mpn_addlsh_n(rp, ap+n*(q-1), n, s, rm);
-  } else {
-    ASSERT_NOCARRY(mpn_add(rp,rp,n+1,ap+n*q,t));
-  }
-  for(i=2; i<q-1; i++)
-  {
-    rp[n] += DO_mpn_addlsh_n(rp, ap+n*i, n, s*(q-i), rm);
-    i++;
-    ws[n] += DO_mpn_addlsh_n(ws, ap+n*i, n, s*(q-i), rm);
-  };
-
-  neg = (mpn_cmp (rp, ws, n + 1) < 0) ? ~0 : 0;
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  if (neg)
-    mpn_add_n_sub_n (rp, rm, ws, rp, n + 1);
-  else
-    mpn_add_n_sub_n (rp, rm, rp, ws, n + 1);
-#else /* !HAVE_NATIVE_mpn_add_n_sub_n */
-  if (neg)
-    mpn_sub_n (rm, ws, rp, n + 1);
-  else
-    mpn_sub_n (rm, rp, ws, n + 1);
-
-  ASSERT_NOCARRY (mpn_add_n (rp, rp, ws, n + 1));
-#endif /* !HAVE_NATIVE_mpn_add_n_sub_n */
-
-  return neg;
-}
diff --git a/gmp/mpn/generic/toom_interpolate_12pts.c b/gmp/mpn/generic/toom_interpolate_12pts.c
deleted file mode 100644
index 180b0329a3..0000000000
--- a/gmp/mpn/generic/toom_interpolate_12pts.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/* Interpolation for the algorithm Toom-Cook 6.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-#if HAVE_NATIVE_mpn_sublsh_n
-#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift(ws,src,n,s);
-  return    __cy + mpn_sub_n(dst,dst,ws,n);
-#endif
-}
-#endif
-
-#if HAVE_NATIVE_mpn_addlsh_n
-#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift(ws,src,n,s);
-  return    __cy + mpn_add_n(dst,dst,ws,n);
-#endif
-}
-#endif
-
-#if HAVE_NATIVE_mpn_subrsh
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
-#else
-/* FIXME: This is not a correct definition, it assumes no carry */
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
-do {									\
-  mp_limb_t __cy;							\
-  MPN_DECR_U (dst, nd, src[0] >> s);					\
-  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
-  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
-} while (0)
-#endif
-
-
-#if GMP_NUMB_BITS < 21
-#error Not implemented: Both sublsh_n(,,,20) should be corrected.
-#endif
-
-#if GMP_NUMB_BITS < 16
-#error Not implemented: divexact_by42525 needs splitting.
-#endif
-
-#if GMP_NUMB_BITS < 12
-#error Not implemented: Hard to adapt...
-#endif
-
-/* FIXME: tuneup should decide the best variant */
-#ifndef AORSMUL_FASTER_AORS_AORSLSH
-#define AORSMUL_FASTER_AORS_AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_AORS_2AORSLSH
-#define AORSMUL_FASTER_AORS_2AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_2AORSLSH
-#define AORSMUL_FASTER_2AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_3AORSLSH
-#define AORSMUL_FASTER_3AORSLSH 1
-#endif
-
-#define BINVERT_9 \
-  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
-
-#define BINVERT_255 \
-  (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
-
-  /* FIXME: find some more general expressions for 2835^-1, 42525^-1 */
-#if GMP_LIMB_BITS == 32
-#define BINVERT_2835  (GMP_NUMB_MASK &		CNST_LIMB(0x53E3771B))
-#define BINVERT_42525 (GMP_NUMB_MASK &		CNST_LIMB(0x9F314C35))
-#else
-#if GMP_LIMB_BITS == 64
-#define BINVERT_2835  (GMP_NUMB_MASK &	CNST_LIMB(0x938CC70553E3771B))
-#define BINVERT_42525 (GMP_NUMB_MASK &	CNST_LIMB(0xE7B40D449F314C35))
-#endif
-#endif
-
-#ifndef mpn_divexact_by255
-#if GMP_NUMB_BITS % 8 == 0
-#define mpn_divexact_by255(dst,src,size) \
-  (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
-#else
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
-#else
-#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
-#endif
-#endif
-#endif
-
-#ifndef mpn_divexact_by9x4
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by9x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,2)
-#else
-#define mpn_divexact_by9x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<2)
-#endif
-#endif
-
-#ifndef mpn_divexact_by42525
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
-#define mpn_divexact_by42525(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,0)
-#else
-#define mpn_divexact_by42525(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525))
-#endif
-#endif
-
-#ifndef mpn_divexact_by2835x4
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
-#define mpn_divexact_by2835x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,2)
-#else
-#define mpn_divexact_by2835x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<2)
-#endif
-#endif
-
-/* Interpolation for Toom-6.5 (or Toom-6), using the evaluation
-   points: infinity(6.5 only), +-4, +-2, +-1, +-1/4, +-1/2, 0. More precisely,
-   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
-   degree 11 (or 10), given the 12 (rsp. 11) values:
-
-     r0 = limit at infinity of f(x) / x^7,
-     r1 = f(4),f(-4),
-     r2 = f(2),f(-2),
-     r3 = f(1),f(-1),
-     r4 = f(1/4),f(-1/4),
-     r5 = f(1/2),f(-1/2),
-     r6 = f(0).
-
-   All couples of the form f(n),f(-n) must be already mixed with
-   toom_couple_handling(f(n),...,f(-n),...)
-
-   The result is stored in {pp, spt + 7*n (or 6*n)}.
-   At entry, r6 is stored at {pp, 2n},
-   r4 is stored at {pp + 3n, 3n + 1}.
-   r2 is stored at {pp + 7n, 3n + 1}.
-   r0 is stored at {pp +11n, spt}.
-
-   The other values are 3n+1 limbs each (with most significant limbs small).
-
-   Negative intermediate results are stored two-complemented.
-   Inputs are destroyed.
-*/
-
-void
-mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5,
-			mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
-{
-  mp_limb_t cy;
-  mp_size_t n3;
-  mp_size_t n3p1;
-  n3 = 3 * n;
-  n3p1 = n3 + 1;
-
-#define   r4    (pp + n3)			/* 3n+1 */
-#define   r2    (pp + 7 * n)			/* 3n+1 */
-#define   r0    (pp +11 * n)			/* s+t <= 2*n */
-
-  /******************************* interpolation *****************************/
-  if (half != 0) {
-    cy = mpn_sub_n (r3, r3, r0, spt);
-    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
-
-    cy = DO_mpn_sublsh_n (r2, r0, spt, 10, wsi);
-    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
-    DO_mpn_subrsh(r5, n3p1, r0, spt, 2, wsi);
-
-    cy = DO_mpn_sublsh_n (r1, r0, spt, 20, wsi);
-    MPN_DECR_U (r1 + spt, n3p1 - spt, cy);
-    DO_mpn_subrsh(r4, n3p1, r0, spt, 4, wsi);
-  };
-
-  r4[n3] -= DO_mpn_sublsh_n (r4 + n, pp, 2 * n, 20, wsi);
-  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  mpn_add_n_sub_n (r1, r4, r4, r1, n3p1);
-#else
-  ASSERT_NOCARRY(mpn_add_n (wsi, r1, r4, n3p1));
-  mpn_sub_n (r4, r4, r1, n3p1); /* can be negative */
-  MP_PTR_SWAP(r1, wsi);
-#endif
-
-  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 10, wsi);
-  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
-#else
-  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
-  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
-  MP_PTR_SWAP(r5, wsi);
-#endif
-
-  r3[n3] -= mpn_sub_n (r3+n, r3+n, pp, 2 * n);
-
-#if AORSMUL_FASTER_AORS_AORSLSH
-  mpn_submul_1 (r4, r5, n3p1, 257); /* can be negative */
-#else
-  mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */
-  DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */
-#endif
-  /* A division by 2835x4 follows. Warning: the operand can be negative! */
-  mpn_divexact_by2835x4(r4, r4, n3p1);
-  if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
-    r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
-
-#if AORSMUL_FASTER_2AORSLSH
-  mpn_addmul_1 (r5, r4, n3p1, 60); /* can be negative */
-#else
-  DO_mpn_sublsh_n (r5, r4, n3p1, 2, wsi); /* can be negative */
-  DO_mpn_addlsh_n (r5, r4, n3p1, 6, wsi); /* can give a carry */
-#endif
-  mpn_divexact_by255(r5, r5, n3p1);
-
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r3, n3p1, 5, wsi));
-
-#if AORSMUL_FASTER_3AORSLSH
-  ASSERT_NOCARRY(mpn_submul_1 (r1, r2, n3p1, 100));
-#else
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 6, wsi));
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 5, wsi));
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 2, wsi));
-#endif
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r3, n3p1, 9, wsi));
-  mpn_divexact_by42525(r1, r1, n3p1);
-
-#if AORSMUL_FASTER_AORS_2AORSLSH
-  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 225));
-#else
-  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r1, n3p1));
-  ASSERT_NOCARRY(DO_mpn_addlsh_n (r2, r1, n3p1, 5, wsi));
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r1, n3p1, 8, wsi));
-#endif
-  mpn_divexact_by9x4(r2, r2, n3p1);
-
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r2, n3p1));
-
-  mpn_sub_n (r4, r2, r4, n3p1);
-  ASSERT_NOCARRY(mpn_rshift(r4, r4, n3p1, 1));
-  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r4, n3p1));
-
-  mpn_add_n (r5, r5, r1, n3p1);
-  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
-
-  /* last interpolation steps... */
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
-  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r5, n3p1));
-  /* ... could be mixed with recomposition
-	||H-r5|M-r5|L-r5|   ||H-r1|M-r1|L-r1|
-  */
-
-  /***************************** recomposition *******************************/
-  /*
-    pp[] prior to operations:
-    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
-
-    summation scheme for remaining operations:
-    |__12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
-    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp
-	||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|
-  */
-
-  cy = mpn_add_n (pp + n, pp + n, r5, n);
-  cy = mpn_add_1 (pp + 2 * n, r5 + n, n, cy);
-#if HAVE_NATIVE_mpn_add_nc
-  cy = r5[n3] + mpn_add_nc(pp + n3, pp + n3, r5 + 2 * n, n, cy);
-#else
-  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
-  cy = r5[n3] + mpn_add_n (pp + n3, pp + n3, r5 + 2 * n, n);
-#endif
-  MPN_INCR_U (pp + n3 + n, 2 * n + 1, cy);
-
-  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r3, n);
-  cy = mpn_add_1 (pp + 2 * n3, r3 + n, n, pp[2 * n3]);
-#if HAVE_NATIVE_mpn_add_nc
-  cy = r3[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r3 + 2 * n, n, cy);
-#else
-  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
-  cy = r3[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r3 + 2 * n, n);
-#endif
-  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
-
-  pp[10*n]+=mpn_add_n (pp + 9 * n, pp + 9 * n, r1, n);
-  if (half) {
-    cy = mpn_add_1 (pp + 10 * n, r1 + n, n, pp[10 * n]);
-#if HAVE_NATIVE_mpn_add_nc
-    if (LIKELY (spt > n)) {
-      cy = r1[n3] + mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, n, cy);
-      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
-    } else {
-      ASSERT_NOCARRY(mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt, cy));
-    }
-#else
-    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
-    if (LIKELY (spt > n)) {
-      cy = r1[n3] + mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, n);
-      MPN_INCR_U (pp + 4 * n3, spt - n, cy);
-    } else {
-      ASSERT_NOCARRY(mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt));
-    }
-#endif
-  } else {
-    ASSERT_NOCARRY(mpn_add_1 (pp + 10 * n, r1 + n, spt, pp[10 * n]));
-  }
-
-#undef   r0
-#undef   r2
-#undef   r4
-}
diff --git a/gmp/mpn/generic/toom_interpolate_16pts.c b/gmp/mpn/generic/toom_interpolate_16pts.c
deleted file mode 100644
index 5afe6641f6..0000000000
--- a/gmp/mpn/generic/toom_interpolate_16pts.c
+++ /dev/null
@@ -1,527 +0,0 @@
-/* Interpolation for the algorithm Toom-Cook 8.5-way.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#if GMP_NUMB_BITS < 29
-#error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB.
-#endif
-
-#if GMP_NUMB_BITS < 28
-#error Not implemented: divexact_by188513325 and _by182712915 will not work.
-#endif
-
-
-#if HAVE_NATIVE_mpn_sublsh_n
-#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift(ws,src,n,s);
-  return    __cy + mpn_sub_n(dst,dst,ws,n);
-#endif
-}
-#endif
-
-#if HAVE_NATIVE_mpn_addlsh_n
-#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift(ws,src,n,s);
-  return    __cy + mpn_add_n(dst,dst,ws,n);
-#endif
-}
-#endif
-
-#if HAVE_NATIVE_mpn_subrsh
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s)
-#else
-/* FIXME: This is not a correct definition, it assumes no carry */
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
-do {									\
-  mp_limb_t __cy;							\
-  MPN_DECR_U (dst, nd, src[0] >> s);					\
-  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
-  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
-} while (0)
-#endif
-
-
-/* FIXME: tuneup should decide the best variant */
-#ifndef AORSMUL_FASTER_AORS_AORSLSH
-#define AORSMUL_FASTER_AORS_AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_AORS_2AORSLSH
-#define AORSMUL_FASTER_AORS_2AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_2AORSLSH
-#define AORSMUL_FASTER_2AORSLSH 1
-#endif
-#ifndef AORSMUL_FASTER_3AORSLSH
-#define AORSMUL_FASTER_3AORSLSH 1
-#endif
-
-#if GMP_NUMB_BITS < 43
-#define BIT_CORRECTION 1
-#define CORRECTION_BITS GMP_NUMB_BITS
-#else
-#define BIT_CORRECTION 0
-#define CORRECTION_BITS 0
-#endif
-
-#define BINVERT_9 \
-  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
-
-#define BINVERT_255 \
-  (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8)))
-
-  /* FIXME: find some more general expressions for inverses */
-#if GMP_LIMB_BITS == 32
-#define BINVERT_2835  (GMP_NUMB_MASK &		CNST_LIMB(0x53E3771B))
-#define BINVERT_42525 (GMP_NUMB_MASK &		CNST_LIMB(0x9F314C35))
-#define BINVERT_182712915 (GMP_NUMB_MASK &	CNST_LIMB(0x550659DB))
-#define BINVERT_188513325 (GMP_NUMB_MASK &	CNST_LIMB(0xFBC333A5))
-#define BINVERT_255x182712915L (GMP_NUMB_MASK &	CNST_LIMB(0x6FC4CB25))
-#define BINVERT_255x188513325L (GMP_NUMB_MASK &	CNST_LIMB(0x6864275B))
-#if GMP_NAIL_BITS == 0
-#define BINVERT_255x182712915H CNST_LIMB(0x1B649A07)
-#define BINVERT_255x188513325H CNST_LIMB(0x06DB993A)
-#else /* GMP_NAIL_BITS != 0 */
-#define BINVERT_255x182712915H \
-  (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS)))
-#define BINVERT_255x188513325H \
-  (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS)))
-#endif
-#else
-#if GMP_LIMB_BITS == 64
-#define BINVERT_2835  (GMP_NUMB_MASK &	CNST_LIMB(0x938CC70553E3771B))
-#define BINVERT_42525 (GMP_NUMB_MASK &	CNST_LIMB(0xE7B40D449F314C35))
-#define BINVERT_255x182712915  (GMP_NUMB_MASK &	CNST_LIMB(0x1B649A076FC4CB25))
-#define BINVERT_255x188513325  (GMP_NUMB_MASK &	CNST_LIMB(0x06DB993A6864275B))
-#endif
-#endif
-
-#ifndef mpn_divexact_by255
-#if GMP_NUMB_BITS % 8 == 0
-#define mpn_divexact_by255(dst,src,size) \
-  (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255)))
-#else
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0)
-#else
-#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255))
-#endif
-#endif
-#endif
-
-#ifndef mpn_divexact_by255x4
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2)
-#else
-#define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2)
-#endif
-#endif
-
-#ifndef mpn_divexact_by9x16
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4)
-#else
-#define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4)
-#endif
-#endif
-
-#ifndef mpn_divexact_by42525x16
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525)
-#define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4)
-#else
-#define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4)
-#endif
-#endif
-
-#ifndef mpn_divexact_by2835x64
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835)
-#define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6)
-#else
-#define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6)
-#endif
-#endif
-
-#ifndef  mpn_divexact_by255x182712915
-#if GMP_NUMB_BITS < 36
-#if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H)
-/* FIXME: use mpn_bdiv_q_2_pi2 */
-#endif
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915)
-#define mpn_divexact_by255x182712915(dst,src,size)				\
-  do {										\
-    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0);	\
-    mpn_divexact_by255(dst,dst,size);						\
-  } while(0)
-#else
-#define mpn_divexact_by255x182712915(dst,src,size)	\
-  do {							\
-    mpn_divexact_1(dst,src,size,CNST_LIMB(182712915));	\
-    mpn_divexact_by255(dst,dst,size);			\
-  } while(0)
-#endif
-#else /* GMP_NUMB_BITS > 35 */
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915)
-#define mpn_divexact_by255x182712915(dst,src,size) \
-  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0)
-#else
-#define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915))
-#endif
-#endif /* GMP_NUMB_BITS >?< 36 */
-#endif
-
-#ifndef  mpn_divexact_by255x188513325
-#if GMP_NUMB_BITS < 36
-#if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H)
-/* FIXME: use mpn_bdiv_q_1_pi2 */
-#endif
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325)
-#define mpn_divexact_by255x188513325(dst,src,size)			\
-  do {									\
-    mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0);	\
-    mpn_divexact_by255(dst,dst,size);					\
-  } while(0)
-#else
-#define mpn_divexact_by255x188513325(dst,src,size)	\
-  do {							\
-    mpn_divexact_1(dst,src,size,CNST_LIMB(188513325));	\
-    mpn_divexact_by255(dst,dst,size);			\
-  } while(0)
-#endif
-#else /* GMP_NUMB_BITS > 35 */
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325)
-#define mpn_divexact_by255x188513325(dst,src,size) \
-  mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0)
-#else
-#define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325))
-#endif
-#endif /* GMP_NUMB_BITS >?< 36 */
-#endif
-
-/* Interpolation for Toom-8.5 (or Toom-8), using the evaluation
-   points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2,
-   +-1/8, 0. More precisely, we want to compute
-   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or
-   14), given the 16 (rsp. 15) values:
-
-     r0 = limit at infinity of f(x) / x^7,
-     r1 = f(8),f(-8),
-     r2 = f(4),f(-4),
-     r3 = f(2),f(-2),
-     r4 = f(1),f(-1),
-     r5 = f(1/4),f(-1/4),
-     r6 = f(1/2),f(-1/2),
-     r7 = f(1/8),f(-1/8),
-     r8 = f(0).
-
-   All couples of the form f(n),f(-n) must be already mixed with
-   toom_couple_handling(f(n),...,f(-n),...)
-
-   The result is stored in {pp, spt + 7*n (or 8*n)}.
-   At entry, r8 is stored at {pp, 2n},
-   r6 is stored at {pp + 3n, 3n + 1}.
-   r4 is stored at {pp + 7n, 3n + 1}.
-   r2 is stored at {pp +11n, 3n + 1}.
-   r0 is stored at {pp +15n, spt}.
-
-   The other values are 3n+1 limbs each (with most significant limbs small).
-
-   Negative intermediate results are stored two-complemented.
-   Inputs are destroyed.
-*/
-
-void
-mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7,
-			mp_size_t n, mp_size_t spt, int half, mp_ptr wsi)
-{
-  mp_limb_t cy;
-  mp_size_t n3;
-  mp_size_t n3p1;
-  n3 = 3 * n;
-  n3p1 = n3 + 1;
-
-#define   r6    (pp + n3)			/* 3n+1 */
-#define   r4    (pp + 7 * n)			/* 3n+1 */
-#define   r2    (pp +11 * n)			/* 3n+1 */
-#define   r0    (pp +15 * n)			/* s+t <= 2*n */
-
-  ASSERT( spt <= 2 * n );
-  /******************************* interpolation *****************************/
-  if( half != 0) {
-    cy = mpn_sub_n (r4, r4, r0, spt);
-    MPN_DECR_U (r4 + spt, n3p1 - spt, cy);
-
-    cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi);
-    MPN_DECR_U (r3 + spt, n3p1 - spt, cy);
-    DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi);
-
-    cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi);
-    MPN_DECR_U (r2 + spt, n3p1 - spt, cy);
-    DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi);
-
-    cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi);
-#if BIT_CORRECTION
-    cy = mpn_sub_1 (r1 + spt + BIT_CORRECTION, r1 + spt + BIT_CORRECTION,
-		    n3p1 - spt - BIT_CORRECTION, cy);
-    ASSERT (BIT_CORRECTION > 0 || cy == 0);
-    /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */
-    cy = r7[n3p1];
-    r7[n3p1] = 0x80;
-#else
-    MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy);
-#endif
-    DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi);
-#if BIT_CORRECTION
-    /* FIXME: assumes r7[n3p1] is writable. */
-    ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 );
-    r7[n3p1] = cy;
-#endif
-  };
-
-  r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi);
-  DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi);
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  mpn_add_n_sub_n (r2, r5, r5, r2, n3p1);
-#else
-  mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */
-  ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1));
-  MP_PTR_SWAP(r5, wsi);
-#endif
-
-  r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi);
-  DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi);
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  mpn_add_n_sub_n (r3, r6, r6, r3, n3p1);
-#else
-  ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1));
-  mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */
-  MP_PTR_SWAP(r3, wsi);
-#endif
-
-  cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi);
-#if BIT_CORRECTION
-  MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6);
-  cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi);
-  cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy);
-  ASSERT ( BIT_CORRECTION > 0 || cy != 0 );
-#else
-  r7[n3] -= cy;
-  DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi);
-#endif
-
-#if HAVE_NATIVE_mpn_add_n_sub_n
-  mpn_add_n_sub_n (r1, r7, r7, r1, n3p1);
-#else
-  mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */
-  mpn_add_n (r1, r1, r7, n3p1);  /* if BIT_CORRECTION != 0, can give a carry. */
-  MP_PTR_SWAP(r7, wsi);
-#endif
-
-  r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n);
-
-#if AORSMUL_FASTER_2AORSLSH
-  mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */
-#else
-  DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */
-  DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */
-#endif
-
-  mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */
-#if AORSMUL_FASTER_3AORSLSH
-  mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */
-#else
-  DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */
-  DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */
-  DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */
-#endif
-  mpn_divexact_by255x188513325(r7, r7, n3p1);
-
-  mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
-  /* A division by 2835x64 follows. Warning: the operand can be negative! */
-  mpn_divexact_by2835x64(r5, r5, n3p1);
-  if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
-    r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
-
-#if AORSMUL_FASTER_AORS_AORSLSH
-  mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */
-#else
-  mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */
-  DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */
-#endif
-#if AORSMUL_FASTER_2AORSLSH
-  mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */
-#else
-  DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
-  DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
-#endif
-  /* A division by 255x4 follows. Warning: the operand can be negative! */
-  mpn_divexact_by255x4(r6, r6, n3p1);
-  if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
-    r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
-
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi));
-
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi));
-  ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400));
-
-  /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/
-  DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi);
-  mpn_submul_1 (r1, r2, n3p1, 1428);
-  mpn_submul_1 (r1, r3, n3p1, 112896);
-  mpn_divexact_by255x182712915(r1, r1, n3p1);
-
-  ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425));
-  mpn_divexact_by42525x16(r2, r2, n3p1);
-
-#if AORSMUL_FASTER_AORS_2AORSLSH
-  ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969));
-#else
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1));
-  ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi));
-  ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi));
-#endif
-  ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900));
-  mpn_divexact_by9x16(r3, r3, n3p1);
-
-  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1));
-  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1));
-  ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1));
-
-  mpn_add_n (r6, r2, r6, n3p1);
-  ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1));
-  ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1));
-
-  mpn_sub_n (r5, r3, r5, n3p1);
-  ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1));
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1));
-
-  mpn_add_n (r7, r1, r7, n3p1);
-  ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1));
-  ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1));
-
-  /* last interpolation steps... */
-  /* ... could be mixed with recomposition
-	||H-r7|M-r7|L-r7|   ||H-r5|M-r5|L-r5|
-  */
-
-  /***************************** recomposition *******************************/
-  /*
-    pp[] prior to operations:
-    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
-
-    summation scheme for remaining operations:
-    |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp
-    |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp
-	||H r1|M r1|L r1|   ||H r3|M r3|L r3|   ||H_r5|M_r5|L_r5|   ||H r7|M r7|L r7|
-  */
-
-  cy = mpn_add_n (pp + n, pp + n, r7, n);
-  cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy);
-#if HAVE_NATIVE_mpn_add_nc
-  cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy);
-#else
-  MPN_INCR_U (r7 + 2 * n, n + 1, cy);
-  cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n);
-#endif
-  MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy);
-
-  pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n);
-  cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]);
-#if HAVE_NATIVE_mpn_add_nc
-  cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy);
-#else
-  MPN_INCR_U (r5 + 2 * n, n + 1, cy);
-  cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n);
-#endif
-  MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy);
-
-  pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n);
-  cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]);
-#if HAVE_NATIVE_mpn_add_nc
-  cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy);
-#else
-  MPN_INCR_U (r3 + 2 * n, n + 1, cy);
-  cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n);
-#endif
-  MPN_INCR_U (pp +12 * n, 2 * n + 1, cy);
-
-  pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n);
-  if ( half ) {
-    cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]);
-#if HAVE_NATIVE_mpn_add_nc
-    if(LIKELY(spt > n)) {
-      cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy);
-      MPN_INCR_U (pp + 16 * n, spt - n, cy);
-    } else {
-      ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy));
-    }
-#else
-    MPN_INCR_U (r1 + 2 * n, n + 1, cy);
-    if(LIKELY(spt > n)) {
-      cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n);
-      MPN_INCR_U (pp + 16 * n, spt - n, cy);
-    } else {
-      ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt));
-    }
-#endif
-  } else {
-    ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n]));
-  }
-
-#undef   r0
-#undef   r2
-#undef   r4
-#undef   r6
-}
diff --git a/gmp/mpn/generic/toom_interpolate_5pts.c b/gmp/mpn/generic/toom_interpolate_5pts.c
index 9fa5f0b7a6..67260cc3d5 100644
--- a/gmp/mpn/generic/toom_interpolate_5pts.c
+++ b/gmp/mpn/generic/toom_interpolate_5pts.c
@@ -7,33 +7,23 @@
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
 
-Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007 Free Software Foundation,
+Inc.
 
 This file is part of the GNU MP Library.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
@@ -41,29 +31,28 @@ see https://www.gnu.org/licenses/.  */
 void
 mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
 			   mp_size_t k, mp_size_t twor, int sa,
-			   mp_limb_t vinf0)
+			   mp_limb_t vinf0, mp_ptr ws)
 {
   mp_limb_t cy, saved;
-  mp_size_t twok;
-  mp_size_t kk1;
-  mp_ptr c1, v1, c3, vinf;
-
-  twok = k + k;
-  kk1 = twok + 1;
+  mp_size_t twok = k + k;
+  mp_size_t kk1 = twok + 1;
+  mp_ptr c1, v1, c3, vinf, c5;
+  mp_limb_t cout; /* final carry, should be zero at the end */
 
   c1 = c  + k;
   v1 = c1 + k;
   c3 = v1 + k;
   vinf = c3 + k;
+  c5 = vinf + k;
 
 #define v0 (c)
   /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
      thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
   */
-  if (sa)
-    ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
+  if (sa <= 0)
+    mpn_add_n (v2, v2, vm1, kk1);
   else
-    ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
+    mpn_sub_n (v2, v2, vm1, kk1);
 
   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
        v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
@@ -74,18 +63,17 @@ mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
        v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
 
-  /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
-     tm1 >= 0                                         (0  1 0  1 0)
+  /* (2) vm1 <- tm1 := (v1 - sa*vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
+     tm1 >= 0                                            (0  1 0  1 0)
      No carry comes out from {v1, kk1} +/- {vm1, kk1},
-     and the division by two is exact.
-     If (sa!=0) the sign of vm1 is negative */
-  if (sa)
+     and the division by two is exact */
+  if (sa <= 0)
     {
 #ifdef HAVE_NATIVE_mpn_rsh1add_n
       mpn_rsh1add_n (vm1, v1, vm1, kk1);
 #else
-      ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
-      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+      mpn_add_n (vm1, v1, vm1, kk1);
+      mpn_rshift (vm1, vm1, kk1, 1);
 #endif
     }
   else
@@ -93,8 +81,8 @@ mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
       mpn_rsh1sub_n (vm1, v1, vm1, kk1);
 #else
-      ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
-      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
+      mpn_sub_n (vm1, v1, vm1, kk1);
+      mpn_rshift (vm1, vm1, kk1, 1);
 #endif
     }
 
@@ -115,8 +103,8 @@ mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
 #ifdef HAVE_NATIVE_mpn_rsh1sub_n
   mpn_rsh1sub_n (v2, v2, v1, kk1);
 #else
-  ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
-  ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
+  mpn_sub_n (v2, v2, v1, kk1);
+  mpn_rshift (v2, v2, kk1, 1);
 #endif
 
   /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
@@ -125,75 +113,58 @@ mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
   /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
      result is v1 >= 0
   */
-  ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
+  mpn_sub_n (v1, v1, vm1, kk1);
 
-  /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
-  cy = mpn_add_n (c1, c1, vm1, kk1);
-  MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
-  /* Memory allocated for vm1 is now free, it can be recycled ...*/
+  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0   v1-v0-tm1      hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
 
   /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
      result is v2 >= 0 */
   saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
   vinf[0] = vinf0;       /* Set the right value for vinf0                     */
-#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
-  cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
+#ifdef HAVE_NATIVE_mpn_sublsh1_n
+  cy = mpn_sublsh1_n (v2, v2, vinf, twor);
 #else
-  /* Overwrite unused vm1 */
-  cy = mpn_lshift (vm1, vinf, twor, 1);
-  cy += mpn_sub_n (v2, v2, vm1, twor);
+  cy = mpn_lshift (ws, vinf, twor, 1);
+  cy += mpn_sub_n (v2, v2, ws, twor);
 #endif
   MPN_DECR_U (v2 + twor, kk1 - twor, cy);
 
-  /* Current matrix is
-     [1 0 0 0 0; vinf
-      0 1 0 0 0; v2
-      1 0 1 0 0; v1
-      0 1 0 1 0; vm1
-      0 0 0 0 1] v0
-     Some values already are in-place (we added vm1 in the correct position)
-     | vinf|  v1 |  v0 |
-	      | vm1 |
-     One still is in a separated area
-	| +v2 |
-     We have to compute v1-=vinf; vm1 -= v2,
-	   |-vinf|
-	      | -v2 |
-     Carefully reordering operations we can avoid to compute twice the sum
-     of the high half of v2 plus the low half of vinf.
-  */
-
-  /* Add the high half of t2 in {vinf} */
-  if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
-    cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
-    MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
-  } else { /* triggered only by very unbalanced cases like
-	      (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
-    ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
-  }
   /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
      result is >= 0 */
-  /* Side effect: we also subtracted (high half) vm1 -= v2 */
   cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
-  vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
   vinf[0] = saved;
   MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
+  __GMPN_ADD_1 (cout, vinf, vinf, twor, vinf0); /* Add vinf0, propagate carry. */
 
-  /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
-     Operate only on the low half.
+  /* (8) vm1 <- vm1-t2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
+     vm1 >= 0
   */
-  cy = mpn_sub_n (c1, c1, v2, k);
-  MPN_DECR_U (v1, kk1, cy);
+  mpn_sub_n (vm1, vm1, v2, kk1);            /* No overlapping here.        */
 
   /********************* Beginning the final phase **********************/
 
-  /* Most of the recomposition was done */
+  /* {c,2k} {c+2k,2k  } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       t1      hi(t1)+vinf   tm1    (v2-vm1-3t1)/6    EMPTY */
+
+  /* (9) add t2 in {c+3k, ...} */
+  cy = mpn_add_n (c3, c3, v2, kk1);
+  __GMPN_ADD_1 (cout, c5 + 1, c5 + 1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
+
+  /* {c,2k} {c+2k,2k  } {c+4k ,2r } {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
+       v0       t1      hi(t1)+vinf   tm1    (v2-vm1-3t1)/6    EMPTY */
+  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
+     v0       t1         vinf      tm1  t2
+		    +t2 */
+
+  /* add vm1 in {c+k, ...} */
+  cy = mpn_add_n (c1, c1, vm1, kk1);
+  __GMPN_ADD_1 (cout, c3 + 1, c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
 
-  /* add t2 in {c+3k, ...}, but only the low half */
-  cy = mpn_add_n (c3, c3, v2, k);
-  vinf[0] += cy;
-  ASSERT(vinf[0] >= cy); /* No carry */
-  MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
+  /* c   c+k  c+2k  c+3k  c+4k      t   t+2k+1  t+4k+2
+     v0       t1         vinf      tm1  t2
+	  +tm1      +t2    */
 
 #undef v0
+#undef t2
 }
diff --git a/gmp/mpn/generic/toom_interpolate_6pts.c b/gmp/mpn/generic/toom_interpolate_6pts.c
deleted file mode 100644
index bdb2e95b89..0000000000
--- a/gmp/mpn/generic/toom_interpolate_6pts.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
-#ifndef mpn_divexact_by3
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && MODLIMB_INVERSE_3
-#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,MODLIMB_INVERSE_3,0)
-#else
-#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
-#endif
-#endif
-
-/* Interpolation for Toom-3.5, using the evaluation points: infinity,
-   1, -1, 2, -2. More precisely, we want to compute
-   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
-   six values
-
-     w5 = f(0),
-     w4 = f(-1),
-     w3 = f(1)
-     w2 = f(-2),
-     w1 = f(2),
-     w0 = limit at infinity of f(x) / x^5,
-
-   The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
-   {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
-   {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
-   significant limbs small). f(-1) and f(-2) may be negative, signs
-   determined by the flag bits. All intermediate results are positive.
-   Inputs are destroyed.
-
-   Interpolation sequence was taken from the paper: "Integer and
-   Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
-   Some slight variations were introduced: adaptation to "gmp
-   instruction set", and a final saving of an operation by interlacing
-   interpolation and recomposition phases.
-*/
-
-void
-mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
-			   mp_ptr w4, mp_ptr w2, mp_ptr w1,
-			   mp_size_t w0n)
-{
-  mp_limb_t cy;
-  /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
-  mp_limb_t cy4, cy6, embankment;
-
-  ASSERT( n > 0 );
-  ASSERT( 2*n >= w0n && w0n > 0 );
-
-#define w5  pp					/* 2n   */
-#define w3  (pp + 2 * n)			/* 2n+1 */
-#define w0  (pp + 5 * n)			/* w0n  */
-
-  /* Interpolate with sequence:
-     W2 =(W1 - W2)>>2
-     W1 =(W1 - W5)>>1
-     W1 =(W1 - W2)>>1
-     W4 =(W3 - W4)>>1
-     W2 =(W2 - W4)/3
-     W3 = W3 - W4 - W5
-     W1 =(W1 - W3)/3
-     // Last steps are mixed with recomposition...
-     W2 = W2 - W0<<2
-     W4 = W4 - W2
-     W3 = W3 - W1
-     W2 = W2 - W0
-  */
-
-  /* W2 =(W1 - W2)>>2 */
-  if (flags & toom6_vm2_neg)
-    mpn_add_n (w2, w1, w2, 2 * n + 1);
-  else
-    mpn_sub_n (w2, w1, w2, 2 * n + 1);
-  mpn_rshift (w2, w2, 2 * n + 1, 2);
-
-  /* W1 =(W1 - W5)>>1 */
-  w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
-  mpn_rshift (w1, w1, 2 * n + 1, 1);
-
-  /* W1 =(W1 - W2)>>1 */
-#if HAVE_NATIVE_mpn_rsh1sub_n
-  mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
-#else
-  mpn_sub_n (w1, w1, w2, 2 * n + 1);
-  mpn_rshift (w1, w1, 2 * n + 1, 1);
-#endif
-
-  /* W4 =(W3 - W4)>>1 */
-  if (flags & toom6_vm1_neg)
-    {
-#if HAVE_NATIVE_mpn_rsh1add_n
-      mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
-#else
-      mpn_add_n (w4, w3, w4, 2 * n + 1);
-      mpn_rshift (w4, w4, 2 * n + 1, 1);
-#endif
-    }
-  else
-    {
-#if HAVE_NATIVE_mpn_rsh1sub_n
-      mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
-#else
-      mpn_sub_n (w4, w3, w4, 2 * n + 1);
-      mpn_rshift (w4, w4, 2 * n + 1, 1);
-#endif
-    }
-
-  /* W2 =(W2 - W4)/3 */
-  mpn_sub_n (w2, w2, w4, 2 * n + 1);
-  mpn_divexact_by3 (w2, w2, 2 * n + 1);
-
-  /* W3 = W3 - W4 - W5 */
-  mpn_sub_n (w3, w3, w4, 2 * n + 1);
-  w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
-
-  /* W1 =(W1 - W3)/3 */
-  mpn_sub_n (w1, w1, w3, 2 * n + 1);
-  mpn_divexact_by3 (w1, w1, 2 * n + 1);
-
-  /*
-    [1 0 0 0 0 0;
-     0 1 0 0 0 0;
-     1 0 1 0 0 0;
-     0 1 0 1 0 0;
-     1 0 1 0 1 0;
-     0 0 0 0 0 1]
-
-    pp[] prior to operations:
-     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
-
-    summation scheme for remaining operations:
-     |______________5|n_____4|n_____3|n_____2|n______|n______|pp
-     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
-				    || H w4  | L w4  |
-		    || H w2  | L w2  |
-	    || H w1  | L w1  |
-			    ||-H w1  |-L w1  |
-		     |-H w0  |-L w0 ||-H w2  |-L w2  |
-  */
-  cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
-  MPN_INCR_U (pp + 3 * n + 1, n, cy);
-
-  /* W2 -= W0<<2 */
-#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
-#if HAVE_NATIVE_mpn_sublsh2_n_ip1
-  cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
-#else
-  cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
-#endif
-#else
-  /* {W4,2*n+1} is now free and can be overwritten. */
-  cy = mpn_lshift(w4, w0, w0n, 2);
-  cy+= mpn_sub_n(w2, w2, w4, w0n);
-#endif
-  MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
-
-  /* W4L = W4L - W2L */
-  cy = mpn_sub_n (pp + n, pp + n, w2, n);
-  MPN_DECR_U (w3, 2 * n + 1, cy);
-
-  /* W3H = W3H + W2L */
-  cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
-  /* W1L + W2H */
-  cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
-  MPN_INCR_U (w1 + n, n + 1, cy);
-
-  /* W0 = W0 + W1H */
-  if (LIKELY (w0n > n))
-    cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
-  else
-    cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
-
-  /*
-    summation scheme for the next operation:
-     |...____5|n_____4|n_____3|n_____2|n______|n______|pp
-     |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
-		     ...-w0___|-w1_w2 |
-  */
-  /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
-  cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
-
-  /* embankment is a "dirty trick" to avoid carry/borrow propagation
-     beyond allocated memory */
-  embankment = w0[w0n - 1] - 1;
-  w0[w0n - 1] = 1;
-  if (LIKELY (w0n > n)) {
-    if (cy4 > cy6)
-      MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
-    else
-      MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
-    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
-    MPN_INCR_U (w0 + n, w0n - n, cy6);
-  } else {
-    MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
-    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
-  }
-  w0[w0n - 1] += embankment;
-
-#undef w5
-#undef w3
-#undef w0
-
-}
diff --git a/gmp/mpn/generic/toom_interpolate_7pts.c b/gmp/mpn/generic/toom_interpolate_7pts.c
index 2a67dba82f..872da26309 100644
--- a/gmp/mpn/generic/toom_interpolate_7pts.c
+++ b/gmp/mpn/generic/toom_interpolate_7pts.c
@@ -1,7 +1,6 @@
 /* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62.
 
-   Contributed to the GNU project by Niels Möller.
-   Improvements by Marco Bodrato.
+   Contributed to the GNU project by Niels M�ller.
 
    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
@@ -12,216 +11,149 @@ Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 
-#define BINVERT_3 MODLIMB_INVERSE_3
-
-#define BINVERT_9 \
-  ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39)
-
-#define BINVERT_15 \
-  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
-
-/* For the various mpn_divexact_byN here, fall back to using either
-   mpn_pi1_bdiv_q_1 or mpn_divexact_1.  The former has less overhead and is
-   many faster if it is native.  For now, since mpn_divexact_1 is native on
-   several platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use
-   mpn_pi1_bdiv_q_1 unconditionally.  FIXME.  */
+/* Arithmetic right shift, requiring that the shifted out bits are zero. */
+static inline void
+divexact_2exp (mp_ptr rp, mp_srcptr sp, mp_size_t n, unsigned shift)
+{
+  mp_limb_t sign;
+  sign = LIMB_HIGHBIT_TO_MASK (sp[n-1] << GMP_NAIL_BITS) << (GMP_NUMB_BITS - shift);
+  ASSERT_NOCARRY (mpn_rshift (rp, sp, n, shift));
+  rp[n-1] |= sign & GMP_NUMB_MASK;
+}
 
 /* For odd divisors, mpn_divexact_1 works fine with two's complement. */
 #ifndef mpn_divexact_by3
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
-#else
 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
 #endif
-#endif
-
 #ifndef mpn_divexact_by9
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by9(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,9,BINVERT_9,0)
-#else
 #define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9)
 #endif
-#endif
-
 #ifndef mpn_divexact_by15
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by15(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,15,BINVERT_15,0)
-#else
 #define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15)
 #endif
-#endif
 
-/* Interpolation for toom4, using the evaluation points 0, infinity,
-   1, -1, 2, -2, 1/2. More precisely, we want to compute
+/* Interpolation for toom4, using the evaluation points infinity, 2,
+   1, -1, 1/2, -1/2. More precisely, we want to compute
    f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the
    seven values
 
      w0 = f(0),
-     w1 = f(-2),
-     w2 = f(1),
+     w1 = 64 f(-1/2),
+     w2 = 64 f(1/2),
      w3 = f(-1),
-     w4 = f(2)
-     w5 = 64 * f(1/2)
+     w4 = f(1)
+     w5 = f(2)
      w6 = limit at infinity of f(x) / x^6,
 
    The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n },
    w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n,
    w6n }. The other values are 2n + 1 limbs each (with most
    significant limbs small). f(-1) and f(-1/2) may be negative, signs
-   determined by the flag bits. Inputs are destroyed.
+   determined by the flag bits. All intermediate results are
+   represented in two's complement. Inputs are destroyed.
 
    Needs (2*n + 1) limbs of temporary storage.
 */
 
 void
-mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
+mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom4_flags flags,
 			   mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,
 			   mp_size_t w6n, mp_ptr tp)
 {
-  mp_size_t m;
+  mp_size_t m = 2*n + 1;
+  mp_ptr w2 = rp + 2*n;
+  mp_ptr w6 = rp + 6*n;
   mp_limb_t cy;
 
-  m = 2*n + 1;
-#define w0 rp
-#define w2 (rp + 2*n)
-#define w6 (rp + 6*n)
-
   ASSERT (w6n > 0);
   ASSERT (w6n <= 2*n);
 
-  /* Using formulas similar to Marco Bodrato's
+  /* Using Marco Bodrato's formulas
 
-     W5 = W5 + W4
-     W1 =(W4 - W1)/2
-     W4 = W4 - W0
-     W4 =(W4 - W1)/4 - W6*16
-     W3 =(W2 - W3)/2
-     W2 = W2 - W3
+     W5 = W5 + W2
+     W3 =(W3 + W4)/2
+     W1 = W1 + W2
+     W2 = W2 - W6 - W0*64
+     W2 =(W2*2 - W1)/8
+     W4 = W4 - W3
 
-     W5 = W5 - W2*65      May be negative.
-     W2 = W2 - W6 - W0
-     W5 =(W5 + W2*45)/2   Now >= 0 again.
-     W4 =(W4 - W2)/3
-     W2 = W2 - W4
+     W5 = W5 - W4*65
+     W4 = W4 - W6 - W0
+     W5 = W5 + W4*45
+     W2 =(W2 - W4)/3
+     W4 = W4 - W2
 
-     W1 = W5 - W1         May be negative.
-     W5 =(W5 - W3*8)/9
+     W1 = W1 - W5
+     W5 =(W5 - W3*16)/ 18
      W3 = W3 - W5
-     W1 =(W1/15 + W5)/2   Now >= 0 again.
+     W1 =(W1/30 + W5)/ 2
      W5 = W5 - W1
 
-     where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1),
-	   W4 = f(2), W5 = f(1/2), W6 = f(oo),
-
-     Note that most intermediate results are positive; the ones that
-     may be negative are represented in two's complement. We must
-     never shift right a value that may be negative, since that would
-     invalidate the sign bit. On the other hand, divexact by odd
-     numbers work fine with two's complement.
+     where W0 = f(0), W1 = 64 f(-1/2), W2 = 64 f(1/2), W3 = f(-1),
+	   W4 = f(1), W5 = f(2), W6 = f(oo),
   */
 
-  mpn_add_n (w5, w5, w4, m);
-  if (flags & toom7_w1_neg)
-    {
-#ifdef HAVE_NATIVE_mpn_rsh1add_n
-      mpn_rsh1add_n (w1, w1, w4, m);
-#else
-      mpn_add_n (w1, w1, w4, m);  ASSERT (!(w1[0] & 1));
-      mpn_rshift (w1, w1, m, 1);
-#endif
-    }
+  mpn_add_n (w5, w5, w2, m);
+  if (flags & toom4_w3_neg)
+    mpn_add_n (w3, w3, w4, m);
   else
-    {
-#ifdef HAVE_NATIVE_mpn_rsh1sub_n
-      mpn_rsh1sub_n (w1, w4, w1, m);
-#else
-      mpn_sub_n (w1, w4, w1, m);  ASSERT (!(w1[0] & 1));
-      mpn_rshift (w1, w1, m, 1);
-#endif
-    }
-  mpn_sub (w4, w4, m, w0, 2*n);
-  mpn_sub_n (w4, w4, w1, m);  ASSERT (!(w4[0] & 3));
-  mpn_rshift (w4, w4, m, 2); /* w4>=0 */
-
-  tp[w6n] = mpn_lshift (tp, w6, w6n, 4);
-  mpn_sub (w4, w4, m, tp, w6n+1);
-
-  if (flags & toom7_w3_neg)
-    {
-#ifdef HAVE_NATIVE_mpn_rsh1add_n
-      mpn_rsh1add_n (w3, w3, w2, m);
-#else
-      mpn_add_n (w3, w3, w2, m);  ASSERT (!(w3[0] & 1));
-      mpn_rshift (w3, w3, m, 1);
-#endif
-    }
+    mpn_sub_n (w3, w4, w3, m);
+  divexact_2exp (w3, w3, m, 1);
+  if (flags & toom4_w1_neg)
+    mpn_add_n (w1, w1, w2, m);
   else
-    {
-#ifdef HAVE_NATIVE_mpn_rsh1sub_n
-      mpn_rsh1sub_n (w3, w2, w3, m);
-#else
-      mpn_sub_n (w3, w2, w3, m);  ASSERT (!(w3[0] & 1));
-      mpn_rshift (w3, w3, m, 1);
-#endif
-    }
-
-  mpn_sub_n (w2, w2, w3, m);
-
-  mpn_submul_1 (w5, w2, m, 65);
+    mpn_sub_n (w1, w2, w1, m);
   mpn_sub (w2, w2, m, w6, w6n);
-  mpn_sub (w2, w2, m, w0, 2*n);
-
-  mpn_addmul_1 (w5, w2, m, 45);  ASSERT (!(w5[0] & 1));
-  mpn_rshift (w5, w5, m, 1);
-  mpn_sub_n (w4, w4, w2, m);
-
-  mpn_divexact_by3 (w4, w4, m);
+  tp[2*n] = mpn_lshift (tp, rp, 2*n, 6);
+  mpn_sub_n (w2, w2, tp, m);
+  mpn_lshift (w2, w2, m, 1);
+  mpn_sub_n (w2, w2, w1, m);
+  divexact_2exp (w2, w2, m, 3);
+  mpn_sub_n (w4, w4, w3, m);
+
+  mpn_submul_1 (w5, w4, m, 65);
+  mpn_sub (w4, w4, m, w6, w6n);
+  mpn_sub (w4, w4, m, rp, 2*n);
+  mpn_addmul_1 (w5, w4, m, 45);
   mpn_sub_n (w2, w2, w4, m);
+  /* Rely on divexact working with two's complement */
+  mpn_divexact_by3 (w2, w2, m);
+  mpn_sub_n (w4, w4, w2, m);
 
-  mpn_sub_n (w1, w5, w1, m);
-  mpn_lshift (tp, w3, m, 3);
+  mpn_sub_n (w1, w1, w5, m);
+  mpn_lshift (tp, w3, m, 4);
   mpn_sub_n (w5, w5, tp, m);
+  divexact_2exp (w5, w5, m, 1);
   mpn_divexact_by9 (w5, w5, m);
   mpn_sub_n (w3, w3, w5, m);
-
+  divexact_2exp (w1, w1, m, 1);
   mpn_divexact_by15 (w1, w1, m);
-  mpn_add_n (w1, w1, w5, m);  ASSERT (!(w1[0] & 1));
-  mpn_rshift (w1, w1, m, 1); /* w1>=0 now */
+  mpn_add_n (w1, w1, w5, m);
+  divexact_2exp (w1, w1, m, 1);
   mpn_sub_n (w5, w5, w1, m);
 
-  /* These bounds are valid for the 4x4 polynomial product of toom44,
-   * and they are conservative for toom53 and toom62. */
-  ASSERT (w1[2*n] < 2);
-  ASSERT (w2[2*n] < 3);
-  ASSERT (w3[2*n] < 4);
-  ASSERT (w4[2*n] < 3);
-  ASSERT (w5[2*n] < 2);
+  /* Two's complement coefficients must be non-negative at the end of
+     this procedure. */
+  ASSERT ( !(w1[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w2[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w3[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w4[2*n] & GMP_LIMB_HIGHBIT));
+  ASSERT ( !(w5[2*n] & GMP_LIMB_HIGHBIT));
 
   /* Addition chain. Note carries and the 2n'th limbs that need to be
    * added in.
@@ -242,8 +174,8 @@ mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
    *        c7   c6   c5   c4   c3                 Carries to propagate
    */
 
-  cy = mpn_add_n (rp + n, rp + n, w1, m);
-  MPN_INCR_U (w2 + n + 1, n , cy);
+  cy = mpn_add_n (rp + n, rp + n, w1, 2*n);
+  MPN_INCR_U (w2 + n, n + 1, w1[2*n] + cy);
   cy = mpn_add_n (rp + 3*n, rp + 3*n, w3, n);
   MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);
   cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);
@@ -251,7 +183,10 @@ mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,
   cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);
   MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);
   if (w6n > n + 1)
-    ASSERT_NOCARRY (mpn_add (rp + 6*n, rp + 6*n, w6n, w5 + n, n + 1));
+    {
+      mp_limb_t c7 = mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, n + 1);
+      MPN_INCR_U (rp + 7*n + 1, w6n - n - 1, c7);
+    }
   else
     {
       ASSERT_NOCARRY (mpn_add_n (rp + 6*n, rp + 6*n, w5 + n, w6n));
diff --git a/gmp/mpn/generic/toom_interpolate_8pts.c b/gmp/mpn/generic/toom_interpolate_8pts.c
deleted file mode 100644
index 9e8808334e..0000000000
--- a/gmp/mpn/generic/toom_interpolate_8pts.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
-
-   Contributed to the GNU project by Marco Bodrato.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#define BINVERT_3 MODLIMB_INVERSE_3
-
-#define BINVERT_15 \
-  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
-
-#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
-
-#ifndef mpn_divexact_by3
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
-#else
-#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
-#endif
-#endif
-
-#ifndef mpn_divexact_by45
-#if GMP_NUMB_BITS % 12 == 0
-#define mpn_divexact_by45(dst,src,size) \
-  (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
-#else
-#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
-#else
-#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
-#endif
-#endif
-#endif
-
-#if HAVE_NATIVE_mpn_sublsh2_n_ip1
-#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
-#else
-#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
-#endif
-
-#if HAVE_NATIVE_mpn_sublsh_n
-#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
-#else
-static mp_limb_t
-DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
-{
-#if USE_MUL_1 && 0
-  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
-#else
-  mp_limb_t __cy;
-  __cy = mpn_lshift (ws,src,n,s);
-  return __cy + mpn_sub_n (dst,dst,ws,n);
-#endif
-}
-#endif
-
-
-#if HAVE_NATIVE_mpn_subrsh
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
-#else
-/* This is not a correct definition, it assumes no carry */
-#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
-do {									\
-  mp_limb_t __cy;							\
-  MPN_DECR_U (dst, nd, src[0] >> s);					\
-  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
-  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
-} while (0)
-#endif
-
-/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
-   points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
-   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
-   degree 7 (or 6), given the 8 (rsp. 7) values:
-
-     r1 = limit at infinity of f(x) / x^7,
-     r2 = f(4),
-     r3 = f(-4),
-     r4 = f(2),
-     r5 = f(-2),
-     r6 = f(1),
-     r7 = f(-1),
-     r8 = f(0).
-
-   All couples of the form f(n),f(-n) must be already mixed with
-   toom_couple_handling(f(n),...,f(-n),...)
-
-   The result is stored in {pp, spt + 7*n (or 6*n)}.
-   At entry, r8 is stored at {pp, 2n},
-   r5 is stored at {pp + 3n, 3n + 1}.
-
-   The other values are 2n+... limbs each (with most significant limbs small).
-
-   All intermediate results are positive.
-   Inputs are destroyed.
-*/
-
-void
-mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
-			   mp_ptr r3, mp_ptr r7,
-			   mp_size_t spt, mp_ptr ws)
-{
-  mp_limb_signed_t cy;
-  mp_ptr r5, r1;
-  r5 = (pp + 3 * n);			/* 3n+1 */
-  r1 = (pp + 7 * n);			/* spt */
-
-  /******************************* interpolation *****************************/
-
-  DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
-  cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
-  MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
-
-  DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
-  cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
-  MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
-
-  r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
-  cy = mpn_sub_n (r7, r7, r1, spt);
-  MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
-
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
-  ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
-
-  ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
-
-  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
-
-  mpn_divexact_by45 (r3, r3, 3 * n + 1);
-
-  ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
-
-  ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
-
-  /* last interpolation steps... */
-  /* ... are mixed with recomposition */
-
-  /***************************** recomposition *******************************/
-  /*
-    pp[] prior to operations:
-     |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
-
-    summation scheme for remaining operations:
-     |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
-     |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
-	  ||_H r3|_M r3|_L*r3|
-				  ||_H_r7|_M_r7|_L_r7|
-		      ||-H r3|-M r3|-L*r3|
-				  ||-H*r5|-M_r5|-L_r5|
-  */
-
-  cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
-  cy-= mpn_sub_n (pp + n, pp + n, r5, n);
-  if (0 > cy)
-    MPN_DECR_U (r7 + n, 2*n + 1, 1);
-  else
-    MPN_INCR_U (r7 + n, 2*n + 1, cy);
-
-  cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */
-  MPN_DECR_U (r7 + 2*n, n + 1, cy);
-
-  cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
-  r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
-  cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
-  if (UNLIKELY(0 > cy))
-    MPN_DECR_U (r5 + n + 1, 2*n, 1);
-  else
-    MPN_INCR_U (r5 + n + 1, 2*n, cy);
-
-  ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
-
-  cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
-  MPN_INCR_U (r3 + 2*n, n + 1, cy);
-  cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
-  if (LIKELY(spt != n))
-    MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
-  else
-    ASSERT (r3[3*n] | cy == 0);
-}
diff --git a/gmp/mpn/generic/trialdiv.c b/gmp/mpn/generic/trialdiv.c
deleted file mode 100644
index cad159c3a0..0000000000
--- a/gmp/mpn/generic/trialdiv.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/* mpn_trialdiv -- find small factors of an mpn number using trial division.
-
-   Contributed to the GNU project by Torbjorn Granlund.
-
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
-   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
-   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-/*
-   This function finds the first (smallest) factor represented in
-   trialdivtab.h.  It does not stop the factoring effort just because it has
-   reached some sensible limit, such as the square root of the input number.
-
-   The caller can limit the factoring effort by passing NPRIMES.  The function
-   will then divide until that limit, or perhaps a few primes more.  A position
-   which only mpn_trialdiv can make sense of is returned in the WHERE
-   parameter.  It can be used for restarting the factoring effort; the first
-   call should pass 0 here.
-
-   Input:        1. A non-negative number T = {tp,tn}
-                 2. NPRIMES as described above,
-                 3. *WHERE as described above.
-   Output:       1. *WHERE updated as described above.
-                 2. Return value is non-zero if we found a factor, else zero
-                    To get the actual prime factor, compute the mod B inverse
-                    of the return value.
-*/
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-struct gmp_primes_dtab {
-  mp_limb_t binv;
-  mp_limb_t lim;
-};
-
-struct gmp_primes_ptab {
-  mp_limb_t ppp;	/* primes, multiplied together */
-  mp_limb_t cps[7];	/* ppp values pre-computed for mpn_mod_1s_4p */
-  unsigned int idx:24;	/* index of  first primes in dtab */
-  unsigned int np :8;	/* number of primes related to this entry */
-};
-
-
-static const struct gmp_primes_dtab gmp_primes_dtab[] =
-{
-#define WANT_dtab
-#define P(p,inv,lim) {inv,lim}
-#include "trialdivtab.h"
-#undef WANT_dtab
-#undef P
-  {0,0}
-};
-
-static const struct gmp_primes_ptab gmp_primes_ptab[] =
-{
-#define WANT_ptab
-#include "trialdivtab.h"
-#undef WANT_ptab
-};
-
-#define PTAB_LINES (sizeof (gmp_primes_ptab) / sizeof (gmp_primes_ptab[0]))
-
-/* FIXME: We could optimize out one of the outer loop conditions if we
-   had a final ptab entry with a huge nd field.  */
-mp_limb_t
-mpn_trialdiv (mp_srcptr tp, mp_size_t tn, mp_size_t nprimes, int *where)
-{
-  mp_limb_t ppp;
-  const mp_limb_t *cps;
-  const struct gmp_primes_dtab *dp;
-  long i, j, idx, np;
-  mp_limb_t r, q;
-
-  ASSERT (tn >= 1);
-
-  for (i = *where; i < PTAB_LINES; i++)
-    {
-      ppp = gmp_primes_ptab[i].ppp;
-      cps = gmp_primes_ptab[i].cps;
-
-      r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps);
-
-      idx = gmp_primes_ptab[i].idx;
-      np = gmp_primes_ptab[i].np;
-
-      /* Check divisibility by individual primes.  */
-      dp = &gmp_primes_dtab[idx] + np;
-      for (j = -np; j < 0; j++)
-	{
-	  q = r * dp[j].binv;
-	  if (q <= dp[j].lim)
-	    {
-	      *where = i;
-	      return dp[j].binv;
-	    }
-	}
-
-      nprimes -= np;
-      if (nprimes <= 0)
-	return 0;
-    }
-  return 0;
-}
diff --git a/gmp/mpn/generic/udiv_w_sdiv.c b/gmp/mpn/generic/udiv_w_sdiv.c
index 7136429f0f..f876aa5734 100644
--- a/gmp/mpn/generic/udiv_w_sdiv.c
+++ b/gmp/mpn/generic/udiv_w_sdiv.c
@@ -9,40 +9,30 @@
    GNU MP RELEASE.
 
 
-Copyright 1992, 1994, 1996, 2000, 2011, 2012 Free Software Foundation, Inc.
+Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
 The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
 
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+License for more details.
 
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
 
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
 
 mp_limb_t
-mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
+mpn_udiv_w_sdiv (rp, a1, a0, d)
+     mp_limb_t *rp, a1, a0, d;
 {
   mp_limb_t q, r;
   mp_limb_t c0, c1, b1;
@@ -52,7 +42,7 @@ mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
 
   if ((mp_limb_signed_t) d >= 0)
     {
-      if (a1 < d - a1 - (a0 >> (GMP_LIMB_BITS - 1)))
+      if (a1 < d - a1 - (a0 >> (BITS_PER_MP_LIMB - 1)))
 	{
 	  /* dividend, divisor, and quotient are nonnegative */
 	  sdiv_qrnnd (q, r, a1, a0, d);
@@ -60,18 +50,18 @@ mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
       else
 	{
 	  /* Compute c1*2^32 + c0 = a1*2^32 + a0 - 2^31*d */
-	  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (GMP_LIMB_BITS - 1));
+	  sub_ddmmss (c1, c0, a1, a0, d >> 1, d << (BITS_PER_MP_LIMB - 1));
 	  /* Divide (c1*2^32 + c0) by d */
 	  sdiv_qrnnd (q, r, c1, c0, d);
 	  /* Add 2^31 to quotient */
-	  q += (mp_limb_t) 1 << (GMP_LIMB_BITS - 1);
+	  q += (mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1);
 	}
     }
   else
     {
       b1 = d >> 1;			/* d/2, between 2^30 and 2^31 - 1 */
       c1 = a1 >> 1;			/* A/2 */
-      c0 = (a1 << (GMP_LIMB_BITS - 1)) + (a0 >> 1);
+      c0 = (a1 << (BITS_PER_MP_LIMB - 1)) + (a0 >> 1);
 
       if (a1 < b1)			/* A < 2^32*b1, so A/2 < 2^31*b1 */
 	{
@@ -126,12 +116,12 @@ mpn_udiv_w_sdiv (mp_limb_t *rp, mp_limb_t a1, mp_limb_t a0, mp_limb_t d)
 	{				/* Hence a1 = d - 1 = 2*b1 - 1 */
 	  if (a0 >= -d)
 	    {
-	      q = -CNST_LIMB(1);
+	      q = -1;
 	      r = a0 + d;
 	    }
 	  else
 	    {
-	      q = -CNST_LIMB(2);
+	      q = -2;
 	      r = a0 + 2*d;
 	    }
 	}
diff --git a/gmp/mpn/generic/zero.c b/gmp/mpn/generic/zero.c
deleted file mode 100644
index e6e7fd3101..0000000000
--- a/gmp/mpn/generic/zero.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* mpn_zero
-
-Copyright 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of either:
-
-  * the GNU Lesser General Public License as published by the Free
-    Software Foundation; either version 3 of the License, or (at your
-    option) any later version.
-
-or
-
-  * the GNU General Public License as published by the Free Software
-    Foundation; either version 2 of the License, or (at your option) any
-    later version.
-
-or both in parallel, as here.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received copies of the GNU General Public License and the
-GNU Lesser General Public License along with the GNU MP Library.  If not,
-see https://www.gnu.org/licenses/.  */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_zero (mp_ptr rp, mp_size_t n)
-{
-  mp_size_t i;
-
-  rp += n;
-  for (i = -n; i != 0; i++)
-    rp[i] = 0;
-}